public inbox for gentoo-commits@lists.gentoo.org
 help / color / mirror / Atom feed
From: "Alfredo Tupone" <tupone@gentoo.org>
To: gentoo-commits@lists.gentoo.org
Subject: [gentoo-commits] repo/gentoo:master commit in: sci-libs/datasets/, sci-libs/datasets/files/
Date: Thu, 22 Feb 2024 07:27:04 +0000 (UTC)	[thread overview]
Message-ID: <1708586800.05e867ed4f4ffc6bcbf57160225dcce103062057.tupone@gentoo> (raw)

commit:     05e867ed4f4ffc6bcbf57160225dcce103062057
Author:     Alfredo Tupone <tupone <AT> gentoo <DOT> org>
AuthorDate: Thu Feb 22 07:26:13 2024 +0000
Commit:     Alfredo Tupone <tupone <AT> gentoo <DOT> org>
CommitDate: Thu Feb 22 07:26:40 2024 +0000
URL:        https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=05e867ed

sci-libs/datasets: add 2.17.1, drop 2.16.0

Bug: https://bugs.gentoo.org/921090
Signed-off-by: Alfredo Tupone <tupone <AT> gentoo.org>

 sci-libs/datasets/Manifest                         |  2 +-
 ...tasets-2.16.0.ebuild => datasets-2.17.1.ebuild} |  8 ++++--
 .../datasets/files/datasets-2.14.4-tests.patch     | 10 --------
 ...6.0-tests.patch => datasets-2.17.1-tests.patch} | 29 ++++++++--------------
 4 files changed, 17 insertions(+), 32 deletions(-)

diff --git a/sci-libs/datasets/Manifest b/sci-libs/datasets/Manifest
index 0880ec7cb629..1b5292db6bf4 100644
--- a/sci-libs/datasets/Manifest
+++ b/sci-libs/datasets/Manifest
@@ -1 +1 @@
-DIST datasets-2.16.0.gh.tar.gz 2163874 BLAKE2B baec91a0e39fac3e07f11e352a286c0940cbc672e7233267e70d1abb64dd31bae18c55213a20fafaeaf2f60268104f294c77c9b73ddc1b289175904288a7c440 SHA512 f2a17ffab192163cfc196cc2bad0adb2ca657b5cf911f74f299b6e29eb4fcfacc377505b1857974a6b55252eedf8775a8706f9e991450c55e5d613020dc03735
+DIST datasets-2.17.1.gh.tar.gz 2168860 BLAKE2B ad7e9be7e60125d53b19b6277b6be6ae6050321e4210293a37737a345a4806d4901e9507fbf7a51c5e00a91912656d68a94e76cf70e070433beccc6e1ad54643 SHA512 43617c3d98cc3ad17fb577d6e917d164c8b6ec24740604ca281adaa2f0e5a6538633721792c9fa6621b7f1980161d8acf62dcdcdacca56e1739a8f28e3c71cdf

diff --git a/sci-libs/datasets/datasets-2.16.0.ebuild b/sci-libs/datasets/datasets-2.17.1.ebuild
similarity index 91%
rename from sci-libs/datasets/datasets-2.16.0.ebuild
rename to sci-libs/datasets/datasets-2.17.1.ebuild
index a34fcaa2f89c..9b6295db1a0e 100644
--- a/sci-libs/datasets/datasets-2.16.0.ebuild
+++ b/sci-libs/datasets/datasets-2.17.1.ebuild
@@ -27,12 +27,16 @@ RDEPEND="
 	$(python_gen_cond_dep '
 		dev-python/absl-py[${PYTHON_USEDEP}]
 		dev-python/aiohttp[${PYTHON_USEDEP}]
+		dev-python/dill[${PYTHON_USEDEP}]
+		dev-python/filelock[${PYTHON_USEDEP}]
 		<=dev-python/fsspec-2023.10.0[${PYTHON_USEDEP}]
 		dev-python/multiprocess[${PYTHON_USEDEP}]
+		dev-python/numpy[${PYTHON_USEDEP}]
 		dev-python/packaging[${PYTHON_USEDEP}]
 		dev-python/pandas[${PYTHON_USEDEP}]
 		dev-python/pyarrow[${PYTHON_USEDEP},parquet,snappy]
 		dev-python/pyyaml[${PYTHON_USEDEP}]
+		dev-python/requests[${PYTHON_USEDEP}]
 		dev-python/tqdm[${PYTHON_USEDEP}]
 		dev-python/xxhash[${PYTHON_USEDEP}]
 		dev-python/zstandard[${PYTHON_USEDEP}]
@@ -46,7 +50,7 @@ BDEPEND="test? (
 		dev-python/absl-py[${PYTHON_USEDEP}]
 		dev-python/pytest-datadir[${PYTHON_USEDEP}]
 		dev-python/decorator[${PYTHON_USEDEP}]
-		=dev-python/sqlalchemy-1*[${PYTHON_USEDEP}]
+		dev-python/sqlalchemy[${PYTHON_USEDEP}]
 		sci-libs/jiwer[${PYTHON_USEDEP}]
 		sci-libs/seqeval[${PYTHON_USEDEP}]
 	')
@@ -79,5 +83,5 @@ src_prepare() {
 		tests/test_streaming_download_manager.py \
 		tests/commands/test_test.py \
 		tests/packaged_modules/test_cache.py \
-		die
+		|| die
 }

diff --git a/sci-libs/datasets/files/datasets-2.14.4-tests.patch b/sci-libs/datasets/files/datasets-2.14.4-tests.patch
index 5dd322309b20..b9791c04e8e0 100644
--- a/sci-libs/datasets/files/datasets-2.14.4-tests.patch
+++ b/sci-libs/datasets/files/datasets-2.14.4-tests.patch
@@ -8,16 +8,6 @@
      @pytest.mark.filterwarnings("ignore:metric_module_factory is deprecated:FutureWarning")
      @pytest.mark.filterwarnings("ignore:load_metric is deprecated:FutureWarning")
      def test_load_metric(self, metric_name):
---- a/tests/test_hf_gcp.py	2023-05-04 19:33:31.150825303 +0200
-+++ b/tests/test_hf_gcp.py	2023-05-04 19:40:08.401759538 +0200
-@@ -75,6 +75,7 @@
-             self.assertTrue(os.path.exists(datset_info_path))
- 
- 
-+@pytest.mark.skip(reason="require apache_beam")
- @pytest.mark.integration
- def test_as_dataset_from_hf_gcs(tmp_path_factory):
-     tmp_dir = tmp_path_factory.mktemp("test_hf_gcp") / "test_wikipedia_simple"
 --- a/tests/test_distributed.py	2023-05-04 19:43:09.861275030 +0200
 +++ b/tests/test_distributed.py	2023-05-04 19:44:17.608326722 +0200
 @@ -74,6 +74,7 @@

diff --git a/sci-libs/datasets/files/datasets-2.16.0-tests.patch b/sci-libs/datasets/files/datasets-2.17.1-tests.patch
similarity index 90%
rename from sci-libs/datasets/files/datasets-2.16.0-tests.patch
rename to sci-libs/datasets/files/datasets-2.17.1-tests.patch
index 8cb89e824b3b..14ae50602d10 100644
--- a/sci-libs/datasets/files/datasets-2.16.0-tests.patch
+++ b/sci-libs/datasets/files/datasets-2.17.1-tests.patch
@@ -1,6 +1,6 @@
 --- a/tests/test_arrow_dataset.py	2024-02-20 21:53:24.248470991 +0100
 +++ b/tests/test_arrow_dataset.py	2024-02-20 21:53:29.441804737 +0100
-@@ -3982,7 +3982,6 @@
+@@ -4016,7 +4016,6 @@
      [
          "relative/path",
          "/absolute/path",
@@ -10,15 +10,15 @@
      ],
 --- a/tests/test_load.py	2024-02-20 22:12:13.699209107 +0100
 +++ b/tests/test_load.py	2024-02-20 22:13:10.862626708 +0100
-@@ -386,6 +386,7 @@
+@@ -388,6 +388,7 @@
              hf_modules_cache=self.hf_modules_cache,
          )
  
 +    @pytest.mark.skip(reason="")
      def test_HubDatasetModuleFactoryWithScript_dont_trust_remote_code(self):
-         # "squad" has a dataset script
+         # "lhoestq/test" has a dataset script
          factory = HubDatasetModuleFactoryWithScript(
-@@ -402,6 +402,7 @@
+@@ -403,6 +404,7 @@
          )
          self.assertRaises(ValueError, factory.get_module)
  
@@ -26,7 +26,7 @@
      def test_HubDatasetModuleFactoryWithScript_with_github_dataset(self):
          # "wmt_t2t" has additional imports (internal)
          factory = HubDatasetModuleFactoryWithScript(
-@@ -411,6 +412,7 @@
+@@ -412,6 +414,7 @@
          assert importlib.import_module(module_factory_result.module_path) is not None
          assert module_factory_result.builder_kwargs["base_path"].startswith(config.HF_ENDPOINT)
  
@@ -34,7 +34,7 @@
      def test_GithubMetricModuleFactory_with_internal_import(self):
          # "squad_v2" requires additional imports (internal)
          factory = GithubMetricModuleFactory(
-@@ -419,6 +421,7 @@
+@@ -420,6 +423,7 @@
          module_factory_result = factory.get_module()
          assert importlib.import_module(module_factory_result.module_path) is not None
  
@@ -42,7 +42,7 @@
      @pytest.mark.filterwarnings("ignore:GithubMetricModuleFactory is deprecated:FutureWarning")
      def test_GithubMetricModuleFactory_with_external_import(self):
          # "bleu" requires additional imports (external from github)
-@@ -1032,6 +1035,7 @@
+@@ -1033,6 +1037,7 @@
          datasets.load_dataset_builder(SAMPLE_DATASET_TWO_CONFIG_IN_METADATA, "non-existing-config")
  
  
@@ -50,7 +50,7 @@
  @pytest.mark.parametrize("serializer", [pickle, dill])
  def test_load_dataset_builder_with_metadata_configs_pickable(serializer):
      builder = datasets.load_dataset_builder(SAMPLE_DATASET_SINGLE_CONFIG_IN_METADATA)
-@@ -1153,6 +1157,7 @@
+@@ -1154,6 +1159,7 @@
      assert len(builder.config.data_files["test"]) > 0
  
  
@@ -58,7 +58,7 @@
  def test_load_dataset_builder_fail():
      with pytest.raises(DatasetNotFoundError):
          datasets.load_dataset_builder("blabla")
-@@ -1168,6 +1173,7 @@
+@@ -1169,6 +1175,7 @@
      assert isinstance(next(iter(dataset["train"])), dict)
  
  
@@ -68,7 +68,7 @@
      assert isinstance(dataset, DatasetDict)
 --- a/tests/test_hf_gcp.py	2024-02-21 09:59:26.918397895 +0100
 +++ b/tests/test_hf_gcp.py	2024-02-21 09:59:46.335100597 +0100
-@@ -47,6 +47,7 @@
+@@ -45,6 +45,7 @@
          ]
  
  
@@ -78,15 +78,6 @@
      dataset = None
 --- a/tests/test_inspect.py	2024-02-21 10:03:32.315520016 +0100
 +++ b/tests/test_inspect.py	2024-02-21 10:03:50.345553490 +0100
-@@ -18,7 +18,7 @@
- pytestmark = pytest.mark.integration
- 
- 
--@pytest.mark.parametrize("path", ["paws", csv.__file__])
-+@pytest.mark.parametrize("path", [csv.__file__])
- def test_inspect_dataset(path, tmp_path):
-     inspect_dataset(path, tmp_path)
-     script_name = Path(path).stem + ".py"
 @@ -49,6 +49,7 @@
      assert list(info.splits.keys()) == expected_splits
  


             reply	other threads:[~2024-02-22  7:27 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-22  7:27 Alfredo Tupone [this message]
  -- strict thread matches above, loose matches on Subject: below --
2024-02-22 18:37 [gentoo-commits] repo/gentoo:master commit in: sci-libs/datasets/, sci-libs/datasets/files/ Alfredo Tupone
2024-02-21 11:33 Alfredo Tupone
2023-08-24 15:29 Alfredo Tupone
2023-05-07 20:16 Alfredo Tupone

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1708586800.05e867ed4f4ffc6bcbf57160225dcce103062057.tupone@gentoo \
    --to=tupone@gentoo.org \
    --cc=gentoo-commits@lists.gentoo.org \
    --cc=gentoo-dev@lists.gentoo.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox