public inbox for gentoo-commits@lists.gentoo.org
 help / color / mirror / Atom feed
* [gentoo-commits] repo/gentoo:master commit in: sci-libs/datasets/files/
@ 2024-02-21  7:24 Alfredo Tupone
  0 siblings, 0 replies; 3+ messages in thread
From: Alfredo Tupone @ 2024-02-21  7:24 UTC (permalink / raw
  To: gentoo-commits

commit:     7f972518eb4e40bc7f8bc670bb45015d371a9166
Author:     Alfredo Tupone <tupone <AT> gentoo <DOT> org>
AuthorDate: Wed Feb 21 07:23:55 2024 +0000
Commit:     Alfredo Tupone <tupone <AT> gentoo <DOT> org>
CommitDate: Wed Feb 21 07:23:55 2024 +0000
URL:        https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=7f972518

sci-libs/datasets: missing a patch

Signed-off-by: Alfredo Tupone <tupone <AT> gentoo.org>

 .../datasets/files/datasets-2.15.0-tests.patch     | 46 ++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/sci-libs/datasets/files/datasets-2.15.0-tests.patch b/sci-libs/datasets/files/datasets-2.15.0-tests.patch
new file mode 100644
index 000000000000..64d8dcfdc8d8
--- /dev/null
+++ b/sci-libs/datasets/files/datasets-2.15.0-tests.patch
@@ -0,0 +1,46 @@
+--- a/tests/test_arrow_dataset.py	2024-02-20 21:53:24.248470991 +0100
++++ b/tests/test_arrow_dataset.py	2024-02-20 21:53:29.441804737 +0100
+@@ -3978,7 +3978,6 @@
+     [
+         "relative/path",
+         "/absolute/path",
+-        "s3://bucket/relative/path",
+         "hdfs://relative/path",
+         "hdfs:///absolute/path",
+     ],
+--- a/tests/test_hf_gcp.py	2024-02-20 21:55:18.821852434 +0100
++++ b/tests/test_hf_gcp.py	2024-02-20 21:55:46.525186394 +0100
+@@ -22,7 +22,6 @@
+     {"dataset": "wikipedia", "config_name": "20220301.it"},
+     {"dataset": "wikipedia", "config_name": "20220301.simple"},
+     {"dataset": "snli", "config_name": "plain_text"},
+-    {"dataset": "eli5", "config_name": "LFQA_reddit"},
+     {"dataset": "wiki40b", "config_name": "en"},
+     {"dataset": "wiki_dpr", "config_name": "psgs_w100.nq.compressed"},
+     {"dataset": "wiki_dpr", "config_name": "psgs_w100.nq.no_index"},
+--- a/tests/test_inspect.py	2024-02-20 22:01:35.148488467 +0100
++++ b/tests/test_inspect.py	2024-02-20 22:02:14.458561571 +0100
+@@ -15,7 +15,7 @@
+ pytestmark = pytest.mark.integration
+ 
+ 
+-@pytest.mark.parametrize("path", ["paws", "csv"])
++@pytest.mark.parametrize("path", ["csv"])
+ def test_inspect_dataset(path, tmp_path):
+     inspect_dataset(path, tmp_path)
+     script_name = path + ".py"
+--- a/tests/test_load.py	2024-02-20 22:12:13.699209107 +0100
++++ b/tests/test_load.py	2024-02-20 22:13:10.862626708 +0100
+@@ -1235,12 +1235,6 @@
+ 
+ 
+ @pytest.mark.integration
+-def test_load_streaming_private_dataset_with_zipped_data(hf_token, hf_private_dataset_repo_zipped_txt_data):
+-    ds = load_dataset(hf_private_dataset_repo_zipped_txt_data, streaming=True, token=hf_token)
+-    assert next(iter(ds)) is not None
+-
+-
+-@pytest.mark.integration
+ def test_load_dataset_config_kwargs_passed_as_arguments():
+     ds_default = load_dataset(SAMPLE_DATASET_IDENTIFIER4)
+     ds_custom = load_dataset(SAMPLE_DATASET_IDENTIFIER4, drop_metadata=True)


^ permalink raw reply related	[flat|nested] 3+ messages in thread
* [gentoo-commits] repo/gentoo:master commit in: sci-libs/datasets/files/
@ 2023-05-15 21:32 Alfredo Tupone
  0 siblings, 0 replies; 3+ messages in thread
From: Alfredo Tupone @ 2023-05-15 21:32 UTC (permalink / raw
  To: gentoo-commits

commit:     48af9c1392f58d79a196a26287bc4bb362317f18
Author:     Alfredo Tupone <tupone <AT> gentoo <DOT> org>
AuthorDate: Mon May 15 21:30:47 2023 +0000
Commit:     Alfredo Tupone <tupone <AT> gentoo <DOT> org>
CommitDate: Mon May 15 21:31:44 2023 +0000
URL:        https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=48af9c13

sci-libs/datasets: ignore some tests not working on sandbox

Signed-off-by: Alfredo Tupone <tupone <AT> gentoo.org>

 .../datasets/files/datasets-2.11.0-tests.patch     | 82 ++++++++++++++++++++++
 1 file changed, 82 insertions(+)

diff --git a/sci-libs/datasets/files/datasets-2.11.0-tests.patch b/sci-libs/datasets/files/datasets-2.11.0-tests.patch
index 0bbf200acbd0..0babe8b23d58 100644
--- a/sci-libs/datasets/files/datasets-2.11.0-tests.patch
+++ b/sci-libs/datasets/files/datasets-2.11.0-tests.patch
@@ -168,3 +168,85 @@
      def test_task_automatic_speech_recognition(self):
          # Include a dummy extra column `dummy` to test we drop it correctly
          features_before_cast = Features(
+--- a/tests/test_streaming_download_manager.py	2023-05-15 23:06:59.146379973 +0200
++++ b/tests/test_streaming_download_manager.py	2023-05-15 23:11:32.441363757 +0200
+@@ -217,6 +217,7 @@
+     assert output_path == _readd_double_slash_removed_by_path(Path(expected_path).as_posix())
+ 
+ 
++@pytest.mark.skip(reason="not working in sandbox")
+ @pytest.mark.parametrize(
+     "input_path, exists",
+     [
+@@ -299,6 +300,7 @@
+         assert list(f) == TEST_URL_CONTENT.splitlines(keepends=True)
+ 
+ 
++@pytest.mark.skip(reason="not working in sandbox")
+ @pytest.mark.parametrize(
+     "input_path, expected_paths",
+     [
+@@ -328,6 +330,7 @@
+         xlistdir(root_url, use_auth_token=hf_token)
+ 
+ 
++@pytest.mark.skip(reason="not working in sandbox")
+ @pytest.mark.parametrize(
+     "input_path, isdir",
+     [
+@@ -355,6 +358,7 @@
+         xisdir(root_url, use_auth_token=hf_token)
+ 
+ 
++@pytest.mark.skip(reason="not working in sandbox")
+ @pytest.mark.parametrize(
+     "input_path, isfile",
+     [
+@@ -378,6 +382,7 @@
+     assert xisfile(root_url + "qwertyuiop", use_auth_token=hf_token) is False
+ 
+ 
++@pytest.mark.skip(reason="not working in sandbox")
+ @pytest.mark.parametrize(
+     "input_path, size",
+     [
+@@ -402,6 +407,7 @@
+         xgetsize(root_url + "qwertyuiop", use_auth_token=hf_token)
+ 
+ 
++@pytest.mark.skip(reason="not working in sandbox")
+ @pytest.mark.parametrize(
+     "input_path, expected_paths",
+     [
+@@ -444,6 +450,7 @@
+     assert len(xglob("zip://qwertyuiop/*::" + root_url, use_auth_token=hf_token)) == 0
+ 
+ 
++@pytest.mark.skip(reason="not working in sandbox")
+ @pytest.mark.parametrize(
+     "input_path, expected_outputs",
+     [
+@@ -533,6 +540,7 @@
+     def test_xpath_as_posix(self, input_path, expected_path):
+         assert xPath(input_path).as_posix() == expected_path
+ 
++    @pytest.mark.skip(reason="not working in sandbox")
+     @pytest.mark.parametrize(
+         "input_path, exists",
+         [
+@@ -548,6 +556,7 @@
+             (tmp_path / "file.txt").touch()
+         assert xexists(input_path) is exists
+ 
++    @pytest.mark.skip(reason="not working in sandbox")
+     @pytest.mark.parametrize(
+         "input_path, pattern, expected_paths",
+         [
+@@ -586,6 +595,7 @@
+         output_paths = sorted(xPath(input_path).glob(pattern))
+         assert output_paths == expected_paths
+ 
++    @pytest.mark.skip(reason="not working in sandbox")
+     @pytest.mark.parametrize(
+         "input_path, pattern, expected_paths",
+         [


^ permalink raw reply related	[flat|nested] 3+ messages in thread
* [gentoo-commits] repo/gentoo:master commit in: sci-libs/datasets/files/
@ 2023-05-08 19:31 Alfredo Tupone
  0 siblings, 0 replies; 3+ messages in thread
From: Alfredo Tupone @ 2023-05-08 19:31 UTC (permalink / raw
  To: gentoo-commits

commit:     e2df99959623331aa3710cc48992e783cb9f5985
Author:     Alfredo Tupone <tupone <AT> gentoo <DOT> org>
AuthorDate: Mon May  8 19:27:32 2023 +0000
Commit:     Alfredo Tupone <tupone <AT> gentoo <DOT> org>
CommitDate: Mon May  8 19:31:33 2023 +0000
URL:        https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=e2df9995

sci-libs/datasets: fix librosa import

Closes: https://bugs.gentoo.org/905938
Signed-off-by: Alfredo Tupone <tupone <AT> gentoo.org>

 .../datasets/files/datasets-2.11.0-tests.patch     | 28 ++++++++++++----------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/sci-libs/datasets/files/datasets-2.11.0-tests.patch b/sci-libs/datasets/files/datasets-2.11.0-tests.patch
index 01e5d9c70e7b..0bbf200acbd0 100644
--- a/sci-libs/datasets/files/datasets-2.11.0-tests.patch
+++ b/sci-libs/datasets/files/datasets-2.11.0-tests.patch
@@ -59,7 +59,7 @@
  @pytest.mark.parametrize(
      "build_example",
      [
-@@ -82,6 +82,7 @@
+@@ -81,6 +82,7 @@
      assert decoded_example.keys() == {"path", "array", "sampling_rate"}
  
  
@@ -75,7 +75,7 @@
  @pytest.mark.parametrize("sampling_rate", [16_000, 48_000])
  def test_audio_decode_example_pcm(shared_datadir, sampling_rate):
      audio_path = str(shared_datadir / "test_audio_16000.pcm")
-@@ -416,6 +417,7 @@
+@@ -414,6 +417,7 @@
      assert column[0]["sampling_rate"] == 16000
  
  
@@ -83,7 +83,7 @@
  @pytest.mark.parametrize(
      "build_data",
      [
-@@ -440,6 +442,7 @@
+@@ -438,6 +442,7 @@
      assert item["audio"].keys() == {"path", "array", "sampling_rate"}
  
  
@@ -91,7 +91,7 @@
  def test_dataset_concatenate_audio_features(shared_datadir):
      # we use a different data structure between 1 and 2 to make sure they are compatible with each other
      audio_path = str(shared_datadir / "test_audio_44100.wav")
-@@ -453,6 +456,7 @@
+@@ -451,6 +456,7 @@
      assert concatenated_dataset[1]["audio"]["array"].shape == dset2[0]["audio"]["array"].shape
  
  
@@ -99,7 +99,7 @@
  def test_dataset_concatenate_nested_audio_features(shared_datadir):
      # we use a different data structure between 1 and 2 to make sure they are compatible with each other
      audio_path = str(shared_datadir / "test_audio_44100.wav")
-@@ -616,6 +616,7 @@
+@@ -610,6 +616,7 @@
      assert isinstance(ds, Dataset)
  
  
@@ -107,7 +107,7 @@
  def test_dataset_with_audio_feature_undecoded(shared_datadir):
      audio_path = str(shared_datadir / "test_audio_44100.wav")
      data = {"audio": [audio_path]}
-@@ -633,6 +634,7 @@
+@@ -627,6 +634,7 @@
      assert column[0] == {"path": audio_path, "bytes": None}
  
  
@@ -115,7 +115,7 @@
  def test_formatted_dataset_with_audio_feature_undecoded(shared_datadir):
      audio_path = str(shared_datadir / "test_audio_44100.wav")
      data = {"audio": [audio_path]}
-@@ -664,6 +666,7 @@
+@@ -658,6 +666,7 @@
          assert column[0] == {"path": audio_path, "bytes": None}
  
  
@@ -135,24 +135,26 @@
      wrong_scheme = "ERROR"
 --- a/tests/packaged_modules/test_audiofolder.py	2023-05-06 14:00:39.560876163 +0200
 +++ b/tests/packaged_modules/test_audiofolder.py	2023-05-06 14:01:26.005212423 +0200
-@@ -4,7 +4,6 @@
- import librosa
+@@ -1,10 +1,8 @@
+ import shutil
+ import textwrap
+ 
+-import librosa
  import numpy as np
  import pytest
 -import soundfile as sf
  
  from datasets import Audio, ClassLabel, Features, Value
  from datasets.data_files import DataFilesDict, get_data_patterns_locally
-@@ -191,9 +190,11 @@
-     assert len(data_files_with_two_splits_and_metadata["test"]) == 2
+@@ -192,8 +190,11 @@
      return data_files_with_two_splits_and_metadata
  
--
+ 
 +@pytest.mark.skip(reason="require soundfile")
  @pytest.fixture
  def data_files_with_zip_archives(tmp_path, audio_file):
 +    import soundfile as sf
-+
++    import librosa
      data_dir = tmp_path / "audiofolder_data_dir_with_zip_archives"
      data_dir.mkdir(parents=True, exist_ok=True)
      archive_dir = data_dir / "archive"


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-02-21  7:24 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-02-21  7:24 [gentoo-commits] repo/gentoo:master commit in: sci-libs/datasets/files/ Alfredo Tupone
  -- strict thread matches above, loose matches on Subject: below --
2023-05-15 21:32 Alfredo Tupone
2023-05-08 19:31 Alfredo Tupone

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox