* [gentoo-commits] repo/gentoo:master commit in: sci-libs/datasets/files/
@ 2023-05-08 19:31 Alfredo Tupone
0 siblings, 0 replies; 4+ messages in thread
From: Alfredo Tupone @ 2023-05-08 19:31 UTC (permalink / raw
To: gentoo-commits
commit: e2df99959623331aa3710cc48992e783cb9f5985
Author: Alfredo Tupone <tupone <AT> gentoo <DOT> org>
AuthorDate: Mon May 8 19:27:32 2023 +0000
Commit: Alfredo Tupone <tupone <AT> gentoo <DOT> org>
CommitDate: Mon May 8 19:31:33 2023 +0000
URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=e2df9995
sci-libs/datasets: fix librosa import
Closes: https://bugs.gentoo.org/905938
Signed-off-by: Alfredo Tupone <tupone <AT> gentoo.org>
.../datasets/files/datasets-2.11.0-tests.patch | 28 ++++++++++++----------
1 file changed, 15 insertions(+), 13 deletions(-)
diff --git a/sci-libs/datasets/files/datasets-2.11.0-tests.patch b/sci-libs/datasets/files/datasets-2.11.0-tests.patch
index 01e5d9c70e7b..0bbf200acbd0 100644
--- a/sci-libs/datasets/files/datasets-2.11.0-tests.patch
+++ b/sci-libs/datasets/files/datasets-2.11.0-tests.patch
@@ -59,7 +59,7 @@
@pytest.mark.parametrize(
"build_example",
[
-@@ -82,6 +82,7 @@
+@@ -81,6 +82,7 @@
assert decoded_example.keys() == {"path", "array", "sampling_rate"}
@@ -75,7 +75,7 @@
@pytest.mark.parametrize("sampling_rate", [16_000, 48_000])
def test_audio_decode_example_pcm(shared_datadir, sampling_rate):
audio_path = str(shared_datadir / "test_audio_16000.pcm")
-@@ -416,6 +417,7 @@
+@@ -414,6 +417,7 @@
assert column[0]["sampling_rate"] == 16000
@@ -83,7 +83,7 @@
@pytest.mark.parametrize(
"build_data",
[
-@@ -440,6 +442,7 @@
+@@ -438,6 +442,7 @@
assert item["audio"].keys() == {"path", "array", "sampling_rate"}
@@ -91,7 +91,7 @@
def test_dataset_concatenate_audio_features(shared_datadir):
# we use a different data structure between 1 and 2 to make sure they are compatible with each other
audio_path = str(shared_datadir / "test_audio_44100.wav")
-@@ -453,6 +456,7 @@
+@@ -451,6 +456,7 @@
assert concatenated_dataset[1]["audio"]["array"].shape == dset2[0]["audio"]["array"].shape
@@ -99,7 +99,7 @@
def test_dataset_concatenate_nested_audio_features(shared_datadir):
# we use a different data structure between 1 and 2 to make sure they are compatible with each other
audio_path = str(shared_datadir / "test_audio_44100.wav")
-@@ -616,6 +616,7 @@
+@@ -610,6 +616,7 @@
assert isinstance(ds, Dataset)
@@ -107,7 +107,7 @@
def test_dataset_with_audio_feature_undecoded(shared_datadir):
audio_path = str(shared_datadir / "test_audio_44100.wav")
data = {"audio": [audio_path]}
-@@ -633,6 +634,7 @@
+@@ -627,6 +634,7 @@
assert column[0] == {"path": audio_path, "bytes": None}
@@ -115,7 +115,7 @@
def test_formatted_dataset_with_audio_feature_undecoded(shared_datadir):
audio_path = str(shared_datadir / "test_audio_44100.wav")
data = {"audio": [audio_path]}
-@@ -664,6 +666,7 @@
+@@ -658,6 +666,7 @@
assert column[0] == {"path": audio_path, "bytes": None}
@@ -135,24 +135,26 @@
wrong_scheme = "ERROR"
--- a/tests/packaged_modules/test_audiofolder.py 2023-05-06 14:00:39.560876163 +0200
+++ b/tests/packaged_modules/test_audiofolder.py 2023-05-06 14:01:26.005212423 +0200
-@@ -4,7 +4,6 @@
- import librosa
+@@ -1,10 +1,8 @@
+ import shutil
+ import textwrap
+
+-import librosa
import numpy as np
import pytest
-import soundfile as sf
from datasets import Audio, ClassLabel, Features, Value
from datasets.data_files import DataFilesDict, get_data_patterns_locally
-@@ -191,9 +190,11 @@
- assert len(data_files_with_two_splits_and_metadata["test"]) == 2
+@@ -192,8 +190,11 @@
return data_files_with_two_splits_and_metadata
--
+
+@pytest.mark.skip(reason="require soundfile")
@pytest.fixture
def data_files_with_zip_archives(tmp_path, audio_file):
+ import soundfile as sf
-+
++ import librosa
data_dir = tmp_path / "audiofolder_data_dir_with_zip_archives"
data_dir.mkdir(parents=True, exist_ok=True)
archive_dir = data_dir / "archive"
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [gentoo-commits] repo/gentoo:master commit in: sci-libs/datasets/files/
@ 2023-05-15 21:32 Alfredo Tupone
0 siblings, 0 replies; 4+ messages in thread
From: Alfredo Tupone @ 2023-05-15 21:32 UTC (permalink / raw
To: gentoo-commits
commit: 48af9c1392f58d79a196a26287bc4bb362317f18
Author: Alfredo Tupone <tupone <AT> gentoo <DOT> org>
AuthorDate: Mon May 15 21:30:47 2023 +0000
Commit: Alfredo Tupone <tupone <AT> gentoo <DOT> org>
CommitDate: Mon May 15 21:31:44 2023 +0000
URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=48af9c13
sci-libs/datasets: ignore some tests not working on sandbox
Signed-off-by: Alfredo Tupone <tupone <AT> gentoo.org>
.../datasets/files/datasets-2.11.0-tests.patch | 82 ++++++++++++++++++++++
1 file changed, 82 insertions(+)
diff --git a/sci-libs/datasets/files/datasets-2.11.0-tests.patch b/sci-libs/datasets/files/datasets-2.11.0-tests.patch
index 0bbf200acbd0..0babe8b23d58 100644
--- a/sci-libs/datasets/files/datasets-2.11.0-tests.patch
+++ b/sci-libs/datasets/files/datasets-2.11.0-tests.patch
@@ -168,3 +168,85 @@
def test_task_automatic_speech_recognition(self):
# Include a dummy extra column `dummy` to test we drop it correctly
features_before_cast = Features(
+--- a/tests/test_streaming_download_manager.py 2023-05-15 23:06:59.146379973 +0200
++++ b/tests/test_streaming_download_manager.py 2023-05-15 23:11:32.441363757 +0200
+@@ -217,6 +217,7 @@
+ assert output_path == _readd_double_slash_removed_by_path(Path(expected_path).as_posix())
+
+
++@pytest.mark.skip(reason="not working in sandbox")
+ @pytest.mark.parametrize(
+ "input_path, exists",
+ [
+@@ -299,6 +300,7 @@
+ assert list(f) == TEST_URL_CONTENT.splitlines(keepends=True)
+
+
++@pytest.mark.skip(reason="not working in sandbox")
+ @pytest.mark.parametrize(
+ "input_path, expected_paths",
+ [
+@@ -328,6 +330,7 @@
+ xlistdir(root_url, use_auth_token=hf_token)
+
+
++@pytest.mark.skip(reason="not working in sandbox")
+ @pytest.mark.parametrize(
+ "input_path, isdir",
+ [
+@@ -355,6 +358,7 @@
+ xisdir(root_url, use_auth_token=hf_token)
+
+
++@pytest.mark.skip(reason="not working in sandbox")
+ @pytest.mark.parametrize(
+ "input_path, isfile",
+ [
+@@ -378,6 +382,7 @@
+ assert xisfile(root_url + "qwertyuiop", use_auth_token=hf_token) is False
+
+
++@pytest.mark.skip(reason="not working in sandbox")
+ @pytest.mark.parametrize(
+ "input_path, size",
+ [
+@@ -402,6 +407,7 @@
+ xgetsize(root_url + "qwertyuiop", use_auth_token=hf_token)
+
+
++@pytest.mark.skip(reason="not working in sandbox")
+ @pytest.mark.parametrize(
+ "input_path, expected_paths",
+ [
+@@ -444,6 +450,7 @@
+ assert len(xglob("zip://qwertyuiop/*::" + root_url, use_auth_token=hf_token)) == 0
+
+
++@pytest.mark.skip(reason="not working in sandbox")
+ @pytest.mark.parametrize(
+ "input_path, expected_outputs",
+ [
+@@ -533,6 +540,7 @@
+ def test_xpath_as_posix(self, input_path, expected_path):
+ assert xPath(input_path).as_posix() == expected_path
+
++ @pytest.mark.skip(reason="not working in sandbox")
+ @pytest.mark.parametrize(
+ "input_path, exists",
+ [
+@@ -548,6 +556,7 @@
+ (tmp_path / "file.txt").touch()
+ assert xexists(input_path) is exists
+
++ @pytest.mark.skip(reason="not working in sandbox")
+ @pytest.mark.parametrize(
+ "input_path, pattern, expected_paths",
+ [
+@@ -586,6 +595,7 @@
+ output_paths = sorted(xPath(input_path).glob(pattern))
+ assert output_paths == expected_paths
+
++ @pytest.mark.skip(reason="not working in sandbox")
+ @pytest.mark.parametrize(
+ "input_path, pattern, expected_paths",
+ [
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [gentoo-commits] repo/gentoo:master commit in: sci-libs/datasets/files/
@ 2024-02-21 7:24 Alfredo Tupone
0 siblings, 0 replies; 4+ messages in thread
From: Alfredo Tupone @ 2024-02-21 7:24 UTC (permalink / raw
To: gentoo-commits
commit: 7f972518eb4e40bc7f8bc670bb45015d371a9166
Author: Alfredo Tupone <tupone <AT> gentoo <DOT> org>
AuthorDate: Wed Feb 21 07:23:55 2024 +0000
Commit: Alfredo Tupone <tupone <AT> gentoo <DOT> org>
CommitDate: Wed Feb 21 07:23:55 2024 +0000
URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=7f972518
sci-libs/datasets: missing a patch
Signed-off-by: Alfredo Tupone <tupone <AT> gentoo.org>
.../datasets/files/datasets-2.15.0-tests.patch | 46 ++++++++++++++++++++++
1 file changed, 46 insertions(+)
diff --git a/sci-libs/datasets/files/datasets-2.15.0-tests.patch b/sci-libs/datasets/files/datasets-2.15.0-tests.patch
new file mode 100644
index 000000000000..64d8dcfdc8d8
--- /dev/null
+++ b/sci-libs/datasets/files/datasets-2.15.0-tests.patch
@@ -0,0 +1,46 @@
+--- a/tests/test_arrow_dataset.py 2024-02-20 21:53:24.248470991 +0100
++++ b/tests/test_arrow_dataset.py 2024-02-20 21:53:29.441804737 +0100
+@@ -3978,7 +3978,6 @@
+ [
+ "relative/path",
+ "/absolute/path",
+- "s3://bucket/relative/path",
+ "hdfs://relative/path",
+ "hdfs:///absolute/path",
+ ],
+--- a/tests/test_hf_gcp.py 2024-02-20 21:55:18.821852434 +0100
++++ b/tests/test_hf_gcp.py 2024-02-20 21:55:46.525186394 +0100
+@@ -22,7 +22,6 @@
+ {"dataset": "wikipedia", "config_name": "20220301.it"},
+ {"dataset": "wikipedia", "config_name": "20220301.simple"},
+ {"dataset": "snli", "config_name": "plain_text"},
+- {"dataset": "eli5", "config_name": "LFQA_reddit"},
+ {"dataset": "wiki40b", "config_name": "en"},
+ {"dataset": "wiki_dpr", "config_name": "psgs_w100.nq.compressed"},
+ {"dataset": "wiki_dpr", "config_name": "psgs_w100.nq.no_index"},
+--- a/tests/test_inspect.py 2024-02-20 22:01:35.148488467 +0100
++++ b/tests/test_inspect.py 2024-02-20 22:02:14.458561571 +0100
+@@ -15,7 +15,7 @@
+ pytestmark = pytest.mark.integration
+
+
+-@pytest.mark.parametrize("path", ["paws", "csv"])
++@pytest.mark.parametrize("path", ["csv"])
+ def test_inspect_dataset(path, tmp_path):
+ inspect_dataset(path, tmp_path)
+ script_name = path + ".py"
+--- a/tests/test_load.py 2024-02-20 22:12:13.699209107 +0100
++++ b/tests/test_load.py 2024-02-20 22:13:10.862626708 +0100
+@@ -1235,12 +1235,6 @@
+
+
+ @pytest.mark.integration
+-def test_load_streaming_private_dataset_with_zipped_data(hf_token, hf_private_dataset_repo_zipped_txt_data):
+- ds = load_dataset(hf_private_dataset_repo_zipped_txt_data, streaming=True, token=hf_token)
+- assert next(iter(ds)) is not None
+-
+-
+-@pytest.mark.integration
+ def test_load_dataset_config_kwargs_passed_as_arguments():
+ ds_default = load_dataset(SAMPLE_DATASET_IDENTIFIER4)
+ ds_custom = load_dataset(SAMPLE_DATASET_IDENTIFIER4, drop_metadata=True)
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [gentoo-commits] repo/gentoo:master commit in: sci-libs/datasets/files/
@ 2025-03-07 10:02 Alfredo Tupone
0 siblings, 0 replies; 4+ messages in thread
From: Alfredo Tupone @ 2025-03-07 10:02 UTC (permalink / raw
To: gentoo-commits
commit: f2fd26cac175376fc73f24754c17f82776e378ab
Author: Alfredo Tupone <tupone <AT> gentoo <DOT> org>
AuthorDate: Fri Mar 7 10:01:44 2025 +0000
Commit: Alfredo Tupone <tupone <AT> gentoo <DOT> org>
CommitDate: Fri Mar 7 10:01:44 2025 +0000
URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=f2fd26ca
sci-libs/datasets: drop old file
Signed-off-by: Alfredo Tupone <tupone <AT> gentoo.org>
.../datasets/files/datasets-2.19.2-tests.patch | 23 ----------------------
1 file changed, 23 deletions(-)
diff --git a/sci-libs/datasets/files/datasets-2.19.2-tests.patch b/sci-libs/datasets/files/datasets-2.19.2-tests.patch
deleted file mode 100644
index 64df833032c5..000000000000
--- a/sci-libs/datasets/files/datasets-2.19.2-tests.patch
+++ /dev/null
@@ -1,23 +0,0 @@
---- a/tests/test_arrow_dataset.py 2024-02-20 21:53:24.248470991 +0100
-+++ b/tests/test_arrow_dataset.py 2024-02-20 21:53:29.441804737 +0100
-@@ -4109,7 +4109,6 @@
- [
- "relative/path",
- "/absolute/path",
-- "s3://bucket/relative/path",
- "hdfs://relative/path",
- "hdfs:///absolute/path",
- ],
---- a/tests/packaged_modules/test_audiofolder.py 2023-05-06 14:00:39.560876163 +0200
-+++ b/tests/packaged_modules/test_audiofolder.py 2023-05-06 14:01:26.005212423 +0200
-@@ -1,10 +1,8 @@
- import shutil
- import textwrap
-
--import librosa
- import numpy as np
- import pytest
--import soundfile as sf
-
- from datasets import Audio, ClassLabel, Features, Value
- from datasets.data_files import DataFilesDict, get_data_patterns
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2025-03-07 10:02 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-03-07 10:02 [gentoo-commits] repo/gentoo:master commit in: sci-libs/datasets/files/ Alfredo Tupone
-- strict thread matches above, loose matches on Subject: below --
2024-02-21 7:24 Alfredo Tupone
2023-05-15 21:32 Alfredo Tupone
2023-05-08 19:31 Alfredo Tupone
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox