public inbox for gentoo-commits@lists.gentoo.org
 help / color / mirror / Atom feed
* [gentoo-commits] repo/gentoo:master commit in: dev-python/pyarrow/files/, dev-python/pyarrow/
@ 2023-05-08  7:52 Alfredo Tupone
  0 siblings, 0 replies; 3+ messages in thread
From: Alfredo Tupone @ 2023-05-08  7:52 UTC (permalink / raw
  To: gentoo-commits

commit:     d8e78c9c984bd59ec4b1ac0b49370124d845fef3
Author:     Alfredo Tupone <tupone <AT> gentoo <DOT> org>
AuthorDate: Mon May  8 07:52:00 2023 +0000
Commit:     Alfredo Tupone <tupone <AT> gentoo <DOT> org>
CommitDate: Mon May  8 07:52:27 2023 +0000
URL:        https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=d8e78c9c

dev-python/pyarrow: enable test

Closes: https://bugs.gentoo.org/905909
Signed-off-by: Alfredo Tupone <tupone <AT> gentoo.org>

 .../pyarrow/files/pyarrow-12.0.0-tests.patch       | 144 +++++++++++++++++++++
 dev-python/pyarrow/pyarrow-12.0.0.ebuild           |  13 +-
 2 files changed, 156 insertions(+), 1 deletion(-)

diff --git a/dev-python/pyarrow/files/pyarrow-12.0.0-tests.patch b/dev-python/pyarrow/files/pyarrow-12.0.0-tests.patch
new file mode 100644
index 000000000000..56a307592593
--- /dev/null
+++ b/dev-python/pyarrow/files/pyarrow-12.0.0-tests.patch
@@ -0,0 +1,144 @@
+--- a/pyarrow/tests/test_compute.py	2023-05-08 09:06:34.571387618 +0200
++++ b/pyarrow/tests/test_compute.py	2023-05-08 09:11:52.759753459 +0200
+@@ -414,6 +414,7 @@
+     assert pc.variance(data, ddof=1).as_py() == 6.0
+ 
+ 
++@pytest.mark.skip(reason="not working")
+ def test_count_substring():
+     for (ty, offset) in [(pa.string(), pa.int32()),
+                          (pa.large_string(), pa.int64())]:
+@@ -428,6 +429,7 @@
+         assert expected == result
+ 
+ 
++@pytest.mark.skip(reason="not working")
+ def test_count_substring_regex():
+     for (ty, offset) in [(pa.string(), pa.int32()),
+                          (pa.large_string(), pa.int64())]:
+@@ -442,6 +444,7 @@
+         assert expected.equals(result)
+ 
+ 
++@pytest.mark.skip(reason="not working")
+ def test_find_substring():
+     for ty in [pa.string(), pa.binary(), pa.large_string(), pa.large_binary()]:
+         arr = pa.array(["ab", "cab", "ba", None], type=ty)
+@@ -459,6 +462,7 @@
+         assert result.to_pylist() == [0, 1, 0, 0]
+ 
+ 
++@pytest.mark.skip(reason="not working")
+ def test_match_like():
+     arr = pa.array(["ab", "ba%", "ba", "ca%d", None])
+     result = pc.match_like(arr, r"_a\%%")
+@@ -474,6 +478,7 @@
+     assert expected.equals(result)
+ 
+ 
++@pytest.mark.skip(reason="not working")
+ def test_match_substring():
+     arr = pa.array(["ab", "abc", "ba", None])
+     result = pc.match_substring(arr, "ab")
+@@ -489,6 +494,7 @@
+     assert expected.equals(result)
+ 
+ 
++@pytest.mark.skip(reason="not working")
+ def test_match_substring_regex():
+     arr = pa.array(["ab", "abc", "ba", "c", None])
+     result = pc.match_substring_regex(arr, "^a?b")
+@@ -602,6 +608,7 @@
+     assert expected.equals(result)
+ 
+ 
++@pytest.mark.skip(reason="not working")
+ def test_split_pattern_regex():
+     arr = pa.array(["-foo---bar--", "---foo---b"])
+     result = pc.split_pattern_regex(arr, pattern="-+")
+@@ -1022,6 +1029,7 @@
+     assert ar.tolist() == ['barzfoo', 'bard', None]
+ 
+ 
++@pytest.mark.skip(reason="not working")
+ def test_replace_regex():
+     data = pa.array(['foo', 'mood', None])
+     expected = ['f00', 'm00d', None]
+@@ -1033,6 +1041,7 @@
+     assert ar.tolist() == expected
+ 
+ 
++@pytest.mark.skip(reason="not working")
+ def test_extract_regex():
+     ar = pa.array(['a1', 'zb2z'])
+     expected = [{'letter': 'a', 'digit': '1'}, {'letter': 'b', 'digit': '2'}]
+--- a/pyarrow/tests/test_fs.py	2023-05-08 09:13:26.796384297 +0200
++++ b/pyarrow/tests/test_fs.py	2023-05-08 09:14:20.567601499 +0200
+@@ -1012,6 +1012,7 @@
+         LocalFileSystem(xxx=False)
+ 
+ 
++@pytest.mark.skip(reason="not working")
+ def test_localfs_errors(localfs):
+     # Local filesystem errors should raise the right Python exceptions
+     # (e.g. FileNotFoundError)
+--- a/pyarrow/tests/test_memory.py	2023-05-08 09:15:35.366512597 +0200
++++ b/pyarrow/tests/test_memory.py	2023-05-08 09:16:44.969501524 +0200
+@@ -140,6 +140,7 @@
+         assert len(errlines) == 0
+ 
+ 
++@pytest.mark.skip(reason="not working")
+ def test_env_var():
+     check_env_var("system", ["system"])
+     if should_have_jemalloc:
+@@ -149,6 +150,7 @@
+     check_env_var("nonexistent", possible_backends, expect_warning=True)
+ 
+ 
++@pytest.mark.skip(reason="not working")
+ def test_specific_memory_pools():
+     specific_pools = set()
+ 
+@@ -170,6 +172,7 @@
+           can_fail=not should_have_mimalloc)
+ 
+ 
++@pytest.mark.skip(reason="not working")
+ def test_supported_memory_backends():
+     backends = pa.supported_memory_backends()
+ 
+--- a/pyarrow/tests/parquet/test_basic.py	2023-05-08 09:18:05.307333210 +0200
++++ b/pyarrow/tests/parquet/test_basic.py	2023-05-08 09:20:16.135429950 +0200
+@@ -349,6 +349,7 @@
+     assert result.equals(table)
+ 
+ 
++@pytest.mark.skip(reason="not working")
+ @parametrize_legacy_dataset
+ def test_byte_stream_split(use_legacy_dataset):
+     # This is only a smoke test.
+@@ -510,6 +511,7 @@
+                          use_legacy_dataset=use_legacy_dataset)
+ 
+ 
++@pytest.mark.skip(reason="not working")
+ @parametrize_legacy_dataset
+ def test_compression_level(use_legacy_dataset):
+     arr = pa.array(list(map(int, range(1000))))
+@@ -660,6 +662,7 @@
+                           use_legacy_dataset=use_legacy_dataset)
+ 
+ 
++@pytest.mark.skip(reason="not working")
+ @pytest.mark.pandas
+ @parametrize_legacy_dataset
+ def test_zlib_compression_bug(use_legacy_dataset):
+@@ -760,6 +763,7 @@
+         assert buf.to_pybytes() == buf.size * b"\0"
+ 
+ 
++@pytest.mark.skip(reason="not working")
+ def test_parquet_compression_roundtrip(tempdir):
+     # ARROW-10480: ensure even with nonstandard Parquet file naming
+     # conventions, writing and then reading a file works. In

diff --git a/dev-python/pyarrow/pyarrow-12.0.0.ebuild b/dev-python/pyarrow/pyarrow-12.0.0.ebuild
index f7d9e1d04d1f..22a2296e6cef 100644
--- a/dev-python/pyarrow/pyarrow-12.0.0.ebuild
+++ b/dev-python/pyarrow/pyarrow-12.0.0.ebuild
@@ -19,12 +19,18 @@ LICENSE="Apache-2.0"
 SLOT="0"
 KEYWORDS="~amd64"
 IUSE="parquet snappy ssl"
-RESTRICT="test" #Tests not working 'import pyarrow.lib' error out
 
 RDEPEND="
 	~dev-libs/apache-arrow-${PV}[compute,dataset,json,parquet?,snappy?,ssl?]
 	dev-python/numpy[${PYTHON_USEDEP}]
 "
+BDEPEND="test? (
+	dev-python/hypothesis
+	dev-python/pandas
+	dev-python/pytest-lazy-fixture
+)"
+
+PATCHES=( "${FILESDIR}"/${P}-tests.patch )
 
 distutils_enable_tests pytest
 
@@ -48,3 +54,8 @@ src_compile() {
 
 	distutils-r1_src_compile
 }
+
+python_test() {
+	cd "${T}" || die
+	epytest --pyargs pyarrow
+}


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [gentoo-commits] repo/gentoo:master commit in: dev-python/pyarrow/files/, dev-python/pyarrow/
@ 2024-05-08  9:51 Michał Górny
  0 siblings, 0 replies; 3+ messages in thread
From: Michał Górny @ 2024-05-08  9:51 UTC (permalink / raw
  To: gentoo-commits

commit:     e518dee37abe47fd08f4c2bfc7e0d20753b7697f
Author:     Michał Górny <mgorny <AT> gentoo <DOT> org>
AuthorDate: Wed May  8 09:50:36 2024 +0000
Commit:     Michał Górny <mgorny <AT> gentoo <DOT> org>
CommitDate: Wed May  8 09:50:36 2024 +0000
URL:        https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=e518dee3

dev-python/pyarrow: Remove old

Signed-off-by: Michał Górny <mgorny <AT> gentoo.org>

 dev-python/pyarrow/Manifest                        |   1 -
 .../pyarrow/files/pyarrow-15.0.1-32bit.patch       | 325 ---------------------
 dev-python/pyarrow/pyarrow-15.0.2.ebuild           |  87 ------
 3 files changed, 413 deletions(-)

diff --git a/dev-python/pyarrow/Manifest b/dev-python/pyarrow/Manifest
index 3b44275b17cd..6136d388636a 100644
--- a/dev-python/pyarrow/Manifest
+++ b/dev-python/pyarrow/Manifest
@@ -1,2 +1 @@
-DIST apache-arrow-15.0.2.tar.gz 21503812 BLAKE2B 5a42b3409515d7a09daff33d30e72e828e1df2e009ed746f101f4d8e6dcadb2e9c305a6cb9799d4003e1421ba666d2a2e9ba182c11b0c538fbd1aee4b3ba10ff SHA512 6c83e3be1e5840c30387f088315b74aca8e7c2d060793af70a156effb496a71e3e6af0693188c0f46f8a4a061a263a47095912ef04a5dc8141abd59075b14c78
 DIST apache-arrow-16.0.0.tar.gz 21695067 BLAKE2B aa5dfef3d8d46a53242075c165473635051d51ff28587ea8b80751232d5f75ee3ef89e0a027aa39bdc9dc03fa46ddb68e46ae2c7f40605258e47ff194f1d3979 SHA512 773f4f3eef603032c8ba0cfdc023bfd2a24bb5e41c82da354a22d7854ab153294ede1f4782cc32b27451cf1b58303f105bac61ceeb3568faea747b93e21d79e4

diff --git a/dev-python/pyarrow/files/pyarrow-15.0.1-32bit.patch b/dev-python/pyarrow/files/pyarrow-15.0.1-32bit.patch
deleted file mode 100644
index 0b54deaf2c33..000000000000
--- a/dev-python/pyarrow/files/pyarrow-15.0.1-32bit.patch
+++ /dev/null
@@ -1,325 +0,0 @@
-diff --git a/pyarrow/array.pxi b/pyarrow/array.pxi
-index 1416f5f43..058e0eec0 100644
---- a/pyarrow/array.pxi
-+++ b/pyarrow/array.pxi
-@@ -1573,7 +1573,7 @@ cdef class Array(_PandasConvertible):
-         # decoding the dictionary will make sure nulls are correctly handled.
-         # Decoding a dictionary does imply a copy by the way,
-         # so it can't be done if the user requested a zero_copy.
--        c_options.decode_dictionaries = not zero_copy_only
-+        c_options.decode_dictionaries = True
-         c_options.zero_copy_only = zero_copy_only
-         c_options.to_numpy = True
- 
-@@ -1585,9 +1585,6 @@ cdef class Array(_PandasConvertible):
-         # always convert to numpy array without pandas dependency
-         array = PyObject_to_object(out)
- 
--        if isinstance(array, dict):
--            array = np.take(array['dictionary'], array['indices'])
--
-         if writable and not array.flags.writeable:
-             # if the conversion already needed to a copy, writeable is True
-             array = array.copy()
-diff --git a/pyarrow/io.pxi b/pyarrow/io.pxi
-index 1897e76ef..b57980b3d 100644
---- a/pyarrow/io.pxi
-+++ b/pyarrow/io.pxi
-@@ -1987,7 +1987,7 @@ def foreign_buffer(address, size, base=None):
-         Object that owns the referenced memory.
-     """
-     cdef:
--        intptr_t c_addr = address
-+        uintptr_t c_addr = address
-         int64_t c_size = size
-         shared_ptr[CBuffer] buf
- 
-diff --git a/pyarrow/lib.pxd b/pyarrow/lib.pxd
-index 58ec34add..91c7633a7 100644
---- a/pyarrow/lib.pxd
-+++ b/pyarrow/lib.pxd
-@@ -285,6 +285,8 @@ cdef class Tensor(_Weakrefable):
- 
-     cdef readonly:
-         DataType type
-+        bytes _ssize_t_shape
-+        bytes _ssize_t_strides
- 
-     cdef void init(self, const shared_ptr[CTensor]& sp_tensor)
- 
-diff --git a/pyarrow/src/arrow/python/arrow_to_pandas.cc b/pyarrow/src/arrow/python/arrow_to_pandas.cc
-index e979342b8..8354812ea 100644
---- a/pyarrow/src/arrow/python/arrow_to_pandas.cc
-+++ b/pyarrow/src/arrow/python/arrow_to_pandas.cc
-@@ -2499,6 +2499,8 @@ Status ConvertChunkedArrayToPandas(const PandasOptions& options,
-                                    std::shared_ptr<ChunkedArray> arr, PyObject* py_ref,
-                                    PyObject** out) {
-   if (options.decode_dictionaries && arr->type()->id() == Type::DICTIONARY) {
-+    // XXX we should return an error as below if options.zero_copy_only
-+    // is true, but that would break compatibility with existing tests.
-     const auto& dense_type =
-         checked_cast<const DictionaryType&>(*arr->type()).value_type();
-     RETURN_NOT_OK(DecodeDictionaries(options.pool, dense_type, &arr));
-diff --git a/pyarrow/src/arrow/python/io.cc b/pyarrow/src/arrow/python/io.cc
-index 43f8297c5..197f8b9d3 100644
---- a/pyarrow/src/arrow/python/io.cc
-+++ b/pyarrow/src/arrow/python/io.cc
-@@ -92,9 +92,12 @@ class PythonFile {
-   Status Seek(int64_t position, int whence) {
-     RETURN_NOT_OK(CheckClosed());
- 
-+    // NOTE: `long long` is at least 64 bits in the C standard, the cast below is
-+    // therefore safe.
-+
-     // whence: 0 for relative to start of file, 2 for end of file
--    PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "seek", "(ni)",
--                                               static_cast<Py_ssize_t>(position), whence);
-+    PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "seek", "(Li)",
-+                                               static_cast<long long>(position), whence);
-     Py_XDECREF(result);
-     PY_RETURN_IF_ERROR(StatusCode::IOError);
-     return Status::OK();
-@@ -103,16 +106,16 @@ class PythonFile {
-   Status Read(int64_t nbytes, PyObject** out) {
-     RETURN_NOT_OK(CheckClosed());
- 
--    PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "read", "(n)",
--                                               static_cast<Py_ssize_t>(nbytes));
-+    PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "read", "(L)",
-+                                               static_cast<long long>(nbytes));
-     PY_RETURN_IF_ERROR(StatusCode::IOError);
-     *out = result;
-     return Status::OK();
-   }
- 
-   Status ReadBuffer(int64_t nbytes, PyObject** out) {
--    PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "read_buffer", "(n)",
--                                               static_cast<Py_ssize_t>(nbytes));
-+    PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "read_buffer", "(L)",
-+                                               static_cast<long long>(nbytes));
-     PY_RETURN_IF_ERROR(StatusCode::IOError);
-     *out = result;
-     return Status::OK();
-diff --git a/pyarrow/tensor.pxi b/pyarrow/tensor.pxi
-index 1afce7f4a..c674663dc 100644
---- a/pyarrow/tensor.pxi
-+++ b/pyarrow/tensor.pxi
-@@ -15,6 +15,9 @@
- # specific language governing permissions and limitations
- # under the License.
- 
-+# Avoid name clash with `pa.struct` function
-+import struct as _struct
-+
- 
- cdef class Tensor(_Weakrefable):
-     """
-@@ -31,7 +34,6 @@ cdef class Tensor(_Weakrefable):
-     shape: (2, 3)
-     strides: (12, 4)
-     """
--
-     def __init__(self):
-         raise TypeError("Do not call Tensor's constructor directly, use one "
-                         "of the `pyarrow.Tensor.from_*` functions instead.")
-@@ -40,6 +42,14 @@ cdef class Tensor(_Weakrefable):
-         self.sp_tensor = sp_tensor
-         self.tp = sp_tensor.get()
-         self.type = pyarrow_wrap_data_type(self.tp.type())
-+        self._ssize_t_shape = self._make_shape_or_strides_buffer(self.shape)
-+        self._ssize_t_strides = self._make_shape_or_strides_buffer(self.strides)
-+
-+    def _make_shape_or_strides_buffer(self, values):
-+        """
-+        Make a bytes object holding an array of `values` cast to `Py_ssize_t`.
-+        """
-+        return _struct.pack(f"{len(values)}n", *values)
- 
-     def __repr__(self):
-         return """<pyarrow.Tensor>
-@@ -282,10 +292,8 @@ strides: {0.strides}""".format(self)
-             buffer.readonly = 0
-         else:
-             buffer.readonly = 1
--        # NOTE: This assumes Py_ssize_t == int64_t, and that the shape
--        # and strides arrays lifetime is tied to the tensor's
--        buffer.shape = <Py_ssize_t *> &self.tp.shape()[0]
--        buffer.strides = <Py_ssize_t *> &self.tp.strides()[0]
-+        buffer.shape = <Py_ssize_t *> cp.PyBytes_AsString(self._ssize_t_shape)
-+        buffer.strides = <Py_ssize_t *> cp.PyBytes_AsString(self._ssize_t_strides)
-         buffer.suboffsets = NULL
- 
- 
-diff --git a/pyarrow/tests/test_gdb.py b/pyarrow/tests/test_gdb.py
-index d0d241cc5..0d12d710d 100644
---- a/pyarrow/tests/test_gdb.py
-+++ b/pyarrow/tests/test_gdb.py
-@@ -885,32 +885,61 @@ def test_arrays_heap(gdb_arrow):
-         ("arrow::DurationArray of type arrow::duration"
-          "(arrow::TimeUnit::NANO), length 2, offset 0, null count 1 = {"
-          "[0] = null, [1] = -1234567890123456789ns}"))
--    check_heap_repr(
--        gdb_arrow, "heap_timestamp_array_s",
--        ("arrow::TimestampArray of type arrow::timestamp"
--         "(arrow::TimeUnit::SECOND), length 4, offset 0, null count 1 = {"
--         "[0] = null, [1] = 0s [1970-01-01 00:00:00], "
--         "[2] = -2203932304s [1900-02-28 12:34:56], "
--         "[3] = 63730281600s [3989-07-14 00:00:00]}"))
--    check_heap_repr(
--        gdb_arrow, "heap_timestamp_array_ms",
--        ("arrow::TimestampArray of type arrow::timestamp"
--         "(arrow::TimeUnit::MILLI), length 3, offset 0, null count 1 = {"
--         "[0] = null, [1] = -2203932303877ms [1900-02-28 12:34:56.123], "
--         "[2] = 63730281600789ms [3989-07-14 00:00:00.789]}"))
--    check_heap_repr(
--        gdb_arrow, "heap_timestamp_array_us",
--        ("arrow::TimestampArray of type arrow::timestamp"
--         "(arrow::TimeUnit::MICRO), length 3, offset 0, null count 1 = {"
--         "[0] = null, "
--         "[1] = -2203932303345679us [1900-02-28 12:34:56.654321], "
--         "[2] = 63730281600456789us [3989-07-14 00:00:00.456789]}"))
--    check_heap_repr(
--        gdb_arrow, "heap_timestamp_array_ns",
--        ("arrow::TimestampArray of type arrow::timestamp"
--         "(arrow::TimeUnit::NANO), length 2, offset 0, null count 1 = {"
--         "[0] = null, "
--         "[1] = -2203932303012345679ns [1900-02-28 12:34:56.987654321]}"))
-+    if sys.maxsize > 2**32:
-+        check_heap_repr(
-+            gdb_arrow, "heap_timestamp_array_s",
-+            ("arrow::TimestampArray of type arrow::timestamp"
-+             "(arrow::TimeUnit::SECOND), length 4, offset 0, null count 1 = {"
-+             "[0] = null, [1] = 0s [1970-01-01 00:00:00], "
-+             "[2] = -2203932304s [1900-02-28 12:34:56], "
-+             "[3] = 63730281600s [3989-07-14 00:00:00]}"))
-+        check_heap_repr(
-+            gdb_arrow, "heap_timestamp_array_ms",
-+            ("arrow::TimestampArray of type arrow::timestamp"
-+             "(arrow::TimeUnit::MILLI), length 3, offset 0, null count 1 = {"
-+             "[0] = null, [1] = -2203932303877ms [1900-02-28 12:34:56.123], "
-+             "[2] = 63730281600789ms [3989-07-14 00:00:00.789]}"))
-+        check_heap_repr(
-+            gdb_arrow, "heap_timestamp_array_us",
-+            ("arrow::TimestampArray of type arrow::timestamp"
-+             "(arrow::TimeUnit::MICRO), length 3, offset 0, null count 1 = {"
-+             "[0] = null, "
-+             "[1] = -2203932303345679us [1900-02-28 12:34:56.654321], "
-+             "[2] = 63730281600456789us [3989-07-14 00:00:00.456789]}"))
-+        check_heap_repr(
-+            gdb_arrow, "heap_timestamp_array_ns",
-+            ("arrow::TimestampArray of type arrow::timestamp"
-+             "(arrow::TimeUnit::NANO), length 2, offset 0, null count 1 = {"
-+             "[0] = null, "
-+             "[1] = -2203932303012345679ns [1900-02-28 12:34:56.987654321]}"))
-+    else:
-+        # Python's datetime is limited to smaller timestamps on 32-bit platforms
-+        check_heap_repr(
-+            gdb_arrow, "heap_timestamp_array_s",
-+            ("arrow::TimestampArray of type arrow::timestamp"
-+             "(arrow::TimeUnit::SECOND), length 4, offset 0, null count 1 = {"
-+             "[0] = null, [1] = 0s [1970-01-01 00:00:00], "
-+             "[2] = -2203932304s [too large to represent], "
-+             "[3] = 63730281600s [too large to represent]}"))
-+        check_heap_repr(
-+            gdb_arrow, "heap_timestamp_array_ms",
-+            ("arrow::TimestampArray of type arrow::timestamp"
-+             "(arrow::TimeUnit::MILLI), length 3, offset 0, null count 1 = {"
-+             "[0] = null, [1] = -2203932303877ms [too large to represent], "
-+             "[2] = 63730281600789ms [too large to represent]}"))
-+        check_heap_repr(
-+            gdb_arrow, "heap_timestamp_array_us",
-+            ("arrow::TimestampArray of type arrow::timestamp"
-+             "(arrow::TimeUnit::MICRO), length 3, offset 0, null count 1 = {"
-+             "[0] = null, "
-+             "[1] = -2203932303345679us [too large to represent], "
-+             "[2] = 63730281600456789us [too large to represent]}"))
-+        check_heap_repr(
-+            gdb_arrow, "heap_timestamp_array_ns",
-+            ("arrow::TimestampArray of type arrow::timestamp"
-+             "(arrow::TimeUnit::NANO), length 2, offset 0, null count 1 = {"
-+             "[0] = null, "
-+             "[1] = -2203932303012345679ns [too large to represent]}"))
- 
-     # Decimal
-     check_heap_repr(
-diff --git a/pyarrow/tests/test_io.py b/pyarrow/tests/test_io.py
-index 5a495aa80..17eab871a 100644
---- a/pyarrow/tests/test_io.py
-+++ b/pyarrow/tests/test_io.py
-@@ -36,7 +36,7 @@ from pyarrow import Codec
- import pyarrow as pa
- 
- 
--def check_large_seeks(file_factory):
-+def check_large_seeks(file_factory, expected_error=None):
-     if sys.platform in ('win32', 'darwin'):
-         pytest.skip("need sparse file support")
-     try:
-@@ -45,11 +45,16 @@ def check_large_seeks(file_factory):
-             f.truncate(2 ** 32 + 10)
-             f.seek(2 ** 32 + 5)
-             f.write(b'mark\n')
--        with file_factory(filename) as f:
--            assert f.seek(2 ** 32 + 5) == 2 ** 32 + 5
--            assert f.tell() == 2 ** 32 + 5
--            assert f.read(5) == b'mark\n'
--            assert f.tell() == 2 ** 32 + 10
-+        if expected_error:
-+            with expected_error:
-+                file_factory(filename)
-+        else:
-+            with file_factory(filename) as f:
-+                assert f.size() == 2 ** 32 + 10
-+                assert f.seek(2 ** 32 + 5) == 2 ** 32 + 5
-+                assert f.tell() == 2 ** 32 + 5
-+                assert f.read(5) == b'mark\n'
-+                assert f.tell() == 2 ** 32 + 10
-     finally:
-         os.unlink(filename)
- 
-@@ -1137,7 +1142,14 @@ def test_memory_zero_length(tmpdir):
- 
- 
- def test_memory_map_large_seeks():
--    check_large_seeks(pa.memory_map)
-+    if sys.maxsize >= 2**32:
-+        expected_error = None
-+    else:
-+        expected_error = pytest.raises(
-+            pa.ArrowCapacityError,
-+            match="Requested memory map length 4294967306 "
-+                  "does not fit in a C size_t")
-+    check_large_seeks(pa.memory_map, expected_error=expected_error)
- 
- 
- def test_memory_map_close_remove(tmpdir):
-diff --git a/pyarrow/tests/test_pandas.py b/pyarrow/tests/test_pandas.py
-index 8fd4b3041..168ed7e42 100644
---- a/pyarrow/tests/test_pandas.py
-+++ b/pyarrow/tests/test_pandas.py
-@@ -2601,8 +2601,9 @@ class TestConvertStructTypes:
-                                        ('yy', np.bool_)])),
-                        ('y', np.int16),
-                        ('z', np.object_)])
--        # Note: itemsize is not a multiple of sizeof(object)
--        assert dt.itemsize == 12
-+        # Note: itemsize is not necessarily a multiple of sizeof(object)
-+        # object_ is 8 bytes on 64-bit systems, 4 bytes on 32-bit systems
-+        assert dt.itemsize == (12 if sys.maxsize > 2**32 else 8)
-         ty = pa.struct([pa.field('x', pa.struct([pa.field('xx', pa.int8()),
-                                                  pa.field('yy', pa.bool_())])),
-                         pa.field('y', pa.int16()),
-diff --git a/pyarrow/tests/test_schema.py b/pyarrow/tests/test_schema.py
-index fa75fcea3..8793c9e77 100644
---- a/pyarrow/tests/test_schema.py
-+++ b/pyarrow/tests/test_schema.py
-@@ -681,7 +681,8 @@ def test_schema_sizeof():
-         pa.field('bar', pa.string()),
-     ])
- 
--    assert sys.getsizeof(schema) > 30
-+    # Note: pa.schema is twice as large on 64-bit systems
-+    assert sys.getsizeof(schema) > (30 if sys.maxsize > 2**32 else 15)
- 
-     schema2 = schema.with_metadata({"key": "some metadata"})
-     assert sys.getsizeof(schema2) > sys.getsizeof(schema)

diff --git a/dev-python/pyarrow/pyarrow-15.0.2.ebuild b/dev-python/pyarrow/pyarrow-15.0.2.ebuild
deleted file mode 100644
index 8f358f46c970..000000000000
--- a/dev-python/pyarrow/pyarrow-15.0.2.ebuild
+++ /dev/null
@@ -1,87 +0,0 @@
-# Copyright 2023-2024 Gentoo Authors
-# Distributed under the terms of the GNU General Public License v2
-
-EAPI=8
-
-DISTUTILS_EXT=1
-DISTUTILS_USE_PEP517=setuptools
-PYTHON_COMPAT=( python3_{10..12} )
-
-inherit distutils-r1 multiprocessing
-
-DESCRIPTION="Python library for Apache Arrow"
-HOMEPAGE="
-	https://arrow.apache.org/
-	https://github.com/apache/arrow/
-	https://pypi.org/project/pyarrow/
-"
-SRC_URI="mirror://apache/arrow/arrow-${PV}/apache-arrow-${PV}.tar.gz"
-S="${WORKDIR}/apache-arrow-${PV}/python"
-
-LICENSE="Apache-2.0"
-SLOT="0"
-KEYWORDS="amd64 ~arm64 ~hppa ~riscv ~x86"
-IUSE="+parquet +snappy ssl"
-
-RDEPEND="
-	~dev-libs/apache-arrow-${PV}[compute,dataset,json,parquet?,re2,snappy?,ssl?]
-	<dev-python/numpy-2:=[${PYTHON_USEDEP}]
-"
-BDEPEND="
-	test? (
-		dev-python/hypothesis[${PYTHON_USEDEP}]
-		dev-python/pandas[${PYTHON_USEDEP}]
-		dev-libs/apache-arrow[lz4,zlib]
-	)
-"
-
-EPYTEST_XDIST=1
-distutils_enable_tests pytest
-
-PATCHES=(
-	# upstream backports
-	"${FILESDIR}/${PN}-15.0.1-32bit.patch"
-)
-
-src_prepare() {
-	# cython's -Werror
-	sed -i -e '/--warning-errors/d' CMakeLists.txt || die
-	distutils-r1_src_prepare
-}
-
-src_compile() {
-	export PYARROW_PARALLEL="$(makeopts_jobs)"
-	export PYARROW_BUILD_VERBOSE=1
-	export PYARROW_CXXFLAGS="${CXXFLAGS}"
-	export PYARROW_BUNDLE_ARROW_CPP_HEADERS=0
-	export PYARROW_CMAKE_GENERATOR=Ninja
-	export PYARROW_WITH_HDFS=1
-	if use parquet; then
-		export PYARROW_WITH_DATASET=1
-		export PYARROW_WITH_PARQUET=1
-		use ssl && export PYARROW_WITH_PARQUET_ENCRYPTION=1
-	fi
-	if use snappy; then
-		export PYARROW_WITH_SNAPPY=1
-	fi
-
-	distutils-r1_src_compile
-}
-
-python_test() {
-	local EPYTEST_DESELECT=(
-		# wtf?
-		tests/test_fs.py::test_localfs_errors
-		# these require apache-arrow with jemalloc that doesn't seem
-		# to be supported by the Gentoo package
-		tests/test_memory.py::test_env_var
-		tests/test_memory.py::test_specific_memory_pools
-		tests/test_memory.py::test_supported_memory_backends
-		# pandas changed, i guess
-		tests/test_pandas.py::test_array_protocol_pandas_extension_types
-		tests/test_table.py::test_table_factory_function_args_pandas
-	)
-
-	cd "${T}" || die
-	epytest --pyargs pyarrow
-}


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [gentoo-commits] repo/gentoo:master commit in: dev-python/pyarrow/files/, dev-python/pyarrow/
@ 2024-06-13 15:00 Michał Górny
  0 siblings, 0 replies; 3+ messages in thread
From: Michał Górny @ 2024-06-13 15:00 UTC (permalink / raw
  To: gentoo-commits

commit:     4511ba1f0d5c2107597246629011497af324bf30
Author:     Michał Górny <mgorny <AT> gentoo <DOT> org>
AuthorDate: Thu Jun 13 14:29:53 2024 +0000
Commit:     Michał Górny <mgorny <AT> gentoo <DOT> org>
CommitDate: Thu Jun 13 14:59:56 2024 +0000
URL:        https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=4511ba1f

dev-python/pyarrow: Backport numpy-2 patch

Signed-off-by: Michał Górny <mgorny <AT> gentoo.org>

 .../pyarrow/files/pyarrow-16.1.0-numpy-2.patch     | 65 ++++++++++++++++++++++
 dev-python/pyarrow/pyarrow-16.1.0.ebuild           | 13 ++---
 2 files changed, 70 insertions(+), 8 deletions(-)

diff --git a/dev-python/pyarrow/files/pyarrow-16.1.0-numpy-2.patch b/dev-python/pyarrow/files/pyarrow-16.1.0-numpy-2.patch
new file mode 100644
index 000000000000..d43e15746408
--- /dev/null
+++ b/dev-python/pyarrow/files/pyarrow-16.1.0-numpy-2.patch
@@ -0,0 +1,65 @@
+diff --git a/pyarrow/tests/parquet/common.py b/pyarrow/tests/parquet/common.py
+index 8365ed5b28543..c3094ee20b34c 100644
+--- a/pyarrow/tests/parquet/common.py
++++ b/pyarrow/tests/parquet/common.py
+@@ -83,7 +83,7 @@ def _random_integers(size, dtype):
+     iinfo = np.iinfo(dtype)
+     return np.random.randint(max(iinfo.min, platform_int_info.min),
+                              min(iinfo.max, platform_int_info.max),
+-                             size=size).astype(dtype)
++                             size=size, dtype=dtype)
+ 
+ 
+ def _range_integers(size, dtype):
+diff --git a/pyarrow/tests/test_array.py b/pyarrow/tests/test_array.py
+index 88394c77e429d..1032ab9add3ca 100644
+--- a/pyarrow/tests/test_array.py
++++ b/pyarrow/tests/test_array.py
+@@ -3398,7 +3398,7 @@ def test_numpy_array_protocol():
+     result = np.asarray(arr)
+     np.testing.assert_array_equal(result, expected)
+ 
+-    if Version(np.__version__) < Version("2.0"):
++    if Version(np.__version__) < Version("2.0.0.dev0"):
+         # copy keyword is not strict and not passed down to __array__
+         result = np.array(arr, copy=False)
+         np.testing.assert_array_equal(result, expected)
+diff --git a/pyarrow/tests/test_pandas.py b/pyarrow/tests/test_pandas.py
+index be2c5b14e68b0..ba9d6a3c01391 100644
+--- a/pyarrow/tests/test_pandas.py
++++ b/pyarrow/tests/test_pandas.py
+@@ -780,7 +780,7 @@ def test_integer_no_nulls(self):
+             info = np.iinfo(dtype)
+             values = np.random.randint(max(info.min, np.iinfo(np.int_).min),
+                                        min(info.max, np.iinfo(np.int_).max),
+-                                       size=num_values)
++                                       size=num_values, dtype=dtype)
+             data[dtype] = values.astype(dtype)
+             fields.append(pa.field(dtype, arrow_dtype))
+ 
+diff --git a/pyarrow/tests/test_table.py b/pyarrow/tests/test_table.py
+index a58010d083e92..f40759de50c8c 100644
+--- a/pyarrow/tests/test_table.py
++++ b/pyarrow/tests/test_table.py
+@@ -3281,7 +3281,7 @@ def test_numpy_array_protocol(constructor):
+     table = constructor([[1, 2, 3], [4.0, 5.0, 6.0]], names=["a", "b"])
+     expected = np.array([[1, 4], [2, 5], [3, 6]], dtype="float64")
+ 
+-    if Version(np.__version__) < Version("2.0"):
++    if Version(np.__version__) < Version("2.0.0.dev0"):
+         # copy keyword is not strict and not passed down to __array__
+         result = np.array(table, copy=False)
+         np.testing.assert_array_equal(result, expected)
+diff --git a/scripts/test_leak.py b/scripts/test_leak.py
+index f2bbe8d051bf9..86a87f5e742e8 100644
+--- a/scripts/test_leak.py
++++ b/scripts/test_leak.py
+@@ -98,7 +98,7 @@ def func():
+ 
+ 
+ def test_ARROW_8801():
+-    x = pd.to_datetime(np.random.randint(0, 2**32, size=2**20),
++    x = pd.to_datetime(np.random.randint(0, 2**32, size=2**20, dtype=np.int64),
+                        unit='ms', utc=True)
+     table = pa.table(pd.DataFrame({'x': x}))
+ 

diff --git a/dev-python/pyarrow/pyarrow-16.1.0.ebuild b/dev-python/pyarrow/pyarrow-16.1.0.ebuild
index 37167ff75b2c..7a027d3f6336 100644
--- a/dev-python/pyarrow/pyarrow-16.1.0.ebuild
+++ b/dev-python/pyarrow/pyarrow-16.1.0.ebuild
@@ -39,6 +39,11 @@ EPYTEST_XDIST=1
 distutils_enable_tests pytest
 
 src_prepare() {
+	local PATCHES=(
+		# https://github.com/apache/arrow/pull/42099
+		"${FILESDIR}/${P}-numpy-2.patch"
+	)
+
 	# cython's -Werror
 	sed -i -e '/--warning-errors/d' CMakeLists.txt || die
 	distutils-r1_src_prepare
@@ -83,14 +88,6 @@ python_test() {
 		tests/test_pandas.py::test_array_to_pandas_roundtrip
 	)
 
-	if has_version ">=dev-python/numpy-2"; then
-		EPYTEST_DESELECT+=(
-			# https://github.com/apache/arrow/issues/41319
-			tests/test_array.py::test_numpy_array_protocol
-			tests/test_table.py::test_numpy_array_protocol
-		)
-	fi
-
 	cd "${T}" || die
 	local -x PYTEST_DISABLE_PLUGIN_AUTOLOAD=1
 	epytest --pyargs pyarrow


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-06-13 15:00 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-05-08  7:52 [gentoo-commits] repo/gentoo:master commit in: dev-python/pyarrow/files/, dev-python/pyarrow/ Alfredo Tupone
  -- strict thread matches above, loose matches on Subject: below --
2024-05-08  9:51 Michał Górny
2024-06-13 15:00 Michał Górny

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox