* [gentoo-commits] repo/gentoo:master commit in: dev-python/pyarrow/, dev-python/pyarrow/files/
@ 2023-07-24 11:33 Michał Górny
0 siblings, 0 replies; 3+ messages in thread
From: Michał Górny @ 2023-07-24 11:33 UTC (permalink / raw
To: gentoo-commits
commit: 2c55f5435c517319680a73f13b4a91bec4379e7e
Author: Michał Górny <mgorny <AT> gentoo <DOT> org>
AuthorDate: Mon Jul 24 11:19:29 2023 +0000
Commit: Michał Górny <mgorny <AT> gentoo <DOT> org>
CommitDate: Mon Jul 24 11:33:08 2023 +0000
URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=2c55f543
dev-python/pyarrow: Remove old
Signed-off-by: Michał Górny <mgorny <AT> gentoo.org>
dev-python/pyarrow/Manifest | 1 -
.../pyarrow/files/pyarrow-12.0.0-tests.patch | 144 ---------------------
dev-python/pyarrow/pyarrow-12.0.0.ebuild | 61 ---------
3 files changed, 206 deletions(-)
diff --git a/dev-python/pyarrow/Manifest b/dev-python/pyarrow/Manifest
index fec6d859a687..61a09a5ef288 100644
--- a/dev-python/pyarrow/Manifest
+++ b/dev-python/pyarrow/Manifest
@@ -1,2 +1 @@
-DIST apache-arrow-12.0.0.tar.gz 20159048 BLAKE2B 43b4f36e1d1f84fc83c46a3627ad72cead38310f4325b8d21d17d726cd416016f0839d312c80737c0a921da6aefee537413e30309a656301c19834c2986d734e SHA512 f815be4fb20b6001ba5525270765fe239b5468708a7be34b93b60ee0ce63464727d183c9756fbc33bffd199019e1f06a7fddd306ce8388435cea7771070a2ca9
DIST apache-arrow-12.0.1.tar.gz 20172604 BLAKE2B bf66761d33ceb778c2f53c2e643e0c2bb4448f29eaa94d17e9815628665f4965fde28bd47fce4a5874717d2161c3f8adbb2e8a0e6658a51ae6d617e4f09d202b SHA512 551ae200551fcc73b7deddcc5f0b06633159ab1308506901a9086e4e2e34e4437f26d609fdbacba0ebe7d1fe83bdb8e92a268e9e41575d655d5b2d4fbef7a7ce
diff --git a/dev-python/pyarrow/files/pyarrow-12.0.0-tests.patch b/dev-python/pyarrow/files/pyarrow-12.0.0-tests.patch
deleted file mode 100644
index 56a307592593..000000000000
--- a/dev-python/pyarrow/files/pyarrow-12.0.0-tests.patch
+++ /dev/null
@@ -1,144 +0,0 @@
---- a/pyarrow/tests/test_compute.py 2023-05-08 09:06:34.571387618 +0200
-+++ b/pyarrow/tests/test_compute.py 2023-05-08 09:11:52.759753459 +0200
-@@ -414,6 +414,7 @@
- assert pc.variance(data, ddof=1).as_py() == 6.0
-
-
-+@pytest.mark.skip(reason="not working")
- def test_count_substring():
- for (ty, offset) in [(pa.string(), pa.int32()),
- (pa.large_string(), pa.int64())]:
-@@ -428,6 +429,7 @@
- assert expected == result
-
-
-+@pytest.mark.skip(reason="not working")
- def test_count_substring_regex():
- for (ty, offset) in [(pa.string(), pa.int32()),
- (pa.large_string(), pa.int64())]:
-@@ -442,6 +444,7 @@
- assert expected.equals(result)
-
-
-+@pytest.mark.skip(reason="not working")
- def test_find_substring():
- for ty in [pa.string(), pa.binary(), pa.large_string(), pa.large_binary()]:
- arr = pa.array(["ab", "cab", "ba", None], type=ty)
-@@ -459,6 +462,7 @@
- assert result.to_pylist() == [0, 1, 0, 0]
-
-
-+@pytest.mark.skip(reason="not working")
- def test_match_like():
- arr = pa.array(["ab", "ba%", "ba", "ca%d", None])
- result = pc.match_like(arr, r"_a\%%")
-@@ -474,6 +478,7 @@
- assert expected.equals(result)
-
-
-+@pytest.mark.skip(reason="not working")
- def test_match_substring():
- arr = pa.array(["ab", "abc", "ba", None])
- result = pc.match_substring(arr, "ab")
-@@ -489,6 +494,7 @@
- assert expected.equals(result)
-
-
-+@pytest.mark.skip(reason="not working")
- def test_match_substring_regex():
- arr = pa.array(["ab", "abc", "ba", "c", None])
- result = pc.match_substring_regex(arr, "^a?b")
-@@ -602,6 +608,7 @@
- assert expected.equals(result)
-
-
-+@pytest.mark.skip(reason="not working")
- def test_split_pattern_regex():
- arr = pa.array(["-foo---bar--", "---foo---b"])
- result = pc.split_pattern_regex(arr, pattern="-+")
-@@ -1022,6 +1029,7 @@
- assert ar.tolist() == ['barzfoo', 'bard', None]
-
-
-+@pytest.mark.skip(reason="not working")
- def test_replace_regex():
- data = pa.array(['foo', 'mood', None])
- expected = ['f00', 'm00d', None]
-@@ -1033,6 +1041,7 @@
- assert ar.tolist() == expected
-
-
-+@pytest.mark.skip(reason="not working")
- def test_extract_regex():
- ar = pa.array(['a1', 'zb2z'])
- expected = [{'letter': 'a', 'digit': '1'}, {'letter': 'b', 'digit': '2'}]
---- a/pyarrow/tests/test_fs.py 2023-05-08 09:13:26.796384297 +0200
-+++ b/pyarrow/tests/test_fs.py 2023-05-08 09:14:20.567601499 +0200
-@@ -1012,6 +1012,7 @@
- LocalFileSystem(xxx=False)
-
-
-+@pytest.mark.skip(reason="not working")
- def test_localfs_errors(localfs):
- # Local filesystem errors should raise the right Python exceptions
- # (e.g. FileNotFoundError)
---- a/pyarrow/tests/test_memory.py 2023-05-08 09:15:35.366512597 +0200
-+++ b/pyarrow/tests/test_memory.py 2023-05-08 09:16:44.969501524 +0200
-@@ -140,6 +140,7 @@
- assert len(errlines) == 0
-
-
-+@pytest.mark.skip(reason="not working")
- def test_env_var():
- check_env_var("system", ["system"])
- if should_have_jemalloc:
-@@ -149,6 +150,7 @@
- check_env_var("nonexistent", possible_backends, expect_warning=True)
-
-
-+@pytest.mark.skip(reason="not working")
- def test_specific_memory_pools():
- specific_pools = set()
-
-@@ -170,6 +172,7 @@
- can_fail=not should_have_mimalloc)
-
-
-+@pytest.mark.skip(reason="not working")
- def test_supported_memory_backends():
- backends = pa.supported_memory_backends()
-
---- a/pyarrow/tests/parquet/test_basic.py 2023-05-08 09:18:05.307333210 +0200
-+++ b/pyarrow/tests/parquet/test_basic.py 2023-05-08 09:20:16.135429950 +0200
-@@ -349,6 +349,7 @@
- assert result.equals(table)
-
-
-+@pytest.mark.skip(reason="not working")
- @parametrize_legacy_dataset
- def test_byte_stream_split(use_legacy_dataset):
- # This is only a smoke test.
-@@ -510,6 +511,7 @@
- use_legacy_dataset=use_legacy_dataset)
-
-
-+@pytest.mark.skip(reason="not working")
- @parametrize_legacy_dataset
- def test_compression_level(use_legacy_dataset):
- arr = pa.array(list(map(int, range(1000))))
-@@ -660,6 +662,7 @@
- use_legacy_dataset=use_legacy_dataset)
-
-
-+@pytest.mark.skip(reason="not working")
- @pytest.mark.pandas
- @parametrize_legacy_dataset
- def test_zlib_compression_bug(use_legacy_dataset):
-@@ -760,6 +763,7 @@
- assert buf.to_pybytes() == buf.size * b"\0"
-
-
-+@pytest.mark.skip(reason="not working")
- def test_parquet_compression_roundtrip(tempdir):
- # ARROW-10480: ensure even with nonstandard Parquet file naming
- # conventions, writing and then reading a file works. In
diff --git a/dev-python/pyarrow/pyarrow-12.0.0.ebuild b/dev-python/pyarrow/pyarrow-12.0.0.ebuild
deleted file mode 100644
index 21342c3f3ac8..000000000000
--- a/dev-python/pyarrow/pyarrow-12.0.0.ebuild
+++ /dev/null
@@ -1,61 +0,0 @@
-# Copyright 2023 Gentoo Authors
-# Distributed under the terms of the GNU General Public License v2
-
-EAPI=8
-
-DISTUTILS_EXT=1
-DISTUTILS_USE_PEP517=setuptools
-PYTHON_COMPAT=( python3_{9..11} )
-inherit distutils-r1 multiprocessing
-
-DESCRIPTION="Python library for Apache Arrow"
-HOMEPAGE="
- https://pypi.org/project/pyarrow/
- https://arrow.apache.org/
-"
-SRC_URI="mirror://apache/arrow/arrow-${PV}/apache-arrow-${PV}.tar.gz"
-
-LICENSE="Apache-2.0"
-SLOT="0"
-KEYWORDS="~amd64"
-IUSE="parquet snappy ssl"
-
-RDEPEND="
- ~dev-libs/apache-arrow-${PV}[compute,dataset,json,parquet?,snappy?,ssl?]
- dev-python/numpy[${PYTHON_USEDEP}]
-"
-BDEPEND="test? (
- dev-python/hypothesis
- <dev-python/pandas-2
- dev-python/pytest-lazy-fixture
-)"
-
-PATCHES=( "${FILESDIR}"/${P}-tests.patch )
-
-distutils_enable_tests pytest
-
-S="${WORKDIR}/apache-arrow-${PV}/python"
-
-src_compile() {
- export PYARROW_PARALLEL="$(makeopts_jobs)"
- export PYARROW_BUILD_VERBOSE=1
- export PYARROW_CXXFLAGS="${CXXFLAGS}"
- export PYARROW_BUNDLE_ARROW_CPP_HEADERS=0
- export PYARROW_CMAKE_GENERATOR=Ninja
- export PYARROW_WITH_HDFS=1
- if use parquet; then
- export PYARROW_WITH_DATASET=1
- export PYARROW_WITH_PARQUET=1
- use ssl && export PYARROW_WITH_PARQUET_ENCRYPTION=1
- fi
- if use snappy; then
- export PYARROW_WITH_SNAPPY=1
- fi
-
- distutils-r1_src_compile
-}
-
-python_test() {
- cd "${T}" || die
- epytest --pyargs pyarrow
-}
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [gentoo-commits] repo/gentoo:master commit in: dev-python/pyarrow/, dev-python/pyarrow/files/
@ 2024-03-08 18:28 Michał Górny
0 siblings, 0 replies; 3+ messages in thread
From: Michał Górny @ 2024-03-08 18:28 UTC (permalink / raw
To: gentoo-commits
commit: 056eb2421401c324af6946f82302e35ca6afb026
Author: Michał Górny <mgorny <AT> gentoo <DOT> org>
AuthorDate: Fri Mar 8 17:11:39 2024 +0000
Commit: Michał Górny <mgorny <AT> gentoo <DOT> org>
CommitDate: Fri Mar 8 18:28:40 2024 +0000
URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=056eb242
dev-python/pyarrow: Bump to 15.0.1
Closes: https://bugs.gentoo.org/926309
Signed-off-by: Michał Górny <mgorny <AT> gentoo.org>
dev-python/pyarrow/Manifest | 1 +
.../pyarrow/files/pyarrow-15.0.1-32bit.patch | 325 +++++++++++++++++++++
dev-python/pyarrow/pyarrow-15.0.1.ebuild | 87 ++++++
3 files changed, 413 insertions(+)
diff --git a/dev-python/pyarrow/Manifest b/dev-python/pyarrow/Manifest
index 36dbedb282d9..809d7f359a44 100644
--- a/dev-python/pyarrow/Manifest
+++ b/dev-python/pyarrow/Manifest
@@ -1 +1,2 @@
DIST apache-arrow-15.0.0.tar.gz 21491996 BLAKE2B 55709d1d181ed5c1482e1eadc9031c692bbd39434ccad17be8c0f3f5af47e3b3d5f262903d1ce09c39442497e14c22c80d7b30215e4de830a4ac82a1b3db34fb SHA512 d5dccaa0907b0e6f2a460e32ae75091942dcb70b51db4aefe2767ee8d99882694607b723a9c06898dda3938d8eb498258d7f9aad11054665b6ea9c2fbaeafa74
+DIST apache-arrow-15.0.1.tar.gz 21499849 BLAKE2B 5f8f91932941105e753b7b7812bf132bd99501ccfac0574b8072e638764cb46694062bcdb8568a474f50de008ede9259b70f16ba7f33ada0f6ec763c21b1c25a SHA512 b426421336c6bc3757626b2743a039d3c7030ad257c3bcf3247a236462dbc140b7eff4476cb727f4d048144a90c1368740c139318f8237d6cc20e87d3efdaf74
diff --git a/dev-python/pyarrow/files/pyarrow-15.0.1-32bit.patch b/dev-python/pyarrow/files/pyarrow-15.0.1-32bit.patch
new file mode 100644
index 000000000000..0b54deaf2c33
--- /dev/null
+++ b/dev-python/pyarrow/files/pyarrow-15.0.1-32bit.patch
@@ -0,0 +1,325 @@
+diff --git a/pyarrow/array.pxi b/pyarrow/array.pxi
+index 1416f5f43..058e0eec0 100644
+--- a/pyarrow/array.pxi
++++ b/pyarrow/array.pxi
+@@ -1573,7 +1573,7 @@ cdef class Array(_PandasConvertible):
+ # decoding the dictionary will make sure nulls are correctly handled.
+ # Decoding a dictionary does imply a copy by the way,
+ # so it can't be done if the user requested a zero_copy.
+- c_options.decode_dictionaries = not zero_copy_only
++ c_options.decode_dictionaries = True
+ c_options.zero_copy_only = zero_copy_only
+ c_options.to_numpy = True
+
+@@ -1585,9 +1585,6 @@ cdef class Array(_PandasConvertible):
+ # always convert to numpy array without pandas dependency
+ array = PyObject_to_object(out)
+
+- if isinstance(array, dict):
+- array = np.take(array['dictionary'], array['indices'])
+-
+ if writable and not array.flags.writeable:
+ # if the conversion already needed to a copy, writeable is True
+ array = array.copy()
+diff --git a/pyarrow/io.pxi b/pyarrow/io.pxi
+index 1897e76ef..b57980b3d 100644
+--- a/pyarrow/io.pxi
++++ b/pyarrow/io.pxi
+@@ -1987,7 +1987,7 @@ def foreign_buffer(address, size, base=None):
+ Object that owns the referenced memory.
+ """
+ cdef:
+- intptr_t c_addr = address
++ uintptr_t c_addr = address
+ int64_t c_size = size
+ shared_ptr[CBuffer] buf
+
+diff --git a/pyarrow/lib.pxd b/pyarrow/lib.pxd
+index 58ec34add..91c7633a7 100644
+--- a/pyarrow/lib.pxd
++++ b/pyarrow/lib.pxd
+@@ -285,6 +285,8 @@ cdef class Tensor(_Weakrefable):
+
+ cdef readonly:
+ DataType type
++ bytes _ssize_t_shape
++ bytes _ssize_t_strides
+
+ cdef void init(self, const shared_ptr[CTensor]& sp_tensor)
+
+diff --git a/pyarrow/src/arrow/python/arrow_to_pandas.cc b/pyarrow/src/arrow/python/arrow_to_pandas.cc
+index e979342b8..8354812ea 100644
+--- a/pyarrow/src/arrow/python/arrow_to_pandas.cc
++++ b/pyarrow/src/arrow/python/arrow_to_pandas.cc
+@@ -2499,6 +2499,8 @@ Status ConvertChunkedArrayToPandas(const PandasOptions& options,
+ std::shared_ptr<ChunkedArray> arr, PyObject* py_ref,
+ PyObject** out) {
+ if (options.decode_dictionaries && arr->type()->id() == Type::DICTIONARY) {
++ // XXX we should return an error as below if options.zero_copy_only
++ // is true, but that would break compatibility with existing tests.
+ const auto& dense_type =
+ checked_cast<const DictionaryType&>(*arr->type()).value_type();
+ RETURN_NOT_OK(DecodeDictionaries(options.pool, dense_type, &arr));
+diff --git a/pyarrow/src/arrow/python/io.cc b/pyarrow/src/arrow/python/io.cc
+index 43f8297c5..197f8b9d3 100644
+--- a/pyarrow/src/arrow/python/io.cc
++++ b/pyarrow/src/arrow/python/io.cc
+@@ -92,9 +92,12 @@ class PythonFile {
+ Status Seek(int64_t position, int whence) {
+ RETURN_NOT_OK(CheckClosed());
+
++ // NOTE: `long long` is at least 64 bits in the C standard, the cast below is
++ // therefore safe.
++
+ // whence: 0 for relative to start of file, 2 for end of file
+- PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "seek", "(ni)",
+- static_cast<Py_ssize_t>(position), whence);
++ PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "seek", "(Li)",
++ static_cast<long long>(position), whence);
+ Py_XDECREF(result);
+ PY_RETURN_IF_ERROR(StatusCode::IOError);
+ return Status::OK();
+@@ -103,16 +106,16 @@ class PythonFile {
+ Status Read(int64_t nbytes, PyObject** out) {
+ RETURN_NOT_OK(CheckClosed());
+
+- PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "read", "(n)",
+- static_cast<Py_ssize_t>(nbytes));
++ PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "read", "(L)",
++ static_cast<long long>(nbytes));
+ PY_RETURN_IF_ERROR(StatusCode::IOError);
+ *out = result;
+ return Status::OK();
+ }
+
+ Status ReadBuffer(int64_t nbytes, PyObject** out) {
+- PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "read_buffer", "(n)",
+- static_cast<Py_ssize_t>(nbytes));
++ PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "read_buffer", "(L)",
++ static_cast<long long>(nbytes));
+ PY_RETURN_IF_ERROR(StatusCode::IOError);
+ *out = result;
+ return Status::OK();
+diff --git a/pyarrow/tensor.pxi b/pyarrow/tensor.pxi
+index 1afce7f4a..c674663dc 100644
+--- a/pyarrow/tensor.pxi
++++ b/pyarrow/tensor.pxi
+@@ -15,6 +15,9 @@
+ # specific language governing permissions and limitations
+ # under the License.
+
++# Avoid name clash with `pa.struct` function
++import struct as _struct
++
+
+ cdef class Tensor(_Weakrefable):
+ """
+@@ -31,7 +34,6 @@ cdef class Tensor(_Weakrefable):
+ shape: (2, 3)
+ strides: (12, 4)
+ """
+-
+ def __init__(self):
+ raise TypeError("Do not call Tensor's constructor directly, use one "
+ "of the `pyarrow.Tensor.from_*` functions instead.")
+@@ -40,6 +42,14 @@ cdef class Tensor(_Weakrefable):
+ self.sp_tensor = sp_tensor
+ self.tp = sp_tensor.get()
+ self.type = pyarrow_wrap_data_type(self.tp.type())
++ self._ssize_t_shape = self._make_shape_or_strides_buffer(self.shape)
++ self._ssize_t_strides = self._make_shape_or_strides_buffer(self.strides)
++
++ def _make_shape_or_strides_buffer(self, values):
++ """
++ Make a bytes object holding an array of `values` cast to `Py_ssize_t`.
++ """
++ return _struct.pack(f"{len(values)}n", *values)
+
+ def __repr__(self):
+ return """<pyarrow.Tensor>
+@@ -282,10 +292,8 @@ strides: {0.strides}""".format(self)
+ buffer.readonly = 0
+ else:
+ buffer.readonly = 1
+- # NOTE: This assumes Py_ssize_t == int64_t, and that the shape
+- # and strides arrays lifetime is tied to the tensor's
+- buffer.shape = <Py_ssize_t *> &self.tp.shape()[0]
+- buffer.strides = <Py_ssize_t *> &self.tp.strides()[0]
++ buffer.shape = <Py_ssize_t *> cp.PyBytes_AsString(self._ssize_t_shape)
++ buffer.strides = <Py_ssize_t *> cp.PyBytes_AsString(self._ssize_t_strides)
+ buffer.suboffsets = NULL
+
+
+diff --git a/pyarrow/tests/test_gdb.py b/pyarrow/tests/test_gdb.py
+index d0d241cc5..0d12d710d 100644
+--- a/pyarrow/tests/test_gdb.py
++++ b/pyarrow/tests/test_gdb.py
+@@ -885,32 +885,61 @@ def test_arrays_heap(gdb_arrow):
+ ("arrow::DurationArray of type arrow::duration"
+ "(arrow::TimeUnit::NANO), length 2, offset 0, null count 1 = {"
+ "[0] = null, [1] = -1234567890123456789ns}"))
+- check_heap_repr(
+- gdb_arrow, "heap_timestamp_array_s",
+- ("arrow::TimestampArray of type arrow::timestamp"
+- "(arrow::TimeUnit::SECOND), length 4, offset 0, null count 1 = {"
+- "[0] = null, [1] = 0s [1970-01-01 00:00:00], "
+- "[2] = -2203932304s [1900-02-28 12:34:56], "
+- "[3] = 63730281600s [3989-07-14 00:00:00]}"))
+- check_heap_repr(
+- gdb_arrow, "heap_timestamp_array_ms",
+- ("arrow::TimestampArray of type arrow::timestamp"
+- "(arrow::TimeUnit::MILLI), length 3, offset 0, null count 1 = {"
+- "[0] = null, [1] = -2203932303877ms [1900-02-28 12:34:56.123], "
+- "[2] = 63730281600789ms [3989-07-14 00:00:00.789]}"))
+- check_heap_repr(
+- gdb_arrow, "heap_timestamp_array_us",
+- ("arrow::TimestampArray of type arrow::timestamp"
+- "(arrow::TimeUnit::MICRO), length 3, offset 0, null count 1 = {"
+- "[0] = null, "
+- "[1] = -2203932303345679us [1900-02-28 12:34:56.654321], "
+- "[2] = 63730281600456789us [3989-07-14 00:00:00.456789]}"))
+- check_heap_repr(
+- gdb_arrow, "heap_timestamp_array_ns",
+- ("arrow::TimestampArray of type arrow::timestamp"
+- "(arrow::TimeUnit::NANO), length 2, offset 0, null count 1 = {"
+- "[0] = null, "
+- "[1] = -2203932303012345679ns [1900-02-28 12:34:56.987654321]}"))
++ if sys.maxsize > 2**32:
++ check_heap_repr(
++ gdb_arrow, "heap_timestamp_array_s",
++ ("arrow::TimestampArray of type arrow::timestamp"
++ "(arrow::TimeUnit::SECOND), length 4, offset 0, null count 1 = {"
++ "[0] = null, [1] = 0s [1970-01-01 00:00:00], "
++ "[2] = -2203932304s [1900-02-28 12:34:56], "
++ "[3] = 63730281600s [3989-07-14 00:00:00]}"))
++ check_heap_repr(
++ gdb_arrow, "heap_timestamp_array_ms",
++ ("arrow::TimestampArray of type arrow::timestamp"
++ "(arrow::TimeUnit::MILLI), length 3, offset 0, null count 1 = {"
++ "[0] = null, [1] = -2203932303877ms [1900-02-28 12:34:56.123], "
++ "[2] = 63730281600789ms [3989-07-14 00:00:00.789]}"))
++ check_heap_repr(
++ gdb_arrow, "heap_timestamp_array_us",
++ ("arrow::TimestampArray of type arrow::timestamp"
++ "(arrow::TimeUnit::MICRO), length 3, offset 0, null count 1 = {"
++ "[0] = null, "
++ "[1] = -2203932303345679us [1900-02-28 12:34:56.654321], "
++ "[2] = 63730281600456789us [3989-07-14 00:00:00.456789]}"))
++ check_heap_repr(
++ gdb_arrow, "heap_timestamp_array_ns",
++ ("arrow::TimestampArray of type arrow::timestamp"
++ "(arrow::TimeUnit::NANO), length 2, offset 0, null count 1 = {"
++ "[0] = null, "
++ "[1] = -2203932303012345679ns [1900-02-28 12:34:56.987654321]}"))
++ else:
++ # Python's datetime is limited to smaller timestamps on 32-bit platforms
++ check_heap_repr(
++ gdb_arrow, "heap_timestamp_array_s",
++ ("arrow::TimestampArray of type arrow::timestamp"
++ "(arrow::TimeUnit::SECOND), length 4, offset 0, null count 1 = {"
++ "[0] = null, [1] = 0s [1970-01-01 00:00:00], "
++ "[2] = -2203932304s [too large to represent], "
++ "[3] = 63730281600s [too large to represent]}"))
++ check_heap_repr(
++ gdb_arrow, "heap_timestamp_array_ms",
++ ("arrow::TimestampArray of type arrow::timestamp"
++ "(arrow::TimeUnit::MILLI), length 3, offset 0, null count 1 = {"
++ "[0] = null, [1] = -2203932303877ms [too large to represent], "
++ "[2] = 63730281600789ms [too large to represent]}"))
++ check_heap_repr(
++ gdb_arrow, "heap_timestamp_array_us",
++ ("arrow::TimestampArray of type arrow::timestamp"
++ "(arrow::TimeUnit::MICRO), length 3, offset 0, null count 1 = {"
++ "[0] = null, "
++ "[1] = -2203932303345679us [too large to represent], "
++ "[2] = 63730281600456789us [too large to represent]}"))
++ check_heap_repr(
++ gdb_arrow, "heap_timestamp_array_ns",
++ ("arrow::TimestampArray of type arrow::timestamp"
++ "(arrow::TimeUnit::NANO), length 2, offset 0, null count 1 = {"
++ "[0] = null, "
++ "[1] = -2203932303012345679ns [too large to represent]}"))
+
+ # Decimal
+ check_heap_repr(
+diff --git a/pyarrow/tests/test_io.py b/pyarrow/tests/test_io.py
+index 5a495aa80..17eab871a 100644
+--- a/pyarrow/tests/test_io.py
++++ b/pyarrow/tests/test_io.py
+@@ -36,7 +36,7 @@ from pyarrow import Codec
+ import pyarrow as pa
+
+
+-def check_large_seeks(file_factory):
++def check_large_seeks(file_factory, expected_error=None):
+ if sys.platform in ('win32', 'darwin'):
+ pytest.skip("need sparse file support")
+ try:
+@@ -45,11 +45,16 @@ def check_large_seeks(file_factory):
+ f.truncate(2 ** 32 + 10)
+ f.seek(2 ** 32 + 5)
+ f.write(b'mark\n')
+- with file_factory(filename) as f:
+- assert f.seek(2 ** 32 + 5) == 2 ** 32 + 5
+- assert f.tell() == 2 ** 32 + 5
+- assert f.read(5) == b'mark\n'
+- assert f.tell() == 2 ** 32 + 10
++ if expected_error:
++ with expected_error:
++ file_factory(filename)
++ else:
++ with file_factory(filename) as f:
++ assert f.size() == 2 ** 32 + 10
++ assert f.seek(2 ** 32 + 5) == 2 ** 32 + 5
++ assert f.tell() == 2 ** 32 + 5
++ assert f.read(5) == b'mark\n'
++ assert f.tell() == 2 ** 32 + 10
+ finally:
+ os.unlink(filename)
+
+@@ -1137,7 +1142,14 @@ def test_memory_zero_length(tmpdir):
+
+
+ def test_memory_map_large_seeks():
+- check_large_seeks(pa.memory_map)
++ if sys.maxsize >= 2**32:
++ expected_error = None
++ else:
++ expected_error = pytest.raises(
++ pa.ArrowCapacityError,
++ match="Requested memory map length 4294967306 "
++ "does not fit in a C size_t")
++ check_large_seeks(pa.memory_map, expected_error=expected_error)
+
+
+ def test_memory_map_close_remove(tmpdir):
+diff --git a/pyarrow/tests/test_pandas.py b/pyarrow/tests/test_pandas.py
+index 8fd4b3041..168ed7e42 100644
+--- a/pyarrow/tests/test_pandas.py
++++ b/pyarrow/tests/test_pandas.py
+@@ -2601,8 +2601,9 @@ class TestConvertStructTypes:
+ ('yy', np.bool_)])),
+ ('y', np.int16),
+ ('z', np.object_)])
+- # Note: itemsize is not a multiple of sizeof(object)
+- assert dt.itemsize == 12
++ # Note: itemsize is not necessarily a multiple of sizeof(object)
++ # object_ is 8 bytes on 64-bit systems, 4 bytes on 32-bit systems
++ assert dt.itemsize == (12 if sys.maxsize > 2**32 else 8)
+ ty = pa.struct([pa.field('x', pa.struct([pa.field('xx', pa.int8()),
+ pa.field('yy', pa.bool_())])),
+ pa.field('y', pa.int16()),
+diff --git a/pyarrow/tests/test_schema.py b/pyarrow/tests/test_schema.py
+index fa75fcea3..8793c9e77 100644
+--- a/pyarrow/tests/test_schema.py
++++ b/pyarrow/tests/test_schema.py
+@@ -681,7 +681,8 @@ def test_schema_sizeof():
+ pa.field('bar', pa.string()),
+ ])
+
+- assert sys.getsizeof(schema) > 30
++ # Note: pa.schema is twice as large on 64-bit systems
++ assert sys.getsizeof(schema) > (30 if sys.maxsize > 2**32 else 15)
+
+ schema2 = schema.with_metadata({"key": "some metadata"})
+ assert sys.getsizeof(schema2) > sys.getsizeof(schema)
diff --git a/dev-python/pyarrow/pyarrow-15.0.1.ebuild b/dev-python/pyarrow/pyarrow-15.0.1.ebuild
new file mode 100644
index 000000000000..07163984e450
--- /dev/null
+++ b/dev-python/pyarrow/pyarrow-15.0.1.ebuild
@@ -0,0 +1,87 @@
+# Copyright 2023-2024 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=8
+
+DISTUTILS_EXT=1
+DISTUTILS_USE_PEP517=setuptools
+PYTHON_COMPAT=( python3_{10..12} )
+
+inherit distutils-r1 multiprocessing
+
+DESCRIPTION="Python library for Apache Arrow"
+HOMEPAGE="
+ https://arrow.apache.org/
+ https://github.com/apache/arrow/
+ https://pypi.org/project/pyarrow/
+"
+SRC_URI="mirror://apache/arrow/arrow-${PV}/apache-arrow-${PV}.tar.gz"
+S="${WORKDIR}/apache-arrow-${PV}/python"
+
+LICENSE="Apache-2.0"
+SLOT="0"
+KEYWORDS="~amd64 ~hppa ~riscv"
+IUSE="parquet snappy ssl"
+
+RDEPEND="
+ ~dev-libs/apache-arrow-${PV}[compute,dataset,json,parquet?,re2,snappy?,ssl?]
+ dev-python/numpy[${PYTHON_USEDEP}]
+"
+BDEPEND="
+ test? (
+ dev-python/hypothesis[${PYTHON_USEDEP}]
+ dev-python/pandas[${PYTHON_USEDEP}]
+ <dev-python/pytest-8.1[${PYTHON_USEDEP}]
+ dev-libs/apache-arrow[lz4,zlib]
+ )
+"
+
+distutils_enable_tests pytest
+
+PATCHES=(
+ # upstream backports
+ "${FILESDIR}/${PN}-15.0.1-32bit.patch"
+)
+
+src_prepare() {
+ # cython's -Werror
+ sed -i -e '/--warning-errors/d' CMakeLists.txt || die
+ distutils-r1_src_prepare
+}
+
+src_compile() {
+ export PYARROW_PARALLEL="$(makeopts_jobs)"
+ export PYARROW_BUILD_VERBOSE=1
+ export PYARROW_CXXFLAGS="${CXXFLAGS}"
+ export PYARROW_BUNDLE_ARROW_CPP_HEADERS=0
+ export PYARROW_CMAKE_GENERATOR=Ninja
+ export PYARROW_WITH_HDFS=1
+ if use parquet; then
+ export PYARROW_WITH_DATASET=1
+ export PYARROW_WITH_PARQUET=1
+ use ssl && export PYARROW_WITH_PARQUET_ENCRYPTION=1
+ fi
+ if use snappy; then
+ export PYARROW_WITH_SNAPPY=1
+ fi
+
+ distutils-r1_src_compile
+}
+
+python_test() {
+ local EPYTEST_DESELECT=(
+ # wtf?
+ tests/test_fs.py::test_localfs_errors
+ # these require apache-arrow with jemalloc that doesn't seem
+ # to be supported by the Gentoo package
+ tests/test_memory.py::test_env_var
+ tests/test_memory.py::test_specific_memory_pools
+ tests/test_memory.py::test_supported_memory_backends
+ # pandas changed, i guess
+ tests/test_pandas.py::test_array_protocol_pandas_extension_types
+ tests/test_table.py::test_table_factory_function_args_pandas
+ )
+
+ cd "${T}" || die
+ epytest --pyargs pyarrow
+}
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [gentoo-commits] repo/gentoo:master commit in: dev-python/pyarrow/, dev-python/pyarrow/files/
@ 2024-07-10 18:52 Michał Górny
0 siblings, 0 replies; 3+ messages in thread
From: Michał Górny @ 2024-07-10 18:52 UTC (permalink / raw
To: gentoo-commits
commit: e0d064d847bcc79b85788165df0aecdc2d8f25a5
Author: Michał Górny <mgorny <AT> gentoo <DOT> org>
AuthorDate: Wed Jul 10 18:31:46 2024 +0000
Commit: Michał Górny <mgorny <AT> gentoo <DOT> org>
CommitDate: Wed Jul 10 18:52:02 2024 +0000
URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=e0d064d8
dev-python/pyarrow: Enable py3.13
Signed-off-by: Michał Górny <mgorny <AT> gentoo.org>
.../pyarrow/files/pyarrow-16.1.0-py313.patch | 60 ++++++++++++++++++++++
dev-python/pyarrow/pyarrow-16.1.0.ebuild | 5 +-
2 files changed, 64 insertions(+), 1 deletion(-)
diff --git a/dev-python/pyarrow/files/pyarrow-16.1.0-py313.patch b/dev-python/pyarrow/files/pyarrow-16.1.0-py313.patch
new file mode 100644
index 000000000000..f3e0053dd003
--- /dev/null
+++ b/dev-python/pyarrow/files/pyarrow-16.1.0-py313.patch
@@ -0,0 +1,60 @@
+diff --git a/python/pyarrow/src/arrow/python/udf.cc b/python/pyarrow/src/arrow/python/udf.cc
+index e9b72a2592738..b6a862af8ca07 100644
+--- a/pyarrow/src/arrow/python/udf.cc
++++ b/pyarrow/src/arrow/python/udf.cc
+@@ -28,6 +28,10 @@
+ #include "arrow/util/checked_cast.h"
+ #include "arrow/util/logging.h"
+
++// Py_IsFinalizing added in Python 3.13.0a4
++#if PY_VERSION_HEX < 0x030D00A4
++#define Py_IsFinalizing() _Py_IsFinalizing()
++#endif
+ namespace arrow {
+ using compute::ExecSpan;
+ using compute::Grouper;
+@@ -47,7 +51,7 @@ struct PythonUdfKernelState : public compute::KernelState {
+ // function needs to be destroyed at process exit
+ // and Python may no longer be initialized.
+ ~PythonUdfKernelState() {
+- if (_Py_IsFinalizing()) {
++ if (Py_IsFinalizing()) {
+ function->detach();
+ }
+ }
+@@ -64,7 +68,7 @@ struct PythonUdfKernelInit {
+ // function needs to be destroyed at process exit
+ // and Python may no longer be initialized.
+ ~PythonUdfKernelInit() {
+- if (_Py_IsFinalizing()) {
++ if (Py_IsFinalizing()) {
+ function->detach();
+ }
+ }
+@@ -132,7 +136,7 @@ struct PythonTableUdfKernelInit {
+ // function needs to be destroyed at process exit
+ // and Python may no longer be initialized.
+ ~PythonTableUdfKernelInit() {
+- if (_Py_IsFinalizing()) {
++ if (Py_IsFinalizing()) {
+ function_maker->detach();
+ }
+ }
+@@ -173,7 +177,7 @@ struct PythonUdfScalarAggregatorImpl : public ScalarUdfAggregator {
+ };
+
+ ~PythonUdfScalarAggregatorImpl() override {
+- if (_Py_IsFinalizing()) {
++ if (Py_IsFinalizing()) {
+ function->detach();
+ }
+ }
+@@ -270,7 +274,7 @@ struct PythonUdfHashAggregatorImpl : public HashUdfAggregator {
+ };
+
+ ~PythonUdfHashAggregatorImpl() override {
+- if (_Py_IsFinalizing()) {
++ if (Py_IsFinalizing()) {
+ function->detach();
+ }
+ }
diff --git a/dev-python/pyarrow/pyarrow-16.1.0.ebuild b/dev-python/pyarrow/pyarrow-16.1.0.ebuild
index b2a9ed0139dd..0f95bb569a64 100644
--- a/dev-python/pyarrow/pyarrow-16.1.0.ebuild
+++ b/dev-python/pyarrow/pyarrow-16.1.0.ebuild
@@ -5,7 +5,7 @@ EAPI=8
DISTUTILS_EXT=1
DISTUTILS_USE_PEP517=setuptools
-PYTHON_COMPAT=( python3_{10..12} )
+PYTHON_COMPAT=( python3_{10..13} )
inherit distutils-r1 multiprocessing
@@ -42,6 +42,8 @@ src_prepare() {
local PATCHES=(
# https://github.com/apache/arrow/pull/42099
"${FILESDIR}/${P}-numpy-2.patch"
+ # https://github.com/apache/arrow/pull/42034
+ "${FILESDIR}/${P}-py313.patch"
)
# cython's -Werror
@@ -86,6 +88,7 @@ python_test() {
tests/test_convert_builtin.py::test_array_to_pylist_roundtrip
tests/test_feather.py::test_roundtrip
tests/test_pandas.py::test_array_to_pandas_roundtrip
+ tests/test_types.py::test_hashing
)
cd "${T}" || die
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2024-07-10 18:52 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-03-08 18:28 [gentoo-commits] repo/gentoo:master commit in: dev-python/pyarrow/, dev-python/pyarrow/files/ Michał Górny
-- strict thread matches above, loose matches on Subject: below --
2024-07-10 18:52 Michał Górny
2023-07-24 11:33 Michał Górny
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox