public inbox for gentoo-commits@lists.gentoo.org
 help / color / mirror / Atom feed
* [gentoo-commits] repo/gentoo:master commit in: dev-python/cchardet/, dev-python/cchardet/files/
@ 2022-12-23 19:54 Arthur Zamarin
  0 siblings, 0 replies; only message in thread
From: Arthur Zamarin @ 2022-12-23 19:54 UTC (permalink / raw
  To: gentoo-commits

commit:     7fa4f1cffe521648405368d68b27cf375212771d
Author:     Arthur Zamarin <arthurzam <AT> gentoo <DOT> org>
AuthorDate: Fri Dec 23 19:52:40 2022 +0000
Commit:     Arthur Zamarin <arthurzam <AT> gentoo <DOT> org>
CommitDate: Fri Dec 23 19:54:24 2022 +0000
URL:        https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=7fa4f1cf

dev-python/cchardet: PEP517, enable py3.11, use pytest

Signed-off-by: Arthur Zamarin <arthurzam <AT> gentoo.org>

 dev-python/cchardet/cchardet-2.1.7-r1.ebuild       |  30 ++++++
 .../cchardet/files/cchardet-2.1.7-pytest.patch     | 120 +++++++++++++++++++++
 2 files changed, 150 insertions(+)

diff --git a/dev-python/cchardet/cchardet-2.1.7-r1.ebuild b/dev-python/cchardet/cchardet-2.1.7-r1.ebuild
new file mode 100644
index 000000000000..4f344e9f6e57
--- /dev/null
+++ b/dev-python/cchardet/cchardet-2.1.7-r1.ebuild
@@ -0,0 +1,30 @@
+# Copyright 2021-2022 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=8
+
+DISTUTILS_USE_PEP517=setuptools
+PYTHON_COMPAT=( python3_{8..11} )
+inherit distutils-r1
+
+DESCRIPTION="High speed universal character encoding detector"
+HOMEPAGE="
+	https://github.com/PyYoshi/cChardet
+	https://pypi.org/project/cchardet/
+"
+SRC_URI="mirror://pypi/${PN:0:1}/${PN}/${P}.tar.gz"
+
+LICENSE="MPL-1.1"
+SLOT="0"
+KEYWORDS="~amd64 ~arm ~x86"
+
+BDEPEND="
+	dev-python/cython[${PYTHON_USEDEP}]
+"
+
+PATCHES=(
+	# https://github.com/PyYoshi/cChardet/pull/78
+	"${FILESDIR}/${P}-pytest.patch"
+)
+
+distutils_enable_tests pytest

diff --git a/dev-python/cchardet/files/cchardet-2.1.7-pytest.patch b/dev-python/cchardet/files/cchardet-2.1.7-pytest.patch
new file mode 100644
index 000000000000..11f38579c184
--- /dev/null
+++ b/dev-python/cchardet/files/cchardet-2.1.7-pytest.patch
@@ -0,0 +1,120 @@
+https://github.com/PyYoshi/cChardet/pull/78
+
+From: q0w <43147888+q0w@users.noreply.github.com>
+Date: Wed, 17 Nov 2021 14:50:41 +0300
+Subject: [PATCH 02/13] Use pytest
+
+--- /dev/null
++++ b/src/tests/cchardet_test.py
+@@ -0,0 +1,111 @@
++import glob
++import os
++
++import cchardet
++
++SKIP_LIST = [
++    'src/tests/testdata/ja/utf-16le.txt',
++    'src/tests/testdata/ja/utf-16be.txt',
++    'src/tests/testdata/es/iso-8859-15.txt',
++    'src/tests/testdata/da/iso-8859-1.txt',
++    'src/tests/testdata/he/iso-8859-8.txt'
++]
++
++# Python can't decode encoding
++SKIP_LIST_02 = [
++    'src/tests/testdata/vi/viscii.txt',
++    'src/tests/testdata/zh/euc-tw.txt'
++]
++SKIP_LIST_02.extend(SKIP_LIST)
++
++
++def test_ascii():
++    detected_encoding = cchardet.detect(b'abcdefghijklmnopqrstuvwxyz')
++    assert 'ascii' == detected_encoding['encoding'].lower()
++
++
++def test_detect():
++    testfiles = glob.glob('src/tests/testdata/*/*.txt')
++    for testfile in testfiles:
++        if testfile.replace("\\", "/") in SKIP_LIST:
++            continue
++
++        base = os.path.basename(testfile)
++        expected_charset = os.path.splitext(base)[0]
++        with open(testfile, 'rb') as f:
++            msg = f.read()
++            detected_encoding = cchardet.detect(msg)
++            assert expected_charset.lower() == detected_encoding['encoding'].lower()
++
++
++def test_detector():
++    detector = cchardet.UniversalDetector()
++    with open("src/tests/samples/wikipediaJa_One_Thousand_and_One_Nights_SJIS.txt", 'rb') as f:
++        line = f.readline()
++        while line:
++            detector.feed(line)
++            if detector.done:
++                break
++            line = f.readline()
++    detector.close()
++    detected_encoding = detector.result
++    assert "shift_jis" == detected_encoding['encoding'].lower()
++
++
++def test_github_issue_20():
++    """
++    https://github.com/PyYoshi/cChardet/issues/20
++    """
++    msg = b'\x8f'
++
++    cchardet.detect(msg)
++
++    detector = cchardet.UniversalDetector()
++    detector.feed(msg)
++    detector.close()
++
++
++def test_decode():
++    testfiles = glob.glob('src/tests/testdata/*/*.txt')
++    for testfile in testfiles:
++        if testfile.replace("\\", "/") in SKIP_LIST_02:
++            continue
++
++        base = os.path.basename(testfile)
++        expected_charset = os.path.splitext(base)[0]
++        with open(testfile, 'rb') as f:
++            msg = f.read()
++            detected_encoding = cchardet.detect(msg)
++            try:
++                msg.decode(detected_encoding["encoding"])
++            except LookupError as e:
++                print("LookupError: { file=%s, encoding=%s }" % (
++                    testfile, detected_encoding["encoding"]))
++                raise e
++
++
++def test_utf8_with_bom():
++    sample = b'\xEF\xBB\xBF'
++    detected_encoding = cchardet.detect(sample)
++    assert "utf-8-sig" == detected_encoding['encoding'].lower()
++
++
++def test_null_bytes():
++    sample = b'ABC\x00\x80\x81'
++    detected_encoding = cchardet.detect(sample)
++
++    assert detected_encoding['encoding'] is None
++
++# def test_iso8859_2_csv(self):
++#     testfile = 'tests/samples/iso8859-2.csv'
++#     with open(testfile, 'rb') as f:
++#         msg = f.read()
++#         detected_encoding = cchardet.detect(msg)
++#         eq_(
++#             "iso8859-2",
++#             detected_encoding['encoding'].lower(),
++#             'Expected %s, but got %s' % (
++#                 "iso8859-2",
++#                 detected_encoding['encoding'].lower()
++#             )
++#         )


^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2022-12-23 19:54 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-12-23 19:54 [gentoo-commits] repo/gentoo:master commit in: dev-python/cchardet/, dev-python/cchardet/files/ Arthur Zamarin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox