From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from lists.gentoo.org (pigeon.gentoo.org [208.92.234.80]) by finch.gentoo.org (Postfix) with ESMTP id B3A301381F3 for ; Wed, 14 Aug 2013 08:30:29 +0000 (UTC) Received: from pigeon.gentoo.org (localhost [127.0.0.1]) by pigeon.gentoo.org (Postfix) with SMTP id 4CC7DE09A7; Wed, 14 Aug 2013 08:30:23 +0000 (UTC) Received: from smtp.gentoo.org (smtp.gentoo.org [140.211.166.183]) (using TLSv1 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by pigeon.gentoo.org (Postfix) with ESMTPS id 91987E09A7 for ; Wed, 14 Aug 2013 08:30:22 +0000 (UTC) Received: from hornbill.gentoo.org (hornbill.gentoo.org [94.100.119.163]) (using TLSv1 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by smtp.gentoo.org (Postfix) with ESMTPS id 1803533EAB2 for ; Wed, 14 Aug 2013 08:30:21 +0000 (UTC) Received: from localhost.localdomain (localhost [127.0.0.1]) by hornbill.gentoo.org (Postfix) with ESMTP id 86A64E468F for ; Wed, 14 Aug 2013 08:30:19 +0000 (UTC) From: "Jauhien Piatlicki" To: gentoo-commits@lists.gentoo.org Content-Transfer-Encoding: 8bit Content-type: text/plain; charset=UTF-8 Reply-To: gentoo-dev@lists.gentoo.org, "Jauhien Piatlicki" Message-ID: <1376468983.546f8794ff0a43c177a8ac50e7dc368c397e3a90.jauhien@gentoo> Subject: [gentoo-commits] proj/g-sorcery:pypi commit in: / X-VCS-Repository: proj/g-sorcery X-VCS-Committer: jauhien X-VCS-Committer-Name: Jauhien Piatlicki X-VCS-Revision: 546f8794ff0a43c177a8ac50e7dc368c397e3a90 X-VCS-Branch: pypi Date: Wed, 14 Aug 2013 08:30:19 +0000 (UTC) Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-Id: Gentoo Linux mail X-BeenThere: gentoo-commits@lists.gentoo.org X-Archives-Salt: 58a7b41a-eb96-45c3-a888-1b75180e835d X-Archives-Hash: 3e7a49ed81eb68228871a7199b87f163 Message-ID: <20130814083019.yWTgL1Gm2ZmDy7TQ0pyFYSszkELGga82duOXkXqGolk@z> commit: 546f8794ff0a43c177a8ac50e7dc368c397e3a90 Author: Jauhien Piatlicki (jauhien) gmail com> AuthorDate: Wed Aug 14 08:29:43 2013 +0000 Commit: Jauhien Piatlicki gmail com> CommitDate: Wed Aug 14 08:29:43 2013 +0000 URL: http://git.overlays.gentoo.org/gitweb/?p=proj/g-sorcery.git;a=commit;h=546f8794 merging pypi branch gs_pypi/pypi_db.py | 132 ++++++++++++++++++++++++++++------------------------- 1 file changed, 70 insertions(+), 62 deletions(-) diff --cc gs_pypi/pypi_db.py index fca3432,9963b4e..69ad476 --- a/gs_pypi/pypi_db.py +++ b/gs_pypi/pypi_db.py @@@ -68,72 -65,73 +70,78 @@@ class PypiDBGenerator(DBGenerator) data = {} data["files"] = [] data["info"] = {} - for table in soup("table", class_ = "list")[-1:]: - if not "File" in table("th")[0].string: - continue - - for entry in table("tr")[1:-1]: - fields = entry("td") - - FILE = 0 - URL = 0 - MD5 = 1 - - TYPE = 1 - PYVERSION = 2 - UPLOADED = 3 - SIZE = 4 - - file_inf = fields[FILE]("a")[0]["href"].split("#") - file_url = file_inf[URL] - file_md5 = file_inf[MD5][4:] - - file_type = fields[TYPE].string - file_pyversion = fields[PYVERSION].string - file_uploaded = fields[UPLOADED].string - file_size = fields[SIZE].string - - data["files"].append({"url": file_url, - "md5": file_md5, - "type": file_type, - "pyversion": file_pyversion, - "uploaded": file_uploaded, - "size": file_size}) - entry.decompose() - table.decompose() - - uls = soup("ul", class_ = "nodot") - if uls: - if "Downloads (All Versions):" in uls[0]("strong")[0].string: - ul = uls[1] - else: - ul = uls[0] - - for entry in ul.contents: - if not hasattr(entry, "name") or entry.name != "li": - continue - entry_name = entry("strong")[0].string - if not entry_name: - continue - - if entry_name == "Categories": - data["info"][entry_name] = {} - for cat_entry in entry("a"): - cat_data = cat_entry.string.split(" :: ") - data["info"][entry_name][cat_data[0]] = cat_data[1:] - continue - - if entry("span"): - data["info"][entry_name] = entry("span")[0].string + try: + for table in soup("table", class_ = "list")[-1:]: + if not "File" in table("th")[0].string: continue - if entry("a"): - data["info"][entry_name] = entry("a")[0]["href"] - continue - entry.decompose() - ul.decompose() + for entry in table("tr")[1:-1]: + fields = entry("td") + + FILE = 0 + URL = 0 + MD5 = 1 + + TYPE = 1 + PYVERSION = 2 + UPLOADED = 3 + SIZE = 4 + + file_inf = fields[FILE]("a")[0]["href"].split("#") + file_url = file_inf[URL] + file_md5 = file_inf[MD5][4:] + + file_type = fields[TYPE].string + file_pyversion = fields[PYVERSION].string + file_uploaded = fields[UPLOADED].string + file_size = fields[SIZE].string + + data["files"].append({"url": file_url, + "md5": file_md5, + "type": file_type, + "pyversion": file_pyversion, + "uploaded": file_uploaded, + "size": file_size}) ++ entry.decompose() ++ table.decompose() + + uls = soup("ul", class_ = "nodot") + if uls: + if "Downloads (All Versions):" in uls[0]("strong")[0].string: + ul = uls[1] + else: + ul = uls[0] + + for entry in ul.contents: + if not hasattr(entry, "name") or entry.name != "li": + continue + entry_name = entry("strong")[0].string + if not entry_name: + continue + + if entry_name == "Categories": + data["info"][entry_name] = {} + for cat_entry in entry("a"): + cat_data = cat_entry.string.split(" :: ") + data["info"][entry_name][cat_data[0]] = cat_data[1:] + continue + + if entry("span"): + data["info"][entry_name] = entry("span")[0].string + continue + + if entry("a"): + data["info"][entry_name] = entry("a")[0]["href"] + continue ++ entry.decompose() ++ ul.decompose() + + except Exception as error: + print("There was an error during parsing: " + str(error)) + print("Ignoring this package.") + data = {} + soup.decompose() return data def process_data(self, pkg_db, data, common_config, config):