From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from lists.gentoo.org (pigeon.gentoo.org [208.92.234.80]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by finch.gentoo.org (Postfix) with ESMTPS id 3A7261395E2 for ; Thu, 10 Nov 2016 15:44:11 +0000 (UTC) Received: from pigeon.gentoo.org (localhost [127.0.0.1]) by pigeon.gentoo.org (Postfix) with SMTP id A1E16E08B8; Thu, 10 Nov 2016 15:44:08 +0000 (UTC) Received: from smtp.gentoo.org (smtp.gentoo.org [140.211.166.183]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by pigeon.gentoo.org (Postfix) with ESMTPS id 75B3CE08B8 for ; Thu, 10 Nov 2016 15:44:08 +0000 (UTC) Received: from oystercatcher.gentoo.org (oystercatcher.gentoo.org [148.251.78.52]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.gentoo.org (Postfix) with ESMTPS id 540C6335DEF for ; Thu, 10 Nov 2016 15:44:06 +0000 (UTC) Received: from localhost.localdomain (localhost [127.0.0.1]) by oystercatcher.gentoo.org (Postfix) with ESMTP id E56712499 for ; Thu, 10 Nov 2016 15:44:04 +0000 (UTC) From: "Mart Raudsepp" To: gentoo-commits@lists.gentoo.org Content-Transfer-Encoding: 8bit Content-type: text/plain; charset=UTF-8 Reply-To: gentoo-dev@lists.gentoo.org, "Mart Raudsepp" Message-ID: <1478792620.d7dbfa3ba07dcd2cbc1f0be9f9575c436c9a82e3.leio@gentoo> Subject: [gentoo-commits] proj/grumpy:master commit in: /, backend/lib/ X-VCS-Repository: proj/grumpy X-VCS-Files: backend/lib/sync.py manage.py X-VCS-Directories: / backend/lib/ X-VCS-Committer: leio X-VCS-Committer-Name: Mart Raudsepp X-VCS-Revision: d7dbfa3ba07dcd2cbc1f0be9f9575c436c9a82e3 X-VCS-Branch: master Date: Thu, 10 Nov 2016 15:44:04 +0000 (UTC) Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-Id: Gentoo Linux mail X-BeenThere: gentoo-commits@lists.gentoo.org X-Archives-Salt: 48ce93e2-cbd2-43f2-8866-924576c1ff02 X-Archives-Hash: 5a167ebd5036d74e25dabfd395089556 commit: d7dbfa3ba07dcd2cbc1f0be9f9575c436c9a82e3 Author: Mart Raudsepp gentoo org> AuthorDate: Thu Nov 10 15:43:16 2016 +0000 Commit: Mart Raudsepp gentoo org> CommitDate: Thu Nov 10 15:43:40 2016 +0000 URL: https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=d7dbfa3b Initial projects.xml parsing code with debug printout backend/lib/sync.py | 59 +++++++++++++++++++++++++++++++++++++++++++++++++---- manage.py | 8 +++++++- 2 files changed, 62 insertions(+), 5 deletions(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index ce54937..7139119 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -1,13 +1,64 @@ +import xml.etree.ElementTree as ET from flask import json import requests from .. import app, db from .models import Category, Package, PackageVersion -url_base = "https://packages.gentoo.org/" +proj_url = "https://api.gentoo.org/metastructure/projects.xml" +pkg_url_base = "https://packages.gentoo.org/" http_session = requests.session() +def sync_projects(): + data = http_session.get(proj_url) + if not data: + print("Failed retrieving projects.xml") + return + root = ET.fromstring(data.text) + projects = [] + # Parsing is based on http://www.gentoo.org/dtd/projects.dtd as of 2016-11-10 + if root.tag.lower() != 'projects': + print("Downloaded projects.xml root tag isn't 'projects'") + return + for proj_elem in root: + if proj_elem.tag.lower() != 'project': + print("Skipping unknown subtag <%s>" % proj_elem.tag) + continue + proj = {} + for elem in proj_elem: + tag = elem.tag.lower() + if tag in ['email', 'name', 'url', 'description']: + proj[tag] = elem.text + elif tag == 'member': + member = {} + if 'is-lead' in elem.attrib and elem.attrib['is-lead'] == '1': + member['is_lead'] = True + for member_elem in elem: + member_tag = member_elem.tag.lower() + if member_tag in ['email', 'name', 'role']: + member[member_tag] = member_elem.text + if 'email' in member: + # TODO: Sync the members (it's valid as email is given) - maybe at the end, after we have synced the project data, so we can add him to the project directly + pass + elif tag == 'subproject': + if 'ref' in elem.attrib: + if 'subprojects' not in proj: + proj['subprojects'] = [] + # subprojects will be a list of (subproject_email, inherit-members) tuples where inherit-members is None, 0 or 1 (if dtd is followed). TODO: Might change if sync code will want it differently + proj['subprojects'].append((elem.attrib['ref'], elem.attrib['inherit-members'] if 'inherit-members' in elem.attrib else None)) + else: + print("Invalid tag inside project %s - required 'ref' attribute missing" % proj['email'] if 'email' in proj else "") + else: + print("Skipping unknown subtag <%s>" % tag) + if 'email' in proj: + projects.append(proj) + else: + print("Skipping incomplete project data due to lack of required email identifier: %s" % (proj,)) + from pprint import pprint + print("Found the following projects and data:") + pprint(projects) + def sync_categories(): - url = url_base + "categories.json" + url = pkg_url_base + "categories.json" data = http_session.get(url) # TODO: Handle response error (if not data) categories = json.loads(data.text) @@ -26,7 +77,7 @@ def sync_categories(): def sync_packages(): for category in Category.query.all(): existing_packages = category.packages.all() - data = http_session.get(url_base + "categories/" + category.name + ".json") + data = http_session.get(pkg_url_base + "categories/" + category.name + ".json") if not data: print("No JSON data for category %s" % category.name) # FIXME: Better handling; mark category as inactive/gone? continue @@ -45,7 +96,7 @@ def sync_packages(): def sync_versions(): for package in Package.query.all(): - data = http_session.get(url_base + "packages/" + package.full_name + ".json") + data = http_session.get(pkg_url_base + "packages/" + package.full_name + ".json") if not data: print("No JSON data for package %s" % package.full_name) # FIXME: Handle better; e.g mark the package as removed if no pkgmove update continue diff --git a/manage.py b/manage.py index 359c63a..a31b96c 100755 --- a/manage.py +++ b/manage.py @@ -21,12 +21,18 @@ def init(): @manager.command def sync_gentoo(): - """Synchronize Gentoo data from packages.gentoo.org API""" + """Synchronize Gentoo data""" + sync.sync_projects() sync.sync_categories() sync.sync_packages() #sync_versions() @manager.command +def sync_projects(): + """Synchronize only Gentoo projects.xml data""" + sync.sync_projects() + +@manager.command def sync_categories(): """Synchronize only Gentoo categories data""" sync.sync_categories()