public inbox for gentoo-commits@lists.gentoo.org
 help / color / mirror / Atom feed
* [gentoo-commits] proj/grumpy:master commit in: /, backend/lib/
@ 2016-09-07 20:21 Mart Raudsepp
  0 siblings, 0 replies; 3+ messages in thread
From: Mart Raudsepp @ 2016-09-07 20:21 UTC (permalink / raw
  To: gentoo-commits

commit:     1e826829e42b0524365770dd329af5217a5f6b54
Author:     Mart Raudsepp <leio <AT> gentoo <DOT> org>
AuthorDate: Wed Sep  7 20:20:20 2016 +0000
Commit:     Mart Raudsepp <leio <AT> gentoo <DOT> org>
CommitDate: Wed Sep  7 20:20:20 2016 +0000
URL:        https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=1e826829

Add syncing of packages in categories from packages.g.o (just name)

Also add manage.py commands to call the sync steps individually for testing

 backend/lib/sync.py | 28 ++++++++++++++++++++++++++--
 manage.py           | 25 ++++++++++++++++++++++---
 2 files changed, 48 insertions(+), 5 deletions(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index 3cfb746..6dcb6b9 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -1,15 +1,18 @@
 from flask import json
 import requests
 from .. import app, db
-from .models import Category
+from .models import Category, Package
 
+url_base = "https://packages.gentoo.org/"
 http_session = requests.session()
 
 def sync_categories():
-    url = "https://packages.gentoo.org/categories.json"
+    url = url_base + "categories.json"
     data = http_session.get(url)
+    # TODO: Handle response error (if not data)
     categories = json.loads(data.text)
     existing_categories = {}
+    # TODO: Use UPSERT instead (on_conflict_do_update) if we can rely on postgresql:9.5
     for cat in Category.query.all():
         existing_categories[cat.name] = cat
     for category in categories:
@@ -19,3 +22,24 @@ def sync_categories():
             new_cat = Category(name=category['name'], description=category['description'])
             db.session.add(new_cat)
     db.session.commit()
+
+def sync_packages():
+    for category in Category.query.all():
+        existing_packages = category.packages.all()
+        print("Existing packages in DB for category %s: %s" % (category.name, existing_packages,))
+        data = http_session.get(url_base + "categories/" + category.name + ".json")
+        if not data:
+            print("No JSON data for category %s" % category.name) # FIXME: Better handling; mark category as inactive/gone?
+            continue
+        packages = json.loads(data.text)['packages']
+        # TODO: Use UPSERT instead (on_conflict_do_update)
+        existing_packages = {}
+        for pkg in Package.query.all():
+            existing_packages[pkg.name] = pkg
+        for package in packages:
+            if package['name'] in existing_packages:
+                continue # TODO: Update description once we keep that in DB
+            else:
+                new_pkg = Package(category_id=category.id, name=package['name'])
+                db.session.add(new_pkg)
+    db.session.commit()

diff --git a/manage.py b/manage.py
index 4f123aa..4634518 100755
--- a/manage.py
+++ b/manage.py
@@ -4,7 +4,7 @@
 from flask_script import Manager, Shell
 
 from backend import app, db
-from backend.lib.sync import sync_categories
+from backend.lib import sync
 
 
 manager = Manager(app)
@@ -21,8 +21,27 @@ def init():
 
 @manager.command
 def sync_gentoo():
-    """Syncronize Gentoo data from packages.gentoo.org API"""
-    sync_categories()
+    """Synchronize Gentoo data from packages.gentoo.org API"""
+    sync.sync_categories()
+    sync.sync_packages()
+    #sync_versions()
+
+@manager.command
+def sync_categories():
+    """Synchronize only Gentoo categories data"""
+    sync.sync_categories()
+
+@manager.command
+def sync_packages():
+    """Synchronize only Gentoo packages base data (without details)"""
+    sync.sync_packages()
+
+'''
+@manager.command
+def sync_versions():
+    """Synchronize only Gentoo package details"""
+    sync.sync_versions()
+'''
 
 if __name__ == '__main__':
     manager.run()


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [gentoo-commits] proj/grumpy:master commit in: /, backend/lib/
@ 2016-11-10  9:11 Mart Raudsepp
  0 siblings, 0 replies; 3+ messages in thread
From: Mart Raudsepp @ 2016-11-10  9:11 UTC (permalink / raw
  To: gentoo-commits

commit:     d584775a6820f23561c5b8922a46644920bbf2e6
Author:     Mart Raudsepp <leio <AT> gentoo <DOT> org>
AuthorDate: Thu Nov 10 09:09:42 2016 +0000
Commit:     Mart Raudsepp <leio <AT> gentoo <DOT> org>
CommitDate: Thu Nov 10 09:09:42 2016 +0000
URL:        https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=d584775a

Add dirty sync_versions debug code

This just prints the first packages versions JSON data out and exits,
so just some initial debug code out of the way to sync in projects.xml
first, as sync_versions will need to reference projects and maintainers,
so better to finish projects.xml sync first.

 backend/lib/sync.py | 12 +++++++++++-
 manage.py           |  2 --
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index a6aef23..ce54937 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -1,7 +1,7 @@
 from flask import json
 import requests
 from .. import app, db
-from .models import Category, Package
+from .models import Category, Package, PackageVersion
 
 url_base = "https://packages.gentoo.org/"
 http_session = requests.session()
@@ -42,3 +42,13 @@ def sync_packages():
                 new_pkg = Package(category_id=category.id, name=package['name'])
                 db.session.add(new_pkg)
     db.session.commit()
+
+def sync_versions():
+    for package in Package.query.all():
+        data = http_session.get(url_base + "packages/" + package.full_name + ".json")
+        if not data:
+            print("No JSON data for package %s" % package.full_name) # FIXME: Handle better; e.g mark the package as removed if no pkgmove update
+            continue
+        from pprint import pprint
+        pprint(json.loads(data.text))
+        break

diff --git a/manage.py b/manage.py
index 4634518..359c63a 100755
--- a/manage.py
+++ b/manage.py
@@ -36,12 +36,10 @@ def sync_packages():
     """Synchronize only Gentoo packages base data (without details)"""
     sync.sync_packages()
 
-'''
 @manager.command
 def sync_versions():
     """Synchronize only Gentoo package details"""
     sync.sync_versions()
-'''
 
 if __name__ == '__main__':
     manager.run()


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [gentoo-commits] proj/grumpy:master commit in: /, backend/lib/
@ 2016-11-10 15:44 Mart Raudsepp
  0 siblings, 0 replies; 3+ messages in thread
From: Mart Raudsepp @ 2016-11-10 15:44 UTC (permalink / raw
  To: gentoo-commits

commit:     d7dbfa3ba07dcd2cbc1f0be9f9575c436c9a82e3
Author:     Mart Raudsepp <leio <AT> gentoo <DOT> org>
AuthorDate: Thu Nov 10 15:43:16 2016 +0000
Commit:     Mart Raudsepp <leio <AT> gentoo <DOT> org>
CommitDate: Thu Nov 10 15:43:40 2016 +0000
URL:        https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=d7dbfa3b

Initial projects.xml parsing code with debug printout

 backend/lib/sync.py | 59 +++++++++++++++++++++++++++++++++++++++++++++++++----
 manage.py           |  8 +++++++-
 2 files changed, 62 insertions(+), 5 deletions(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index ce54937..7139119 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -1,13 +1,64 @@
+import xml.etree.ElementTree as ET
 from flask import json
 import requests
 from .. import app, db
 from .models import Category, Package, PackageVersion
 
-url_base = "https://packages.gentoo.org/"
+proj_url = "https://api.gentoo.org/metastructure/projects.xml"
+pkg_url_base = "https://packages.gentoo.org/"
 http_session = requests.session()
 
+def sync_projects():
+    data = http_session.get(proj_url)
+    if not data:
+        print("Failed retrieving projects.xml")
+        return
+    root = ET.fromstring(data.text)
+    projects = []
+    # Parsing is based on http://www.gentoo.org/dtd/projects.dtd as of 2016-11-10
+    if root.tag.lower() != 'projects':
+        print("Downloaded projects.xml root tag isn't 'projects'")
+        return
+    for proj_elem in root:
+        if proj_elem.tag.lower() != 'project':
+            print("Skipping unknown <projects> subtag <%s>" % proj_elem.tag)
+            continue
+        proj = {}
+        for elem in proj_elem:
+            tag = elem.tag.lower()
+            if tag in ['email', 'name', 'url', 'description']:
+                proj[tag] = elem.text
+            elif tag == 'member':
+                member = {}
+                if 'is-lead' in elem.attrib and elem.attrib['is-lead'] == '1':
+                    member['is_lead'] = True
+                for member_elem in elem:
+                    member_tag = member_elem.tag.lower()
+                    if member_tag in ['email', 'name', 'role']:
+                        member[member_tag] = member_elem.text
+                if 'email' in member:
+                    # TODO: Sync the members (it's valid as email is given) - maybe at the end, after we have synced the project data, so we can add him to the project directly
+                    pass
+            elif tag == 'subproject':
+                if 'ref' in elem.attrib:
+                    if 'subprojects' not in proj:
+                        proj['subprojects'] = []
+                    # subprojects will be a list of (subproject_email, inherit-members) tuples where inherit-members is None, 0 or 1 (if dtd is followed). TODO: Might change if sync code will want it differently
+                    proj['subprojects'].append((elem.attrib['ref'], elem.attrib['inherit-members'] if 'inherit-members' in elem.attrib else None))
+                else:
+                    print("Invalid <subproject> tag inside project %s - required 'ref' attribute missing" % proj['email'] if 'email' in proj else "<unknown>")
+            else:
+                print("Skipping unknown <project> subtag <%s>" % tag)
+        if 'email' in proj:
+            projects.append(proj)
+        else:
+            print("Skipping incomplete project data due to lack of required email identifier: %s" % (proj,))
+    from pprint import pprint
+    print("Found the following projects and data:")
+    pprint(projects)
+
 def sync_categories():
-    url = url_base + "categories.json"
+    url = pkg_url_base + "categories.json"
     data = http_session.get(url)
     # TODO: Handle response error (if not data)
     categories = json.loads(data.text)
@@ -26,7 +77,7 @@ def sync_categories():
 def sync_packages():
     for category in Category.query.all():
         existing_packages = category.packages.all()
-        data = http_session.get(url_base + "categories/" + category.name + ".json")
+        data = http_session.get(pkg_url_base + "categories/" + category.name + ".json")
         if not data:
             print("No JSON data for category %s" % category.name) # FIXME: Better handling; mark category as inactive/gone?
             continue
@@ -45,7 +96,7 @@ def sync_packages():
 
 def sync_versions():
     for package in Package.query.all():
-        data = http_session.get(url_base + "packages/" + package.full_name + ".json")
+        data = http_session.get(pkg_url_base + "packages/" + package.full_name + ".json")
         if not data:
             print("No JSON data for package %s" % package.full_name) # FIXME: Handle better; e.g mark the package as removed if no pkgmove update
             continue

diff --git a/manage.py b/manage.py
index 359c63a..a31b96c 100755
--- a/manage.py
+++ b/manage.py
@@ -21,12 +21,18 @@ def init():
 
 @manager.command
 def sync_gentoo():
-    """Synchronize Gentoo data from packages.gentoo.org API"""
+    """Synchronize Gentoo data"""
+    sync.sync_projects()
     sync.sync_categories()
     sync.sync_packages()
     #sync_versions()
 
 @manager.command
+def sync_projects():
+    """Synchronize only Gentoo projects.xml data"""
+    sync.sync_projects()
+
+@manager.command
 def sync_categories():
     """Synchronize only Gentoo categories data"""
     sync.sync_categories()


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2016-11-10 15:44 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-09-07 20:21 [gentoo-commits] proj/grumpy:master commit in: /, backend/lib/ Mart Raudsepp
  -- strict thread matches above, loose matches on Subject: below --
2016-11-10  9:11 Mart Raudsepp
2016-11-10 15:44 Mart Raudsepp

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox