* [gentoo-commits] proj/grumpy:master commit in: /, backend/lib/
@ 2016-09-07 20:21 Mart Raudsepp
0 siblings, 0 replies; 3+ messages in thread
From: Mart Raudsepp @ 2016-09-07 20:21 UTC (permalink / raw
To: gentoo-commits
commit: 1e826829e42b0524365770dd329af5217a5f6b54
Author: Mart Raudsepp <leio <AT> gentoo <DOT> org>
AuthorDate: Wed Sep 7 20:20:20 2016 +0000
Commit: Mart Raudsepp <leio <AT> gentoo <DOT> org>
CommitDate: Wed Sep 7 20:20:20 2016 +0000
URL: https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=1e826829
Add syncing of packages in categories from packages.g.o (just name)
Also add manage.py commands to call the sync steps individually for testing
backend/lib/sync.py | 28 ++++++++++++++++++++++++++--
manage.py | 25 ++++++++++++++++++++++---
2 files changed, 48 insertions(+), 5 deletions(-)
diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index 3cfb746..6dcb6b9 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -1,15 +1,18 @@
from flask import json
import requests
from .. import app, db
-from .models import Category
+from .models import Category, Package
+url_base = "https://packages.gentoo.org/"
http_session = requests.session()
def sync_categories():
- url = "https://packages.gentoo.org/categories.json"
+ url = url_base + "categories.json"
data = http_session.get(url)
+ # TODO: Handle response error (if not data)
categories = json.loads(data.text)
existing_categories = {}
+ # TODO: Use UPSERT instead (on_conflict_do_update) if we can rely on postgresql:9.5
for cat in Category.query.all():
existing_categories[cat.name] = cat
for category in categories:
@@ -19,3 +22,24 @@ def sync_categories():
new_cat = Category(name=category['name'], description=category['description'])
db.session.add(new_cat)
db.session.commit()
+
+def sync_packages():
+ for category in Category.query.all():
+ existing_packages = category.packages.all()
+ print("Existing packages in DB for category %s: %s" % (category.name, existing_packages,))
+ data = http_session.get(url_base + "categories/" + category.name + ".json")
+ if not data:
+ print("No JSON data for category %s" % category.name) # FIXME: Better handling; mark category as inactive/gone?
+ continue
+ packages = json.loads(data.text)['packages']
+ # TODO: Use UPSERT instead (on_conflict_do_update)
+ existing_packages = {}
+ for pkg in Package.query.all():
+ existing_packages[pkg.name] = pkg
+ for package in packages:
+ if package['name'] in existing_packages:
+ continue # TODO: Update description once we keep that in DB
+ else:
+ new_pkg = Package(category_id=category.id, name=package['name'])
+ db.session.add(new_pkg)
+ db.session.commit()
diff --git a/manage.py b/manage.py
index 4f123aa..4634518 100755
--- a/manage.py
+++ b/manage.py
@@ -4,7 +4,7 @@
from flask_script import Manager, Shell
from backend import app, db
-from backend.lib.sync import sync_categories
+from backend.lib import sync
manager = Manager(app)
@@ -21,8 +21,27 @@ def init():
@manager.command
def sync_gentoo():
- """Syncronize Gentoo data from packages.gentoo.org API"""
- sync_categories()
+ """Synchronize Gentoo data from packages.gentoo.org API"""
+ sync.sync_categories()
+ sync.sync_packages()
+ #sync_versions()
+
+@manager.command
+def sync_categories():
+ """Synchronize only Gentoo categories data"""
+ sync.sync_categories()
+
+@manager.command
+def sync_packages():
+ """Synchronize only Gentoo packages base data (without details)"""
+ sync.sync_packages()
+
+'''
+@manager.command
+def sync_versions():
+ """Synchronize only Gentoo package details"""
+ sync.sync_versions()
+'''
if __name__ == '__main__':
manager.run()
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [gentoo-commits] proj/grumpy:master commit in: /, backend/lib/
@ 2016-11-10 9:11 Mart Raudsepp
0 siblings, 0 replies; 3+ messages in thread
From: Mart Raudsepp @ 2016-11-10 9:11 UTC (permalink / raw
To: gentoo-commits
commit: d584775a6820f23561c5b8922a46644920bbf2e6
Author: Mart Raudsepp <leio <AT> gentoo <DOT> org>
AuthorDate: Thu Nov 10 09:09:42 2016 +0000
Commit: Mart Raudsepp <leio <AT> gentoo <DOT> org>
CommitDate: Thu Nov 10 09:09:42 2016 +0000
URL: https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=d584775a
Add dirty sync_versions debug code
This just prints the first packages versions JSON data out and exits,
so just some initial debug code out of the way to sync in projects.xml
first, as sync_versions will need to reference projects and maintainers,
so better to finish projects.xml sync first.
backend/lib/sync.py | 12 +++++++++++-
manage.py | 2 --
2 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index a6aef23..ce54937 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -1,7 +1,7 @@
from flask import json
import requests
from .. import app, db
-from .models import Category, Package
+from .models import Category, Package, PackageVersion
url_base = "https://packages.gentoo.org/"
http_session = requests.session()
@@ -42,3 +42,13 @@ def sync_packages():
new_pkg = Package(category_id=category.id, name=package['name'])
db.session.add(new_pkg)
db.session.commit()
+
+def sync_versions():
+ for package in Package.query.all():
+ data = http_session.get(url_base + "packages/" + package.full_name + ".json")
+ if not data:
+ print("No JSON data for package %s" % package.full_name) # FIXME: Handle better; e.g mark the package as removed if no pkgmove update
+ continue
+ from pprint import pprint
+ pprint(json.loads(data.text))
+ break
diff --git a/manage.py b/manage.py
index 4634518..359c63a 100755
--- a/manage.py
+++ b/manage.py
@@ -36,12 +36,10 @@ def sync_packages():
"""Synchronize only Gentoo packages base data (without details)"""
sync.sync_packages()
-'''
@manager.command
def sync_versions():
"""Synchronize only Gentoo package details"""
sync.sync_versions()
-'''
if __name__ == '__main__':
manager.run()
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [gentoo-commits] proj/grumpy:master commit in: /, backend/lib/
@ 2016-11-10 15:44 Mart Raudsepp
0 siblings, 0 replies; 3+ messages in thread
From: Mart Raudsepp @ 2016-11-10 15:44 UTC (permalink / raw
To: gentoo-commits
commit: d7dbfa3ba07dcd2cbc1f0be9f9575c436c9a82e3
Author: Mart Raudsepp <leio <AT> gentoo <DOT> org>
AuthorDate: Thu Nov 10 15:43:16 2016 +0000
Commit: Mart Raudsepp <leio <AT> gentoo <DOT> org>
CommitDate: Thu Nov 10 15:43:40 2016 +0000
URL: https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=d7dbfa3b
Initial projects.xml parsing code with debug printout
backend/lib/sync.py | 59 +++++++++++++++++++++++++++++++++++++++++++++++++----
manage.py | 8 +++++++-
2 files changed, 62 insertions(+), 5 deletions(-)
diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index ce54937..7139119 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -1,13 +1,64 @@
+import xml.etree.ElementTree as ET
from flask import json
import requests
from .. import app, db
from .models import Category, Package, PackageVersion
-url_base = "https://packages.gentoo.org/"
+proj_url = "https://api.gentoo.org/metastructure/projects.xml"
+pkg_url_base = "https://packages.gentoo.org/"
http_session = requests.session()
+def sync_projects():
+ data = http_session.get(proj_url)
+ if not data:
+ print("Failed retrieving projects.xml")
+ return
+ root = ET.fromstring(data.text)
+ projects = []
+ # Parsing is based on http://www.gentoo.org/dtd/projects.dtd as of 2016-11-10
+ if root.tag.lower() != 'projects':
+ print("Downloaded projects.xml root tag isn't 'projects'")
+ return
+ for proj_elem in root:
+ if proj_elem.tag.lower() != 'project':
+ print("Skipping unknown <projects> subtag <%s>" % proj_elem.tag)
+ continue
+ proj = {}
+ for elem in proj_elem:
+ tag = elem.tag.lower()
+ if tag in ['email', 'name', 'url', 'description']:
+ proj[tag] = elem.text
+ elif tag == 'member':
+ member = {}
+ if 'is-lead' in elem.attrib and elem.attrib['is-lead'] == '1':
+ member['is_lead'] = True
+ for member_elem in elem:
+ member_tag = member_elem.tag.lower()
+ if member_tag in ['email', 'name', 'role']:
+ member[member_tag] = member_elem.text
+ if 'email' in member:
+ # TODO: Sync the members (it's valid as email is given) - maybe at the end, after we have synced the project data, so we can add him to the project directly
+ pass
+ elif tag == 'subproject':
+ if 'ref' in elem.attrib:
+ if 'subprojects' not in proj:
+ proj['subprojects'] = []
+ # subprojects will be a list of (subproject_email, inherit-members) tuples where inherit-members is None, 0 or 1 (if dtd is followed). TODO: Might change if sync code will want it differently
+ proj['subprojects'].append((elem.attrib['ref'], elem.attrib['inherit-members'] if 'inherit-members' in elem.attrib else None))
+ else:
+ print("Invalid <subproject> tag inside project %s - required 'ref' attribute missing" % proj['email'] if 'email' in proj else "<unknown>")
+ else:
+ print("Skipping unknown <project> subtag <%s>" % tag)
+ if 'email' in proj:
+ projects.append(proj)
+ else:
+ print("Skipping incomplete project data due to lack of required email identifier: %s" % (proj,))
+ from pprint import pprint
+ print("Found the following projects and data:")
+ pprint(projects)
+
def sync_categories():
- url = url_base + "categories.json"
+ url = pkg_url_base + "categories.json"
data = http_session.get(url)
# TODO: Handle response error (if not data)
categories = json.loads(data.text)
@@ -26,7 +77,7 @@ def sync_categories():
def sync_packages():
for category in Category.query.all():
existing_packages = category.packages.all()
- data = http_session.get(url_base + "categories/" + category.name + ".json")
+ data = http_session.get(pkg_url_base + "categories/" + category.name + ".json")
if not data:
print("No JSON data for category %s" % category.name) # FIXME: Better handling; mark category as inactive/gone?
continue
@@ -45,7 +96,7 @@ def sync_packages():
def sync_versions():
for package in Package.query.all():
- data = http_session.get(url_base + "packages/" + package.full_name + ".json")
+ data = http_session.get(pkg_url_base + "packages/" + package.full_name + ".json")
if not data:
print("No JSON data for package %s" % package.full_name) # FIXME: Handle better; e.g mark the package as removed if no pkgmove update
continue
diff --git a/manage.py b/manage.py
index 359c63a..a31b96c 100755
--- a/manage.py
+++ b/manage.py
@@ -21,12 +21,18 @@ def init():
@manager.command
def sync_gentoo():
- """Synchronize Gentoo data from packages.gentoo.org API"""
+ """Synchronize Gentoo data"""
+ sync.sync_projects()
sync.sync_categories()
sync.sync_packages()
#sync_versions()
@manager.command
+def sync_projects():
+ """Synchronize only Gentoo projects.xml data"""
+ sync.sync_projects()
+
+@manager.command
def sync_categories():
"""Synchronize only Gentoo categories data"""
sync.sync_categories()
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2016-11-10 15:44 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-09-07 20:21 [gentoo-commits] proj/grumpy:master commit in: /, backend/lib/ Mart Raudsepp
-- strict thread matches above, loose matches on Subject: below --
2016-11-10 9:11 Mart Raudsepp
2016-11-10 15:44 Mart Raudsepp
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox