diff options
author | Mart Raudsepp <leio@gentoo.org> | 2016-09-07 23:20:20 +0300 |
---|---|---|
committer | Mart Raudsepp <leio@gentoo.org> | 2016-09-07 23:20:20 +0300 |
commit | 1e826829e42b0524365770dd329af5217a5f6b54 (patch) | |
tree | 16f9763698d1e36fd7bbf1fe3978aec3b4a2609e | |
parent | Don't double-quote debug output for full atoms from %r usage (diff) | |
download | grumpy-1e826829e42b0524365770dd329af5217a5f6b54.tar.gz grumpy-1e826829e42b0524365770dd329af5217a5f6b54.tar.bz2 grumpy-1e826829e42b0524365770dd329af5217a5f6b54.zip |
Add syncing of packages in categories from packages.g.o (just name)
Also add manage.py commands to call the sync steps individually for testing
-rw-r--r-- | backend/lib/sync.py | 28 | ||||
-rwxr-xr-x | manage.py | 25 |
2 files changed, 48 insertions, 5 deletions
diff --git a/backend/lib/sync.py b/backend/lib/sync.py index 3cfb746..6dcb6b9 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -1,15 +1,18 @@ from flask import json import requests from .. import app, db -from .models import Category +from .models import Category, Package +url_base = "https://packages.gentoo.org/" http_session = requests.session() def sync_categories(): - url = "https://packages.gentoo.org/categories.json" + url = url_base + "categories.json" data = http_session.get(url) + # TODO: Handle response error (if not data) categories = json.loads(data.text) existing_categories = {} + # TODO: Use UPSERT instead (on_conflict_do_update) if we can rely on postgresql:9.5 for cat in Category.query.all(): existing_categories[cat.name] = cat for category in categories: @@ -19,3 +22,24 @@ def sync_categories(): new_cat = Category(name=category['name'], description=category['description']) db.session.add(new_cat) db.session.commit() + +def sync_packages(): + for category in Category.query.all(): + existing_packages = category.packages.all() + print("Existing packages in DB for category %s: %s" % (category.name, existing_packages,)) + data = http_session.get(url_base + "categories/" + category.name + ".json") + if not data: + print("No JSON data for category %s" % category.name) # FIXME: Better handling; mark category as inactive/gone? + continue + packages = json.loads(data.text)['packages'] + # TODO: Use UPSERT instead (on_conflict_do_update) + existing_packages = {} + for pkg in Package.query.all(): + existing_packages[pkg.name] = pkg + for package in packages: + if package['name'] in existing_packages: + continue # TODO: Update description once we keep that in DB + else: + new_pkg = Package(category_id=category.id, name=package['name']) + db.session.add(new_pkg) + db.session.commit() @@ -4,7 +4,7 @@ from flask_script import Manager, Shell from backend import app, db -from backend.lib.sync import sync_categories +from backend.lib import sync manager = Manager(app) @@ -21,8 +21,27 @@ def init(): @manager.command def sync_gentoo(): - """Syncronize Gentoo data from packages.gentoo.org API""" - sync_categories() + """Synchronize Gentoo data from packages.gentoo.org API""" + sync.sync_categories() + sync.sync_packages() + #sync_versions() + +@manager.command +def sync_categories(): + """Synchronize only Gentoo categories data""" + sync.sync_categories() + +@manager.command +def sync_packages(): + """Synchronize only Gentoo packages base data (without details)""" + sync.sync_packages() + +''' +@manager.command +def sync_versions(): + """Synchronize only Gentoo package details""" + sync.sync_versions() +''' if __name__ == '__main__': manager.run() |