From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from lists.gentoo.org (pigeon.gentoo.org [208.92.234.80]) by finch.gentoo.org (Postfix) with ESMTP id 1FE301381F3 for ; Mon, 29 Jul 2013 16:08:29 +0000 (UTC) Received: from pigeon.gentoo.org (localhost [127.0.0.1]) by pigeon.gentoo.org (Postfix) with SMTP id 0D546E08E8; Mon, 29 Jul 2013 16:08:28 +0000 (UTC) Received: from smtp.gentoo.org (smtp.gentoo.org [140.211.166.183]) (using TLSv1 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by pigeon.gentoo.org (Postfix) with ESMTPS id 941ECE08E8 for ; Mon, 29 Jul 2013 16:08:27 +0000 (UTC) Received: from hornbill.gentoo.org (hornbill.gentoo.org [94.100.119.163]) (using TLSv1 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by smtp.gentoo.org (Postfix) with ESMTPS id 5D88933E9EB for ; Mon, 29 Jul 2013 16:08:26 +0000 (UTC) Received: from localhost.localdomain (localhost [127.0.0.1]) by hornbill.gentoo.org (Postfix) with ESMTP id D6E99E5459 for ; Mon, 29 Jul 2013 16:08:24 +0000 (UTC) From: "Antanas Ursulis" To: gentoo-commits@lists.gentoo.org Content-Transfer-Encoding: 8bit Content-type: text/plain; charset=UTF-8 Reply-To: gentoo-dev@lists.gentoo.org, "Antanas Ursulis" Message-ID: <1375113737.8dfea24b40c34292f20ab60975d3585094b70cb0.uranium@gentoo> Subject: [gentoo-commits] proj/log-analysis:master commit in: / X-VCS-Repository: proj/log-analysis X-VCS-Files: flask_app.py portage_processor.py simple_client.py submission.proto X-VCS-Directories: / X-VCS-Committer: uranium X-VCS-Committer-Name: Antanas Ursulis X-VCS-Revision: 8dfea24b40c34292f20ab60975d3585094b70cb0 X-VCS-Branch: master Date: Mon, 29 Jul 2013 16:08:24 +0000 (UTC) Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-Id: Gentoo Linux mail X-BeenThere: gentoo-commits@lists.gentoo.org X-Archives-Salt: b8de81d3-33bb-46c6-9666-9d0247835201 X-Archives-Hash: f0a2ada8527256376b4ae4ba677afee1 commit: 8dfea24b40c34292f20ab60975d3585094b70cb0 Author: Antanas Uršulis gmail com> AuthorDate: Mon Jul 29 16:02:17 2013 +0000 Commit: Antanas Ursulis gmail com> CommitDate: Mon Jul 29 16:02:17 2013 +0000 URL: http://git.overlays.gentoo.org/gitweb/?p=proj/log-analysis.git;a=commit;h=8dfea24b Introduce processors and PortageProcessor. A processor is initialised with a database and storage provider. It should implement the process(request, source) method, where request is a protobuf Submission message. process() should analyse the received files, perform any required transformations and should usually store the files and create appropriate database entries. Processors are multiplexed through the 'provider' variable in the protobuf Submission message. This allows to process/analyse various types of logs differently. --- flask_app.py | 8 +++--- portage_processor.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++ simple_client.py | 1 + submission.proto | 3 ++- 4 files changed, 82 insertions(+), 4 deletions(-) diff --git a/flask_app.py b/flask_app.py index ce55c38..832702c 100644 --- a/flask_app.py +++ b/flask_app.py @@ -7,8 +7,11 @@ import os, socket import submission_pb2, storage from flask import Flask, request +from portage_processor import PortageProcessor + app = Flask(__name__) store = storage.FilesystemStorage('logs/') +processors = {'portage' : PortageProcessor(None, store)} # TODO: initialise from config file @app.route('/') def index(): @@ -19,9 +22,8 @@ def submit(): submission = submission_pb2.Submission() submission.ParseFromString(request.data) source = socket.getfqdn(request.remote_addr) # TODO: is this ok? - # TODO: pass through analyser - for f in submission.files: - store.save_file(source, f.filename, f.data) + + processors[submission.provider].process(submission, source) return '' if __name__ == '__main__': diff --git a/portage_processor.py b/portage_processor.py new file mode 100644 index 0000000..2403cdf --- /dev/null +++ b/portage_processor.py @@ -0,0 +1,74 @@ +import re, StringIO + +class PortageProcessor: + _r = { + 'warnings' : re.compile(r"(Tinderbox QA Warning!|QA Notice: (Pre-stripped|file does not exist|command not found|USE flag|Files built without respecting|The following files)|linux_config_exists|will always overflow|called with bigger|maintainer mode detected|econf called in src_compile|udev rules should be installed)"), + 'testfailed' : re.compile(r"^ \* ERROR: .* failed \(test phase\):"), + 'failed' : re.compile(r"^ \* ERROR: .* failed"), + 'collision' : re.compile(r"Detected file collision"), + 'maintainer' : re.compile(r"^ \* Maintainer: ([a-zA-Z0-9.@_+-]+)(?: ([a-zA-Z0-9.@_+,-]+))?$"), + 'escapes' : re.compile(r"\x1b\[[^\x40-\x7e]*[\x40-\x7e]") + } + + def __init__(self, db, storage): + self.db = db + self.storage = storage + + def process(self, request, source): + for f in request.files: + matches = 0 + pkg_failed = False + test_failed = False + collision = False + bug_assignee = 'bug-wranglers@gentoo.org' + bug_cc = '' + + # TODO: look at proper HTML generation methods: + # (*) either XHTML via xml.etree + # (*) or Jinja2 (is it possible to parse and generate in one pass?) + output = StringIO.StringIO() + output.write('''\ + + + + + + +
    +''') + + for line in f.data.split("\n"): + match = False + + line = self._r['escapes'].sub('', line) + + if self._r['warnings'].search(line): + match = True + elif self._r['testfailed'].search(line): + test_failed = True + match = True + elif self._r['failed'].search(line): + pkg_failed = True + match = True + elif self._r['collision'].search(line): + pkg_failed = True + collision = True + match = True + else: + m = self._r['maintainer'].search(line) + if m: + bug_assignee, bug_cc = m.group(1, 2) + + if match: + matches += 1 + output.write('\t'*3 + '
  1. ' + line + '
  2. \n') + else: + output.write('\t'*3 + '
  3. ' + line + '
  4. \n') + + output.write('''\ +
+ + +''') + + self.storage.save_file(source, f.filename, output.getvalue()) diff --git a/simple_client.py b/simple_client.py index 99a4116..ab4bccf 100644 --- a/simple_client.py +++ b/simple_client.py @@ -6,6 +6,7 @@ import submission_pb2, sys, urllib2, os def send_submission(filenames): submission = submission_pb2.Submission() + submission.provider = "portage" for f in filenames: new_file = submission.files.add() diff --git a/submission.proto b/submission.proto index b06310f..42cf97c 100644 --- a/submission.proto +++ b/submission.proto @@ -4,5 +4,6 @@ message Submission { required bytes data = 2; } - repeated File files = 1; + required string provider = 1; + repeated File files = 2; }