* [gentoo-commits] proj/log-analysis:master commit in: /
@ 2013-07-03 7:15 Antanas Ursulis
0 siblings, 0 replies; 16+ messages in thread
From: Antanas Ursulis @ 2013-07-03 7:15 UTC (permalink / raw
To: gentoo-commits
commit: 2a6d3eb44476b6dcc299ceab7044c04c8e3528af
Author: Antanas Uršulis <antanas.ursulis <AT> gmail <DOT> com>
AuthorDate: Tue Jul 2 22:33:03 2013 +0000
Commit: Antanas Ursulis <antanas.ursulis <AT> gmail <DOT> com>
CommitDate: Tue Jul 2 22:33:03 2013 +0000
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/log-analysis.git;a=commit;h=2a6d3eb4
Move the application to the Flask framework
---
CollectionDaemon.py | 25 -------------------------
CollectionHTTPServer.py | 33 ---------------------------------
flask_app.py | 14 ++++++++++++++
3 files changed, 14 insertions(+), 58 deletions(-)
diff --git a/CollectionDaemon.py b/CollectionDaemon.py
deleted file mode 100644
index a728e75..0000000
--- a/CollectionDaemon.py
+++ /dev/null
@@ -1,25 +0,0 @@
-"""
-Implements a daemon that hosts the HTTP server for log collection.
-
-TODO:
- daemonisation
- implement both IPv4 and IPv6 modes
- proper shutdown
-"""
-
-import CollectionHTTPServer
-
-class CollectionDaemon:
-
- def __init__(self, port=8000):
- server_class = CollectionHTTPServer.HTTPServer6
- handler_class = CollectionHTTPServer.HTTPRequestHandler
-
- self.server_address = ('::', port)
- self.httpd = server_class(self.server_address, handler_class)
-
- def start(self):
- self.httpd.serve_forever()
-
-if __name__ == '__main__':
- CollectionDaemon().start()
diff --git a/CollectionHTTPServer.py b/CollectionHTTPServer.py
deleted file mode 100644
index d2982fd..0000000
--- a/CollectionHTTPServer.py
+++ /dev/null
@@ -1,33 +0,0 @@
-"""
-Implements the HTTP handler for log collection
-
-TODO:
- decide on exact protocol
- HTTP/1.1
- retrieve client's hostname
- send to analyser
- store in filesystem (later in storage backend)
- log groups
-"""
-
-import BaseHTTPServer
-import socket
-
-class HTTPServer6(BaseHTTPServer.HTTPServer):
-
- address_family = socket.AF_INET6
-
-class HTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
-
- #protocol_version = "HTTP/1.1"
-
- def do_POST(self):
- print(self.client_address)
- print(self.command, self.path, self.request_version)
- print(self.headers.headers)
-
- size = int(self.headers.getheader('Content-Length'))
-
- print(self.rfile.read(size))
- self.send_response(200)
- self.end_headers()
diff --git a/flask_app.py b/flask_app.py
new file mode 100644
index 0000000..1bfd837
--- /dev/null
+++ b/flask_app.py
@@ -0,0 +1,14 @@
+"""
+The web application built on Flask is contained within this file.
+When run as a script, the Flask development server is started.
+"""
+
+from flask import Flask
+app = Flask(__name__)
+
+@app.route('/')
+def index():
+ pass
+
+if __name__ == '__main__':
+ app.run(host='::1', debug=True)
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [gentoo-commits] proj/log-analysis:master commit in: /
@ 2013-07-03 7:15 Antanas Ursulis
0 siblings, 0 replies; 16+ messages in thread
From: Antanas Ursulis @ 2013-07-03 7:15 UTC (permalink / raw
To: gentoo-commits
commit: 8c127f7be724c7e4f3054ab7f33a379000f48461
Author: Antanas Uršulis <antanas.ursulis <AT> gmail <DOT> com>
AuthorDate: Tue Jul 2 23:24:49 2013 +0000
Commit: Antanas Ursulis <antanas.ursulis <AT> gmail <DOT> com>
CommitDate: Tue Jul 2 23:26:19 2013 +0000
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/log-analysis.git;a=commit;h=8c127f7b
Add generated protobuf files and vim .swp to .gitignore
---
.gitignore | 2 ++
1 file changed, 2 insertions(+)
diff --git a/.gitignore b/.gitignore
index 0d20b64..3ccad92 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,3 @@
*.pyc
+*_pb2.py
+*.swp
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [gentoo-commits] proj/log-analysis:master commit in: /
@ 2013-07-03 7:15 Antanas Ursulis
0 siblings, 0 replies; 16+ messages in thread
From: Antanas Ursulis @ 2013-07-03 7:15 UTC (permalink / raw
To: gentoo-commits
commit: 1be8057a9fd24e96814065c97dacfdabf2f0386f
Author: Antanas Uršulis <antanas.ursulis <AT> gmail <DOT> com>
AuthorDate: Tue Jul 2 23:27:03 2013 +0000
Commit: Antanas Ursulis <antanas.ursulis <AT> gmail <DOT> com>
CommitDate: Tue Jul 2 23:27:03 2013 +0000
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/log-analysis.git;a=commit;h=1be8057a
Initial protobuf messages
---
Makefile | 9 +++++++++
submission.proto | 4 ++++
2 files changed, 13 insertions(+)
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..f4b165f
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,9 @@
+.PHONY: all protobufs clean
+
+all: protobufs
+
+protobufs: submission.proto
+ protoc --python_out=. $^
+
+clean:
+ rm -f submission_pb2.py
diff --git a/submission.proto b/submission.proto
new file mode 100644
index 0000000..70a60de
--- /dev/null
+++ b/submission.proto
@@ -0,0 +1,4 @@
+message Submission {
+ required string filename = 1;
+ required bytes data = 2;
+}
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [gentoo-commits] proj/log-analysis:master commit in: /
@ 2013-07-03 7:15 Antanas Ursulis
0 siblings, 0 replies; 16+ messages in thread
From: Antanas Ursulis @ 2013-07-03 7:15 UTC (permalink / raw
To: gentoo-commits
commit: be0237f31708f567150b02b8afcbd7d00fa9262b
Author: Antanas Uršulis <antanas.ursulis <AT> gmail <DOT> com>
AuthorDate: Tue Jul 2 23:28:10 2013 +0000
Commit: Antanas Ursulis <antanas.ursulis <AT> gmail <DOT> com>
CommitDate: Tue Jul 2 23:28:10 2013 +0000
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/log-analysis.git;a=commit;h=be0237f3
Add simple submit POST method to flask app
---
flask_app.py | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/flask_app.py b/flask_app.py
index 1bfd837..a69f4f8 100644
--- a/flask_app.py
+++ b/flask_app.py
@@ -3,12 +3,24 @@ The web application built on Flask is contained within this file.
When run as a script, the Flask development server is started.
"""
-from flask import Flask
+import submission_pb2
+from flask import Flask, request
+
app = Flask(__name__)
@app.route('/')
def index():
pass
+@app.route('/submit', methods=['POST'])
+def submit():
+ """
+ TODO:
+ pass through all steps - input, analysis, storage
+ """
+ submission = submission_pb2.Submission()
+ submission.ParseFromString(request.data)
+ return str(submission)
+
if __name__ == '__main__':
app.run(host='::1', debug=True)
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [gentoo-commits] proj/log-analysis:master commit in: /
@ 2013-07-03 7:15 Antanas Ursulis
0 siblings, 0 replies; 16+ messages in thread
From: Antanas Ursulis @ 2013-07-03 7:15 UTC (permalink / raw
To: gentoo-commits
commit: 4b161bb193dd40a6be29082d26586655c02e1cc4
Author: Antanas Uršulis <antanas.ursulis <AT> gmail <DOT> com>
AuthorDate: Tue Jul 2 23:28:45 2013 +0000
Commit: Antanas Ursulis <antanas.ursulis <AT> gmail <DOT> com>
CommitDate: Tue Jul 2 23:28:45 2013 +0000
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/log-analysis.git;a=commit;h=4b161bb1
Simple file submission client, based on urllib and protobuf
---
simple_client.py | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/simple_client.py b/simple_client.py
new file mode 100644
index 0000000..bbd7835
--- /dev/null
+++ b/simple_client.py
@@ -0,0 +1,19 @@
+"""
+Simple submission client that forms a correct protobuf message and performs a POST
+"""
+
+import submission_pb2, sys, urllib
+
+def send_submission(filename):
+ submission = submission_pb2.Submission()
+ submission.filename = filename
+ submission.data = open(filename, 'rb').read()
+
+ print urllib.urlopen('http://[::1]:5000/submit', submission.SerializeToString()).read()
+
+if __name__ == '__main__':
+ if len(sys.argv) != 2:
+ sys.stderr.write('usage: ' + sys.argv[0] + ' FILENAME\n')
+ sys.exit(-1)
+
+ send_submission(sys.argv[1])
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [gentoo-commits] proj/log-analysis:master commit in: /
@ 2013-07-04 1:39 Antanas Ursulis
0 siblings, 0 replies; 16+ messages in thread
From: Antanas Ursulis @ 2013-07-04 1:39 UTC (permalink / raw
To: gentoo-commits
commit: c74e38905e87acd4075e96cb3e4ad2c474e23f25
Author: Antanas Uršulis <antanas.ursulis <AT> gmail <DOT> com>
AuthorDate: Thu Jul 4 01:38:24 2013 +0000
Commit: Antanas Ursulis <antanas.ursulis <AT> gmail <DOT> com>
CommitDate: Thu Jul 4 01:38:24 2013 +0000
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/log-analysis.git;a=commit;h=c74e3890
Implement simple storage in the filesystem
---
flask_app.py | 7 +++++--
storage.py | 23 +++++++++++++++++++++++
2 files changed, 28 insertions(+), 2 deletions(-)
diff --git a/flask_app.py b/flask_app.py
index a69f4f8..87697e5 100644
--- a/flask_app.py
+++ b/flask_app.py
@@ -3,10 +3,12 @@ The web application built on Flask is contained within this file.
When run as a script, the Flask development server is started.
"""
-import submission_pb2
+import os
+import submission_pb2, storage
from flask import Flask, request
app = Flask(__name__)
+store = storage.FilesystemStorage('logs/')
@app.route('/')
def index():
@@ -20,7 +22,8 @@ def submit():
"""
submission = submission_pb2.Submission()
submission.ParseFromString(request.data)
- return str(submission)
+ store.save_file(request.remote_addr, submission.filename, submission.data)
+ return ''
if __name__ == '__main__':
app.run(host='::1', debug=True)
diff --git a/storage.py b/storage.py
new file mode 100644
index 0000000..847806f
--- /dev/null
+++ b/storage.py
@@ -0,0 +1,23 @@
+"""
+Implements storage of collected log files in the local filesystem.
+"""
+
+import os, errno
+
+class FilesystemStorage:
+ def __init__(self, root):
+ self.root = root
+ try:
+ os.mkdir(root)
+ except OSError:
+ pass # TODO: proper handling
+
+ def save_file(self, source, filename, data):
+ try:
+ os.mkdir(os.path.join(self.root, source))
+ except OSError:
+ pass # TODO: proper handling
+
+ path = os.path.join(self.root, source, filename) # TODO: consider adding in date at some point
+ with open(path, 'wb') as f:
+ f.write(data)
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [gentoo-commits] proj/log-analysis:master commit in: /
@ 2013-07-05 0:00 Antanas Ursulis
0 siblings, 0 replies; 16+ messages in thread
From: Antanas Ursulis @ 2013-07-05 0:00 UTC (permalink / raw
To: gentoo-commits
commit: e31eab1368bf684b9a9a085babacfe06549d7faf
Author: Antanas Uršulis <antanas.ursulis <AT> gmail <DOT> com>
AuthorDate: Thu Jul 4 23:17:55 2013 +0000
Commit: Antanas Ursulis <antanas.ursulis <AT> gmail <DOT> com>
CommitDate: Thu Jul 4 23:17:55 2013 +0000
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/log-analysis.git;a=commit;h=e31eab13
'clean' target update
---
Makefile | 1 +
1 file changed, 1 insertion(+)
diff --git a/Makefile b/Makefile
index f4b165f..f87dfc1 100644
--- a/Makefile
+++ b/Makefile
@@ -7,3 +7,4 @@ protobufs: submission.proto
clean:
rm -f submission_pb2.py
+ rm -f *.pyc
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [gentoo-commits] proj/log-analysis:master commit in: /
@ 2013-07-05 0:00 Antanas Ursulis
0 siblings, 0 replies; 16+ messages in thread
From: Antanas Ursulis @ 2013-07-05 0:00 UTC (permalink / raw
To: gentoo-commits
commit: adecdbc1cd849ca19fbccf2c189f676d54b51e85
Author: Antanas Uršulis <antanas.ursulis <AT> gmail <DOT> com>
AuthorDate: Thu Jul 4 23:18:44 2013 +0000
Commit: Antanas Ursulis <antanas.ursulis <AT> gmail <DOT> com>
CommitDate: Thu Jul 4 23:18:44 2013 +0000
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/log-analysis.git;a=commit;h=adecdbc1
Group logs by source hostname and add multiple-file submissions
---
flask_app.py | 11 +++++------
simple_client.py | 15 +++++++++------
submission.proto | 8 ++++++--
3 files changed, 20 insertions(+), 14 deletions(-)
diff --git a/flask_app.py b/flask_app.py
index 87697e5..ce55c38 100644
--- a/flask_app.py
+++ b/flask_app.py
@@ -3,7 +3,7 @@ The web application built on Flask is contained within this file.
When run as a script, the Flask development server is started.
"""
-import os
+import os, socket
import submission_pb2, storage
from flask import Flask, request
@@ -16,13 +16,12 @@ def index():
@app.route('/submit', methods=['POST'])
def submit():
- """
- TODO:
- pass through all steps - input, analysis, storage
- """
submission = submission_pb2.Submission()
submission.ParseFromString(request.data)
- store.save_file(request.remote_addr, submission.filename, submission.data)
+ source = socket.getfqdn(request.remote_addr) # TODO: is this ok?
+ # TODO: pass through analyser
+ for f in submission.files:
+ store.save_file(source, f.filename, f.data)
return ''
if __name__ == '__main__':
diff --git a/simple_client.py b/simple_client.py
index bbd7835..0335260 100644
--- a/simple_client.py
+++ b/simple_client.py
@@ -4,16 +4,19 @@ Simple submission client that forms a correct protobuf message and performs a PO
import submission_pb2, sys, urllib
-def send_submission(filename):
+def send_submission(filenames):
submission = submission_pb2.Submission()
- submission.filename = filename
- submission.data = open(filename, 'rb').read()
+
+ for f in filenames:
+ new_file = submission.files.add()
+ new_file.filename = f
+ new_file.data = open(f, 'rb').read()
print urllib.urlopen('http://[::1]:5000/submit', submission.SerializeToString()).read()
if __name__ == '__main__':
- if len(sys.argv) != 2:
- sys.stderr.write('usage: ' + sys.argv[0] + ' FILENAME\n')
+ if len(sys.argv) < 2:
+ sys.stderr.write('usage: ' + sys.argv[0] + ' FILENAMES\n')
sys.exit(-1)
- send_submission(sys.argv[1])
+ send_submission(sys.argv[1:])
diff --git a/submission.proto b/submission.proto
index 70a60de..b06310f 100644
--- a/submission.proto
+++ b/submission.proto
@@ -1,4 +1,8 @@
message Submission {
- required string filename = 1;
- required bytes data = 2;
+ message File {
+ required string filename = 1;
+ required bytes data = 2;
+ }
+
+ repeated File files = 1;
}
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [gentoo-commits] proj/log-analysis:master commit in: /
@ 2013-07-29 16:08 Antanas Ursulis
0 siblings, 0 replies; 16+ messages in thread
From: Antanas Ursulis @ 2013-07-29 16:08 UTC (permalink / raw
To: gentoo-commits
commit: 167cee2838f3b4e4c785d749e1b2366b116569ed
Author: Antanas Uršulis <antanas.ursulis <AT> gmail <DOT> com>
AuthorDate: Mon Jul 29 16:01:19 2013 +0000
Commit: Antanas Ursulis <antanas.ursulis <AT> gmail <DOT> com>
CommitDate: Mon Jul 29 16:01:19 2013 +0000
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/log-analysis.git;a=commit;h=167cee28
Port simple_client to urllib2, so that we can set Content-Type
---
simple_client.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/simple_client.py b/simple_client.py
index 3abc7c9..99a4116 100644
--- a/simple_client.py
+++ b/simple_client.py
@@ -2,7 +2,7 @@
Simple submission client that forms a correct protobuf message and performs a POST
"""
-import submission_pb2, sys, urllib, os
+import submission_pb2, sys, urllib2, os
def send_submission(filenames):
submission = submission_pb2.Submission()
@@ -12,7 +12,8 @@ def send_submission(filenames):
new_file.filename = os.path.basename(f)
new_file.data = open(f, 'rb').read()
- print urllib.urlopen('http://[::1]:5000/submit', submission.SerializeToString()).read()
+ request = urllib2.Request('http://[::1]:5000/submit', submission.SerializeToString(), {"Content-Type" : "application/octet-stream"})
+ print urllib2.urlopen(request).read()
if __name__ == '__main__':
if len(sys.argv) < 2:
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [gentoo-commits] proj/log-analysis:master commit in: /
@ 2013-07-29 16:08 Antanas Ursulis
0 siblings, 0 replies; 16+ messages in thread
From: Antanas Ursulis @ 2013-07-29 16:08 UTC (permalink / raw
To: gentoo-commits
commit: 8dfea24b40c34292f20ab60975d3585094b70cb0
Author: Antanas Uršulis <antanas.ursulis <AT> gmail <DOT> com>
AuthorDate: Mon Jul 29 16:02:17 2013 +0000
Commit: Antanas Ursulis <antanas.ursulis <AT> gmail <DOT> com>
CommitDate: Mon Jul 29 16:02:17 2013 +0000
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/log-analysis.git;a=commit;h=8dfea24b
Introduce processors and PortageProcessor.
A processor is initialised with a database and storage provider. It
should implement the process(request, source) method, where request is a
protobuf Submission message. process() should analyse the received
files, perform any required transformations and should usually store the
files and create appropriate database entries.
Processors are multiplexed through the 'provider' variable in the
protobuf Submission message. This allows to process/analyse various
types of logs differently.
---
flask_app.py | 8 +++---
portage_processor.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++
simple_client.py | 1 +
submission.proto | 3 ++-
4 files changed, 82 insertions(+), 4 deletions(-)
diff --git a/flask_app.py b/flask_app.py
index ce55c38..832702c 100644
--- a/flask_app.py
+++ b/flask_app.py
@@ -7,8 +7,11 @@ import os, socket
import submission_pb2, storage
from flask import Flask, request
+from portage_processor import PortageProcessor
+
app = Flask(__name__)
store = storage.FilesystemStorage('logs/')
+processors = {'portage' : PortageProcessor(None, store)} # TODO: initialise from config file
@app.route('/')
def index():
@@ -19,9 +22,8 @@ def submit():
submission = submission_pb2.Submission()
submission.ParseFromString(request.data)
source = socket.getfqdn(request.remote_addr) # TODO: is this ok?
- # TODO: pass through analyser
- for f in submission.files:
- store.save_file(source, f.filename, f.data)
+
+ processors[submission.provider].process(submission, source)
return ''
if __name__ == '__main__':
diff --git a/portage_processor.py b/portage_processor.py
new file mode 100644
index 0000000..2403cdf
--- /dev/null
+++ b/portage_processor.py
@@ -0,0 +1,74 @@
+import re, StringIO
+
+class PortageProcessor:
+ _r = {
+ 'warnings' : re.compile(r"(Tinderbox QA Warning!|QA Notice: (Pre-stripped|file does not exist|command not found|USE flag|Files built without respecting|The following files)|linux_config_exists|will always overflow|called with bigger|maintainer mode detected|econf called in src_compile|udev rules should be installed)"),
+ 'testfailed' : re.compile(r"^ \* ERROR: .* failed \(test phase\):"),
+ 'failed' : re.compile(r"^ \* ERROR: .* failed"),
+ 'collision' : re.compile(r"Detected file collision"),
+ 'maintainer' : re.compile(r"^ \* Maintainer: ([a-zA-Z0-9.@_+-]+)(?: ([a-zA-Z0-9.@_+,-]+))?$"),
+ 'escapes' : re.compile(r"\x1b\[[^\x40-\x7e]*[\x40-\x7e]")
+ }
+
+ def __init__(self, db, storage):
+ self.db = db
+ self.storage = storage
+
+ def process(self, request, source):
+ for f in request.files:
+ matches = 0
+ pkg_failed = False
+ test_failed = False
+ collision = False
+ bug_assignee = 'bug-wranglers@gentoo.org'
+ bug_cc = ''
+
+ # TODO: look at proper HTML generation methods:
+ # (*) either XHTML via xml.etree
+ # (*) or Jinja2 (is it possible to parse and generate in one pass?)
+ output = StringIO.StringIO()
+ output.write('''\
+<!doctype html>
+<html>
+ <head>
+ <link rel="stylesheet" type="text/css" href="htmlgrep.css">
+ </head>
+ <body>
+ <ol>
+''')
+
+ for line in f.data.split("\n"):
+ match = False
+
+ line = self._r['escapes'].sub('', line)
+
+ if self._r['warnings'].search(line):
+ match = True
+ elif self._r['testfailed'].search(line):
+ test_failed = True
+ match = True
+ elif self._r['failed'].search(line):
+ pkg_failed = True
+ match = True
+ elif self._r['collision'].search(line):
+ pkg_failed = True
+ collision = True
+ match = True
+ else:
+ m = self._r['maintainer'].search(line)
+ if m:
+ bug_assignee, bug_cc = m.group(1, 2)
+
+ if match:
+ matches += 1
+ output.write('\t'*3 + '<li class="match">' + line + '</li>\n')
+ else:
+ output.write('\t'*3 + '<li>' + line + '</li>\n')
+
+ output.write('''\
+ </ol>
+ </body>
+</html>
+''')
+
+ self.storage.save_file(source, f.filename, output.getvalue())
diff --git a/simple_client.py b/simple_client.py
index 99a4116..ab4bccf 100644
--- a/simple_client.py
+++ b/simple_client.py
@@ -6,6 +6,7 @@ import submission_pb2, sys, urllib2, os
def send_submission(filenames):
submission = submission_pb2.Submission()
+ submission.provider = "portage"
for f in filenames:
new_file = submission.files.add()
diff --git a/submission.proto b/submission.proto
index b06310f..42cf97c 100644
--- a/submission.proto
+++ b/submission.proto
@@ -4,5 +4,6 @@ message Submission {
required bytes data = 2;
}
- repeated File files = 1;
+ required string provider = 1;
+ repeated File files = 2;
}
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [gentoo-commits] proj/log-analysis:master commit in: /
@ 2013-07-29 16:08 Antanas Ursulis
0 siblings, 0 replies; 16+ messages in thread
From: Antanas Ursulis @ 2013-07-29 16:08 UTC (permalink / raw
To: gentoo-commits
commit: 5640153330f6ff8935009f1570563c48a088aef5
Author: Antanas Uršulis <antanas.ursulis <AT> gmail <DOT> com>
AuthorDate: Wed Jul 17 22:03:19 2013 +0000
Commit: Antanas Ursulis <antanas.ursulis <AT> gmail <DOT> com>
CommitDate: Wed Jul 17 22:03:19 2013 +0000
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/log-analysis.git;a=commit;h=56401533
Send only filename, not full path
---
simple_client.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/simple_client.py b/simple_client.py
index 0335260..3abc7c9 100644
--- a/simple_client.py
+++ b/simple_client.py
@@ -2,14 +2,14 @@
Simple submission client that forms a correct protobuf message and performs a POST
"""
-import submission_pb2, sys, urllib
+import submission_pb2, sys, urllib, os
def send_submission(filenames):
submission = submission_pb2.Submission()
for f in filenames:
new_file = submission.files.add()
- new_file.filename = f
+ new_file.filename = os.path.basename(f)
new_file.data = open(f, 'rb').read()
print urllib.urlopen('http://[::1]:5000/submit', submission.SerializeToString()).read()
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [gentoo-commits] proj/log-analysis:master commit in: /
@ 2013-07-29 16:08 Antanas Ursulis
0 siblings, 0 replies; 16+ messages in thread
From: Antanas Ursulis @ 2013-07-29 16:08 UTC (permalink / raw
To: gentoo-commits
commit: 7f8c730c7fc129cd58c6f7607ad032ac8f11e9f6
Author: Antanas Uršulis <antanas.ursulis <AT> gmail <DOT> com>
AuthorDate: Mon Jul 29 16:07:19 2013 +0000
Commit: Antanas Ursulis <antanas.ursulis <AT> gmail <DOT> com>
CommitDate: Mon Jul 29 16:07:19 2013 +0000
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/log-analysis.git;a=commit;h=7f8c730c
More basename() defensiveness
---
storage.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/storage.py b/storage.py
index 847806f..fda6c35 100644
--- a/storage.py
+++ b/storage.py
@@ -18,6 +18,6 @@ class FilesystemStorage:
except OSError:
pass # TODO: proper handling
- path = os.path.join(self.root, source, filename) # TODO: consider adding in date at some point
+ path = os.path.join(self.root, source, os.path.basename(filename)) # TODO: consider adding in date at some point
with open(path, 'wb') as f:
f.write(data)
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [gentoo-commits] proj/log-analysis:master commit in: /
@ 2013-07-29 18:59 Antanas Ursulis
0 siblings, 0 replies; 16+ messages in thread
From: Antanas Ursulis @ 2013-07-29 18:59 UTC (permalink / raw
To: gentoo-commits
commit: e5bdc5bc099096186e00e61604484fc6a1ecdeb2
Author: Antanas Uršulis <antanas.ursulis <AT> gmail <DOT> com>
AuthorDate: Mon Jul 29 18:55:51 2013 +0000
Commit: Antanas Ursulis <antanas.ursulis <AT> gmail <DOT> com>
CommitDate: Mon Jul 29 18:55:51 2013 +0000
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/log-analysis.git;a=commit;h=e5bdc5bc
Database (SQL) class for functionality common across all processors
Currently uses MySQLdb. Schema included.
---
database.py | 22 ++++++++++++++++++++++
flask_app.py | 18 ++++++++++++++----
portage_processor.py | 9 +++++----
schema.sql | 12 ++++++++++++
simple_client.py | 1 +
submission.proto | 3 ++-
6 files changed, 56 insertions(+), 9 deletions(-)
diff --git a/database.py b/database.py
new file mode 100644
index 0000000..5202876
--- /dev/null
+++ b/database.py
@@ -0,0 +1,22 @@
+from contextlib import closing
+import MySQLdb
+
+class DatabaseConnection:
+ def __init__(self, conn):
+ self.conn = conn
+
+ def insert_file(self, path, group_id):
+ with closing(self.conn.cursor()) as c:
+ c.execute("insert into `files` (`path`, `group_id`) values (%s, %s)", (path, group_id))
+ self.conn.commit()
+ return c.lastrowid
+
+ def insert_group(self, name, provider, date):
+ with closing(self.conn.cursor()) as c:
+ c.execute("insert into `groups` (`name`, `provider`, `date`) values (%s, %s, %s)", (name, provider, date))
+ self.conn.commit()
+ return c.lastrowid
+
+def get_connection(user, passwd, db):
+ conn = MySQLdb.connect(user=user, passwd=passwd, db=db)
+ return DatabaseConnection(conn)
diff --git a/flask_app.py b/flask_app.py
index 832702c..5356bc4 100644
--- a/flask_app.py
+++ b/flask_app.py
@@ -4,14 +4,24 @@ When run as a script, the Flask development server is started.
"""
import os, socket
-import submission_pb2, storage
-from flask import Flask, request
+import submission_pb2, storage, database
+from flask import Flask, request, g
from portage_processor import PortageProcessor
app = Flask(__name__)
store = storage.FilesystemStorage('logs/')
-processors = {'portage' : PortageProcessor(None, store)} # TODO: initialise from config file
+processors = {'portage' : PortageProcessor(store)} # TODO: initialise from config file
+
+@app.before_request
+def before_request():
+ g.db = database.get_connection('gsoc', 'gsocpasswd', 'loganalysis')
+
+@app.teardown_request
+def teardown_request(exception):
+ db = getattr(g, 'db', None)
+ if db is not None:
+ db.conn.close()
@app.route('/')
def index():
@@ -23,7 +33,7 @@ def submit():
submission.ParseFromString(request.data)
source = socket.getfqdn(request.remote_addr) # TODO: is this ok?
- processors[submission.provider].process(submission, source)
+ processors[submission.provider].process(submission, source, g.db)
return ''
if __name__ == '__main__':
diff --git a/portage_processor.py b/portage_processor.py
index 2403cdf..66fb970 100644
--- a/portage_processor.py
+++ b/portage_processor.py
@@ -1,4 +1,4 @@
-import re, StringIO
+import os, re, StringIO, time
class PortageProcessor:
_r = {
@@ -10,11 +10,11 @@ class PortageProcessor:
'escapes' : re.compile(r"\x1b\[[^\x40-\x7e]*[\x40-\x7e]")
}
- def __init__(self, db, storage):
- self.db = db
+ def __init__(self, storage):
self.storage = storage
- def process(self, request, source):
+ def process(self, request, source, db):
+ group_id = db.insert_group(request.group_name, 'portage', int(time.time()))
for f in request.files:
matches = 0
pkg_failed = False
@@ -72,3 +72,4 @@ class PortageProcessor:
''')
self.storage.save_file(source, f.filename, output.getvalue())
+ file_id = db.insert_file(os.path.join(source, f.filename), group_id)
diff --git a/schema.sql b/schema.sql
new file mode 100644
index 0000000..564385e
--- /dev/null
+++ b/schema.sql
@@ -0,0 +1,12 @@
+create table if not exists `files` (
+ `id` int primary key auto_increment,
+ `path` text not null,
+ `group_id` int not null
+);
+
+create table if not exists `groups` (
+ `id` int primary key auto_increment,
+ `name` text not null,
+ `provider` varchar(16) not null,
+ `date` int not null
+);
diff --git a/simple_client.py b/simple_client.py
index ab4bccf..c89b6c1 100644
--- a/simple_client.py
+++ b/simple_client.py
@@ -7,6 +7,7 @@ import submission_pb2, sys, urllib2, os
def send_submission(filenames):
submission = submission_pb2.Submission()
submission.provider = "portage"
+ submission.group_name = "Manual submission"
for f in filenames:
new_file = submission.files.add()
diff --git a/submission.proto b/submission.proto
index 42cf97c..3cbf474 100644
--- a/submission.proto
+++ b/submission.proto
@@ -5,5 +5,6 @@ message Submission {
}
required string provider = 1;
- repeated File files = 2;
+ optional string group_name = 2;
+ repeated File files = 3;
}
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [gentoo-commits] proj/log-analysis:master commit in: /
@ 2013-07-29 22:40 Antanas Ursulis
0 siblings, 0 replies; 16+ messages in thread
From: Antanas Ursulis @ 2013-07-29 22:40 UTC (permalink / raw
To: gentoo-commits
commit: a436a8360d42232c1285e0979f3c561579bba591
Author: Antanas Uršulis <antanas.ursulis <AT> gmail <DOT> com>
AuthorDate: Mon Jul 29 22:36:51 2013 +0000
Commit: Antanas Ursulis <antanas.ursulis <AT> gmail <DOT> com>
CommitDate: Mon Jul 29 22:36:51 2013 +0000
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/log-analysis.git;a=commit;h=a436a836
Add hostname to schema
---
database.py | 4 ++--
portage_processor.py | 3 ++-
schema.sql | 1 +
3 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/database.py b/database.py
index 5202876..79ae693 100644
--- a/database.py
+++ b/database.py
@@ -11,9 +11,9 @@ class DatabaseConnection:
self.conn.commit()
return c.lastrowid
- def insert_group(self, name, provider, date):
+ def insert_group(self, hostname, name, provider, date):
with closing(self.conn.cursor()) as c:
- c.execute("insert into `groups` (`name`, `provider`, `date`) values (%s, %s, %s)", (name, provider, date))
+ c.execute("insert into `groups` (`hostname`, `name`, `provider`, `date`) values (%s, %s, %s, %s)", (hostname, name, provider, date))
self.conn.commit()
return c.lastrowid
diff --git a/portage_processor.py b/portage_processor.py
index 66fb970..32ca9c4 100644
--- a/portage_processor.py
+++ b/portage_processor.py
@@ -14,7 +14,8 @@ class PortageProcessor:
self.storage = storage
def process(self, request, source, db):
- group_id = db.insert_group(request.group_name, 'portage', int(time.time()))
+ group_id = db.insert_group(source, request.group_name, 'portage', int(time.time()))
+
for f in request.files:
matches = 0
pkg_failed = False
diff --git a/schema.sql b/schema.sql
index 564385e..6b418a8 100644
--- a/schema.sql
+++ b/schema.sql
@@ -6,6 +6,7 @@ create table if not exists `files` (
create table if not exists `groups` (
`id` int primary key auto_increment,
+ `hostname` varchar(255) not null,
`name` text not null,
`provider` varchar(16) not null,
`date` int not null
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [gentoo-commits] proj/log-analysis:master commit in: /
@ 2013-07-29 22:40 Antanas Ursulis
0 siblings, 0 replies; 16+ messages in thread
From: Antanas Ursulis @ 2013-07-29 22:40 UTC (permalink / raw
To: gentoo-commits
commit: bbd440de5dccef2115a3f59e3ab7ca2864d717ad
Author: Antanas Uršulis <antanas.ursulis <AT> gmail <DOT> com>
AuthorDate: Mon Jul 29 22:40:02 2013 +0000
Commit: Antanas Ursulis <antanas.ursulis <AT> gmail <DOT> com>
CommitDate: Mon Jul 29 22:40:02 2013 +0000
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/log-analysis.git;a=commit;h=bbd440de
Add TODO item
---
flask_app.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/flask_app.py b/flask_app.py
index 5356bc4..17281fa 100644
--- a/flask_app.py
+++ b/flask_app.py
@@ -15,7 +15,7 @@ processors = {'portage' : PortageProcessor(store)} # TODO: initialise from confi
@app.before_request
def before_request():
- g.db = database.get_connection('gsoc', 'gsocpasswd', 'loganalysis')
+ g.db = database.get_connection('gsoc', 'gsocpasswd', 'loganalysis') # TODO: get from config file
@app.teardown_request
def teardown_request(exception):
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [gentoo-commits] proj/log-analysis:master commit in: /
@ 2013-07-29 22:40 Antanas Ursulis
0 siblings, 0 replies; 16+ messages in thread
From: Antanas Ursulis @ 2013-07-29 22:40 UTC (permalink / raw
To: gentoo-commits
commit: 7ba5f957c082545a9eb097839fdc7eba826a7d80
Author: Antanas Uršulis <antanas.ursulis <AT> gmail <DOT> com>
AuthorDate: Mon Jul 29 22:38:23 2013 +0000
Commit: Antanas Ursulis <antanas.ursulis <AT> gmail <DOT> com>
CommitDate: Mon Jul 29 22:38:23 2013 +0000
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/log-analysis.git;a=commit;h=7ba5f957
Implement additional metadata storage for logs received from Portage
Flags like 'pkg_failed', 'test_failed', etc. are stored once per group.
This is a discussion item, as it might make sense to store some data
once per file instead.
---
database.py | 2 +-
portage_database.py | 12 ++++++++++++
portage_processor.py | 18 +++++++++++-------
schema_portage.sql | 10 ++++++++++
4 files changed, 34 insertions(+), 8 deletions(-)
diff --git a/database.py b/database.py
index 79ae693..9927627 100644
--- a/database.py
+++ b/database.py
@@ -1,7 +1,7 @@
from contextlib import closing
import MySQLdb
-class DatabaseConnection:
+class DatabaseConnection(object):
def __init__(self, conn):
self.conn = conn
diff --git a/portage_database.py b/portage_database.py
new file mode 100644
index 0000000..ca7831c
--- /dev/null
+++ b/portage_database.py
@@ -0,0 +1,12 @@
+from contextlib import closing
+from database import DatabaseConnection
+
+class PortageDatabaseConnection(DatabaseConnection):
+ def __init__(self, db):
+ super(PortageDatabaseConnection, self).__init__(db.conn)
+
+ # TODO: consider passing these arguments around in a dictionary or kwargs
+ def insert_group_extra(self, group_id, pkg_name, matches, pkg_failed, test_failed, collision, bug_assignee, bug_cc):
+ with closing(self.conn.cursor()) as c:
+ c.execute("insert into `groups_portage` values (%s, %s, %s, %s, %s, %s, %s, %s)", (group_id, pkg_name, matches, pkg_failed, test_failed, collision, bug_assignee, bug_cc))
+ self.conn.commit()
diff --git a/portage_processor.py b/portage_processor.py
index 32ca9c4..252209e 100644
--- a/portage_processor.py
+++ b/portage_processor.py
@@ -1,4 +1,5 @@
import os, re, StringIO, time
+from portage_database import PortageDatabaseConnection
class PortageProcessor:
_r = {
@@ -14,16 +15,17 @@ class PortageProcessor:
self.storage = storage
def process(self, request, source, db):
+ db = PortageDatabaseConnection(db)
group_id = db.insert_group(source, request.group_name, 'portage', int(time.time()))
- for f in request.files:
- matches = 0
- pkg_failed = False
- test_failed = False
- collision = False
- bug_assignee = 'bug-wranglers@gentoo.org'
- bug_cc = ''
+ matches = 0
+ pkg_failed = False
+ test_failed = False
+ collision = False
+ bug_assignee = 'bug-wranglers@gentoo.org'
+ bug_cc = ''
+ for f in request.files:
# TODO: look at proper HTML generation methods:
# (*) either XHTML via xml.etree
# (*) or Jinja2 (is it possible to parse and generate in one pass?)
@@ -74,3 +76,5 @@ class PortageProcessor:
self.storage.save_file(source, f.filename, output.getvalue())
file_id = db.insert_file(os.path.join(source, f.filename), group_id)
+
+ db.insert_group_extra(group_id, 'TODO', matches, pkg_failed, test_failed, collision, bug_assignee, bug_cc)
diff --git a/schema_portage.sql b/schema_portage.sql
new file mode 100644
index 0000000..4b9c80d
--- /dev/null
+++ b/schema_portage.sql
@@ -0,0 +1,10 @@
+create table if not exists `groups_portage` (
+ `id` int primary key,
+ `pkg_name` varchar(255) not null,
+ `matches` int not null,
+ `pkg_failed` bool not null,
+ `test_failed` bool not null,
+ `collision` bool not null,
+ `bug_assignee` text not null,
+ `bug_cc` text not null
+);
^ permalink raw reply related [flat|nested] 16+ messages in thread
end of thread, other threads:[~2013-07-29 22:40 UTC | newest]
Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-07-03 7:15 [gentoo-commits] proj/log-analysis:master commit in: / Antanas Ursulis
-- strict thread matches above, loose matches on Subject: below --
2013-07-29 22:40 Antanas Ursulis
2013-07-29 22:40 Antanas Ursulis
2013-07-29 22:40 Antanas Ursulis
2013-07-29 18:59 Antanas Ursulis
2013-07-29 16:08 Antanas Ursulis
2013-07-29 16:08 Antanas Ursulis
2013-07-29 16:08 Antanas Ursulis
2013-07-29 16:08 Antanas Ursulis
2013-07-05 0:00 Antanas Ursulis
2013-07-05 0:00 Antanas Ursulis
2013-07-04 1:39 Antanas Ursulis
2013-07-03 7:15 Antanas Ursulis
2013-07-03 7:15 Antanas Ursulis
2013-07-03 7:15 Antanas Ursulis
2013-07-03 7:15 Antanas Ursulis
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox