* [gentoo-commits] gentoo commit in src/gwn: get_glsas.py
@ 2013-10-12 16:08 Markos Chandras (hwoarang)
0 siblings, 0 replies; 3+ messages in thread
From: Markos Chandras (hwoarang) @ 2013-10-12 16:08 UTC (permalink / raw
To: gentoo-commits
hwoarang 13/10/12 16:08:24
Modified: get_glsas.py
Log:
get_glsas.py: Rewrite get_glsas.py from scratch
The old one didn't work and I had not idea how to make it work.
Rewrite it to use the glsa table from
http://www.gentoo.org/security/en/glsa/index.xml?passthrough=1
and BeautifulSoup (http://www.crummy.com/software/BeautifulSoup/)
to make it work again. Print text compatible for direct inclusion in the
WP GMN. The code is not ideal but it works ;)
Revision Changes Path
1.2 src/gwn/get_glsas.py
file : http://sources.gentoo.org/viewvc.cgi/gentoo/src/gwn/get_glsas.py?rev=1.2&view=markup
plain: http://sources.gentoo.org/viewvc.cgi/gentoo/src/gwn/get_glsas.py?rev=1.2&content-type=text/plain
diff : http://sources.gentoo.org/viewvc.cgi/gentoo/src/gwn/get_glsas.py?r1=1.1&r2=1.2
Index: get_glsas.py
===================================================================
RCS file: /var/cvsroot/gentoo/src/gwn/get_glsas.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- get_glsas.py 7 Mar 2007 12:17:57 -0000 1.1
+++ get_glsas.py 12 Oct 2013 16:08:24 -0000 1.2
@@ -1,120 +1,99 @@
#!/usr/bin/python
-# you need dev-python/pyxml for this to work
-
-from xml.dom.ext.reader.Sax2 import FromXmlStream
-from urllib2 import urlopen
-from tempfile import mkstemp
-import sys, re, os
-
-RDF_PAGE = "http://www.gentoo.org/rdf/en/glsa-index.rdf?num=100"
-RDF_NS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
-DTD_PREFIX = "http://www.gentoo.org"
-GLSA_LINK = "http://www.gentoo.org/security/en/glsa/"
-GLSA_LINK2 = "http://security.gentoo.org/glsa/"
-
-no_glsas = """<chapter>
-<title>Gentoo Security</title>
-<section>
-<body>
-
-<p>
-Gentoo Security is on hiatus this week due to no GLSAs being released.
-</p>
-
-</body>
-</section>
-
-</chapter>"""
-
-doctype_re = re.compile("<!DOCTYPE guide SYSTEM \"(.*)\">")
-doctype_new = "<!DOCTYPE guide SYSTEM \"" + DTD_PREFIX + "%s\">\n"
-glsa_re = re.compile(GLSA_LINK + "glsa-(.*).xml");
-glsa_re2 = re.compile(GLSA_LINK2 + "glsa-(.*).xml");
-glsa2gwn = "./glsa2gwn.py"
-
-def get_last_glsa_id(last_gwn):
- if last_gwn.startswith("http://"):
- fd = urlopen(last_gwn)
- else:
- fd = open(last_gwn)
- tmp = open(mkstemp()[1], 'r+')
- matched = False
-
- # need to do this otherwise we get an 'unknown url type' error from urllib2
- # when it encounters the relative link to the dtd
- for line in fd:
- if not matched:
- matches = doctype_re.match(line)
-
- if matches:
- tmp.write(doctype_new % matches.group(1))
- matched = True
- else:
- tmp.write(line)
- else:
- tmp.write(line)
-
- tmp.flush()
- tmp.seek(0)
-
- dom = FromXmlStream(tmp)
- last_glsa_id = "0"
-
- for node in dom.getElementsByTagName("uri"):
- uri = node.getAttribute("link")
-
- if uri.startswith(GLSA_LINK):
- new_id = glsa_re.match(uri).group(1)
- if new_id > last_glsa_id:
- last_glsa_id = new_id
-
- return last_glsa_id
-
-def get_glsa_list(last_id):
- ret = []
- fd = urlopen(RDF_PAGE)
- dom = FromXmlStream(fd)
-
- for node in dom.getElementsByTagNameNS(RDF_NS, "li"):
- uri = node.getAttributeNS(RDF_NS, "resource")
- id = glsa_re2.match(uri).group(1)
-
- if id > last_id:
- ret.append(uri + "?passthru=1")
- else:
- break
-
- ret.reverse()
- return ret
+import urllib2
+from bs4 import BeautifulSoup
+import sys, re, os, time
+
+foundone = 0
+glsa_list = []
+package_list = []
+description_list = []
+bug_list = []
+
+def getglsas(table):
+ global foundone, glsa_list, package_list, description_list, bug_list
+ rows = table.findAll('tr')
+ for tr in rows:
+ cols = tr.findAll('td')
+ passt = 0
+ glsanum = ''
+ package = ''
+ description = ''
+ bugnum = ''
+ for td in cols:
+ if passt == 0:
+ if td.a:
+ # Fetch GLSA id and reconstruct the href
+ glsanum = str(td.a).split()
+ if str(date_from) in str(glsanum[2]):
+ foundone = 1
+ glsanum = glsanum[0] + " " + \
+ glsanum[1] + glsanum[2] + glsanum[3]
+ glsa_list.append(glsanum)
+ else:
+ return
+ passt += 1
+ else:
+ # Ignore table headers
+ passt = 0
+
+ elif passt == 1:
+ # Ignore severity
+ passt += 1
+
+ elif passt == 2:
+ # Fetch package name and construct href
+ package = str(td.string).strip()
+ package = "<a href=\"http://packages.gentoo.org/package/%s\">%s</a>" % (package, package)
+ package_list.append(package)
+ passt += 1
+
+ elif passt == 3:
+ # Fetch description
+ description = str(td.string).strip()
+ description_list.append(description)
+ passt += 1
+
+ elif passt == 4:
+ # Fetch Bug number and recontruct the href
+ if td.a:
+ bugnum = str(td.a).split()
+ bugnum = bugnum[0] + " " + \
+ bugnum[1] + bugnum[2] + bugnum[3]
+ bug_list.append(bugnum)
+ passt = 0
if __name__ == '__main__':
- if len(sys.argv) < 2 or len(sys.argv) > 3:
- print "Usage: " + os.path.basename(sys.argv[0]) + " <last-gwn> [glsa2gwn.py]"
- print "if the last-gwn is a URI remember to add '?passthru=1' to the end"
- print "if the location of glsa2gwn.py is not specified it defaults to " + glsa2gwn
- sys.exit(1)
- else:
- if len(sys.argv) == 3:
- glsa2gwn = sys.argv[2]
+ # get dates from command line, else use now (time.time())
+ starttime = time.gmtime(time.time() - (60 * 60 * 24 * 1))
+ endtime = time.gmtime(time.time() + (60 * 60 * 24 * 31))
+ # Format the string to what we expect
+ date_to = time.strftime("%Y%m", endtime)
+ date_from = time.strftime("%Y%m", starttime)
+ glsas = urllib2.urlopen("http://www.gentoo.org/security/en/glsa/index.xml?passthrough=1").read()
+ soup = BeautifulSoup(glsas)
+ table = soup.findAll('table')
+ # There is probably a better way to fetch the table with the GLSAs
+ table = table[2]
+ print "Looking for GLSAs from %s to %s\n\n" % (date_from, date_to)
+ getglsas(table)
+
+ if foundone:
+ print "\n\nFound %s GLSAs\n" % len(glsa_list)
- last_id = get_last_glsa_id(sys.argv[1])
+ print "Copy and paste the following text to the GMN Security section\n\n"
- # if last_id == 0 then there haven't been any new GLSAs since the last GWN
- if last_id != "0":
- glsas = get_glsa_list(last_id)
-
- if len(glsas) > 0:
- print "<chapter>\n<title>Gentoo security</title>\n"
-
- # if we don't flush here then the previous print statement doesn't
- # make it when redirecting the output to a file
- sys.stdout.flush()
-
- glsas.insert(0, glsa2gwn)
- os.spawnv(os.P_WAIT, glsa2gwn, glsas)
-
- print "</chapter>\n"
- else:
- print no_glsas
- else:
- print no_glsas
+ if not foundone:
+ print "No GLSAs have been released this month! You are safe :)"
+ else:
+ print "The following <a title=\"GLSAs\" " + \
+ "href=\"http://www.gentoo.org/security/en/glsa/index.xml\">GLSAs</a> " + \
+ "have been released by the <a title=\"Security Team\" " + \
+ "href=\"http://wiki.gentoo.org/wiki/Project:Security\">Security Team" + \
+ "</a>"
+ print "[table tablesorter=\"1\" id=\"glsas\"]"
+ print "GLSA, Package, Description, Bug"
+ for x in range(0,len(glsa_list)):
+ print glsa_list[x] + ", " + package_list[x] + \
+ ", " + description_list[x] + ", " + bug_list[x]
+ print "[/table]"
+ sys.exit(0)
^ permalink raw reply [flat|nested] 3+ messages in thread
* [gentoo-commits] gentoo commit in src/gwn: get_glsas.py
@ 2013-11-26 18:54 Markos Chandras (hwoarang)
0 siblings, 0 replies; 3+ messages in thread
From: Markos Chandras (hwoarang) @ 2013-11-26 18:54 UTC (permalink / raw
To: gentoo-commits
hwoarang 13/11/26 18:54:44
Modified: get_glsas.py
Log:
get_glsas.py: print main header as well
Revision Changes Path
1.3 src/gwn/get_glsas.py
file : http://sources.gentoo.org/viewvc.cgi/gentoo/src/gwn/get_glsas.py?rev=1.3&view=markup
plain: http://sources.gentoo.org/viewvc.cgi/gentoo/src/gwn/get_glsas.py?rev=1.3&content-type=text/plain
diff : http://sources.gentoo.org/viewvc.cgi/gentoo/src/gwn/get_glsas.py?r1=1.2&r2=1.3
Index: get_glsas.py
===================================================================
RCS file: /var/cvsroot/gentoo/src/gwn/get_glsas.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- get_glsas.py 12 Oct 2013 16:08:24 -0000 1.2
+++ get_glsas.py 26 Nov 2013 18:54:44 -0000 1.3
@@ -82,6 +82,7 @@
print "Copy and paste the following text to the GMN Security section\n\n"
+ print "<h1>Security</h1>"
if not foundone:
print "No GLSAs have been released this month! You are safe :)"
else:
^ permalink raw reply [flat|nested] 3+ messages in thread
* [gentoo-commits] gentoo commit in src/gwn: get_glsas.py
@ 2014-09-01 21:20 Markos Chandras (hwoarang)
0 siblings, 0 replies; 3+ messages in thread
From: Markos Chandras (hwoarang) @ 2014-09-01 21:20 UTC (permalink / raw
To: gentoo-commits
hwoarang 14/09/01 21:20:49
Modified: get_glsas.py
Log:
get_glsas.py: Multiple fixes
- Add command line arguments for start-stop date
- Escape ',' in description since it conflicts with the wordpress pluging for
tables
- Take care of GLSAs for multiple packages
- improve code to construct the GLSA id hyperlink
Revision Changes Path
1.4 src/gwn/get_glsas.py
file : http://sources.gentoo.org/viewvc.cgi/gentoo/src/gwn/get_glsas.py?rev=1.4&view=markup
plain: http://sources.gentoo.org/viewvc.cgi/gentoo/src/gwn/get_glsas.py?rev=1.4&content-type=text/plain
diff : http://sources.gentoo.org/viewvc.cgi/gentoo/src/gwn/get_glsas.py?r1=1.3&r2=1.4
Index: get_glsas.py
===================================================================
RCS file: /var/cvsroot/gentoo/src/gwn/get_glsas.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- get_glsas.py 26 Nov 2013 18:54:44 -0000 1.3
+++ get_glsas.py 1 Sep 2014 21:20:49 -0000 1.4
@@ -25,13 +25,12 @@
if td.a:
# Fetch GLSA id and reconstruct the href
glsanum = str(td.a).split()
- if str(date_from) in str(glsanum[2]):
- foundone = 1
- glsanum = glsanum[0] + " " + \
- glsanum[1] + glsanum[2] + glsanum[3]
- glsa_list.append(glsanum)
- else:
- return
+ if any(str(date_from) in date for date in glsanum):
+ foundone = 1
+ glsanum = glsanum[0] + " " + glsanum[1] + glsanum[2] + glsanum[3]
+ glsa_list.append(glsanum)
+ else:
+ break
passt += 1
else:
# Ignore table headers
@@ -43,14 +42,26 @@
elif passt == 2:
# Fetch package name and construct href
- package = str(td.string).strip()
- package = "<a href=\"http://packages.gentoo.org/package/%s\">%s</a>" % (package, package)
+ if td.a:
+ # This is usually for GLSAs for
+ # multiple packages.
+ # FIXME: There has to be a better way
+ # to do that...
+ package = td.find_next(text=True).strip().split()[0]
+ extra_pkg = \
+ td.find_next(text=True).strip() + " more)"
+ package = \
+ "<a href=\"http://packages.gentoo.org/package/%s\">%s</a>" \
+ % (package, extra_pkg)
+ else:
+ package = str(td.string).strip()
+ package = "<a href=\"http://packages.gentoo.org/package/%s\">%s</a>" % (package, package)
package_list.append(package)
passt += 1
elif passt == 3:
# Fetch description
- description = str(td.string).strip()
+ description = str(td.string).strip().replace(',','\,')
description_list.append(description)
passt += 1
@@ -66,6 +77,15 @@
# get dates from command line, else use now (time.time())
starttime = time.gmtime(time.time() - (60 * 60 * 24 * 1))
endtime = time.gmtime(time.time() + (60 * 60 * 24 * 31))
+ if len(sys.argv) >=1:
+ if len(sys.argv) >= 2:
+ starttime = time.strptime(str(int(sys.argv[1])), "%Y%m%d")
+ endtime = time.strptime(str(int(sys.argv[2])), "%Y%m%d")
+ else:
+ print "Usage: " + os.path.basename(sys.argv[0]) + " [start-date] [end-date]"
+ print "dates must be passed in 'yyyymmdd' format"
+ print "if no dates are specified then it defaults to a date range of the last 31 days"
+
# Format the string to what we expect
date_to = time.strftime("%Y%m", endtime)
date_from = time.strftime("%Y%m", starttime)
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2014-09-01 21:20 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-11-26 18:54 [gentoo-commits] gentoo commit in src/gwn: get_glsas.py Markos Chandras (hwoarang)
-- strict thread matches above, loose matches on Subject: below --
2014-09-01 21:20 Markos Chandras (hwoarang)
2013-10-12 16:08 Markos Chandras (hwoarang)
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox