[gentoo-commits] gentoo commit in src/gwn: get

public inbox for gentoo-commits@lists.gentoo.org
 help / color / mirror / Atom feed

* [gentoo-commits] gentoo commit in src/gwn: get_glsas.py
@ 2013-10-12 16:08 Markos Chandras (hwoarang)
  0 siblings, 0 replies; 3+ messages in thread
From: Markos Chandras (hwoarang) @ 2013-10-12 16:08 UTC (permalink / raw
  To: gentoo-commits

hwoarang    13/10/12 16:08:24

  Modified:             get_glsas.py
  Log:
  get_glsas.py: Rewrite get_glsas.py from scratch
  
  The old one didn't work and I had not idea how to make it work.
  Rewrite it to use the glsa table from
  http://www.gentoo.org/security/en/glsa/index.xml?passthrough=1
  and BeautifulSoup (http://www.crummy.com/software/BeautifulSoup/)
  to make it work again. Print text compatible for direct inclusion in the
  WP GMN. The code is not ideal but it works ;)

Revision  Changes    Path
1.2                  src/gwn/get_glsas.py

file : http://sources.gentoo.org/viewvc.cgi/gentoo/src/gwn/get_glsas.py?rev=1.2&view=markup
plain: http://sources.gentoo.org/viewvc.cgi/gentoo/src/gwn/get_glsas.py?rev=1.2&content-type=text/plain
diff : http://sources.gentoo.org/viewvc.cgi/gentoo/src/gwn/get_glsas.py?r1=1.1&r2=1.2

Index: get_glsas.py
===================================================================
RCS file: /var/cvsroot/gentoo/src/gwn/get_glsas.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- get_glsas.py	7 Mar 2007 12:17:57 -0000	1.1
+++ get_glsas.py	12 Oct 2013 16:08:24 -0000	1.2
@@ -1,120 +1,99 @@
 #!/usr/bin/python
-# you need dev-python/pyxml for this to work
-
-from xml.dom.ext.reader.Sax2 import FromXmlStream
-from urllib2 import urlopen
-from tempfile import mkstemp
-import sys, re, os
-
-RDF_PAGE = "http://www.gentoo.org/rdf/en/glsa-index.rdf?num=100"
-RDF_NS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
-DTD_PREFIX = "http://www.gentoo.org"
-GLSA_LINK = "http://www.gentoo.org/security/en/glsa/"
-GLSA_LINK2 = "http://security.gentoo.org/glsa/"
-
-no_glsas = """<chapter>
-<title>Gentoo Security</title>
-<section>
-<body>
-
-<p>
-Gentoo Security is on hiatus this week due to no GLSAs being released.
-</p>
-
-</body>
-</section>
-
-</chapter>"""
-
-doctype_re = re.compile("<!DOCTYPE guide SYSTEM \"(.*)\">")
-doctype_new = "<!DOCTYPE guide SYSTEM \"" + DTD_PREFIX + "%s\">\n"
-glsa_re = re.compile(GLSA_LINK + "glsa-(.*).xml");
-glsa_re2 = re.compile(GLSA_LINK2 + "glsa-(.*).xml");
-glsa2gwn = "./glsa2gwn.py"
-
-def get_last_glsa_id(last_gwn):
-	if last_gwn.startswith("http://"):
-		fd = urlopen(last_gwn)
-	else:
-		fd = open(last_gwn)
-	tmp = open(mkstemp()[1], 'r+')
-	matched = False
-
-	# need to do this otherwise we get an 'unknown url type' error from urllib2
-	# when it encounters the relative link to the dtd
-	for line in fd:
-		if not matched:
-			matches = doctype_re.match(line)
-
-			if matches:
-				tmp.write(doctype_new % matches.group(1))
-				matched = True
-			else:
-				tmp.write(line)
-		else:
-			tmp.write(line)
-
-	tmp.flush()
-	tmp.seek(0)
-
-	dom = FromXmlStream(tmp)
-	last_glsa_id = "0"
-
-	for node in dom.getElementsByTagName("uri"):
-		uri = node.getAttribute("link")
-
-		if uri.startswith(GLSA_LINK):
-			new_id = glsa_re.match(uri).group(1)
-			if new_id > last_glsa_id:
-				last_glsa_id = new_id
-
-	return last_glsa_id
-
-def get_glsa_list(last_id):
-	ret = []
-	fd = urlopen(RDF_PAGE)
-	dom = FromXmlStream(fd)
-
-	for node in dom.getElementsByTagNameNS(RDF_NS, "li"):
-		uri = node.getAttributeNS(RDF_NS, "resource")
-		id = glsa_re2.match(uri).group(1)
-
-		if id > last_id:
-			ret.append(uri + "?passthru=1")
-		else:
-			break
-
-	ret.reverse()
-	return ret
 
+import urllib2
+from bs4 import BeautifulSoup
+import sys, re, os, time
+
+foundone = 0
+glsa_list = []
+package_list = []
+description_list = []
+bug_list = []
+
+def getglsas(table):
+	global foundone, glsa_list, package_list, description_list, bug_list
+	rows = table.findAll('tr')
+	for tr in rows:
+		cols = tr.findAll('td')
+		passt = 0
+		glsanum = ''
+		package = ''
+		description = ''
+		bugnum = ''
+		for td in cols:
+			if passt == 0:
+				if td.a:
+					# Fetch GLSA id and reconstruct the href
+					glsanum = str(td.a).split()
+					if str(date_from) in str(glsanum[2]):
+						foundone = 1
+						glsanum = glsanum[0] + " " + \
+						glsanum[1] + glsanum[2] + glsanum[3]
+						glsa_list.append(glsanum)
+					else:
+						return
+					passt += 1
+				else:
+					# Ignore table headers
+					passt = 0
+
+			elif passt == 1:
+				# Ignore severity
+				passt += 1
+
+			elif passt == 2:
+				# Fetch package name and construct href
+				package = str(td.string).strip()
+				package = "<a href=\"http://packages.gentoo.org/package/%s\">%s</a>" % (package, package)
+				package_list.append(package)
+				passt += 1
+
+			elif passt == 3:
+				# Fetch description
+				description = str(td.string).strip()
+				description_list.append(description)
+				passt += 1
+
+			elif passt == 4:
+				# Fetch Bug number and recontruct the href
+				if td.a:
+					bugnum = str(td.a).split()
+					bugnum = bugnum[0] + " " + \
+					bugnum[1] + bugnum[2] + bugnum[3]
+					bug_list.append(bugnum)
+				passt = 0
 if __name__ == '__main__':
-	if len(sys.argv) < 2 or len(sys.argv) > 3:
-		print "Usage: " + os.path.basename(sys.argv[0]) +  " <last-gwn> [glsa2gwn.py]"
-		print "if the last-gwn is a URI remember to add '?passthru=1' to the end"
-		print "if the location of glsa2gwn.py is not specified it defaults to " + glsa2gwn
-		sys.exit(1)
-	else:
-		if len(sys.argv) == 3:
-			glsa2gwn = sys.argv[2]
+	# get dates from command line, else use now (time.time())
+	starttime = time.gmtime(time.time() - (60 * 60 * 24 * 1))
+	endtime = time.gmtime(time.time() + (60 * 60 * 24 * 31))
+	# Format the string to what we expect
+	date_to = time.strftime("%Y%m", endtime)
+	date_from = time.strftime("%Y%m", starttime)
+	glsas =	urllib2.urlopen("http://www.gentoo.org/security/en/glsa/index.xml?passthrough=1").read()
+	soup = BeautifulSoup(glsas)
+	table = soup.findAll('table')
+	# There is probably a better way to fetch the table with the GLSAs
+	table = table[2]
+	print "Looking for GLSAs from %s to %s\n\n" % (date_from, date_to)
+	getglsas(table)
+
+	if foundone:
+		print "\n\nFound %s GLSAs\n" % len(glsa_list)
 
-		last_id = get_last_glsa_id(sys.argv[1])
+	print "Copy and paste the following text to the GMN Security section\n\n"
 
-		# if last_id == 0 then there haven't been any new GLSAs since the last GWN
-		if last_id != "0":
-			glsas = get_glsa_list(last_id)
-
-			if len(glsas) > 0:
-				print "<chapter>\n<title>Gentoo security</title>\n"
-
-				# if we don't flush here then the previous print statement doesn't
-				# make it when redirecting the output to a file
-				sys.stdout.flush()
-
-				glsas.insert(0, glsa2gwn)
-				os.spawnv(os.P_WAIT, glsa2gwn, glsas)
-
-				print "</chapter>\n"
-			else:
-				print no_glsas
-		else:
-			print no_glsas
+	if not foundone:
+		print "No GLSAs have been released this month! You are safe :)"
+	else:
+		print "The following <a title=\"GLSAs\" " + \
+		"href=\"http://www.gentoo.org/security/en/glsa/index.xml\">GLSAs</a> " + \
+		"have been released by the <a title=\"Security Team\" " + \
+		"href=\"http://wiki.gentoo.org/wiki/Project:Security\">Security Team" + \
+		"</a>"
+		print "[table tablesorter=\"1\" id=\"glsas\"]"
+		print "GLSA, Package, Description, Bug"
+		for x in range(0,len(glsa_list)):
+			print glsa_list[x] + ", " + package_list[x] + \
+			", " + description_list[x] + ", " + bug_list[x]
+		print "[/table]"
+	sys.exit(0)





^ permalink raw reply	[flat|nested] 3+ messages in thread

* [gentoo-commits] gentoo commit in src/gwn: get_glsas.py
@ 2013-11-26 18:54 Markos Chandras (hwoarang)
  0 siblings, 0 replies; 3+ messages in thread
From: Markos Chandras (hwoarang) @ 2013-11-26 18:54 UTC (permalink / raw
  To: gentoo-commits

hwoarang    13/11/26 18:54:44

  Modified:             get_glsas.py
  Log:
  get_glsas.py: print main header as well

Revision  Changes    Path
1.3                  src/gwn/get_glsas.py

file : http://sources.gentoo.org/viewvc.cgi/gentoo/src/gwn/get_glsas.py?rev=1.3&view=markup
plain: http://sources.gentoo.org/viewvc.cgi/gentoo/src/gwn/get_glsas.py?rev=1.3&content-type=text/plain
diff : http://sources.gentoo.org/viewvc.cgi/gentoo/src/gwn/get_glsas.py?r1=1.2&r2=1.3

Index: get_glsas.py
===================================================================
RCS file: /var/cvsroot/gentoo/src/gwn/get_glsas.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- get_glsas.py	12 Oct 2013 16:08:24 -0000	1.2
+++ get_glsas.py	26 Nov 2013 18:54:44 -0000	1.3
@@ -82,6 +82,7 @@
 
 	print "Copy and paste the following text to the GMN Security section\n\n"
 
+	print "<h1>Security</h1>"
 	if not foundone:
 		print "No GLSAs have been released this month! You are safe :)"
 	else:





^ permalink raw reply	[flat|nested] 3+ messages in thread

* [gentoo-commits] gentoo commit in src/gwn: get_glsas.py
@ 2014-09-01 21:20 Markos Chandras (hwoarang)
  0 siblings, 0 replies; 3+ messages in thread
From: Markos Chandras (hwoarang) @ 2014-09-01 21:20 UTC (permalink / raw
  To: gentoo-commits

hwoarang    14/09/01 21:20:49

  Modified:             get_glsas.py
  Log:
  get_glsas.py: Multiple fixes
  
  - Add command line arguments for start-stop date
  - Escape ',' in description since it conflicts with the wordpress pluging for
  tables
  - Take care of GLSAs for multiple packages
  - improve code to construct the GLSA id hyperlink

Revision  Changes    Path
1.4                  src/gwn/get_glsas.py

file : http://sources.gentoo.org/viewvc.cgi/gentoo/src/gwn/get_glsas.py?rev=1.4&view=markup
plain: http://sources.gentoo.org/viewvc.cgi/gentoo/src/gwn/get_glsas.py?rev=1.4&content-type=text/plain
diff : http://sources.gentoo.org/viewvc.cgi/gentoo/src/gwn/get_glsas.py?r1=1.3&r2=1.4

Index: get_glsas.py
===================================================================
RCS file: /var/cvsroot/gentoo/src/gwn/get_glsas.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- get_glsas.py	26 Nov 2013 18:54:44 -0000	1.3
+++ get_glsas.py	1 Sep 2014 21:20:49 -0000	1.4
@@ -25,13 +25,12 @@
 				if td.a:
 					# Fetch GLSA id and reconstruct the href
 					glsanum = str(td.a).split()
-					if str(date_from) in str(glsanum[2]):
-						foundone = 1
-						glsanum = glsanum[0] + " " + \
-						glsanum[1] + glsanum[2] + glsanum[3]
-						glsa_list.append(glsanum)
-					else:
-						return
+                                        if any(str(date_from) in date for date in glsanum):
+                                                foundone = 1
+                                                glsanum = glsanum[0] + " " + glsanum[1] + glsanum[2] + glsanum[3]
+                                                glsa_list.append(glsanum)
+       					else:
+        			                break
 					passt += 1
 				else:
 					# Ignore table headers
@@ -43,14 +42,26 @@
 
 			elif passt == 2:
 				# Fetch package name and construct href
-				package = str(td.string).strip()
-				package = "<a href=\"http://packages.gentoo.org/package/%s\">%s</a>" % (package, package)
+                                if td.a:
+                                        # This is usually for GLSAs for
+                                        # multiple packages.
+                                        # FIXME: There has to be a better way
+                                        # to do that...
+                                        package = td.find_next(text=True).strip().split()[0]
+                                        extra_pkg = \
+                                            td.find_next(text=True).strip() + " more)"
+                                        package = \
+                                            "<a href=\"http://packages.gentoo.org/package/%s\">%s</a>" \
+                                            % (package, extra_pkg)
+                                else:
+                                        package = str(td.string).strip()
+                                        package = "<a href=\"http://packages.gentoo.org/package/%s\">%s</a>" % (package, package)
 				package_list.append(package)
 				passt += 1
 
 			elif passt == 3:
 				# Fetch description
-				description = str(td.string).strip()
+				description = str(td.string).strip().replace(',','\,')
 				description_list.append(description)
 				passt += 1
 
@@ -66,6 +77,15 @@
 	# get dates from command line, else use now (time.time())
 	starttime = time.gmtime(time.time() - (60 * 60 * 24 * 1))
 	endtime = time.gmtime(time.time() + (60 * 60 * 24 * 31))
+        if len(sys.argv) >=1:
+            if len(sys.argv) >= 2:
+                starttime = time.strptime(str(int(sys.argv[1])), "%Y%m%d")
+                endtime = time.strptime(str(int(sys.argv[2])), "%Y%m%d")
+            else:
+                print "Usage: " + os.path.basename(sys.argv[0]) +  " [start-date] [end-date]"
+                print "dates must be passed in 'yyyymmdd' format"
+                print "if no dates are specified then it defaults to a date range of the last 31 days"
+
 	# Format the string to what we expect
 	date_to = time.strftime("%Y%m", endtime)
 	date_from = time.strftime("%Y%m", starttime)





^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2014-09-01 21:20 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-11-26 18:54 [gentoo-commits] gentoo commit in src/gwn: get_glsas.py Markos Chandras (hwoarang)
  -- strict thread matches above, loose matches on Subject: below --
2014-09-01 21:20 Markos Chandras (hwoarang)
2013-10-12 16:08 Markos Chandras (hwoarang)

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox