public inbox for gentoo-portage-dev@lists.gentoo.org
 help / color / mirror / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download: 
* [gentoo-portage-dev] [PATCH] filter-bash-environment.py: use buffered input, raw bytes (bug 647654)
@ 2018-02-14 20:38 99% Zac Medico
  0 siblings, 0 replies; 1+ results
From: Zac Medico @ 2018-02-14 20:38 UTC (permalink / raw
  To: gentoo-portage-dev; +Cc: Zac Medico

Use sys.stdin.buffer instead of sys.stdin.buffer.raw, for buffered input.
Also use raw bytes instead of unicode strings, in order to avoid making
assumptions about character encodings, and also to avoid overhead from
unicode decoding/encoding.

Bug: https://bugs.gentoo.org/647654
---
 bin/filter-bash-environment.py | 45 ++++++++++++++++++++----------------------
 1 file changed, 21 insertions(+), 24 deletions(-)

diff --git a/bin/filter-bash-environment.py b/bin/filter-bash-environment.py
index a4cdc5429..91c194b95 100755
--- a/bin/filter-bash-environment.py
+++ b/bin/filter-bash-environment.py
@@ -2,21 +2,19 @@
 # Copyright 1999-2014 Gentoo Foundation
 # Distributed under the terms of the GNU General Public License v2
 
-import codecs
-import io
 import os
 import re
 import sys
 
-here_doc_re = re.compile(r'.*\s<<[-]?(\w+)$')
-func_start_re = re.compile(r'^[-\w]+\s*\(\)\s*$')
-func_end_re = re.compile(r'^\}$')
+here_doc_re = re.compile(br'.*\s<<[-]?(\w+)$')
+func_start_re = re.compile(br'^[-\w]+\s*\(\)\s*$')
+func_end_re = re.compile(br'^\}$')
 
-var_assign_re = re.compile(r'(^|^declare\s+-\S+\s+|^declare\s+|^export\s+)([^=\s]+)=("|\')?.*$')
-close_quote_re = re.compile(r'(\\"|"|\')\s*$')
-readonly_re = re.compile(r'^declare\s+-(\S*)r(\S*)\s+')
+var_assign_re = re.compile(br'(^|^declare\s+-\S+\s+|^declare\s+|^export\s+)([^=\s]+)=("|\')?.*$')
+close_quote_re = re.compile(br'(\\"|"|\')\s*$')
+readonly_re = re.compile(br'^declare\s+-(\S*)r(\S*)\s+')
 # declare without assignment
-var_declare_re = re.compile(r'^declare(\s+-\S+)?\s+([^=\s]+)\s*$')
+var_declare_re = re.compile(br'^declare(\s+-\S+)?\s+([^=\s]+)\s*$')
 
 def have_end_quote(quote, line):
 	"""
@@ -32,16 +30,16 @@ def have_end_quote(quote, line):
 def filter_declare_readonly_opt(line):
 	readonly_match = readonly_re.match(line)
 	if readonly_match is not None:
-		declare_opts = ''
+		declare_opts = b''
 		for i in (1, 2):
 			group = readonly_match.group(i)
 			if group is not None:
 				declare_opts += group
 		if declare_opts:
-			line = 'declare -%s %s' % \
+			line = b'declare -%s %s' % \
 				(declare_opts, line[readonly_match.end():])
 		else:
-			line = 'declare ' + line[readonly_match.end():]
+			line = b'declare ' + line[readonly_match.end():]
 	return line
 
 def filter_bash_environment(pattern, file_in, file_out):
@@ -57,7 +55,7 @@ def filter_bash_environment(pattern, file_in, file_out):
 	for line in file_in:
 		if multi_line_quote is not None:
 			if not multi_line_quote_filter:
-				file_out.write(line.replace("\1", ""))
+				file_out.write(line.replace(b"\1", b""))
 			if have_end_quote(multi_line_quote, line):
 				multi_line_quote = None
 				multi_line_quote_filter = None
@@ -78,7 +76,7 @@ def filter_bash_environment(pattern, file_in, file_out):
 					multi_line_quote_filter = filter_this
 				if not filter_this:
 					line = filter_declare_readonly_opt(line)
-					file_out.write(line.replace("\1", ""))
+					file_out.write(line.replace(b"\1", b""))
 				continue
 			else:
 				declare_match = var_declare_re.match(line)
@@ -98,7 +96,7 @@ def filter_bash_environment(pattern, file_in, file_out):
 			continue
 		here_doc = here_doc_re.match(line)
 		if here_doc is not None:
-			here_doc_delim = re.compile("^%s$" % here_doc.group(1))
+			here_doc_delim = re.compile(b"^%s$" % here_doc.group(1))
 			file_out.write(line)
 			continue
 		# Note: here-documents are handled before functions since otherwise
@@ -141,18 +139,17 @@ if __name__ == "__main__":
 	file_in = sys.stdin
 	file_out = sys.stdout
 	if sys.hexversion >= 0x3000000:
-		file_in = codecs.iterdecode(sys.stdin.buffer.raw,
-			'utf_8', errors='replace')
-		file_out = io.TextIOWrapper(sys.stdout.buffer,
-			'utf_8', errors='backslashreplace')
-
-	var_pattern = args[0].split()
+		file_in = sys.stdin.buffer
+		file_out = sys.stdout.buffer
+		var_pattern = os.fsencode(args[0]).split()
+	else:
+		var_pattern = args[0].split()
 
 	# Filter invalid variable names that are not supported by bash.
-	var_pattern.append(r'\d.*')
-	var_pattern.append(r'.*\W.*')
+	var_pattern.append(br'\d.*')
+	var_pattern.append(br'.*\W.*')
 
-	var_pattern = "^(%s)$" % "|".join(var_pattern)
+	var_pattern = b"^(%s)$" % b"|".join(var_pattern)
 	filter_bash_environment(
 		re.compile(var_pattern), file_in, file_out)
 	file_out.flush()
-- 
2.13.6



^ permalink raw reply related	[relevance 99%]

Results 1-1 of 1 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2018-02-14 20:38 99% [gentoo-portage-dev] [PATCH] filter-bash-environment.py: use buffered input, raw bytes (bug 647654) Zac Medico

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox