Python generator functions abstracting Python-ish vs C++-ish syntax
Today, I've been attaching a parser to some SymPy-based logic for obtaining uncertainty propagation expressions based upon Taylor Series Methods. While SymPy is happy to parse Python-ish files, I've already got the expressions I want to manipulate coded up in C++. I'd like to permit parsing either Python-ish or C++-ish input in a manner that gives useful file/line information whenever SymPy dislikes the content.
The Python yield
keyword provides a really clean mechanism to abstract away such differences in file syntax:
# TODO Line continuation via trailing backslash
def statements_by_newline(files=None):
r'''
Generate (filename, lineno, statement) tuples by parsing the provided
filenames with newline-separated, whitespace-trimmed statements.
Comments are introduced by a '#' and extend until the end of line.
>>> with tempfile.NamedTemporaryFile() as f:
... print("""a=1 # Trailing comments
... # Not every line must have a statement
... f # Nor every line involve assignment
... """, file=f)
... f.flush()
... for (_, lineno, stmt) in statements_by_newline(f.name):
... print(lineno, stmt)
1 a=1
3 f
'''
# Process input line-by-line...
f = fileinput.FileInput(files)
for line in f:
# ...remove comments occurring after the first '#' character
line, _, _ = line.partition('#')
# ...trim then yield statement only on nontrivial line
line = line.strip()
if line:
yield (f.filename(), f.filelineno(), line)
# TODO Behavior on lingering statement content without semicolon
def statements_by_semicolon(files=None):
r'''
Generate (filename, lineno, statement) tuples by parsing the provided
filenames with semicolon-separated, whitespace-trimmed statements.
Comments are introduced by a '//' and extend until the end of line.
>>> with tempfile.NamedTemporaryFile() as f:
... print("""a=1; // Trailing comments may include ';'
... b = // Statements may span lines
... c;
... 1;2;; // Multiple may appear with empty ignored
... """, file=f)
... f.flush()
... for (_, lineno, stmt) in statements_by_semicolon(f.name):
... print(lineno, stmt)
1 a=1
3 b = c
4 1
4 2
'''
# Process input line-by-line maintaining any active statement...
f = fileinput.FileInput(files)
stmt = []
for line in f:
# ...remove comments defined as the first '//' observed
line, _, _ = line.partition('//')
# ...and yield any statements separated by semicolons
# being careful to permit continuation from prior lines.
while line:
head, sep, line = line.partition(';')
head = head.strip()
if head:
stmt.append(head)
if sep and stmt:
yield (f.filename(), f.filelineno(), ' '.join(stmt))
del stmt[:]
Quite slickly, one can now write
def parser(statements_generator):
for (filename, lineno, stmt) in statements_generator:
try:
pass # Manipulate stmt with SymPy
except SyntaxError as e:
e.filename = filename
e.lineno = lineno
raise
producing somewhat usable error messages even though the SymPy handling bits know nothing about the original file syntax.