diff --git a/python/heck/lexer.py b/python/heck/lexer.py index 1af81ba..7e2f7f0 100644 --- a/python/heck/lexer.py +++ b/python/heck/lexer.py @@ -1,5 +1,11 @@ import ply.lex as lex +""" +Lexical analyzer for HECKformat lines using PLY Lex. +""" + +from .exceptions import HeckLexException + from typing import List, Optional import string @@ -25,6 +31,7 @@ tokens = ('ATOM', 'BASE10', 'BASE16', 'COMMENT', 'STRING', 'SECTION', 'ATTRIB', t_ignore = string.whitespace + t_DEEP = r'^(>)+' t_ATOM = r'[A-Za-z_$][A-Za-z0-9_.-]*' t_BASE16 = r'0x[0-9A-Fa-f]+' @@ -60,7 +67,11 @@ def t_error(token: lex.LexToken): lexer = lex.lex() -def lex_line(line, lineno=0) -> Optional[List[lex.LexToken]]: +def lex_line(line: str, lineno: int=0) -> List[lex.LexToken]: + """ + Return a list of tokens for a particular HECKformat file line. + + """ lexer.lineno = lineno try: lexer.input(line) @@ -74,7 +85,7 @@ def lex_line(line, lineno=0) -> Optional[List[lex.LexToken]]: return tokens except lex.LexError as inst: # fixme raise a HeckFormat exception - raise inst + raise HeckLexException from inst TEST_STRINGS = [ '"hi yo123 123xyz #foo" 123xyz 123.223 1f abcd123 123abc $foo "hello world" #foo', diff --git a/python/heck/parse.py b/python/heck/parse.py index 7437701..90a6309 100644 --- a/python/heck/parse.py +++ b/python/heck/parse.py @@ -1,24 +1,24 @@ -from typing import Iterable, Union, Mapping, TypeVar, List +from typing import Iterable, Union, Mapping, TypeVar, List, TextIO import re -from parser import parser - -class HeckException (Exception): - ... - -class HeckParseException(HeckException): - ... - +from .parser import parser +from .exceptions import HeckParseException HeckValue = TypeVar("HeckElement") | str | int | float class HeckElement: + """ + Container for a tree of HECKformat elements. + """ name: str + """The name of the element, either __ROOT__ for top level or whatever is specified in file.""" children: Iterable[TypeVar] + """The children of the element.""" values: Iterable[HeckValue] + """One or more values associated with the element.""" attributes: Mapping[str, HeckValue] - + """Zero or more attributes associated with the element as a key-value pair.""" def __init__(self): self.children = [] @@ -36,28 +36,10 @@ class HeckElement: def __repr__(self): return self.__str__() -# COMMENT ::= # .*$ -# ATOM ::= [A-Za-z_][A-Za-z0-9_-]? -# BASE10NUMBER ::= (-)?[0-9]+(\.)?[0-9]+([FLUIDCfluidc])? -# BASE16NUMBER ::= 0x[0-9A-Fa-f]+ -# NUMBER ::= () -# STRING ::= "([^\"]*|(\\)|(\"))" -# VALUE ::= (||) -# VALUES ::= (\s+)? -# ATTRIBUTENAME ::= -# ATTRIBUTE ::= = -# ATTRIBUTES ::= (\s+)? -# SECTIONLABEL ::= -# SECTION ::= %%%\s+\s+ -# ELEMENTLABEL ::= [A-Za-z_][A-Za-z0-9!@#$%^&*()_+/\\-]? -# ELEMENT ::= \s+(|) -# LINE ::= ^(((>)*) |
| ) (|$) - - -# ATOM = re.compile(r'[A-Za-z_][A-Za-z0-9_-]*') - - -def get_element(ast: List) -> HeckElement: +def _get_element(ast: List) -> HeckElement: + """ + Get an element from an element AST from the parser. + """ if not (ast[0] == 'element'): raise HeckParseException("Found a non-element where an element was expected.") elm = HeckElement() @@ -70,6 +52,9 @@ def get_element(ast: List) -> HeckElement: return elm def load_heck(inp: Iterable[str]) -> HeckElement: + """ + Load a HECKformat into a tree of HeckElements from a list of lines from the file. + """ MODE_INIT = 0 MODE_ELM = 1 MODE_UNPARSE = 2 @@ -102,10 +87,16 @@ def load_heck(inp: Iterable[str]) -> HeckElement: if not mode == MODE_ELM: raise HeckParseException("Didn't find heck preamble, line {idx}") else: - pelm.children.append(get_element(ast)) + pelm.children.append(_get_element(ast)) return rootelm +def load(infile: TextIO) -> HeckElement: + return load_heck(infile.readlines()) + +def loads(ins: str) -> HeckElement: + return load_heck(re.split(r'\n|\r|\r\n', ins)) + TEST_HECK = """ %%% heck diff --git a/python/heck/parser.py b/python/heck/parser.py index 6718350..4ab3112 100644 --- a/python/heck/parser.py +++ b/python/heck/parser.py @@ -1,6 +1,10 @@ import ply.yacc as yacc -from lexer import tokens +""" +Parser for HECKformat lines using PLY Parser. +""" + +from .lexer import tokens def p_value(p): """