diff --git a/python/heck/__init__.py b/python/heck/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/python/heck/exceptions.py b/python/heck/exceptions.py deleted file mode 100644 index 389e113..0000000 --- a/python/heck/exceptions.py +++ /dev/null @@ -1,20 +0,0 @@ -""" -Exceptions for HECKfile processing. -""" - -class HeckException (BaseException): - """ - Base exception for HECKfile processing. - """ - - -class HeckParseException(HeckException): - """ - Raised for parse errors specifically. - """ - - -class HeckLexException(HeckException): - """ - Raised for lex errors specifically. - """ diff --git a/python/heck/lexer.py b/python/heck/lexer.py deleted file mode 100644 index c02d8cc..0000000 --- a/python/heck/lexer.py +++ /dev/null @@ -1,119 +0,0 @@ -import ply.lex as lex - -""" -Lexical analyzer for HECKformat lines using PLY Lex. -""" - -from .exceptions import HeckLexException - -from typing import List, Optional - -import string - -tokens = ('ATOM', 'BASE10', 'BASE16', 'COMMENT', 'STRING', 'SECTION', 'ATTRIB', 'DEEP', 'ELEMENT') - -# COMMENT ::= # .*$ -# ATOM ::= [A-Za-z_][A-Za-z0-9_-]? -# BASE10NUMBER ::= (-)?[0-9]+(\.)?[0-9]+([FLUIDCfluidc])? -# BASE16NUMBER ::= 0x[0-9A-Fa-f]+ -# NUMBER ::= () -# STRING ::= "([^\"]*|(\\)|(\"))" -# VALUE ::= (||) -# VALUES ::= (\s+)? -# ATTRIBUTENAME ::= -# ATTRIBUTE ::= = -# ATTRIBUTES ::= (\s+)? -# SECTIONLABEL ::= -# SECTION ::= %%%\s+\s+ -# ELEMENTLABEL ::= [A-Za-z_][A-Za-z0-9!@#$%^&*()_+/\\-]? -# ELEMENT ::= \s+(|) -# LINE ::= ^(((>)*) |
| ) (|$) - - -t_ignore = string.whitespace - -t_DEEP = r'^(>)+' - -t_BASE16 = r'0x[0-9A-Fa-f]+' -t_SECTION = r'^%%%\s' -t_ATTRIB = '=' -t_ELEMENT = r'[A-Za-z_.][A-Za-z0-9.!@\$%^&*()_+/\\-]*' - -def t_ATOM(token: lex.LexToken): - r'[A-Za-z_$][A-Za-z0-9_.-]*' - if token.value in ('true', 'True'): - token.value = True - elif token.value in ('false', 'False'): - token.value = False - return token - - - -def t_BASE10(token: lex.LexToken): - r'(-)?[0-9]+(\.?[0-9]+)?([FLUIDCfluidc])?(\b|$)' - # python numbers are Very Flexible so we ignore typespec - vstr = token.value - if vstr[-1] in 'FLUIDCfluidc': - vstr = vstr[:-1] - if '.' in vstr: - token.value = float(vstr) - else: - token.value = int(vstr) - return token - -def t_COMMENT(token: lex.LexToken): - r'\#\s?.*$' - ... - -def t_STRING(token: lex.LexToken): - r'"[^"]*"' - token.value = token.value[1:-1] # substring to strip double quotes - return token - -def t_error(token: lex.LexToken): - print(f"{token.lineno} Unexpected character '{token.value[0]}' at position {token.lexpos}.") - print('... ' + token.value) - print(' ^') - # token.lexer.skip(1) - -lexer = lex.lex() - -def lex_line(line: str, lineno: int=0) -> List[lex.LexToken]: - """ - Return a list of tokens for a particular HECKformat file line. - - """ - lexer.lineno = lineno - try: - lexer.input(line) - tokens = [] - while True: - tok = lexer.token() - if tok: - tokens.append(tok) - else: - break - return tokens - except lex.LexError as inst: - # fixme raise a HeckFormat exception - raise HeckLexException from inst - -TEST_STRINGS = [ - '"hi yo123 123xyz #foo" 123xyz 123.223 1f abcd123 123abc $foo "hello world" #foo', - '1.23f', - '"hello world!" atom utehuteu tnhoeun_etuhenuoh', - '"hi yo123 123xyz #foo" xyz 123.223 1f abcd123 abc $foo "hello world" #foo', - '%%% heck', - '%%% markdown foo=bar', - 'element 1.2 1.3 1.4 attrib="string value for attribute"', - '> element 5 4 3 2.5', -] - -if __name__ == "__main__": - for idx, test in enumerate(TEST_STRINGS): - print(f"Line {idx}: '{test}'") - try: - for token in lex_line(test, idx): - print(' ' + str(token)) - except Exception as inst: - print(f'Error in line.') diff --git a/python/heck/parse.py b/python/heck/parse.py deleted file mode 100644 index 7cdbeb8..0000000 --- a/python/heck/parse.py +++ /dev/null @@ -1,226 +0,0 @@ -from typing import Iterable, Union, Mapping, TypeVar, List, TextIO - -import re - -from .parser import parser -from .exceptions import HeckParseException - -HeckValue = TypeVar("HeckElement") | str | int | float - -class HeckElement: - """ - Container for a tree of HECKformat elements. - """ - name: str - """The name of the element, either __ROOT__ for top level or whatever is specified in file.""" - children: Iterable[TypeVar] - """The children of the element.""" - values: Iterable[HeckValue] - """One or more values associated with the element.""" - attributes: Mapping[str, HeckValue] - """Zero or more attributes associated with the element as a key-value pair.""" - - def __init__(self): - self.children = [] - self.values = [] - self.attributes = dict() - self.name = "" - self.unparsed = False - - def __str__(self): - k='' - if self.unparsed: - k='Unparsed ' - return f"" - - def __repr__(self): - return self.__str__() - - - def to_struct(self) -> Mapping[str, Union[int, float, str, Mapping, List]]: - """ - Convert this HeckElement tree to a Python structure. The structure is a dictionary keyed by element name, where - the values are a list of each tree under that name, in order of their declaration in the tree, and futher, a - list of values. - - For example: - - ``` - %%% heck - a b c - a c d - ``` - - turns into: - - {'a': [['b', 'c'], ['c', 'd']]} - - Subelements are treated as a dictionary object added to the end of each value. - - For example: - - ``` - %%% heck - a b - > c d - a e - > f g - ``` - - turns into: - - {'a': [['b'], {'c': [['d']]}, ['e', {'f': [['g']]}]]} - """ - - - def to_dict(self, merge=False) -> Mapping[str, Union[int, float, str, Mapping, List]]: - """ - As with `to_struct`, but attempts to merge all keys together at each level. - - For example: - - ``` - %%% heck - a b c - a c d - ``` - - turns into (with merge set to True): - - {'a': ['b', 'c', 'c', 'd']} - - or (with merge set to False): - - {'a': ['c', 'd']} - - For child elements, the list is replaced by a dictionary, and the values are stored in a special key '%%% values' - - Thus: - - ``` - %%% heck - a b - > c d - ``` - - becomes: - - {'a':{'%%%values': ['b'], 'c': ['d']}} - - This cannot represent exactly the contents of the input tree, however it may be more convenient in many use - cases where repeated elements of the same key are not allowed, or if keys are being treated as unique. - """ - - -def _get_element(ast: List) -> HeckElement: - """ - Get an element from an element AST from the parser. - """ - if not (ast[0] == 'element'): - raise HeckParseException(f"Found a non-element where an element was expected. {ast}") - elm = HeckElement() - elm.name = ast[1]; - for item in ast[2:]: - if item[0] == 'values': - elm.values = [x[1] for x in item[1:]] - elif item[0] == 'attributes': - elm.attributes.update({x[1]: x[2][1] for x in item[1:]}) - return elm - -def load_heck(inp: Iterable[str]) -> HeckElement: - """ - Load a HECKformat into a tree of HeckElements from a list of lines from the file. - """ - MODE_INIT = 0 - MODE_ELM = 1 - MODE_UNPARSE = 2 - - rootelm = HeckElement() - pelm = [rootelm] # parent for subelement - pdepth = 0 - depth = 0 - rootelm.name = "__ROOT__" - mode = MODE_INIT - for idx, line in enumerate(inp): - if mode == MODE_UNPARSE: - if (line.startswith('%%%')): - mode = MODE_INIT - else: - pelm[-1].values.append(line) - continue - else: - ast = parser.parse(line) - if ast: - if ast[0] == 'section': - if ast[1] == 'heck': - mode = MODE_ELM - pelm = [rootelm] - else: - mode = MODE_UNPARSE - pelm = [HeckElement()] - rootelm.children.append(pelm[-1]) - pelm[-1].name = ast[1] - pelm[-1].unparsed = True - else: - if not mode == MODE_ELM: - raise HeckParseException("Didn't find heck preamble, line {idx}") - else: - if ast[0] == 'deep': - # we're in a subitem - depth = ast[1] - if (depth > pdepth): - # are we deeper than last time? - try: - pelm.append(pelm[-1].children[-1]) - except: - raise HeckParseException("Tried to go deeper without a previous element, line {idx}") - elif (depth < pdepth): - # are we shallower than last time? - pelm.pop() - if (not len(pelm)): - raise HeckParseException("Tried to go shallower while already shallow, line {idx}") - ast = ast[2] - pdepth = depth - elif (pdepth > 0): - # we're no longer deep, just pop up to the top - pdepth = 0 - pelm = [rootelm] - pelm[-1].children.append(_get_element(ast)) - - return rootelm - -def load(infile: TextIO) -> HeckElement: - return load_heck(infile.readlines()) - -def loads(ins: str) -> HeckElement: - return load_heck(re.split(r'\n|\r|\r\n', ins)) - - -TEST_HECK = """ -%%% heck -# Website! -title "My Website" bold=True -subtitle "Yep it's a website" -scale 3.72 -matrix 0 0 0 0 1 2 3 1 2 3 4 29394.2 -tags hey man what are you doin -> more tag tag tag 1 2 3 ->> we can go deeper ->>> we can go even deeper -test -> _val 1 -> _val 2 -> _val 3 -valueless -_more.orless complexelement -.yooooo -boolean True -%%% markdown -# Some cheeky markdown to confuse our processing. - -All my page content goes here. -""" - -if __name__ == "__main__": - result = load_heck(TEST_HECK.split('\n')) - print(result) diff --git a/python/heck/parser.py b/python/heck/parser.py deleted file mode 100644 index 15e9fd4..0000000 --- a/python/heck/parser.py +++ /dev/null @@ -1,118 +0,0 @@ -import ply.yacc as yacc - -""" -Parser for HECKformat lines using PLY Parser. -""" - -from .lexer import tokens - -def p_value(p): - """ - value : BASE16 - | BASE10 - | STRING - | ATOM - """ - #print(p[0], p[1]) - p[0] = ("value", p[1]) - - -def p_elm(p): - """ - elm : ATOM - | ELEMENT - """ - p[0] = p[1] - -def p_attribute(p): - """attribute : ATOM ATTRIB value""" - # print(p[0], p[1]) - p[0] = ("attribute", p[1], p[3]) - - -def p_attributes(p): - """ - attributes : attributes attribute - attributes : attribute - """ - if len(p) == 2: - p[0] = ["attributes", p[1]] - else: - p[0] = p[1] - p[0].append(p[2]) - - -def p_section(p): - """ - section : SECTION elm - | SECTION elm attributes - """ - if (len(p) == 3): - p[0] = ("section", p[2]) - else: - p[0] = ("section", p[2], p[3]) - -def p_values(p): - """ - values : values value - values : value - """ - if len(p) == 2: - p[0] = ["values", p[1]] - else: - p[0] = p[1] - p[0].append(p[2]) - - -def p_element(p): - """ - element : elm values - | elm values attributes - | elm attributes - | elm - """ - # print(len(p)) - if len(p) <= 2: - p[0] = ["element", p[1]] - else: - p[0] = ["element", p[1], p[2]] - if (len(p) == 4): - p[0].append(p[3]) - - -def p_statement(p): - """ - statement : element - | DEEP element - | section - """ - if (len(p) > 2): - p[0] = ('deep', len(p[1]), p[2]) - else: - p[0] = p[1] - - -def p_error(p): - if not p: - return - else: - print(f"Syntax error {p}") - -parser = yacc.yacc(start="statement") - - -TEST_STRING = [ - '%%% heck', - '%%% heck foo=bar', - '%%% heck bar=-5l quux="hello! how are you today?" fred=69 barney=nice', - 'title "My website!"', - 'zoom 5.73', - 'tags yo fresh', - 'dumper 1 2 3 4 5 6 7 8 9 dumpped=True', - '> big_dumper 32 23 384848', - '>> deep_dumper 1 2 3 a=false' -] - -if __name__ == "__main__": - for test in TEST_STRING: - print(parser.parse(test)) diff --git a/python/heck/parsetab.py b/python/heck/parsetab.py deleted file mode 100644 index 32ad143..0000000 --- a/python/heck/parsetab.py +++ /dev/null @@ -1,50 +0,0 @@ - -# parsetab.py -# This file is automatically generated. Do not edit. -# pylint: disable=W,C,R -_tabversion = '3.10' - -_lr_method = 'LALR' - -_lr_signature = 'statementATOM ATTRIB BASE10 BASE16 COMMENT DEEP ELEMENT SECTION STRING\n value : BASE16\n | BASE10\n | STRING\n | ATOM\n \n elm : ATOM\n | ELEMENT\n attribute : ATOM ATTRIB value\n attributes : attributes attribute\n attributes : attribute\n \n section : SECTION elm\n | SECTION elm attributes\n \n values : values value\n values : value\n \n element : elm values\n | elm values attributes\n | elm attributes\n | elm\n \n statement : element\n | DEEP element\n | section\n ' - -_lr_action_items = {'DEEP':([0,],[3,]),'SECTION':([0,],[6,]),'ATOM':([0,3,5,6,7,8,10,11,12,13,14,15,16,17,18,19,20,21,23,24,25,26,],[7,7,17,7,-5,-6,17,22,-13,-9,-1,-2,-3,-4,22,22,-12,-8,25,22,-4,-7,]),'ELEMENT':([0,3,6,],[8,8,8,]),'$end':([1,2,4,5,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,24,25,26,],[0,-18,-20,-17,-5,-6,-19,-14,-16,-13,-9,-1,-2,-3,-4,-10,-15,-12,-8,-11,-4,-7,]),'BASE16':([5,7,8,10,12,14,15,16,17,20,23,],[14,-5,-6,14,-13,-1,-2,-3,-4,-12,14,]),'BASE10':([5,7,8,10,12,14,15,16,17,20,23,],[15,-5,-6,15,-13,-1,-2,-3,-4,-12,15,]),'STRING':([5,7,8,10,12,14,15,16,17,20,23,],[16,-5,-6,16,-13,-1,-2,-3,-4,-12,16,]),'ATTRIB':([17,22,],[23,23,]),} - -_lr_action = {} -for _k, _v in _lr_action_items.items(): - for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_action: _lr_action[_x] = {} - _lr_action[_x][_k] = _y -del _lr_action_items - -_lr_goto_items = {'statement':([0,],[1,]),'element':([0,3,],[2,9,]),'section':([0,],[4,]),'elm':([0,3,6,],[5,5,18,]),'values':([5,],[10,]),'attributes':([5,10,18,],[11,19,24,]),'value':([5,10,23,],[12,20,26,]),'attribute':([5,10,11,18,19,24,],[13,13,21,13,21,21,]),} - -_lr_goto = {} -for _k, _v in _lr_goto_items.items(): - for _x, _y in zip(_v[0], _v[1]): - if not _x in _lr_goto: _lr_goto[_x] = {} - _lr_goto[_x][_k] = _y -del _lr_goto_items -_lr_productions = [ - ("S' -> statement","S'",1,None,None,None), - ('value -> BASE16','value',1,'p_value','parser.py',11), - ('value -> BASE10','value',1,'p_value','parser.py',12), - ('value -> STRING','value',1,'p_value','parser.py',13), - ('value -> ATOM','value',1,'p_value','parser.py',14), - ('elm -> ATOM','elm',1,'p_elm','parser.py',22), - ('elm -> ELEMENT','elm',1,'p_elm','parser.py',23), - ('attribute -> ATOM ATTRIB value','attribute',3,'p_attribute','parser.py',27), - ('attributes -> attributes attribute','attributes',2,'p_attributes','parser.py',34), - ('attributes -> attribute','attributes',1,'p_attributes','parser.py',35), - ('section -> SECTION elm','section',2,'p_section','parser.py',46), - ('section -> SECTION elm attributes','section',3,'p_section','parser.py',47), - ('values -> values value','values',2,'p_values','parser.py',56), - ('values -> value','values',1,'p_values','parser.py',57), - ('element -> elm values','element',2,'p_element','parser.py',68), - ('element -> elm values attributes','element',3,'p_element','parser.py',69), - ('element -> elm attributes','element',2,'p_element','parser.py',70), - ('element -> elm','element',1,'p_element','parser.py',71), - ('statement -> element','statement',1,'p_statement','parser.py',84), - ('statement -> DEEP element','statement',2,'p_statement','parser.py',85), - ('statement -> section','statement',1,'p_statement','parser.py',86), -]