diff --git a/python/heck/lexer.py b/python/heck/lexer.py index 7e2f7f0..ae23547 100644 --- a/python/heck/lexer.py +++ b/python/heck/lexer.py @@ -33,11 +33,20 @@ tokens = ('ATOM', 'BASE10', 'BASE16', 'COMMENT', 'STRING', 'SECTION', 'ATTRIB', t_ignore = string.whitespace t_DEEP = r'^(>)+' -t_ATOM = r'[A-Za-z_$][A-Za-z0-9_.-]*' + t_BASE16 = r'0x[0-9A-Fa-f]+' t_SECTION = r'^%%%\s' t_ATTRIB = '=' + +def t_ATOM(token: lex.LexToken): + r'[A-Za-z_$][A-Za-z0-9_.-]*' + if token.value in ('true', 'True'): + token.value = True + elif token.value in ('false', 'False'): + token.value = False + return token + def t_BASE10(token: lex.LexToken): r'(-)?[0-9]+(\.?[0-9]+)?([FLUIDCfluidc])?(\b|$)' # python numbers are Very Flexible so we ignore typespec diff --git a/python/heck/parse.py b/python/heck/parse.py index 90a6309..c7f776b 100644 --- a/python/heck/parse.py +++ b/python/heck/parse.py @@ -36,12 +36,88 @@ class HeckElement: def __repr__(self): return self.__str__() + + def to_struct(self) -> Mapping[str, Union[int, float, str, Mapping, List]]: + """ + Convert this HeckElement tree to a Python structure. The structure is a dictionary keyed by element name, where + the values are a list of each tree under that name, in order of their declaration in the tree, and futher, a + list of values. + + For example: + + ``` + %%% heck + a b c + a c d + ``` + + turns into: + + {'a': [['b', 'c'], ['c', 'd']]} + + Subelements are treated as a dictionary object added to the end of each value. + + For example: + + ``` + %%% heck + a b + > c d + a e + > f g + ``` + + turns into: + + {'a': [['b'], {'c': [['d']]}, ['e', {'f': [['g']]}]]} + """ + + + def to_dict(self, merge=False) -> Mapping[str, Union[int, float, str, Mapping, List]]: + """ + As with `to_struct`, but attempts to merge all keys together at each level. + + For example: + + ``` + %%% heck + a b c + a c d + ``` + + turns into (with merge set to True): + + {'a': ['b', 'c', 'c', 'd']} + + or (with merge set to False): + + {'a': ['c', 'd']} + + For child elements, the list is replaced by a dictionary, and the values are stored in a special key '%%% values' + + Thus: + + ``` + %%% heck + a b + > c d + ``` + + becomes: + + {'a':{'%%%values': ['b'], 'c': ['d']}} + + This cannot represent exactly the contents of the input tree, however it may be more convenient in many use + cases where repeated elements of the same key are not allowed, or if keys are being treated as unique. + """ + + def _get_element(ast: List) -> HeckElement: """ Get an element from an element AST from the parser. """ if not (ast[0] == 'element'): - raise HeckParseException("Found a non-element where an element was expected.") + raise HeckParseException(f"Found a non-element where an element was expected. {ast}") elm = HeckElement() elm.name = ast[1]; for item in ast[2:]: @@ -60,7 +136,9 @@ def load_heck(inp: Iterable[str]) -> HeckElement: MODE_UNPARSE = 2 rootelm = HeckElement() - pelm = rootelm # parent for subelement + pelm = [rootelm] # parent for subelement + pdepth = 0 + depth = 0 rootelm.name = "__ROOT__" mode = MODE_INIT for idx, line in enumerate(inp): @@ -68,7 +146,7 @@ def load_heck(inp: Iterable[str]) -> HeckElement: if (line.startswith('%%%')): mode = MODE_INIT else: - pelm.values.append(line) + pelm[-1].values.append(line) continue else: ast = parser.parse(line) @@ -76,18 +154,38 @@ def load_heck(inp: Iterable[str]) -> HeckElement: if ast[0] == 'section': if ast[1] == 'heck': mode = MODE_ELM - pelm = rootelm + pelm = [rootelm] else: mode = MODE_UNPARSE - pelm = HeckElement() - rootelm.children.append(pelm) - pelm.name = ast[1] - pelm.unparsed = True + pelm = [HeckElement()] + rootelm.children.append(pelm[-1]) + pelm[-1].name = ast[1] + pelm[-1].unparsed = True else: if not mode == MODE_ELM: raise HeckParseException("Didn't find heck preamble, line {idx}") else: - pelm.children.append(_get_element(ast)) + if ast[0] == 'deep': + # we're in a subitem + depth = ast[1] + if (depth > pdepth): + # are we deeper than last time? + try: + pelm.append(pelm[-1].children[-1]) + except: + raise HeckParseException("Tried to go deeper without a previous element, line {idx}") + elif (depth < pdepth): + # are we shallower than last time? + pelm.pop() + if (not len(pelm)): + raise HeckParseException("Tried to go shallower while already shallow, line {idx}") + ast = ast[2] + pdepth = depth + elif (pdepth > 0): + # we're no longer deep, just pop up to the top + pdepth = 0 + pelm = [rootelm] + pelm[-1].children.append(_get_element(ast)) return rootelm @@ -106,7 +204,15 @@ subtitle "Yep it's a website" scale 3.72 matrix 0 0 0 0 1 2 3 1 2 3 4 29394.2 tags hey man what are you doin - +> more tag tag tag 1 2 3 +>> we can go deeper +>>> we can go even deeper +test +> _val 1 +> _val 2 +> _val 3 +valueless +boolean True %%% markdown # Some cheeky markdown to confuse our processing. diff --git a/python/heck/parser.py b/python/heck/parser.py index 4ab3112..53c8f97 100644 --- a/python/heck/parser.py +++ b/python/heck/parser.py @@ -62,11 +62,15 @@ def p_element(p): element : ATOM values | ATOM values attributes | ATOM attributes + | ATOM """ # print(len(p)) - p[0] = ["element", p[1], p[2]] - if (len(p) == 4): - p[0].append(p[3]) + if len(p) <= 2: + p[0] = ["element", p[1]] + else: + p[0] = ["element", p[1], p[2]] + if (len(p) == 4): + p[0].append(p[3]) def p_statement(p): @@ -76,7 +80,7 @@ def p_statement(p): | section """ if (len(p) > 2): - p[0] = ('deep', p[2]) + p[0] = ('deep', len(p[1]), p[2]) else: p[0] = p[1] @@ -85,7 +89,7 @@ def p_error(p): if not p: return else: - print("Syntax error {p}") + print(f"Syntax error {p}") parser = yacc.yacc(start="statement") @@ -99,6 +103,7 @@ TEST_STRING = [ 'tags yo fresh', 'dumper 1 2 3 4 5 6 7 8 9 dumpped=True', '> big_dumper 32 23 384848', + '>> deep_dumper 1 2 3 a=false' ] if __name__ == "__main__": diff --git a/python/heck/parsetab.py b/python/heck/parsetab.py index 174f156..b826a56 100644 --- a/python/heck/parsetab.py +++ b/python/heck/parsetab.py @@ -6,9 +6,9 @@ _tabversion = '3.10' _lr_method = 'LALR' -_lr_signature = 'statementATOM ATTRIB BASE10 BASE16 COMMENT DEEP SECTION STRING\n value : BASE16\n | BASE10\n | STRING\n | ATOM\n attribute : ATOM ATTRIB value\n attributes : attributes attribute\n attributes : attribute\n \n section : SECTION ATOM\n | SECTION ATOM attributes\n \n values : values value\n values : value\n \n element : ATOM values\n | ATOM values attributes\n | ATOM attributes\n \n statement : element\n | DEEP element\n | section\n ' +_lr_signature = 'statementATOM ATTRIB BASE10 BASE16 COMMENT DEEP SECTION STRING\n value : BASE16\n | BASE10\n | STRING\n | ATOM\n attribute : ATOM ATTRIB value\n attributes : attributes attribute\n attributes : attribute\n \n section : SECTION ATOM\n | SECTION ATOM attributes\n \n values : values value\n values : value\n \n element : ATOM values\n | ATOM values attributes\n | ATOM attributes\n | ATOM\n \n statement : element\n | DEEP element\n | section\n ' -_lr_action_items = {'DEEP':([0,],[3,]),'ATOM':([0,3,5,6,8,9,10,11,12,13,14,15,16,17,18,19,21,22,23,24,],[5,5,8,16,-4,8,20,-11,-7,-1,-2,-3,20,23,20,-10,-6,20,-4,-5,]),'SECTION':([0,],[6,]),'$end':([1,2,4,7,8,9,10,11,12,13,14,15,16,18,19,21,22,23,24,],[0,-15,-17,-16,-4,-12,-14,-11,-7,-1,-2,-3,-8,-13,-10,-6,-9,-4,-5,]),'BASE16':([5,8,9,11,13,14,15,17,19,],[13,-4,13,-11,-1,-2,-3,13,-10,]),'BASE10':([5,8,9,11,13,14,15,17,19,],[14,-4,14,-11,-1,-2,-3,14,-10,]),'STRING':([5,8,9,11,13,14,15,17,19,],[15,-4,15,-11,-1,-2,-3,15,-10,]),'ATTRIB':([8,20,],[17,17,]),} +_lr_action_items = {'DEEP':([0,],[3,]),'ATOM':([0,3,5,6,8,9,10,11,12,13,14,15,16,17,18,19,21,22,23,24,],[5,5,8,16,-4,8,20,-11,-7,-1,-2,-3,20,23,20,-10,-6,20,-4,-5,]),'SECTION':([0,],[6,]),'$end':([1,2,4,5,7,8,9,10,11,12,13,14,15,16,18,19,21,22,23,24,],[0,-16,-18,-15,-17,-4,-12,-14,-11,-7,-1,-2,-3,-8,-13,-10,-6,-9,-4,-5,]),'BASE16':([5,8,9,11,13,14,15,17,19,],[13,-4,13,-11,-1,-2,-3,13,-10,]),'BASE10':([5,8,9,11,13,14,15,17,19,],[14,-4,14,-11,-1,-2,-3,14,-10,]),'STRING':([5,8,9,11,13,14,15,17,19,],[15,-4,15,-11,-1,-2,-3,15,-10,]),'ATTRIB':([8,20,],[17,17,]),} _lr_action = {} for _k, _v in _lr_action_items.items(): @@ -27,21 +27,22 @@ for _k, _v in _lr_goto_items.items(): del _lr_goto_items _lr_productions = [ ("S' -> statement","S'",1,None,None,None), - ('value -> BASE16','value',1,'p_value','parser.py',7), - ('value -> BASE10','value',1,'p_value','parser.py',8), - ('value -> STRING','value',1,'p_value','parser.py',9), - ('value -> ATOM','value',1,'p_value','parser.py',10), - ('attribute -> ATOM ATTRIB value','attribute',3,'p_attribute','parser.py',17), - ('attributes -> attributes attribute','attributes',2,'p_attributes','parser.py',24), - ('attributes -> attribute','attributes',1,'p_attributes','parser.py',25), - ('section -> SECTION ATOM','section',2,'p_section','parser.py',36), - ('section -> SECTION ATOM attributes','section',3,'p_section','parser.py',37), - ('values -> values value','values',2,'p_values','parser.py',46), - ('values -> value','values',1,'p_values','parser.py',47), - ('element -> ATOM values','element',2,'p_element','parser.py',58), - ('element -> ATOM values attributes','element',3,'p_element','parser.py',59), - ('element -> ATOM attributes','element',2,'p_element','parser.py',60), - ('statement -> element','statement',1,'p_statement','parser.py',70), - ('statement -> DEEP element','statement',2,'p_statement','parser.py',71), - ('statement -> section','statement',1,'p_statement','parser.py',72), + ('value -> BASE16','value',1,'p_value','parser.py',11), + ('value -> BASE10','value',1,'p_value','parser.py',12), + ('value -> STRING','value',1,'p_value','parser.py',13), + ('value -> ATOM','value',1,'p_value','parser.py',14), + ('attribute -> ATOM ATTRIB value','attribute',3,'p_attribute','parser.py',21), + ('attributes -> attributes attribute','attributes',2,'p_attributes','parser.py',28), + ('attributes -> attribute','attributes',1,'p_attributes','parser.py',29), + ('section -> SECTION ATOM','section',2,'p_section','parser.py',40), + ('section -> SECTION ATOM attributes','section',3,'p_section','parser.py',41), + ('values -> values value','values',2,'p_values','parser.py',50), + ('values -> value','values',1,'p_values','parser.py',51), + ('element -> ATOM values','element',2,'p_element','parser.py',62), + ('element -> ATOM values attributes','element',3,'p_element','parser.py',63), + ('element -> ATOM attributes','element',2,'p_element','parser.py',64), + ('element -> ATOM','element',1,'p_element','parser.py',65), + ('statement -> element','statement',1,'p_statement','parser.py',78), + ('statement -> DEEP element','statement',2,'p_statement','parser.py',79), + ('statement -> section','statement',1,'p_statement','parser.py',80), ]