Implement subelements. Add allowing valueless elements. Start to design the native structure interface.

2024-02-03 09:42:20 -08:00
parent 4693e341e1
commit 6a478aa877
4 changed files with 156 additions and 35 deletions
--- a/python/heck/lexer.py
+++ b/python/heck/lexer.py
@ -33,11 +33,20 @@ tokens = ('ATOM', 'BASE10', 'BASE16', 'COMMENT', 'STRING', 'SECTION', 'ATTRIB',
 t_ignore = string.whitespace

 t_DEEP = r'^(>)+'
-t_ATOM = r'[A-Za-z_$][A-Za-z0-9_.-]*'
+
 t_BASE16 = r'0x[0-9A-Fa-f]+'
 t_SECTION = r'^%%%\s'
 t_ATTRIB = '='

+
+def t_ATOM(token: lex.LexToken):
+    r'[A-Za-z_$][A-Za-z0-9_.-]*'
+    if token.value in ('true', 'True'):
+        token.value = True
+    elif token.value in ('false', 'False'):
+        token.value = False
+    return token
+
 def t_BASE10(token: lex.LexToken):
    r'(-)?[0-9]+(\.?[0-9]+)?([FLUIDCfluidc])?(\b|$)'
    # python numbers are Very Flexible so we ignore typespec
--- a/python/heck/parse.py
+++ b/python/heck/parse.py
@ -36,12 +36,88 @@ class HeckElement:
    def __repr__(self):
        return self.__str__()

+
+    def to_struct(self) -> Mapping[str, Union[int, float, str, Mapping, List]]:
+        """
+        Convert this HeckElement tree to a Python structure. The structure is a dictionary keyed by element name, where
+        the values are a list of each tree under that name, in order of their declaration in the tree, and futher, a
+        list of values.
+
+        For example:
+
+        ```
+        %%% heck
+        a b c
+        a c d
+        ```
+
+        turns into:
+
+        {'a': [['b', 'c'], ['c', 'd']]}
+
+        Subelements are treated as a dictionary object added to the end of each value.
+
+        For example:
+
+        ```
+        %%% heck
+        a b
+        > c d
+        a e
+        > f g
+        ```
+
+        turns into:
+
+        {'a': [['b'], {'c': [['d']]}, ['e', {'f': [['g']]}]]}
+        """
+
+
+    def to_dict(self, merge=False) -> Mapping[str, Union[int, float, str, Mapping, List]]:
+        """
+        As with `to_struct`, but attempts to merge all keys together at each level.
+
+        For example:
+
+        ```
+        %%% heck
+        a b c
+        a c d
+        ```
+
+        turns into (with merge set to True):
+
+        {'a': ['b', 'c', 'c', 'd']}
+
+        or (with merge set to False):
+
+        {'a': ['c', 'd']}
+
+        For child elements, the list is replaced by a dictionary, and the values are stored in a special key '%%% values'
+
+        Thus:
+
+        ```
+        %%% heck
+        a b
+        > c d
+        ```
+
+        becomes:
+
+        {'a':{'%%%values': ['b'], 'c': ['d']}}
+
+        This cannot represent exactly the contents of the input tree, however it may be more convenient in many use
+        cases where repeated elements of the same key are not allowed, or if keys are being treated as unique.
+        """
+
+
 def _get_element(ast: List) -> HeckElement:
    """
    Get an element from an element AST from the parser.
    """
    if not (ast[0] == 'element'):
-        raise HeckParseException("Found a non-element where an element was expected.")
+        raise HeckParseException(f"Found a non-element where an element was expected. {ast}")
    elm = HeckElement()
    elm.name = ast[1];
    for item in ast[2:]:
@ -60,7 +136,9 @@ def load_heck(inp: Iterable[str]) -> HeckElement:
    MODE_UNPARSE = 2

    rootelm = HeckElement()
-    pelm = rootelm # parent for subelement
+    pelm = [rootelm] # parent for subelement
+    pdepth = 0
+    depth = 0
    rootelm.name = "__ROOT__"
    mode = MODE_INIT
    for idx, line in enumerate(inp):
@ -68,7 +146,7 @@ def load_heck(inp: Iterable[str]) -> HeckElement:
            if (line.startswith('%%%')):
                mode = MODE_INIT
            else:
-                pelm.values.append(line)
+                pelm[-1].values.append(line)
                continue
        else:
            ast = parser.parse(line)
@ -76,18 +154,38 @@ def load_heck(inp: Iterable[str]) -> HeckElement:
            if ast[0] == 'section':
                if ast[1] == 'heck':
                    mode = MODE_ELM
-                    pelm = rootelm
+                    pelm = [rootelm]
                else:
                    mode = MODE_UNPARSE
-                    pelm = HeckElement()
-                    rootelm.children.append(pelm)
-                    pelm.name = ast[1]
-                    pelm.unparsed = True
+                    pelm = [HeckElement()]
+                    rootelm.children.append(pelm[-1])
+                    pelm[-1].name = ast[1]
+                    pelm[-1].unparsed = True
            else:
                if not mode == MODE_ELM:
                    raise HeckParseException("Didn't find heck preamble, line {idx}")
                else:
-                    pelm.children.append(_get_element(ast))
+                    if ast[0] == 'deep':
+                        # we're in a subitem
+                        depth = ast[1]
+                        if (depth > pdepth):
+                            # are we deeper than last time?
+                            try:
+                                pelm.append(pelm[-1].children[-1])
+                            except:
+                                raise HeckParseException("Tried to go deeper without a previous element, line {idx}")
+                        elif (depth < pdepth):
+                            # are we shallower than last time?
+                            pelm.pop()
+                            if (not len(pelm)):
+                                raise HeckParseException("Tried to go shallower while already shallow, line {idx}")
+                        ast = ast[2]
+                        pdepth = depth
+                    elif (pdepth > 0):
+                        # we're no longer deep, just pop up to the top
+                        pdepth = 0
+                        pelm = [rootelm]
+                    pelm[-1].children.append(_get_element(ast))

    return rootelm

@ -106,7 +204,15 @@ subtitle "Yep it's a website"
 scale 3.72
 matrix 0 0 0 0 1 2 3  1 2 3 4 29394.2
 tags hey man what are you doin
-
+> more tag tag tag 1 2 3
+>> we can go deeper
+>>> we can go even deeper
+test
+> _val 1
+> _val 2
+> _val 3
+valueless
+boolean True
 %%% markdown
 # Some cheeky markdown to confuse our processing.

--- a/python/heck/parser.py
+++ b/python/heck/parser.py
@ -62,11 +62,15 @@ def p_element(p):
    element : ATOM values
            | ATOM values attributes
            | ATOM attributes
+            | ATOM
    """
    # print(len(p))
-    p[0] = ["element", p[1], p[2]]
-    if (len(p) == 4):
-        p[0].append(p[3])
+    if len(p) <= 2:
+        p[0] = ["element", p[1]]
+    else:
+        p[0] = ["element", p[1], p[2]]
+        if (len(p) == 4):
+            p[0].append(p[3])


 def p_statement(p):
@ -76,7 +80,7 @@ def p_statement(p):
               | section
    """
    if (len(p) > 2):
-        p[0] = ('deep', p[2])
+        p[0] = ('deep', len(p[1]), p[2])
    else:
        p[0] = p[1]

@ -85,7 +89,7 @@ def p_error(p):
    if not p:
        return
    else:
-        print("Syntax error {p}")
+        print(f"Syntax error {p}")

 parser = yacc.yacc(start="statement")

@ -99,6 +103,7 @@ TEST_STRING = [
    'tags yo fresh',
    'dumper 1 2 3 4 5 6 7 8 9 dumpped=True',
    '> big_dumper 32 23 384848',
+    '>> deep_dumper 1 2 3 a=false'
 ]

 if __name__ == "__main__":
--- a/python/heck/parsetab.py
+++ b/python/heck/parsetab.py
@ -6,9 +6,9 @@ _tabversion = '3.10'

 _lr_method = 'LALR'

-_lr_signature = 'statementATOM ATTRIB BASE10 BASE16 COMMENT DEEP SECTION STRING\n    value : BASE16\n          | BASE10\n          | STRING\n          | ATOM\n    attribute : ATOM ATTRIB value\n    attributes : attributes attribute\n    attributes : attribute\n    \n    section : SECTION ATOM\n            | SECTION ATOM attributes\n    \n    values : values value\n    values : value\n    \n    element : ATOM values\n            | ATOM values attributes\n            | ATOM attributes\n    \n    statement  : element\n               | DEEP element\n               | section\n    '
+_lr_signature = 'statementATOM ATTRIB BASE10 BASE16 COMMENT DEEP SECTION STRING\n    value : BASE16\n          | BASE10\n          | STRING\n          | ATOM\n    attribute : ATOM ATTRIB value\n    attributes : attributes attribute\n    attributes : attribute\n    \n    section : SECTION ATOM\n            | SECTION ATOM attributes\n    \n    values : values value\n    values : value\n    \n    element : ATOM values\n            | ATOM values attributes\n            | ATOM attributes\n            | ATOM\n    \n    statement  : element\n               | DEEP element\n               | section\n    '
    
-_lr_action_items = {'DEEP':([0,],[3,]),'ATOM':([0,3,5,6,8,9,10,11,12,13,14,15,16,17,18,19,21,22,23,24,],[5,5,8,16,-4,8,20,-11,-7,-1,-2,-3,20,23,20,-10,-6,20,-4,-5,]),'SECTION':([0,],[6,]),'$end':([1,2,4,7,8,9,10,11,12,13,14,15,16,18,19,21,22,23,24,],[0,-15,-17,-16,-4,-12,-14,-11,-7,-1,-2,-3,-8,-13,-10,-6,-9,-4,-5,]),'BASE16':([5,8,9,11,13,14,15,17,19,],[13,-4,13,-11,-1,-2,-3,13,-10,]),'BASE10':([5,8,9,11,13,14,15,17,19,],[14,-4,14,-11,-1,-2,-3,14,-10,]),'STRING':([5,8,9,11,13,14,15,17,19,],[15,-4,15,-11,-1,-2,-3,15,-10,]),'ATTRIB':([8,20,],[17,17,]),}
+_lr_action_items = {'DEEP':([0,],[3,]),'ATOM':([0,3,5,6,8,9,10,11,12,13,14,15,16,17,18,19,21,22,23,24,],[5,5,8,16,-4,8,20,-11,-7,-1,-2,-3,20,23,20,-10,-6,20,-4,-5,]),'SECTION':([0,],[6,]),'$end':([1,2,4,5,7,8,9,10,11,12,13,14,15,16,18,19,21,22,23,24,],[0,-16,-18,-15,-17,-4,-12,-14,-11,-7,-1,-2,-3,-8,-13,-10,-6,-9,-4,-5,]),'BASE16':([5,8,9,11,13,14,15,17,19,],[13,-4,13,-11,-1,-2,-3,13,-10,]),'BASE10':([5,8,9,11,13,14,15,17,19,],[14,-4,14,-11,-1,-2,-3,14,-10,]),'STRING':([5,8,9,11,13,14,15,17,19,],[15,-4,15,-11,-1,-2,-3,15,-10,]),'ATTRIB':([8,20,],[17,17,]),}

 _lr_action = {}
 for _k, _v in _lr_action_items.items():
@ -27,21 +27,22 @@ for _k, _v in _lr_goto_items.items():
 del _lr_goto_items
 _lr_productions = [
  ("S' -> statement","S'",1,None,None,None),
-  ('value -> BASE16','value',1,'p_value','parser.py',7),
-  ('value -> BASE10','value',1,'p_value','parser.py',8),
-  ('value -> STRING','value',1,'p_value','parser.py',9),
-  ('value -> ATOM','value',1,'p_value','parser.py',10),
-  ('attribute -> ATOM ATTRIB value','attribute',3,'p_attribute','parser.py',17),
-  ('attributes -> attributes attribute','attributes',2,'p_attributes','parser.py',24),
-  ('attributes -> attribute','attributes',1,'p_attributes','parser.py',25),
-  ('section -> SECTION ATOM','section',2,'p_section','parser.py',36),
-  ('section -> SECTION ATOM attributes','section',3,'p_section','parser.py',37),
-  ('values -> values value','values',2,'p_values','parser.py',46),
-  ('values -> value','values',1,'p_values','parser.py',47),
-  ('element -> ATOM values','element',2,'p_element','parser.py',58),
-  ('element -> ATOM values attributes','element',3,'p_element','parser.py',59),
-  ('element -> ATOM attributes','element',2,'p_element','parser.py',60),
-  ('statement -> element','statement',1,'p_statement','parser.py',70),
-  ('statement -> DEEP element','statement',2,'p_statement','parser.py',71),
-  ('statement -> section','statement',1,'p_statement','parser.py',72),
+  ('value -> BASE16','value',1,'p_value','parser.py',11),
+  ('value -> BASE10','value',1,'p_value','parser.py',12),
+  ('value -> STRING','value',1,'p_value','parser.py',13),
+  ('value -> ATOM','value',1,'p_value','parser.py',14),
+  ('attribute -> ATOM ATTRIB value','attribute',3,'p_attribute','parser.py',21),
+  ('attributes -> attributes attribute','attributes',2,'p_attributes','parser.py',28),
+  ('attributes -> attribute','attributes',1,'p_attributes','parser.py',29),
+  ('section -> SECTION ATOM','section',2,'p_section','parser.py',40),
+  ('section -> SECTION ATOM attributes','section',3,'p_section','parser.py',41),
+  ('values -> values value','values',2,'p_values','parser.py',50),
+  ('values -> value','values',1,'p_values','parser.py',51),
+  ('element -> ATOM values','element',2,'p_element','parser.py',62),
+  ('element -> ATOM values attributes','element',3,'p_element','parser.py',63),
+  ('element -> ATOM attributes','element',2,'p_element','parser.py',64),
+  ('element -> ATOM','element',1,'p_element','parser.py',65),
+  ('statement -> element','statement',1,'p_statement','parser.py',78),
+  ('statement -> DEEP element','statement',2,'p_statement','parser.py',79),
+  ('statement -> section','statement',1,'p_statement','parser.py',80),
 ]