Implement subelements. Add allowing valueless elements. Start to design the native structure interface.

This commit is contained in:
Cassowary 2024-02-03 09:42:20 -08:00
parent 4693e341e1
commit 6a478aa877
4 changed files with 156 additions and 35 deletions

View File

@ -33,11 +33,20 @@ tokens = ('ATOM', 'BASE10', 'BASE16', 'COMMENT', 'STRING', 'SECTION', 'ATTRIB',
t_ignore = string.whitespace t_ignore = string.whitespace
t_DEEP = r'^(>)+' t_DEEP = r'^(>)+'
t_ATOM = r'[A-Za-z_$][A-Za-z0-9_.-]*'
t_BASE16 = r'0x[0-9A-Fa-f]+' t_BASE16 = r'0x[0-9A-Fa-f]+'
t_SECTION = r'^%%%\s' t_SECTION = r'^%%%\s'
t_ATTRIB = '=' t_ATTRIB = '='
def t_ATOM(token: lex.LexToken):
r'[A-Za-z_$][A-Za-z0-9_.-]*'
if token.value in ('true', 'True'):
token.value = True
elif token.value in ('false', 'False'):
token.value = False
return token
def t_BASE10(token: lex.LexToken): def t_BASE10(token: lex.LexToken):
r'(-)?[0-9]+(\.?[0-9]+)?([FLUIDCfluidc])?(\b|$)' r'(-)?[0-9]+(\.?[0-9]+)?([FLUIDCfluidc])?(\b|$)'
# python numbers are Very Flexible so we ignore typespec # python numbers are Very Flexible so we ignore typespec

View File

@ -36,12 +36,88 @@ class HeckElement:
def __repr__(self): def __repr__(self):
return self.__str__() return self.__str__()
def to_struct(self) -> Mapping[str, Union[int, float, str, Mapping, List]]:
"""
Convert this HeckElement tree to a Python structure. The structure is a dictionary keyed by element name, where
the values are a list of each tree under that name, in order of their declaration in the tree, and futher, a
list of values.
For example:
```
%%% heck
a b c
a c d
```
turns into:
{'a': [['b', 'c'], ['c', 'd']]}
Subelements are treated as a dictionary object added to the end of each value.
For example:
```
%%% heck
a b
> c d
a e
> f g
```
turns into:
{'a': [['b'], {'c': [['d']]}, ['e', {'f': [['g']]}]]}
"""
def to_dict(self, merge=False) -> Mapping[str, Union[int, float, str, Mapping, List]]:
"""
As with `to_struct`, but attempts to merge all keys together at each level.
For example:
```
%%% heck
a b c
a c d
```
turns into (with merge set to True):
{'a': ['b', 'c', 'c', 'd']}
or (with merge set to False):
{'a': ['c', 'd']}
For child elements, the list is replaced by a dictionary, and the values are stored in a special key '%%% values'
Thus:
```
%%% heck
a b
> c d
```
becomes:
{'a':{'%%%values': ['b'], 'c': ['d']}}
This cannot represent exactly the contents of the input tree, however it may be more convenient in many use
cases where repeated elements of the same key are not allowed, or if keys are being treated as unique.
"""
def _get_element(ast: List) -> HeckElement: def _get_element(ast: List) -> HeckElement:
""" """
Get an element from an element AST from the parser. Get an element from an element AST from the parser.
""" """
if not (ast[0] == 'element'): if not (ast[0] == 'element'):
raise HeckParseException("Found a non-element where an element was expected.") raise HeckParseException(f"Found a non-element where an element was expected. {ast}")
elm = HeckElement() elm = HeckElement()
elm.name = ast[1]; elm.name = ast[1];
for item in ast[2:]: for item in ast[2:]:
@ -60,7 +136,9 @@ def load_heck(inp: Iterable[str]) -> HeckElement:
MODE_UNPARSE = 2 MODE_UNPARSE = 2
rootelm = HeckElement() rootelm = HeckElement()
pelm = rootelm # parent for subelement pelm = [rootelm] # parent for subelement
pdepth = 0
depth = 0
rootelm.name = "__ROOT__" rootelm.name = "__ROOT__"
mode = MODE_INIT mode = MODE_INIT
for idx, line in enumerate(inp): for idx, line in enumerate(inp):
@ -68,7 +146,7 @@ def load_heck(inp: Iterable[str]) -> HeckElement:
if (line.startswith('%%%')): if (line.startswith('%%%')):
mode = MODE_INIT mode = MODE_INIT
else: else:
pelm.values.append(line) pelm[-1].values.append(line)
continue continue
else: else:
ast = parser.parse(line) ast = parser.parse(line)
@ -76,18 +154,38 @@ def load_heck(inp: Iterable[str]) -> HeckElement:
if ast[0] == 'section': if ast[0] == 'section':
if ast[1] == 'heck': if ast[1] == 'heck':
mode = MODE_ELM mode = MODE_ELM
pelm = rootelm pelm = [rootelm]
else: else:
mode = MODE_UNPARSE mode = MODE_UNPARSE
pelm = HeckElement() pelm = [HeckElement()]
rootelm.children.append(pelm) rootelm.children.append(pelm[-1])
pelm.name = ast[1] pelm[-1].name = ast[1]
pelm.unparsed = True pelm[-1].unparsed = True
else: else:
if not mode == MODE_ELM: if not mode == MODE_ELM:
raise HeckParseException("Didn't find heck preamble, line {idx}") raise HeckParseException("Didn't find heck preamble, line {idx}")
else: else:
pelm.children.append(_get_element(ast)) if ast[0] == 'deep':
# we're in a subitem
depth = ast[1]
if (depth > pdepth):
# are we deeper than last time?
try:
pelm.append(pelm[-1].children[-1])
except:
raise HeckParseException("Tried to go deeper without a previous element, line {idx}")
elif (depth < pdepth):
# are we shallower than last time?
pelm.pop()
if (not len(pelm)):
raise HeckParseException("Tried to go shallower while already shallow, line {idx}")
ast = ast[2]
pdepth = depth
elif (pdepth > 0):
# we're no longer deep, just pop up to the top
pdepth = 0
pelm = [rootelm]
pelm[-1].children.append(_get_element(ast))
return rootelm return rootelm
@ -106,7 +204,15 @@ subtitle "Yep it's a website"
scale 3.72 scale 3.72
matrix 0 0 0 0 1 2 3 1 2 3 4 29394.2 matrix 0 0 0 0 1 2 3 1 2 3 4 29394.2
tags hey man what are you doin tags hey man what are you doin
> more tag tag tag 1 2 3
>> we can go deeper
>>> we can go even deeper
test
> _val 1
> _val 2
> _val 3
valueless
boolean True
%%% markdown %%% markdown
# Some cheeky markdown to confuse our processing. # Some cheeky markdown to confuse our processing.

View File

@ -62,11 +62,15 @@ def p_element(p):
element : ATOM values element : ATOM values
| ATOM values attributes | ATOM values attributes
| ATOM attributes | ATOM attributes
| ATOM
""" """
# print(len(p)) # print(len(p))
p[0] = ["element", p[1], p[2]] if len(p) <= 2:
if (len(p) == 4): p[0] = ["element", p[1]]
p[0].append(p[3]) else:
p[0] = ["element", p[1], p[2]]
if (len(p) == 4):
p[0].append(p[3])
def p_statement(p): def p_statement(p):
@ -76,7 +80,7 @@ def p_statement(p):
| section | section
""" """
if (len(p) > 2): if (len(p) > 2):
p[0] = ('deep', p[2]) p[0] = ('deep', len(p[1]), p[2])
else: else:
p[0] = p[1] p[0] = p[1]
@ -85,7 +89,7 @@ def p_error(p):
if not p: if not p:
return return
else: else:
print("Syntax error {p}") print(f"Syntax error {p}")
parser = yacc.yacc(start="statement") parser = yacc.yacc(start="statement")
@ -99,6 +103,7 @@ TEST_STRING = [
'tags yo fresh', 'tags yo fresh',
'dumper 1 2 3 4 5 6 7 8 9 dumpped=True', 'dumper 1 2 3 4 5 6 7 8 9 dumpped=True',
'> big_dumper 32 23 384848', '> big_dumper 32 23 384848',
'>> deep_dumper 1 2 3 a=false'
] ]
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -6,9 +6,9 @@ _tabversion = '3.10'
_lr_method = 'LALR' _lr_method = 'LALR'
_lr_signature = 'statementATOM ATTRIB BASE10 BASE16 COMMENT DEEP SECTION STRING\n value : BASE16\n | BASE10\n | STRING\n | ATOM\n attribute : ATOM ATTRIB value\n attributes : attributes attribute\n attributes : attribute\n \n section : SECTION ATOM\n | SECTION ATOM attributes\n \n values : values value\n values : value\n \n element : ATOM values\n | ATOM values attributes\n | ATOM attributes\n \n statement : element\n | DEEP element\n | section\n ' _lr_signature = 'statementATOM ATTRIB BASE10 BASE16 COMMENT DEEP SECTION STRING\n value : BASE16\n | BASE10\n | STRING\n | ATOM\n attribute : ATOM ATTRIB value\n attributes : attributes attribute\n attributes : attribute\n \n section : SECTION ATOM\n | SECTION ATOM attributes\n \n values : values value\n values : value\n \n element : ATOM values\n | ATOM values attributes\n | ATOM attributes\n | ATOM\n \n statement : element\n | DEEP element\n | section\n '
_lr_action_items = {'DEEP':([0,],[3,]),'ATOM':([0,3,5,6,8,9,10,11,12,13,14,15,16,17,18,19,21,22,23,24,],[5,5,8,16,-4,8,20,-11,-7,-1,-2,-3,20,23,20,-10,-6,20,-4,-5,]),'SECTION':([0,],[6,]),'$end':([1,2,4,7,8,9,10,11,12,13,14,15,16,18,19,21,22,23,24,],[0,-15,-17,-16,-4,-12,-14,-11,-7,-1,-2,-3,-8,-13,-10,-6,-9,-4,-5,]),'BASE16':([5,8,9,11,13,14,15,17,19,],[13,-4,13,-11,-1,-2,-3,13,-10,]),'BASE10':([5,8,9,11,13,14,15,17,19,],[14,-4,14,-11,-1,-2,-3,14,-10,]),'STRING':([5,8,9,11,13,14,15,17,19,],[15,-4,15,-11,-1,-2,-3,15,-10,]),'ATTRIB':([8,20,],[17,17,]),} _lr_action_items = {'DEEP':([0,],[3,]),'ATOM':([0,3,5,6,8,9,10,11,12,13,14,15,16,17,18,19,21,22,23,24,],[5,5,8,16,-4,8,20,-11,-7,-1,-2,-3,20,23,20,-10,-6,20,-4,-5,]),'SECTION':([0,],[6,]),'$end':([1,2,4,5,7,8,9,10,11,12,13,14,15,16,18,19,21,22,23,24,],[0,-16,-18,-15,-17,-4,-12,-14,-11,-7,-1,-2,-3,-8,-13,-10,-6,-9,-4,-5,]),'BASE16':([5,8,9,11,13,14,15,17,19,],[13,-4,13,-11,-1,-2,-3,13,-10,]),'BASE10':([5,8,9,11,13,14,15,17,19,],[14,-4,14,-11,-1,-2,-3,14,-10,]),'STRING':([5,8,9,11,13,14,15,17,19,],[15,-4,15,-11,-1,-2,-3,15,-10,]),'ATTRIB':([8,20,],[17,17,]),}
_lr_action = {} _lr_action = {}
for _k, _v in _lr_action_items.items(): for _k, _v in _lr_action_items.items():
@ -27,21 +27,22 @@ for _k, _v in _lr_goto_items.items():
del _lr_goto_items del _lr_goto_items
_lr_productions = [ _lr_productions = [
("S' -> statement","S'",1,None,None,None), ("S' -> statement","S'",1,None,None,None),
('value -> BASE16','value',1,'p_value','parser.py',7), ('value -> BASE16','value',1,'p_value','parser.py',11),
('value -> BASE10','value',1,'p_value','parser.py',8), ('value -> BASE10','value',1,'p_value','parser.py',12),
('value -> STRING','value',1,'p_value','parser.py',9), ('value -> STRING','value',1,'p_value','parser.py',13),
('value -> ATOM','value',1,'p_value','parser.py',10), ('value -> ATOM','value',1,'p_value','parser.py',14),
('attribute -> ATOM ATTRIB value','attribute',3,'p_attribute','parser.py',17), ('attribute -> ATOM ATTRIB value','attribute',3,'p_attribute','parser.py',21),
('attributes -> attributes attribute','attributes',2,'p_attributes','parser.py',24), ('attributes -> attributes attribute','attributes',2,'p_attributes','parser.py',28),
('attributes -> attribute','attributes',1,'p_attributes','parser.py',25), ('attributes -> attribute','attributes',1,'p_attributes','parser.py',29),
('section -> SECTION ATOM','section',2,'p_section','parser.py',36), ('section -> SECTION ATOM','section',2,'p_section','parser.py',40),
('section -> SECTION ATOM attributes','section',3,'p_section','parser.py',37), ('section -> SECTION ATOM attributes','section',3,'p_section','parser.py',41),
('values -> values value','values',2,'p_values','parser.py',46), ('values -> values value','values',2,'p_values','parser.py',50),
('values -> value','values',1,'p_values','parser.py',47), ('values -> value','values',1,'p_values','parser.py',51),
('element -> ATOM values','element',2,'p_element','parser.py',58), ('element -> ATOM values','element',2,'p_element','parser.py',62),
('element -> ATOM values attributes','element',3,'p_element','parser.py',59), ('element -> ATOM values attributes','element',3,'p_element','parser.py',63),
('element -> ATOM attributes','element',2,'p_element','parser.py',60), ('element -> ATOM attributes','element',2,'p_element','parser.py',64),
('statement -> element','statement',1,'p_statement','parser.py',70), ('element -> ATOM','element',1,'p_element','parser.py',65),
('statement -> DEEP element','statement',2,'p_statement','parser.py',71), ('statement -> element','statement',1,'p_statement','parser.py',78),
('statement -> section','statement',1,'p_statement','parser.py',72), ('statement -> DEEP element','statement',2,'p_statement','parser.py',79),
('statement -> section','statement',1,'p_statement','parser.py',80),
] ]