Implement subelements. Add allowing valueless elements. Start to design the native structure interface.
This commit is contained in:
parent
4693e341e1
commit
6a478aa877
|
@ -33,11 +33,20 @@ tokens = ('ATOM', 'BASE10', 'BASE16', 'COMMENT', 'STRING', 'SECTION', 'ATTRIB',
|
|||
t_ignore = string.whitespace
|
||||
|
||||
t_DEEP = r'^(>)+'
|
||||
t_ATOM = r'[A-Za-z_$][A-Za-z0-9_.-]*'
|
||||
|
||||
t_BASE16 = r'0x[0-9A-Fa-f]+'
|
||||
t_SECTION = r'^%%%\s'
|
||||
t_ATTRIB = '='
|
||||
|
||||
|
||||
def t_ATOM(token: lex.LexToken):
|
||||
r'[A-Za-z_$][A-Za-z0-9_.-]*'
|
||||
if token.value in ('true', 'True'):
|
||||
token.value = True
|
||||
elif token.value in ('false', 'False'):
|
||||
token.value = False
|
||||
return token
|
||||
|
||||
def t_BASE10(token: lex.LexToken):
|
||||
r'(-)?[0-9]+(\.?[0-9]+)?([FLUIDCfluidc])?(\b|$)'
|
||||
# python numbers are Very Flexible so we ignore typespec
|
||||
|
|
|
@ -36,12 +36,88 @@ class HeckElement:
|
|||
def __repr__(self):
|
||||
return self.__str__()
|
||||
|
||||
|
||||
def to_struct(self) -> Mapping[str, Union[int, float, str, Mapping, List]]:
|
||||
"""
|
||||
Convert this HeckElement tree to a Python structure. The structure is a dictionary keyed by element name, where
|
||||
the values are a list of each tree under that name, in order of their declaration in the tree, and futher, a
|
||||
list of values.
|
||||
|
||||
For example:
|
||||
|
||||
```
|
||||
%%% heck
|
||||
a b c
|
||||
a c d
|
||||
```
|
||||
|
||||
turns into:
|
||||
|
||||
{'a': [['b', 'c'], ['c', 'd']]}
|
||||
|
||||
Subelements are treated as a dictionary object added to the end of each value.
|
||||
|
||||
For example:
|
||||
|
||||
```
|
||||
%%% heck
|
||||
a b
|
||||
> c d
|
||||
a e
|
||||
> f g
|
||||
```
|
||||
|
||||
turns into:
|
||||
|
||||
{'a': [['b'], {'c': [['d']]}, ['e', {'f': [['g']]}]]}
|
||||
"""
|
||||
|
||||
|
||||
def to_dict(self, merge=False) -> Mapping[str, Union[int, float, str, Mapping, List]]:
|
||||
"""
|
||||
As with `to_struct`, but attempts to merge all keys together at each level.
|
||||
|
||||
For example:
|
||||
|
||||
```
|
||||
%%% heck
|
||||
a b c
|
||||
a c d
|
||||
```
|
||||
|
||||
turns into (with merge set to True):
|
||||
|
||||
{'a': ['b', 'c', 'c', 'd']}
|
||||
|
||||
or (with merge set to False):
|
||||
|
||||
{'a': ['c', 'd']}
|
||||
|
||||
For child elements, the list is replaced by a dictionary, and the values are stored in a special key '%%% values'
|
||||
|
||||
Thus:
|
||||
|
||||
```
|
||||
%%% heck
|
||||
a b
|
||||
> c d
|
||||
```
|
||||
|
||||
becomes:
|
||||
|
||||
{'a':{'%%%values': ['b'], 'c': ['d']}}
|
||||
|
||||
This cannot represent exactly the contents of the input tree, however it may be more convenient in many use
|
||||
cases where repeated elements of the same key are not allowed, or if keys are being treated as unique.
|
||||
"""
|
||||
|
||||
|
||||
def _get_element(ast: List) -> HeckElement:
|
||||
"""
|
||||
Get an element from an element AST from the parser.
|
||||
"""
|
||||
if not (ast[0] == 'element'):
|
||||
raise HeckParseException("Found a non-element where an element was expected.")
|
||||
raise HeckParseException(f"Found a non-element where an element was expected. {ast}")
|
||||
elm = HeckElement()
|
||||
elm.name = ast[1];
|
||||
for item in ast[2:]:
|
||||
|
@ -60,7 +136,9 @@ def load_heck(inp: Iterable[str]) -> HeckElement:
|
|||
MODE_UNPARSE = 2
|
||||
|
||||
rootelm = HeckElement()
|
||||
pelm = rootelm # parent for subelement
|
||||
pelm = [rootelm] # parent for subelement
|
||||
pdepth = 0
|
||||
depth = 0
|
||||
rootelm.name = "__ROOT__"
|
||||
mode = MODE_INIT
|
||||
for idx, line in enumerate(inp):
|
||||
|
@ -68,7 +146,7 @@ def load_heck(inp: Iterable[str]) -> HeckElement:
|
|||
if (line.startswith('%%%')):
|
||||
mode = MODE_INIT
|
||||
else:
|
||||
pelm.values.append(line)
|
||||
pelm[-1].values.append(line)
|
||||
continue
|
||||
else:
|
||||
ast = parser.parse(line)
|
||||
|
@ -76,18 +154,38 @@ def load_heck(inp: Iterable[str]) -> HeckElement:
|
|||
if ast[0] == 'section':
|
||||
if ast[1] == 'heck':
|
||||
mode = MODE_ELM
|
||||
pelm = rootelm
|
||||
pelm = [rootelm]
|
||||
else:
|
||||
mode = MODE_UNPARSE
|
||||
pelm = HeckElement()
|
||||
rootelm.children.append(pelm)
|
||||
pelm.name = ast[1]
|
||||
pelm.unparsed = True
|
||||
pelm = [HeckElement()]
|
||||
rootelm.children.append(pelm[-1])
|
||||
pelm[-1].name = ast[1]
|
||||
pelm[-1].unparsed = True
|
||||
else:
|
||||
if not mode == MODE_ELM:
|
||||
raise HeckParseException("Didn't find heck preamble, line {idx}")
|
||||
else:
|
||||
pelm.children.append(_get_element(ast))
|
||||
if ast[0] == 'deep':
|
||||
# we're in a subitem
|
||||
depth = ast[1]
|
||||
if (depth > pdepth):
|
||||
# are we deeper than last time?
|
||||
try:
|
||||
pelm.append(pelm[-1].children[-1])
|
||||
except:
|
||||
raise HeckParseException("Tried to go deeper without a previous element, line {idx}")
|
||||
elif (depth < pdepth):
|
||||
# are we shallower than last time?
|
||||
pelm.pop()
|
||||
if (not len(pelm)):
|
||||
raise HeckParseException("Tried to go shallower while already shallow, line {idx}")
|
||||
ast = ast[2]
|
||||
pdepth = depth
|
||||
elif (pdepth > 0):
|
||||
# we're no longer deep, just pop up to the top
|
||||
pdepth = 0
|
||||
pelm = [rootelm]
|
||||
pelm[-1].children.append(_get_element(ast))
|
||||
|
||||
return rootelm
|
||||
|
||||
|
@ -106,7 +204,15 @@ subtitle "Yep it's a website"
|
|||
scale 3.72
|
||||
matrix 0 0 0 0 1 2 3 1 2 3 4 29394.2
|
||||
tags hey man what are you doin
|
||||
|
||||
> more tag tag tag 1 2 3
|
||||
>> we can go deeper
|
||||
>>> we can go even deeper
|
||||
test
|
||||
> _val 1
|
||||
> _val 2
|
||||
> _val 3
|
||||
valueless
|
||||
boolean True
|
||||
%%% markdown
|
||||
# Some cheeky markdown to confuse our processing.
|
||||
|
||||
|
|
|
@ -62,8 +62,12 @@ def p_element(p):
|
|||
element : ATOM values
|
||||
| ATOM values attributes
|
||||
| ATOM attributes
|
||||
| ATOM
|
||||
"""
|
||||
# print(len(p))
|
||||
if len(p) <= 2:
|
||||
p[0] = ["element", p[1]]
|
||||
else:
|
||||
p[0] = ["element", p[1], p[2]]
|
||||
if (len(p) == 4):
|
||||
p[0].append(p[3])
|
||||
|
@ -76,7 +80,7 @@ def p_statement(p):
|
|||
| section
|
||||
"""
|
||||
if (len(p) > 2):
|
||||
p[0] = ('deep', p[2])
|
||||
p[0] = ('deep', len(p[1]), p[2])
|
||||
else:
|
||||
p[0] = p[1]
|
||||
|
||||
|
@ -85,7 +89,7 @@ def p_error(p):
|
|||
if not p:
|
||||
return
|
||||
else:
|
||||
print("Syntax error {p}")
|
||||
print(f"Syntax error {p}")
|
||||
|
||||
parser = yacc.yacc(start="statement")
|
||||
|
||||
|
@ -99,6 +103,7 @@ TEST_STRING = [
|
|||
'tags yo fresh',
|
||||
'dumper 1 2 3 4 5 6 7 8 9 dumpped=True',
|
||||
'> big_dumper 32 23 384848',
|
||||
'>> deep_dumper 1 2 3 a=false'
|
||||
]
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -6,9 +6,9 @@ _tabversion = '3.10'
|
|||
|
||||
_lr_method = 'LALR'
|
||||
|
||||
_lr_signature = 'statementATOM ATTRIB BASE10 BASE16 COMMENT DEEP SECTION STRING\n value : BASE16\n | BASE10\n | STRING\n | ATOM\n attribute : ATOM ATTRIB value\n attributes : attributes attribute\n attributes : attribute\n \n section : SECTION ATOM\n | SECTION ATOM attributes\n \n values : values value\n values : value\n \n element : ATOM values\n | ATOM values attributes\n | ATOM attributes\n \n statement : element\n | DEEP element\n | section\n '
|
||||
_lr_signature = 'statementATOM ATTRIB BASE10 BASE16 COMMENT DEEP SECTION STRING\n value : BASE16\n | BASE10\n | STRING\n | ATOM\n attribute : ATOM ATTRIB value\n attributes : attributes attribute\n attributes : attribute\n \n section : SECTION ATOM\n | SECTION ATOM attributes\n \n values : values value\n values : value\n \n element : ATOM values\n | ATOM values attributes\n | ATOM attributes\n | ATOM\n \n statement : element\n | DEEP element\n | section\n '
|
||||
|
||||
_lr_action_items = {'DEEP':([0,],[3,]),'ATOM':([0,3,5,6,8,9,10,11,12,13,14,15,16,17,18,19,21,22,23,24,],[5,5,8,16,-4,8,20,-11,-7,-1,-2,-3,20,23,20,-10,-6,20,-4,-5,]),'SECTION':([0,],[6,]),'$end':([1,2,4,7,8,9,10,11,12,13,14,15,16,18,19,21,22,23,24,],[0,-15,-17,-16,-4,-12,-14,-11,-7,-1,-2,-3,-8,-13,-10,-6,-9,-4,-5,]),'BASE16':([5,8,9,11,13,14,15,17,19,],[13,-4,13,-11,-1,-2,-3,13,-10,]),'BASE10':([5,8,9,11,13,14,15,17,19,],[14,-4,14,-11,-1,-2,-3,14,-10,]),'STRING':([5,8,9,11,13,14,15,17,19,],[15,-4,15,-11,-1,-2,-3,15,-10,]),'ATTRIB':([8,20,],[17,17,]),}
|
||||
_lr_action_items = {'DEEP':([0,],[3,]),'ATOM':([0,3,5,6,8,9,10,11,12,13,14,15,16,17,18,19,21,22,23,24,],[5,5,8,16,-4,8,20,-11,-7,-1,-2,-3,20,23,20,-10,-6,20,-4,-5,]),'SECTION':([0,],[6,]),'$end':([1,2,4,5,7,8,9,10,11,12,13,14,15,16,18,19,21,22,23,24,],[0,-16,-18,-15,-17,-4,-12,-14,-11,-7,-1,-2,-3,-8,-13,-10,-6,-9,-4,-5,]),'BASE16':([5,8,9,11,13,14,15,17,19,],[13,-4,13,-11,-1,-2,-3,13,-10,]),'BASE10':([5,8,9,11,13,14,15,17,19,],[14,-4,14,-11,-1,-2,-3,14,-10,]),'STRING':([5,8,9,11,13,14,15,17,19,],[15,-4,15,-11,-1,-2,-3,15,-10,]),'ATTRIB':([8,20,],[17,17,]),}
|
||||
|
||||
_lr_action = {}
|
||||
for _k, _v in _lr_action_items.items():
|
||||
|
@ -27,21 +27,22 @@ for _k, _v in _lr_goto_items.items():
|
|||
del _lr_goto_items
|
||||
_lr_productions = [
|
||||
("S' -> statement","S'",1,None,None,None),
|
||||
('value -> BASE16','value',1,'p_value','parser.py',7),
|
||||
('value -> BASE10','value',1,'p_value','parser.py',8),
|
||||
('value -> STRING','value',1,'p_value','parser.py',9),
|
||||
('value -> ATOM','value',1,'p_value','parser.py',10),
|
||||
('attribute -> ATOM ATTRIB value','attribute',3,'p_attribute','parser.py',17),
|
||||
('attributes -> attributes attribute','attributes',2,'p_attributes','parser.py',24),
|
||||
('attributes -> attribute','attributes',1,'p_attributes','parser.py',25),
|
||||
('section -> SECTION ATOM','section',2,'p_section','parser.py',36),
|
||||
('section -> SECTION ATOM attributes','section',3,'p_section','parser.py',37),
|
||||
('values -> values value','values',2,'p_values','parser.py',46),
|
||||
('values -> value','values',1,'p_values','parser.py',47),
|
||||
('element -> ATOM values','element',2,'p_element','parser.py',58),
|
||||
('element -> ATOM values attributes','element',3,'p_element','parser.py',59),
|
||||
('element -> ATOM attributes','element',2,'p_element','parser.py',60),
|
||||
('statement -> element','statement',1,'p_statement','parser.py',70),
|
||||
('statement -> DEEP element','statement',2,'p_statement','parser.py',71),
|
||||
('statement -> section','statement',1,'p_statement','parser.py',72),
|
||||
('value -> BASE16','value',1,'p_value','parser.py',11),
|
||||
('value -> BASE10','value',1,'p_value','parser.py',12),
|
||||
('value -> STRING','value',1,'p_value','parser.py',13),
|
||||
('value -> ATOM','value',1,'p_value','parser.py',14),
|
||||
('attribute -> ATOM ATTRIB value','attribute',3,'p_attribute','parser.py',21),
|
||||
('attributes -> attributes attribute','attributes',2,'p_attributes','parser.py',28),
|
||||
('attributes -> attribute','attributes',1,'p_attributes','parser.py',29),
|
||||
('section -> SECTION ATOM','section',2,'p_section','parser.py',40),
|
||||
('section -> SECTION ATOM attributes','section',3,'p_section','parser.py',41),
|
||||
('values -> values value','values',2,'p_values','parser.py',50),
|
||||
('values -> value','values',1,'p_values','parser.py',51),
|
||||
('element -> ATOM values','element',2,'p_element','parser.py',62),
|
||||
('element -> ATOM values attributes','element',3,'p_element','parser.py',63),
|
||||
('element -> ATOM attributes','element',2,'p_element','parser.py',64),
|
||||
('element -> ATOM','element',1,'p_element','parser.py',65),
|
||||
('statement -> element','statement',1,'p_statement','parser.py',78),
|
||||
('statement -> DEEP element','statement',2,'p_statement','parser.py',79),
|
||||
('statement -> section','statement',1,'p_statement','parser.py',80),
|
||||
]
|
||||
|
|
Loading…
Reference in New Issue