Implement subelements. Add allowing valueless elements. Start to design the native structure interface.
This commit is contained in:
parent
4693e341e1
commit
6a478aa877
|
@ -33,11 +33,20 @@ tokens = ('ATOM', 'BASE10', 'BASE16', 'COMMENT', 'STRING', 'SECTION', 'ATTRIB',
|
||||||
t_ignore = string.whitespace
|
t_ignore = string.whitespace
|
||||||
|
|
||||||
t_DEEP = r'^(>)+'
|
t_DEEP = r'^(>)+'
|
||||||
t_ATOM = r'[A-Za-z_$][A-Za-z0-9_.-]*'
|
|
||||||
t_BASE16 = r'0x[0-9A-Fa-f]+'
|
t_BASE16 = r'0x[0-9A-Fa-f]+'
|
||||||
t_SECTION = r'^%%%\s'
|
t_SECTION = r'^%%%\s'
|
||||||
t_ATTRIB = '='
|
t_ATTRIB = '='
|
||||||
|
|
||||||
|
|
||||||
|
def t_ATOM(token: lex.LexToken):
|
||||||
|
r'[A-Za-z_$][A-Za-z0-9_.-]*'
|
||||||
|
if token.value in ('true', 'True'):
|
||||||
|
token.value = True
|
||||||
|
elif token.value in ('false', 'False'):
|
||||||
|
token.value = False
|
||||||
|
return token
|
||||||
|
|
||||||
def t_BASE10(token: lex.LexToken):
|
def t_BASE10(token: lex.LexToken):
|
||||||
r'(-)?[0-9]+(\.?[0-9]+)?([FLUIDCfluidc])?(\b|$)'
|
r'(-)?[0-9]+(\.?[0-9]+)?([FLUIDCfluidc])?(\b|$)'
|
||||||
# python numbers are Very Flexible so we ignore typespec
|
# python numbers are Very Flexible so we ignore typespec
|
||||||
|
|
|
@ -36,12 +36,88 @@ class HeckElement:
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return self.__str__()
|
return self.__str__()
|
||||||
|
|
||||||
|
|
||||||
|
def to_struct(self) -> Mapping[str, Union[int, float, str, Mapping, List]]:
|
||||||
|
"""
|
||||||
|
Convert this HeckElement tree to a Python structure. The structure is a dictionary keyed by element name, where
|
||||||
|
the values are a list of each tree under that name, in order of their declaration in the tree, and futher, a
|
||||||
|
list of values.
|
||||||
|
|
||||||
|
For example:
|
||||||
|
|
||||||
|
```
|
||||||
|
%%% heck
|
||||||
|
a b c
|
||||||
|
a c d
|
||||||
|
```
|
||||||
|
|
||||||
|
turns into:
|
||||||
|
|
||||||
|
{'a': [['b', 'c'], ['c', 'd']]}
|
||||||
|
|
||||||
|
Subelements are treated as a dictionary object added to the end of each value.
|
||||||
|
|
||||||
|
For example:
|
||||||
|
|
||||||
|
```
|
||||||
|
%%% heck
|
||||||
|
a b
|
||||||
|
> c d
|
||||||
|
a e
|
||||||
|
> f g
|
||||||
|
```
|
||||||
|
|
||||||
|
turns into:
|
||||||
|
|
||||||
|
{'a': [['b'], {'c': [['d']]}, ['e', {'f': [['g']]}]]}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def to_dict(self, merge=False) -> Mapping[str, Union[int, float, str, Mapping, List]]:
|
||||||
|
"""
|
||||||
|
As with `to_struct`, but attempts to merge all keys together at each level.
|
||||||
|
|
||||||
|
For example:
|
||||||
|
|
||||||
|
```
|
||||||
|
%%% heck
|
||||||
|
a b c
|
||||||
|
a c d
|
||||||
|
```
|
||||||
|
|
||||||
|
turns into (with merge set to True):
|
||||||
|
|
||||||
|
{'a': ['b', 'c', 'c', 'd']}
|
||||||
|
|
||||||
|
or (with merge set to False):
|
||||||
|
|
||||||
|
{'a': ['c', 'd']}
|
||||||
|
|
||||||
|
For child elements, the list is replaced by a dictionary, and the values are stored in a special key '%%% values'
|
||||||
|
|
||||||
|
Thus:
|
||||||
|
|
||||||
|
```
|
||||||
|
%%% heck
|
||||||
|
a b
|
||||||
|
> c d
|
||||||
|
```
|
||||||
|
|
||||||
|
becomes:
|
||||||
|
|
||||||
|
{'a':{'%%%values': ['b'], 'c': ['d']}}
|
||||||
|
|
||||||
|
This cannot represent exactly the contents of the input tree, however it may be more convenient in many use
|
||||||
|
cases where repeated elements of the same key are not allowed, or if keys are being treated as unique.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
def _get_element(ast: List) -> HeckElement:
|
def _get_element(ast: List) -> HeckElement:
|
||||||
"""
|
"""
|
||||||
Get an element from an element AST from the parser.
|
Get an element from an element AST from the parser.
|
||||||
"""
|
"""
|
||||||
if not (ast[0] == 'element'):
|
if not (ast[0] == 'element'):
|
||||||
raise HeckParseException("Found a non-element where an element was expected.")
|
raise HeckParseException(f"Found a non-element where an element was expected. {ast}")
|
||||||
elm = HeckElement()
|
elm = HeckElement()
|
||||||
elm.name = ast[1];
|
elm.name = ast[1];
|
||||||
for item in ast[2:]:
|
for item in ast[2:]:
|
||||||
|
@ -60,7 +136,9 @@ def load_heck(inp: Iterable[str]) -> HeckElement:
|
||||||
MODE_UNPARSE = 2
|
MODE_UNPARSE = 2
|
||||||
|
|
||||||
rootelm = HeckElement()
|
rootelm = HeckElement()
|
||||||
pelm = rootelm # parent for subelement
|
pelm = [rootelm] # parent for subelement
|
||||||
|
pdepth = 0
|
||||||
|
depth = 0
|
||||||
rootelm.name = "__ROOT__"
|
rootelm.name = "__ROOT__"
|
||||||
mode = MODE_INIT
|
mode = MODE_INIT
|
||||||
for idx, line in enumerate(inp):
|
for idx, line in enumerate(inp):
|
||||||
|
@ -68,7 +146,7 @@ def load_heck(inp: Iterable[str]) -> HeckElement:
|
||||||
if (line.startswith('%%%')):
|
if (line.startswith('%%%')):
|
||||||
mode = MODE_INIT
|
mode = MODE_INIT
|
||||||
else:
|
else:
|
||||||
pelm.values.append(line)
|
pelm[-1].values.append(line)
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
ast = parser.parse(line)
|
ast = parser.parse(line)
|
||||||
|
@ -76,18 +154,38 @@ def load_heck(inp: Iterable[str]) -> HeckElement:
|
||||||
if ast[0] == 'section':
|
if ast[0] == 'section':
|
||||||
if ast[1] == 'heck':
|
if ast[1] == 'heck':
|
||||||
mode = MODE_ELM
|
mode = MODE_ELM
|
||||||
pelm = rootelm
|
pelm = [rootelm]
|
||||||
else:
|
else:
|
||||||
mode = MODE_UNPARSE
|
mode = MODE_UNPARSE
|
||||||
pelm = HeckElement()
|
pelm = [HeckElement()]
|
||||||
rootelm.children.append(pelm)
|
rootelm.children.append(pelm[-1])
|
||||||
pelm.name = ast[1]
|
pelm[-1].name = ast[1]
|
||||||
pelm.unparsed = True
|
pelm[-1].unparsed = True
|
||||||
else:
|
else:
|
||||||
if not mode == MODE_ELM:
|
if not mode == MODE_ELM:
|
||||||
raise HeckParseException("Didn't find heck preamble, line {idx}")
|
raise HeckParseException("Didn't find heck preamble, line {idx}")
|
||||||
else:
|
else:
|
||||||
pelm.children.append(_get_element(ast))
|
if ast[0] == 'deep':
|
||||||
|
# we're in a subitem
|
||||||
|
depth = ast[1]
|
||||||
|
if (depth > pdepth):
|
||||||
|
# are we deeper than last time?
|
||||||
|
try:
|
||||||
|
pelm.append(pelm[-1].children[-1])
|
||||||
|
except:
|
||||||
|
raise HeckParseException("Tried to go deeper without a previous element, line {idx}")
|
||||||
|
elif (depth < pdepth):
|
||||||
|
# are we shallower than last time?
|
||||||
|
pelm.pop()
|
||||||
|
if (not len(pelm)):
|
||||||
|
raise HeckParseException("Tried to go shallower while already shallow, line {idx}")
|
||||||
|
ast = ast[2]
|
||||||
|
pdepth = depth
|
||||||
|
elif (pdepth > 0):
|
||||||
|
# we're no longer deep, just pop up to the top
|
||||||
|
pdepth = 0
|
||||||
|
pelm = [rootelm]
|
||||||
|
pelm[-1].children.append(_get_element(ast))
|
||||||
|
|
||||||
return rootelm
|
return rootelm
|
||||||
|
|
||||||
|
@ -106,7 +204,15 @@ subtitle "Yep it's a website"
|
||||||
scale 3.72
|
scale 3.72
|
||||||
matrix 0 0 0 0 1 2 3 1 2 3 4 29394.2
|
matrix 0 0 0 0 1 2 3 1 2 3 4 29394.2
|
||||||
tags hey man what are you doin
|
tags hey man what are you doin
|
||||||
|
> more tag tag tag 1 2 3
|
||||||
|
>> we can go deeper
|
||||||
|
>>> we can go even deeper
|
||||||
|
test
|
||||||
|
> _val 1
|
||||||
|
> _val 2
|
||||||
|
> _val 3
|
||||||
|
valueless
|
||||||
|
boolean True
|
||||||
%%% markdown
|
%%% markdown
|
||||||
# Some cheeky markdown to confuse our processing.
|
# Some cheeky markdown to confuse our processing.
|
||||||
|
|
||||||
|
|
|
@ -62,11 +62,15 @@ def p_element(p):
|
||||||
element : ATOM values
|
element : ATOM values
|
||||||
| ATOM values attributes
|
| ATOM values attributes
|
||||||
| ATOM attributes
|
| ATOM attributes
|
||||||
|
| ATOM
|
||||||
"""
|
"""
|
||||||
# print(len(p))
|
# print(len(p))
|
||||||
p[0] = ["element", p[1], p[2]]
|
if len(p) <= 2:
|
||||||
if (len(p) == 4):
|
p[0] = ["element", p[1]]
|
||||||
p[0].append(p[3])
|
else:
|
||||||
|
p[0] = ["element", p[1], p[2]]
|
||||||
|
if (len(p) == 4):
|
||||||
|
p[0].append(p[3])
|
||||||
|
|
||||||
|
|
||||||
def p_statement(p):
|
def p_statement(p):
|
||||||
|
@ -76,7 +80,7 @@ def p_statement(p):
|
||||||
| section
|
| section
|
||||||
"""
|
"""
|
||||||
if (len(p) > 2):
|
if (len(p) > 2):
|
||||||
p[0] = ('deep', p[2])
|
p[0] = ('deep', len(p[1]), p[2])
|
||||||
else:
|
else:
|
||||||
p[0] = p[1]
|
p[0] = p[1]
|
||||||
|
|
||||||
|
@ -85,7 +89,7 @@ def p_error(p):
|
||||||
if not p:
|
if not p:
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
print("Syntax error {p}")
|
print(f"Syntax error {p}")
|
||||||
|
|
||||||
parser = yacc.yacc(start="statement")
|
parser = yacc.yacc(start="statement")
|
||||||
|
|
||||||
|
@ -99,6 +103,7 @@ TEST_STRING = [
|
||||||
'tags yo fresh',
|
'tags yo fresh',
|
||||||
'dumper 1 2 3 4 5 6 7 8 9 dumpped=True',
|
'dumper 1 2 3 4 5 6 7 8 9 dumpped=True',
|
||||||
'> big_dumper 32 23 384848',
|
'> big_dumper 32 23 384848',
|
||||||
|
'>> deep_dumper 1 2 3 a=false'
|
||||||
]
|
]
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
@ -6,9 +6,9 @@ _tabversion = '3.10'
|
||||||
|
|
||||||
_lr_method = 'LALR'
|
_lr_method = 'LALR'
|
||||||
|
|
||||||
_lr_signature = 'statementATOM ATTRIB BASE10 BASE16 COMMENT DEEP SECTION STRING\n value : BASE16\n | BASE10\n | STRING\n | ATOM\n attribute : ATOM ATTRIB value\n attributes : attributes attribute\n attributes : attribute\n \n section : SECTION ATOM\n | SECTION ATOM attributes\n \n values : values value\n values : value\n \n element : ATOM values\n | ATOM values attributes\n | ATOM attributes\n \n statement : element\n | DEEP element\n | section\n '
|
_lr_signature = 'statementATOM ATTRIB BASE10 BASE16 COMMENT DEEP SECTION STRING\n value : BASE16\n | BASE10\n | STRING\n | ATOM\n attribute : ATOM ATTRIB value\n attributes : attributes attribute\n attributes : attribute\n \n section : SECTION ATOM\n | SECTION ATOM attributes\n \n values : values value\n values : value\n \n element : ATOM values\n | ATOM values attributes\n | ATOM attributes\n | ATOM\n \n statement : element\n | DEEP element\n | section\n '
|
||||||
|
|
||||||
_lr_action_items = {'DEEP':([0,],[3,]),'ATOM':([0,3,5,6,8,9,10,11,12,13,14,15,16,17,18,19,21,22,23,24,],[5,5,8,16,-4,8,20,-11,-7,-1,-2,-3,20,23,20,-10,-6,20,-4,-5,]),'SECTION':([0,],[6,]),'$end':([1,2,4,7,8,9,10,11,12,13,14,15,16,18,19,21,22,23,24,],[0,-15,-17,-16,-4,-12,-14,-11,-7,-1,-2,-3,-8,-13,-10,-6,-9,-4,-5,]),'BASE16':([5,8,9,11,13,14,15,17,19,],[13,-4,13,-11,-1,-2,-3,13,-10,]),'BASE10':([5,8,9,11,13,14,15,17,19,],[14,-4,14,-11,-1,-2,-3,14,-10,]),'STRING':([5,8,9,11,13,14,15,17,19,],[15,-4,15,-11,-1,-2,-3,15,-10,]),'ATTRIB':([8,20,],[17,17,]),}
|
_lr_action_items = {'DEEP':([0,],[3,]),'ATOM':([0,3,5,6,8,9,10,11,12,13,14,15,16,17,18,19,21,22,23,24,],[5,5,8,16,-4,8,20,-11,-7,-1,-2,-3,20,23,20,-10,-6,20,-4,-5,]),'SECTION':([0,],[6,]),'$end':([1,2,4,5,7,8,9,10,11,12,13,14,15,16,18,19,21,22,23,24,],[0,-16,-18,-15,-17,-4,-12,-14,-11,-7,-1,-2,-3,-8,-13,-10,-6,-9,-4,-5,]),'BASE16':([5,8,9,11,13,14,15,17,19,],[13,-4,13,-11,-1,-2,-3,13,-10,]),'BASE10':([5,8,9,11,13,14,15,17,19,],[14,-4,14,-11,-1,-2,-3,14,-10,]),'STRING':([5,8,9,11,13,14,15,17,19,],[15,-4,15,-11,-1,-2,-3,15,-10,]),'ATTRIB':([8,20,],[17,17,]),}
|
||||||
|
|
||||||
_lr_action = {}
|
_lr_action = {}
|
||||||
for _k, _v in _lr_action_items.items():
|
for _k, _v in _lr_action_items.items():
|
||||||
|
@ -27,21 +27,22 @@ for _k, _v in _lr_goto_items.items():
|
||||||
del _lr_goto_items
|
del _lr_goto_items
|
||||||
_lr_productions = [
|
_lr_productions = [
|
||||||
("S' -> statement","S'",1,None,None,None),
|
("S' -> statement","S'",1,None,None,None),
|
||||||
('value -> BASE16','value',1,'p_value','parser.py',7),
|
('value -> BASE16','value',1,'p_value','parser.py',11),
|
||||||
('value -> BASE10','value',1,'p_value','parser.py',8),
|
('value -> BASE10','value',1,'p_value','parser.py',12),
|
||||||
('value -> STRING','value',1,'p_value','parser.py',9),
|
('value -> STRING','value',1,'p_value','parser.py',13),
|
||||||
('value -> ATOM','value',1,'p_value','parser.py',10),
|
('value -> ATOM','value',1,'p_value','parser.py',14),
|
||||||
('attribute -> ATOM ATTRIB value','attribute',3,'p_attribute','parser.py',17),
|
('attribute -> ATOM ATTRIB value','attribute',3,'p_attribute','parser.py',21),
|
||||||
('attributes -> attributes attribute','attributes',2,'p_attributes','parser.py',24),
|
('attributes -> attributes attribute','attributes',2,'p_attributes','parser.py',28),
|
||||||
('attributes -> attribute','attributes',1,'p_attributes','parser.py',25),
|
('attributes -> attribute','attributes',1,'p_attributes','parser.py',29),
|
||||||
('section -> SECTION ATOM','section',2,'p_section','parser.py',36),
|
('section -> SECTION ATOM','section',2,'p_section','parser.py',40),
|
||||||
('section -> SECTION ATOM attributes','section',3,'p_section','parser.py',37),
|
('section -> SECTION ATOM attributes','section',3,'p_section','parser.py',41),
|
||||||
('values -> values value','values',2,'p_values','parser.py',46),
|
('values -> values value','values',2,'p_values','parser.py',50),
|
||||||
('values -> value','values',1,'p_values','parser.py',47),
|
('values -> value','values',1,'p_values','parser.py',51),
|
||||||
('element -> ATOM values','element',2,'p_element','parser.py',58),
|
('element -> ATOM values','element',2,'p_element','parser.py',62),
|
||||||
('element -> ATOM values attributes','element',3,'p_element','parser.py',59),
|
('element -> ATOM values attributes','element',3,'p_element','parser.py',63),
|
||||||
('element -> ATOM attributes','element',2,'p_element','parser.py',60),
|
('element -> ATOM attributes','element',2,'p_element','parser.py',64),
|
||||||
('statement -> element','statement',1,'p_statement','parser.py',70),
|
('element -> ATOM','element',1,'p_element','parser.py',65),
|
||||||
('statement -> DEEP element','statement',2,'p_statement','parser.py',71),
|
('statement -> element','statement',1,'p_statement','parser.py',78),
|
||||||
('statement -> section','statement',1,'p_statement','parser.py',72),
|
('statement -> DEEP element','statement',2,'p_statement','parser.py',79),
|
||||||
|
('statement -> section','statement',1,'p_statement','parser.py',80),
|
||||||
]
|
]
|
||||||
|
|
Loading…
Reference in New Issue