from typing import Iterable, Union, Mapping, TypeVar, List, TextIO import re from .parser import parser from .exceptions import HeckParseException HeckValue = TypeVar("HeckElement") | str | int | float class HeckElement: """ Container for a tree of HECKformat elements. """ name: str """The name of the element, either __ROOT__ for top level or whatever is specified in file.""" children: Iterable[TypeVar] """The children of the element.""" values: Iterable[HeckValue] """One or more values associated with the element.""" attributes: Mapping[str, HeckValue] """Zero or more attributes associated with the element as a key-value pair.""" def __init__(self): self.children = [] self.values = [] self.attributes = dict() self.name = "" self.unparsed = False def __str__(self): k='' if self.unparsed: k='Unparsed ' return f"" def __repr__(self): return self.__str__() def to_struct(self) -> Mapping[str, Union[int, float, str, Mapping, List]]: """ Convert this HeckElement tree to a Python structure. The structure is a dictionary keyed by element name, where the values are a list of each tree under that name, in order of their declaration in the tree, and futher, a list of values. For example: ``` %%% heck a b c a c d ``` turns into: {'a': [['b', 'c'], ['c', 'd']]} Subelements are treated as a dictionary object added to the end of each value. For example: ``` %%% heck a b > c d a e > f g ``` turns into: {'a': [['b'], {'c': [['d']]}, ['e', {'f': [['g']]}]]} """ def to_dict(self, merge=False) -> Mapping[str, Union[int, float, str, Mapping, List]]: """ As with `to_struct`, but attempts to merge all keys together at each level. For example: ``` %%% heck a b c a c d ``` turns into (with merge set to True): {'a': ['b', 'c', 'c', 'd']} or (with merge set to False): {'a': ['c', 'd']} For child elements, the list is replaced by a dictionary, and the values are stored in a special key '%%% values' Thus: ``` %%% heck a b > c d ``` becomes: {'a':{'%%%values': ['b'], 'c': ['d']}} This cannot represent exactly the contents of the input tree, however it may be more convenient in many use cases where repeated elements of the same key are not allowed, or if keys are being treated as unique. """ def _get_element(ast: List) -> HeckElement: """ Get an element from an element AST from the parser. """ if not (ast[0] == 'element'): raise HeckParseException(f"Found a non-element where an element was expected. {ast}") elm = HeckElement() elm.name = ast[1]; for item in ast[2:]: if item[0] == 'values': elm.values = [x[1] for x in item[1:]] elif item[0] == 'attributes': elm.attributes.update({x[1]: x[2][1] for x in item[1:]}) return elm def load_heck(inp: Iterable[str]) -> HeckElement: """ Load a HECKformat into a tree of HeckElements from a list of lines from the file. """ MODE_INIT = 0 MODE_ELM = 1 MODE_UNPARSE = 2 rootelm = HeckElement() pelm = [rootelm] # parent for subelement pdepth = 0 depth = 0 rootelm.name = "__ROOT__" mode = MODE_INIT for idx, line in enumerate(inp): if mode == MODE_UNPARSE: if (line.startswith('%%%')): mode = MODE_INIT else: pelm[-1].values.append(line) continue else: ast = parser.parse(line) if ast: if ast[0] == 'section': if ast[1] == 'heck': mode = MODE_ELM pelm = [rootelm] else: mode = MODE_UNPARSE pelm = [HeckElement()] rootelm.children.append(pelm[-1]) pelm[-1].name = ast[1] pelm[-1].unparsed = True else: if not mode == MODE_ELM: raise HeckParseException("Didn't find heck preamble, line {idx}") else: if ast[0] == 'deep': # we're in a subitem depth = ast[1] if (depth > pdepth): # are we deeper than last time? try: pelm.append(pelm[-1].children[-1]) except: raise HeckParseException("Tried to go deeper without a previous element, line {idx}") elif (depth < pdepth): # are we shallower than last time? pelm.pop() if (not len(pelm)): raise HeckParseException("Tried to go shallower while already shallow, line {idx}") ast = ast[2] pdepth = depth elif (pdepth > 0): # we're no longer deep, just pop up to the top pdepth = 0 pelm = [rootelm] pelm[-1].children.append(_get_element(ast)) return rootelm def load(infile: TextIO) -> HeckElement: return load_heck(infile.readlines()) def loads(ins: str) -> HeckElement: return load_heck(re.split(r'\n|\r|\r\n', ins)) TEST_HECK = """ %%% heck # Website! title "My Website" bold=True subtitle "Yep it's a website" scale 3.72 matrix 0 0 0 0 1 2 3 1 2 3 4 29394.2 tags hey man what are you doin > more tag tag tag 1 2 3 >> we can go deeper >>> we can go even deeper test > _val 1 > _val 2 > _val 3 valueless _more.orless complexelement .yooooo boolean True %%% markdown # Some cheeky markdown to confuse our processing. All my page content goes here. """ if __name__ == "__main__": result = load_heck(TEST_HECK.split('\n')) print(result)