heckformat/python/heckformat/parse.py

from typing import Iterable, Union, Mapping, TypeVar, List, TextIO, Any
import collections.abc


import re

from .parser import parser
from .exceptions import HeckParseException

HeckValue = TypeVar("HeckElement") | str | int | float

UNPARSED_MARKER = "%%% UNPARSED %%% "

class HeckElement:
    """
    Container for a tree of HECKformat elements.
    """
    name: str
    """The name of the element, either __ROOT__ for top level or whatever is specified in file."""
    children: Iterable[TypeVar]
    """The children of the element."""
    values: Iterable[HeckValue]
    """One or more values associated with the element."""
    attributes: Mapping[str, HeckValue]
    """Zero or more attributes associated with the element as a key-value pair."""

    def __init__(self):
        self.children = []
        self.values = []
        self.attributes = dict()
        self.name = ""
        self.unparsed = False

    def flatten(self) -> Mapping:
        """
        Convert a hecktree element into a dictionary.
        """
        output = {}
        for elm in self.children:
            elmval = []
            if elm.unparsed:
                nam = UNPARSED_MARKER+elm.name
                val = '\n'.join(elm.values)
                if nam in output:
                    output[nam] = '\n'.join([output[nam], val])
                else:
                    output[nam] = val
            else:
                if len(elm.children):
                    elmval.append(elm.flatten())
                elmval.extend(elm.values)
                if elm.name in output:
                    output[elm.name].extend(elmval)
                else:
                    output[elm.name] = elmval
        return output

    def get_flat_value(self) -> Union[List, HeckValue]:
        if not len(self.values):
            return None

        if len(self.values) > 1:
            return list(self.values)

        return self.values[0]

    def flatten_replace(self) -> Mapping:
        """
        Convert a hecktree element into a dictionary (but don't try to merge values, just pretend each key is unique)
        """
        output = {}
        for elm in self.children:
            if elm.unparsed:
                nam = UNPARSED_MARKER+elm.name
                val = '\n'.join(elm.values)
                output[nam] = val
            else:
                elmval = None
                if len(elm.children):
                    elmval = {}
                    elmval['children'] = elm.flatten_replace()
                    elmval['value'] = elm.get_flat_value()
                else:
                    elmval = elm.get_flat_value()
                output[elm.name] = elmval
        return output


    def __str__(self):
        k=''
        if self.unparsed:
            k='Unparsed '
        return f"<HeckElement {k}{self.name} c={self.children} v={self.values} a={self.attributes}>"

    def __repr__(self):
        return self.__str__()


def _make_element(ast: List) -> HeckElement:
    """
    Get an element from an element AST from the parser.
    """
    if not (ast[0] == 'element'):
        raise HeckParseException(f"Found a non-element where an element was expected. {ast}")
    elm = HeckElement()
    elm.name = ast[1];
    for item in ast[2:]:
        if item[0] == 'values':
            elm.values = [x[1] for x in item[1:]]
        elif item[0] == 'attributes':
            elm.attributes.update({x[1]: x[2][1] for x in item[1:]})
    return elm

def load_heck(inp: Iterable[str]) -> HeckElement:
    """
    Load a HECKformat into a tree of HeckElements from a list of lines from the file.
    """
    MODE_INIT = 0
    MODE_ELM = 1
    MODE_UNPARSE = 2

    rootelm = HeckElement()
    pelm = [rootelm] # parent for subelement
    pdepth = 0
    depth = 0
    rootelm.name = "__ROOT__"
    mode = MODE_INIT
    for idx, line in enumerate(inp):
        if mode == MODE_UNPARSE:
            if (line.startswith('%%%')):
                mode = MODE_INIT
            else:
                pelm[-1].values.append(line)
                continue
        else:
            ast = parser.parse(line)
        if ast:
            if ast[0] == 'section':
                if ast[1] == 'heck':
                    mode = MODE_ELM
                    pelm = [rootelm]
                else:
                    mode = MODE_UNPARSE
                    pelm = [HeckElement()]
                    rootelm.children.append(pelm[-1])
                    pelm[-1].name = ast[1]
                    pelm[-1].unparsed = True
            else:
                if not mode == MODE_ELM:
                    raise HeckParseException("Didn't find heck preamble, line {idx}")
                else:
                    if ast[0] == 'deep':
                        # we're in a subitem
                        depth = ast[1]
                        if (depth > pdepth):
                            # are we deeper than last time?
                            try:
                                pelm.append(pelm[-1].children[-1])
                            except:
                                raise HeckParseException("Tried to go deeper without a previous element, line {idx}")
                        elif (depth < pdepth):
                            # are we shallower than last time?
                            pelm.pop()
                            if (not len(pelm)):
                                raise HeckParseException("Tried to go shallower while already shallow, line {idx}")
                        ast = ast[2]
                        pdepth = depth
                    elif (pdepth > 0):
                        # we're no longer deep, just pop up to the top
                        pdepth = 0
                        pelm = [rootelm]
                    pelm[-1].children.append(_make_element(ast))

    return rootelm

def load(infile: TextIO) -> HeckElement:
    return load_heck(infile.readlines())

def loads(ins: str) -> HeckElement:
    return load_heck(re.split(r'\n|\r|\r\n', ins))


TEST_HECK = """
%%% heck
# Website!
title "My Website" bold=True
subtitle "Yep it's a website"
scale 3.72
matrix 0 0 0 0 1 2 3  1 2 3 4 29394.2
tags hey man what are you doin
> more tag tag tag 1 2 3
>> we can go deeper
>>> we can go even deeper
test
> _val 1
> _val 2
> _val 3
valueless
_more.orless complexelement
.yooooo
boolean True
%%% markdown
# Some cheeky markdown to confuse our processing.

All my page content goes here.
"""

if __name__ == "__main__":
    result = load_heck(TEST_HECK.split('\n'))
    print(result)