heckformat/python/heck/parse.py

from typing import Iterable, Union, Mapping, TypeVar, List, TextIO

import re

from .parser import parser
from .exceptions import HeckParseException

HeckValue = TypeVar("HeckElement") | str | int | float

class HeckElement:
    """
    Container for a tree of HECKformat elements.
    """
    name: str
    """The name of the element, either __ROOT__ for top level or whatever is specified in file."""
    children: Iterable[TypeVar]
    """The children of the element."""
    values: Iterable[HeckValue]
    """One or more values associated with the element."""
    attributes: Mapping[str, HeckValue]
    """Zero or more attributes associated with the element as a key-value pair."""

    def __init__(self):
        self.children = []
        self.values = []
        self.attributes = dict()
        self.name = ""
        self.unparsed = False

    def __str__(self):
        k=''
        if self.unparsed:
            k='Unparsed '
        return f"<HeckElement {k}{self.name} c={self.children} v={self.values} a={self.attributes}>"

    def __repr__(self):
        return self.__str__()

def _get_element(ast: List) -> HeckElement:
    """
    Get an element from an element AST from the parser.
    """
    if not (ast[0] == 'element'):
        raise HeckParseException("Found a non-element where an element was expected.")
    elm = HeckElement()
    elm.name = ast[1];
    for item in ast[2:]:
        if item[0] == 'values':
            elm.values = [x[1] for x in item[1:]]
        elif item[0] == 'attributes':
            elm.attributes.update({x[1]: x[2][1] for x in item[1:]})
    return elm

def load_heck(inp: Iterable[str]) -> HeckElement:
    """
    Load a HECKformat into a tree of HeckElements from a list of lines from the file.
    """
    MODE_INIT = 0
    MODE_ELM = 1
    MODE_UNPARSE = 2

    rootelm = HeckElement()
    pelm = rootelm # parent for subelement
    rootelm.name = "__ROOT__"
    mode = MODE_INIT
    for idx, line in enumerate(inp):
        if mode == MODE_UNPARSE:
            if (line.startswith('%%%')):
                mode = MODE_INIT
            else:
                pelm.values.append(line)
                continue
        else:
            ast = parser.parse(line)
        if ast:
            if ast[0] == 'section':
                if ast[1] == 'heck':
                    mode = MODE_ELM
                    pelm = rootelm
                else:
                    mode = MODE_UNPARSE
                    pelm = HeckElement()
                    rootelm.children.append(pelm)
                    pelm.name = ast[1]
                    pelm.unparsed = True
            else:
                if not mode == MODE_ELM:
                    raise HeckParseException("Didn't find heck preamble, line {idx}")
                else:
                    pelm.children.append(_get_element(ast))

    return rootelm

def load(infile: TextIO) -> HeckElement:
    return load_heck(infile.readlines())

def loads(ins: str) -> HeckElement:
    return load_heck(re.split(r'\n|\r|\r\n', ins))


TEST_HECK = """
%%% heck
# Website!
title "My Website" bold=True
subtitle "Yep it's a website"
scale 3.72
matrix 0 0 0 0 1 2 3  1 2 3 4 29394.2
tags hey man what are you doin

%%% markdown
# Some cheeky markdown to confuse our processing.

All my page content goes here.
"""

if __name__ == "__main__":
    result = load_heck(TEST_HECK.split('\n'))
    print(result)