heckformat/python/heck/parse.py

128 lines
3.5 KiB
Python
Raw Normal View History

from typing import Iterable, Union, Mapping, TypeVar, List
import re
from parser import parser
class HeckException (Exception):
...
class HeckParseException(HeckException):
...
HeckValue = TypeVar("HeckElement") | str | int | float
class HeckElement:
name: str
children: Iterable[TypeVar]
values: Iterable[HeckValue]
attributes: Mapping[str, HeckValue]
def __init__(self):
self.children = []
self.values = []
self.attributes = dict()
self.name = ""
self.unparsed = False
def __str__(self):
k=''
if self.unparsed:
k='Unparsed '
return f"<HeckElement {k}{self.name} c={self.children} v={self.values} a={self.attributes}>"
def __repr__(self):
return self.__str__()
# COMMENT ::= # .*$
# ATOM ::= [A-Za-z_][A-Za-z0-9_-]?
# BASE10NUMBER ::= (-)?[0-9]+(\.)?[0-9]+([FLUIDCfluidc])?
# BASE16NUMBER ::= 0x[0-9A-Fa-f]+
# NUMBER ::= (<BASE10NUMBER|BASE16NUMBER>)
# STRING ::= "([^\"]*|(\\)|(\"))"
# VALUE ::= (<ATOM>|<STRING>|<NUMBER>)
# VALUES ::= <VALUE>(\s+<VALUES>)?
# ATTRIBUTENAME ::= <ATOM>
# ATTRIBUTE ::= <ATTRIBUTENAME>=<VALUE>
# ATTRIBUTES ::= <ATTRIBUTE>(\s+<ATTRIBUTES>)?
# SECTIONLABEL ::= <ATOM>
# SECTION ::= %%%\s+<SECTIONLABEL>\s+<ATTRIBUTES>
# ELEMENTLABEL ::= [A-Za-z_][A-Za-z0-9!@#$%^&*()_+/\\-]?
# ELEMENT ::= <ELEMENTLABEL>\s+(<VALUES>|<ATTRIBUTES>)
# LINE ::= ^(((>)*<ELEMENT>) | <SECTION> | <COMMENT>) (<COMMENT>|$)
# ATOM = re.compile(r'[A-Za-z_][A-Za-z0-9_-]*')
def get_element(ast: List) -> HeckElement:
if not (ast[0] == 'element'):
raise HeckParseException("Found a non-element where an element was expected.")
elm = HeckElement()
elm.name = ast[1];
for item in ast[2:]:
if item[0] == 'values':
elm.values = [x[1] for x in item[1:]]
elif item[0] == 'attributes':
elm.attributes.update({x[1]: x[2][1] for x in item[1:]})
return elm
def load_heck(inp: Iterable[str]) -> HeckElement:
MODE_INIT = 0
MODE_ELM = 1
MODE_UNPARSE = 2
rootelm = HeckElement()
pelm = rootelm # parent for subelement
rootelm.name = "__ROOT__"
mode = MODE_INIT
for idx, line in enumerate(inp):
if mode == MODE_UNPARSE:
if (line.startswith('%%%')):
mode = MODE_INIT
else:
pelm.values.append(line)
continue
else:
ast = parser.parse(line)
if ast:
if ast[0] == 'section':
if ast[1] == 'heck':
mode = MODE_ELM
pelm = rootelm
else:
mode = MODE_UNPARSE
pelm = HeckElement()
rootelm.children.append(pelm)
pelm.name = ast[1]
pelm.unparsed = True
else:
if not mode == MODE_ELM:
raise HeckParseException("Didn't find heck preamble, line {idx}")
else:
pelm.children.append(get_element(ast))
return rootelm
TEST_HECK = """
%%% heck
# Website!
title "My Website" bold=True
subtitle "Yep it's a website"
scale 3.72
matrix 0 0 0 0 1 2 3 1 2 3 4 29394.2
tags hey man what are you doin
%%% markdown
# Some cheeky markdown to confuse our processing.
All my page content goes here.
"""
if __name__ == "__main__":
result = load_heck(TEST_HECK.split('\n'))
print(result)