Reorganize project layout. Add buildability.

This commit is contained in:
Cassowary 2024-02-08 21:14:00 -08:00
parent 49ddb152ec
commit 7c98e6e895
7 changed files with 504 additions and 0 deletions

21
pyproject.toml Normal file
View File

@ -0,0 +1,21 @@
[build-system]
requires = ["pdm-backend"]
build-backend = "pdm.backend"
[project]
name = "heckformat"
dynamic = ["version"]
description = "A simple format for configuration and content storage."
authors = [{name = "Cassowary", email="cassowary@aldercone.studio"}]
dependencies = ["ply>=3.1"]
requires-python = ">=3.8"
readme = "README.md"
license = {text = "LICENSE"}
[tool.pdm.build]
package-dir = "python"
[tool.pdm.version]
source = "file"
path = "python/heckformat/__init__.py"

View File

@ -0,0 +1,2 @@
__version__ = "0.0.1"

View File

@ -0,0 +1,20 @@
"""
Exceptions for HECKfile processing.
"""
class HeckException (BaseException):
"""
Base exception for HECKfile processing.
"""
class HeckParseException(HeckException):
"""
Raised for parse errors specifically.
"""
class HeckLexException(HeckException):
"""
Raised for lex errors specifically.
"""

119
python/heckformat/lexer.py Normal file
View File

@ -0,0 +1,119 @@
import ply.lex as lex
"""
Lexical analyzer for HECKformat lines using PLY Lex.
"""
from .exceptions import HeckLexException
from typing import List, Optional
import string
tokens = ('ATOM', 'BASE10', 'BASE16', 'COMMENT', 'STRING', 'SECTION', 'ATTRIB', 'DEEP', 'ELEMENT')
# COMMENT ::= # .*$
# ATOM ::= [A-Za-z_][A-Za-z0-9_-]?
# BASE10NUMBER ::= (-)?[0-9]+(\.)?[0-9]+([FLUIDCfluidc])?
# BASE16NUMBER ::= 0x[0-9A-Fa-f]+
# NUMBER ::= (<BASE10NUMBER|BASE16NUMBER>)
# STRING ::= "([^\"]*|(\\)|(\"))"
# VALUE ::= (<ATOM>|<STRING>|<NUMBER>)
# VALUES ::= <VALUE>(\s+<VALUES>)?
# ATTRIBUTENAME ::= <ATOM>
# ATTRIBUTE ::= <ATTRIBUTENAME>=<VALUE>
# ATTRIBUTES ::= <ATTRIBUTE>(\s+<ATTRIBUTES>)?
# SECTIONLABEL ::= <ATOM>
# SECTION ::= %%%\s+<SECTIONLABEL>\s+<ATTRIBUTES>
# ELEMENTLABEL ::= [A-Za-z_][A-Za-z0-9!@#$%^&*()_+/\\-]?
# ELEMENT ::= <ELEMENTLABEL>\s+(<VALUES>|<ATTRIBUTES>)
# LINE ::= ^(((>)*<ELEMENT>) | <SECTION> | <COMMENT>) (<COMMENT>|$)
t_ignore = string.whitespace
t_DEEP = r'^(>)+'
t_BASE16 = r'0x[0-9A-Fa-f]+'
t_SECTION = r'^%%%\s'
t_ATTRIB = '='
t_ELEMENT = r'[A-Za-z_.][A-Za-z0-9.!@\$%^&*()_+/\\-]*'
def t_ATOM(token: lex.LexToken):
r'[A-Za-z_$][A-Za-z0-9_.-]*'
if token.value in ('true', 'True'):
token.value = True
elif token.value in ('false', 'False'):
token.value = False
return token
def t_BASE10(token: lex.LexToken):
r'(-)?[0-9]+(\.?[0-9]+)?([FLUIDCfluidc])?(\b|$)'
# python numbers are Very Flexible so we ignore typespec
vstr = token.value
if vstr[-1] in 'FLUIDCfluidc':
vstr = vstr[:-1]
if '.' in vstr:
token.value = float(vstr)
else:
token.value = int(vstr)
return token
def t_COMMENT(token: lex.LexToken):
r'\#\s?.*$'
...
def t_STRING(token: lex.LexToken):
r'"[^"]*"'
token.value = token.value[1:-1] # substring to strip double quotes
return token
def t_error(token: lex.LexToken):
print(f"{token.lineno} Unexpected character '{token.value[0]}' at position {token.lexpos}.")
print('... ' + token.value)
print(' ^')
# token.lexer.skip(1)
lexer = lex.lex()
def lex_line(line: str, lineno: int=0) -> List[lex.LexToken]:
"""
Return a list of tokens for a particular HECKformat file line.
"""
lexer.lineno = lineno
try:
lexer.input(line)
tokens = []
while True:
tok = lexer.token()
if tok:
tokens.append(tok)
else:
break
return tokens
except lex.LexError as inst:
# fixme raise a HeckFormat exception
raise HeckLexException from inst
TEST_STRINGS = [
'"hi yo123 123xyz #foo" 123xyz 123.223 1f abcd123 123abc $foo "hello world" #foo',
'1.23f',
'"hello world!" atom utehuteu tnhoeun_etuhenuoh',
'"hi yo123 123xyz #foo" xyz 123.223 1f abcd123 abc $foo "hello world" #foo',
'%%% heck',
'%%% markdown foo=bar',
'element 1.2 1.3 1.4 attrib="string value for attribute"',
'> element 5 4 3 2.5',
]
if __name__ == "__main__":
for idx, test in enumerate(TEST_STRINGS):
print(f"Line {idx}: '{test}'")
try:
for token in lex_line(test, idx):
print(' ' + str(token))
except Exception as inst:
print(f'Error in line.')

174
python/heckformat/parse.py Normal file
View File

@ -0,0 +1,174 @@
from typing import Iterable, Union, Mapping, TypeVar, List, TextIO, Any
import collections.abc
import re
from .parser import parser
from .exceptions import HeckParseException
HeckValue = TypeVar("HeckElement") | str | int | float
class HeckElement:
"""
Container for a tree of HECKformat elements.
"""
name: str
"""The name of the element, either __ROOT__ for top level or whatever is specified in file."""
children: Iterable[TypeVar]
"""The children of the element."""
values: Iterable[HeckValue]
"""One or more values associated with the element."""
attributes: Mapping[str, HeckValue]
"""Zero or more attributes associated with the element as a key-value pair."""
def __init__(self):
self.children = []
self.values = []
self.attributes = dict()
self.name = ""
self.unparsed = False
def flatten(self) -> Mapping:
output = {}
for elm in self.children:
elmval = []
if elm.unparsed:
nam = '%%%UNPARSED%%% '+elm.name
val = '\n'.join(elm.values)
if nam in output:
output[nam] = '\n'.join([output[nam], val])
else:
output[nam] = val
else:
if len(elm.children):
elmval.append(elm.flatten())
elmval.extend(elm.values)
if elm.name in output:
output[elm.name].extend(elmval)
else:
output[elm.name] = elmval
return output
def __str__(self):
k=''
if self.unparsed:
k='Unparsed '
return f"<HeckElement {k}{self.name} c={self.children} v={self.values} a={self.attributes}>"
def __repr__(self):
return self.__str__()
def _make_element(ast: List) -> HeckElement:
"""
Get an element from an element AST from the parser.
"""
if not (ast[0] == 'element'):
raise HeckParseException(f"Found a non-element where an element was expected. {ast}")
elm = HeckElement()
elm.name = ast[1];
for item in ast[2:]:
if item[0] == 'values':
elm.values = [x[1] for x in item[1:]]
elif item[0] == 'attributes':
elm.attributes.update({x[1]: x[2][1] for x in item[1:]})
return elm
def load_heck(inp: Iterable[str]) -> HeckElement:
"""
Load a HECKformat into a tree of HeckElements from a list of lines from the file.
"""
MODE_INIT = 0
MODE_ELM = 1
MODE_UNPARSE = 2
rootelm = HeckElement()
pelm = [rootelm] # parent for subelement
pdepth = 0
depth = 0
rootelm.name = "__ROOT__"
mode = MODE_INIT
for idx, line in enumerate(inp):
if mode == MODE_UNPARSE:
if (line.startswith('%%%')):
mode = MODE_INIT
else:
pelm[-1].values.append(line)
continue
else:
ast = parser.parse(line)
if ast:
if ast[0] == 'section':
if ast[1] == 'heck':
mode = MODE_ELM
pelm = [rootelm]
else:
mode = MODE_UNPARSE
pelm = [HeckElement()]
rootelm.children.append(pelm[-1])
pelm[-1].name = ast[1]
pelm[-1].unparsed = True
else:
if not mode == MODE_ELM:
raise HeckParseException("Didn't find heck preamble, line {idx}")
else:
if ast[0] == 'deep':
# we're in a subitem
depth = ast[1]
if (depth > pdepth):
# are we deeper than last time?
try:
pelm.append(pelm[-1].children[-1])
except:
raise HeckParseException("Tried to go deeper without a previous element, line {idx}")
elif (depth < pdepth):
# are we shallower than last time?
pelm.pop()
if (not len(pelm)):
raise HeckParseException("Tried to go shallower while already shallow, line {idx}")
ast = ast[2]
pdepth = depth
elif (pdepth > 0):
# we're no longer deep, just pop up to the top
pdepth = 0
pelm = [rootelm]
pelm[-1].children.append(_make_element(ast))
return rootelm
def load(infile: TextIO) -> HeckElement:
return load_heck(infile.readlines())
def loads(ins: str) -> HeckElement:
return load_heck(re.split(r'\n|\r|\r\n', ins))
TEST_HECK = """
%%% heck
# Website!
title "My Website" bold=True
subtitle "Yep it's a website"
scale 3.72
matrix 0 0 0 0 1 2 3 1 2 3 4 29394.2
tags hey man what are you doin
> more tag tag tag 1 2 3
>> we can go deeper
>>> we can go even deeper
test
> _val 1
> _val 2
> _val 3
valueless
_more.orless complexelement
.yooooo
boolean True
%%% markdown
# Some cheeky markdown to confuse our processing.
All my page content goes here.
"""
if __name__ == "__main__":
result = load_heck(TEST_HECK.split('\n'))
print(result)

118
python/heckformat/parser.py Normal file
View File

@ -0,0 +1,118 @@
import ply.yacc as yacc
"""
Parser for HECKformat lines using PLY Parser.
"""
from .lexer import tokens
def p_value(p):
"""
value : BASE16
| BASE10
| STRING
| ATOM
"""
#print(p[0], p[1])
p[0] = ("value", p[1])
def p_elm(p):
"""
elm : ATOM
| ELEMENT
"""
p[0] = p[1]
def p_attribute(p):
"""attribute : ATOM ATTRIB value"""
# print(p[0], p[1])
p[0] = ("attribute", p[1], p[3])
def p_attributes(p):
"""
attributes : attributes attribute
attributes : attribute
"""
if len(p) == 2:
p[0] = ["attributes", p[1]]
else:
p[0] = p[1]
p[0].append(p[2])
def p_section(p):
"""
section : SECTION elm
| SECTION elm attributes
"""
if (len(p) == 3):
p[0] = ("section", p[2])
else:
p[0] = ("section", p[2], p[3])
def p_values(p):
"""
values : values value
values : value
"""
if len(p) == 2:
p[0] = ["values", p[1]]
else:
p[0] = p[1]
p[0].append(p[2])
def p_element(p):
"""
element : elm values
| elm values attributes
| elm attributes
| elm
"""
# print(len(p))
if len(p) <= 2:
p[0] = ["element", p[1]]
else:
p[0] = ["element", p[1], p[2]]
if (len(p) == 4):
p[0].append(p[3])
def p_statement(p):
"""
statement : element
| DEEP element
| section
"""
if (len(p) > 2):
p[0] = ('deep', len(p[1]), p[2])
else:
p[0] = p[1]
def p_error(p):
if not p:
return
else:
print(f"Syntax error {p}")
parser = yacc.yacc(start="statement")
TEST_STRING = [
'%%% heck',
'%%% heck foo=bar',
'%%% heck bar=-5l quux="hello! how are you today?" fred=69 barney=nice',
'title "My website!"',
'zoom 5.73',
'tags yo fresh',
'dumper 1 2 3 4 5 6 7 8 9 dumpped=True',
'> big_dumper 32 23 384848',
'>> deep_dumper 1 2 3 a=false'
]
if __name__ == "__main__":
for test in TEST_STRING:
print(parser.parse(test))

View File

@ -0,0 +1,50 @@
# parsetab.py
# This file is automatically generated. Do not edit.
# pylint: disable=W,C,R
_tabversion = '3.10'
_lr_method = 'LALR'
_lr_signature = 'statementATOM ATTRIB BASE10 BASE16 COMMENT DEEP ELEMENT SECTION STRING\n value : BASE16\n | BASE10\n | STRING\n | ATOM\n \n elm : ATOM\n | ELEMENT\n attribute : ATOM ATTRIB value\n attributes : attributes attribute\n attributes : attribute\n \n section : SECTION elm\n | SECTION elm attributes\n \n values : values value\n values : value\n \n element : elm values\n | elm values attributes\n | elm attributes\n | elm\n \n statement : element\n | DEEP element\n | section\n '
_lr_action_items = {'DEEP':([0,],[3,]),'SECTION':([0,],[6,]),'ATOM':([0,3,5,6,7,8,10,11,12,13,14,15,16,17,18,19,20,21,23,24,25,26,],[7,7,17,7,-5,-6,17,22,-13,-9,-1,-2,-3,-4,22,22,-12,-8,25,22,-4,-7,]),'ELEMENT':([0,3,6,],[8,8,8,]),'$end':([1,2,4,5,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,24,25,26,],[0,-18,-20,-17,-5,-6,-19,-14,-16,-13,-9,-1,-2,-3,-4,-10,-15,-12,-8,-11,-4,-7,]),'BASE16':([5,7,8,10,12,14,15,16,17,20,23,],[14,-5,-6,14,-13,-1,-2,-3,-4,-12,14,]),'BASE10':([5,7,8,10,12,14,15,16,17,20,23,],[15,-5,-6,15,-13,-1,-2,-3,-4,-12,15,]),'STRING':([5,7,8,10,12,14,15,16,17,20,23,],[16,-5,-6,16,-13,-1,-2,-3,-4,-12,16,]),'ATTRIB':([17,22,],[23,23,]),}
_lr_action = {}
for _k, _v in _lr_action_items.items():
for _x,_y in zip(_v[0],_v[1]):
if not _x in _lr_action: _lr_action[_x] = {}
_lr_action[_x][_k] = _y
del _lr_action_items
_lr_goto_items = {'statement':([0,],[1,]),'element':([0,3,],[2,9,]),'section':([0,],[4,]),'elm':([0,3,6,],[5,5,18,]),'values':([5,],[10,]),'attributes':([5,10,18,],[11,19,24,]),'value':([5,10,23,],[12,20,26,]),'attribute':([5,10,11,18,19,24,],[13,13,21,13,21,21,]),}
_lr_goto = {}
for _k, _v in _lr_goto_items.items():
for _x, _y in zip(_v[0], _v[1]):
if not _x in _lr_goto: _lr_goto[_x] = {}
_lr_goto[_x][_k] = _y
del _lr_goto_items
_lr_productions = [
("S' -> statement","S'",1,None,None,None),
('value -> BASE16','value',1,'p_value','parser.py',11),
('value -> BASE10','value',1,'p_value','parser.py',12),
('value -> STRING','value',1,'p_value','parser.py',13),
('value -> ATOM','value',1,'p_value','parser.py',14),
('elm -> ATOM','elm',1,'p_elm','parser.py',22),
('elm -> ELEMENT','elm',1,'p_elm','parser.py',23),
('attribute -> ATOM ATTRIB value','attribute',3,'p_attribute','parser.py',27),
('attributes -> attributes attribute','attributes',2,'p_attributes','parser.py',34),
('attributes -> attribute','attributes',1,'p_attributes','parser.py',35),
('section -> SECTION elm','section',2,'p_section','parser.py',46),
('section -> SECTION elm attributes','section',3,'p_section','parser.py',47),
('values -> values value','values',2,'p_values','parser.py',56),
('values -> value','values',1,'p_values','parser.py',57),
('element -> elm values','element',2,'p_element','parser.py',68),
('element -> elm values attributes','element',3,'p_element','parser.py',69),
('element -> elm attributes','element',2,'p_element','parser.py',70),
('element -> elm','element',1,'p_element','parser.py',71),
('statement -> element','statement',1,'p_statement','parser.py',84),
('statement -> DEEP element','statement',2,'p_statement','parser.py',85),
('statement -> section','statement',1,'p_statement','parser.py',86),
]