Organize and clean up Python implementation.

This commit is contained in:
Cassowary 2024-01-31 09:13:52 -08:00
parent 048898566b
commit 4693e341e1
3 changed files with 42 additions and 36 deletions

View File

@ -1,5 +1,11 @@
import ply.lex as lex import ply.lex as lex
"""
Lexical analyzer for HECKformat lines using PLY Lex.
"""
from .exceptions import HeckLexException
from typing import List, Optional from typing import List, Optional
import string import string
@ -25,6 +31,7 @@ tokens = ('ATOM', 'BASE10', 'BASE16', 'COMMENT', 'STRING', 'SECTION', 'ATTRIB',
t_ignore = string.whitespace t_ignore = string.whitespace
t_DEEP = r'^(>)+' t_DEEP = r'^(>)+'
t_ATOM = r'[A-Za-z_$][A-Za-z0-9_.-]*' t_ATOM = r'[A-Za-z_$][A-Za-z0-9_.-]*'
t_BASE16 = r'0x[0-9A-Fa-f]+' t_BASE16 = r'0x[0-9A-Fa-f]+'
@ -60,7 +67,11 @@ def t_error(token: lex.LexToken):
lexer = lex.lex() lexer = lex.lex()
def lex_line(line, lineno=0) -> Optional[List[lex.LexToken]]: def lex_line(line: str, lineno: int=0) -> List[lex.LexToken]:
"""
Return a list of tokens for a particular HECKformat file line.
"""
lexer.lineno = lineno lexer.lineno = lineno
try: try:
lexer.input(line) lexer.input(line)
@ -74,7 +85,7 @@ def lex_line(line, lineno=0) -> Optional[List[lex.LexToken]]:
return tokens return tokens
except lex.LexError as inst: except lex.LexError as inst:
# fixme raise a HeckFormat exception # fixme raise a HeckFormat exception
raise inst raise HeckLexException from inst
TEST_STRINGS = [ TEST_STRINGS = [
'"hi yo123 123xyz #foo" 123xyz 123.223 1f abcd123 123abc $foo "hello world" #foo', '"hi yo123 123xyz #foo" 123xyz 123.223 1f abcd123 123abc $foo "hello world" #foo',

View File

@ -1,24 +1,24 @@
from typing import Iterable, Union, Mapping, TypeVar, List from typing import Iterable, Union, Mapping, TypeVar, List, TextIO
import re import re
from parser import parser from .parser import parser
from .exceptions import HeckParseException
class HeckException (Exception):
...
class HeckParseException(HeckException):
...
HeckValue = TypeVar("HeckElement") | str | int | float HeckValue = TypeVar("HeckElement") | str | int | float
class HeckElement: class HeckElement:
"""
Container for a tree of HECKformat elements.
"""
name: str name: str
"""The name of the element, either __ROOT__ for top level or whatever is specified in file."""
children: Iterable[TypeVar] children: Iterable[TypeVar]
"""The children of the element."""
values: Iterable[HeckValue] values: Iterable[HeckValue]
"""One or more values associated with the element."""
attributes: Mapping[str, HeckValue] attributes: Mapping[str, HeckValue]
"""Zero or more attributes associated with the element as a key-value pair."""
def __init__(self): def __init__(self):
self.children = [] self.children = []
@ -36,28 +36,10 @@ class HeckElement:
def __repr__(self): def __repr__(self):
return self.__str__() return self.__str__()
# COMMENT ::= # .*$ def _get_element(ast: List) -> HeckElement:
# ATOM ::= [A-Za-z_][A-Za-z0-9_-]? """
# BASE10NUMBER ::= (-)?[0-9]+(\.)?[0-9]+([FLUIDCfluidc])? Get an element from an element AST from the parser.
# BASE16NUMBER ::= 0x[0-9A-Fa-f]+ """
# NUMBER ::= (<BASE10NUMBER|BASE16NUMBER>)
# STRING ::= "([^\"]*|(\\)|(\"))"
# VALUE ::= (<ATOM>|<STRING>|<NUMBER>)
# VALUES ::= <VALUE>(\s+<VALUES>)?
# ATTRIBUTENAME ::= <ATOM>
# ATTRIBUTE ::= <ATTRIBUTENAME>=<VALUE>
# ATTRIBUTES ::= <ATTRIBUTE>(\s+<ATTRIBUTES>)?
# SECTIONLABEL ::= <ATOM>
# SECTION ::= %%%\s+<SECTIONLABEL>\s+<ATTRIBUTES>
# ELEMENTLABEL ::= [A-Za-z_][A-Za-z0-9!@#$%^&*()_+/\\-]?
# ELEMENT ::= <ELEMENTLABEL>\s+(<VALUES>|<ATTRIBUTES>)
# LINE ::= ^(((>)*<ELEMENT>) | <SECTION> | <COMMENT>) (<COMMENT>|$)
# ATOM = re.compile(r'[A-Za-z_][A-Za-z0-9_-]*')
def get_element(ast: List) -> HeckElement:
if not (ast[0] == 'element'): if not (ast[0] == 'element'):
raise HeckParseException("Found a non-element where an element was expected.") raise HeckParseException("Found a non-element where an element was expected.")
elm = HeckElement() elm = HeckElement()
@ -70,6 +52,9 @@ def get_element(ast: List) -> HeckElement:
return elm return elm
def load_heck(inp: Iterable[str]) -> HeckElement: def load_heck(inp: Iterable[str]) -> HeckElement:
"""
Load a HECKformat into a tree of HeckElements from a list of lines from the file.
"""
MODE_INIT = 0 MODE_INIT = 0
MODE_ELM = 1 MODE_ELM = 1
MODE_UNPARSE = 2 MODE_UNPARSE = 2
@ -102,10 +87,16 @@ def load_heck(inp: Iterable[str]) -> HeckElement:
if not mode == MODE_ELM: if not mode == MODE_ELM:
raise HeckParseException("Didn't find heck preamble, line {idx}") raise HeckParseException("Didn't find heck preamble, line {idx}")
else: else:
pelm.children.append(get_element(ast)) pelm.children.append(_get_element(ast))
return rootelm return rootelm
def load(infile: TextIO) -> HeckElement:
return load_heck(infile.readlines())
def loads(ins: str) -> HeckElement:
return load_heck(re.split(r'\n|\r|\r\n', ins))
TEST_HECK = """ TEST_HECK = """
%%% heck %%% heck

View File

@ -1,6 +1,10 @@
import ply.yacc as yacc import ply.yacc as yacc
from lexer import tokens """
Parser for HECKformat lines using PLY Parser.
"""
from .lexer import tokens
def p_value(p): def p_value(p):
""" """