Organize and clean up Python implementation.

This commit is contained in:
Cassowary 2024-01-31 09:13:52 -08:00
parent 048898566b
commit 4693e341e1
3 changed files with 42 additions and 36 deletions

View File

@ -1,5 +1,11 @@
import ply.lex as lex
"""
Lexical analyzer for HECKformat lines using PLY Lex.
"""
from .exceptions import HeckLexException
from typing import List, Optional
import string
@ -25,6 +31,7 @@ tokens = ('ATOM', 'BASE10', 'BASE16', 'COMMENT', 'STRING', 'SECTION', 'ATTRIB',
t_ignore = string.whitespace
t_DEEP = r'^(>)+'
t_ATOM = r'[A-Za-z_$][A-Za-z0-9_.-]*'
t_BASE16 = r'0x[0-9A-Fa-f]+'
@ -60,7 +67,11 @@ def t_error(token: lex.LexToken):
lexer = lex.lex()
def lex_line(line, lineno=0) -> Optional[List[lex.LexToken]]:
def lex_line(line: str, lineno: int=0) -> List[lex.LexToken]:
"""
Return a list of tokens for a particular HECKformat file line.
"""
lexer.lineno = lineno
try:
lexer.input(line)
@ -74,7 +85,7 @@ def lex_line(line, lineno=0) -> Optional[List[lex.LexToken]]:
return tokens
except lex.LexError as inst:
# fixme raise a HeckFormat exception
raise inst
raise HeckLexException from inst
TEST_STRINGS = [
'"hi yo123 123xyz #foo" 123xyz 123.223 1f abcd123 123abc $foo "hello world" #foo',

View File

@ -1,24 +1,24 @@
from typing import Iterable, Union, Mapping, TypeVar, List
from typing import Iterable, Union, Mapping, TypeVar, List, TextIO
import re
from parser import parser
class HeckException (Exception):
...
class HeckParseException(HeckException):
...
from .parser import parser
from .exceptions import HeckParseException
HeckValue = TypeVar("HeckElement") | str | int | float
class HeckElement:
"""
Container for a tree of HECKformat elements.
"""
name: str
"""The name of the element, either __ROOT__ for top level or whatever is specified in file."""
children: Iterable[TypeVar]
"""The children of the element."""
values: Iterable[HeckValue]
"""One or more values associated with the element."""
attributes: Mapping[str, HeckValue]
"""Zero or more attributes associated with the element as a key-value pair."""
def __init__(self):
self.children = []
@ -36,28 +36,10 @@ class HeckElement:
def __repr__(self):
return self.__str__()
# COMMENT ::= # .*$
# ATOM ::= [A-Za-z_][A-Za-z0-9_-]?
# BASE10NUMBER ::= (-)?[0-9]+(\.)?[0-9]+([FLUIDCfluidc])?
# BASE16NUMBER ::= 0x[0-9A-Fa-f]+
# NUMBER ::= (<BASE10NUMBER|BASE16NUMBER>)
# STRING ::= "([^\"]*|(\\)|(\"))"
# VALUE ::= (<ATOM>|<STRING>|<NUMBER>)
# VALUES ::= <VALUE>(\s+<VALUES>)?
# ATTRIBUTENAME ::= <ATOM>
# ATTRIBUTE ::= <ATTRIBUTENAME>=<VALUE>
# ATTRIBUTES ::= <ATTRIBUTE>(\s+<ATTRIBUTES>)?
# SECTIONLABEL ::= <ATOM>
# SECTION ::= %%%\s+<SECTIONLABEL>\s+<ATTRIBUTES>
# ELEMENTLABEL ::= [A-Za-z_][A-Za-z0-9!@#$%^&*()_+/\\-]?
# ELEMENT ::= <ELEMENTLABEL>\s+(<VALUES>|<ATTRIBUTES>)
# LINE ::= ^(((>)*<ELEMENT>) | <SECTION> | <COMMENT>) (<COMMENT>|$)
# ATOM = re.compile(r'[A-Za-z_][A-Za-z0-9_-]*')
def get_element(ast: List) -> HeckElement:
def _get_element(ast: List) -> HeckElement:
"""
Get an element from an element AST from the parser.
"""
if not (ast[0] == 'element'):
raise HeckParseException("Found a non-element where an element was expected.")
elm = HeckElement()
@ -70,6 +52,9 @@ def get_element(ast: List) -> HeckElement:
return elm
def load_heck(inp: Iterable[str]) -> HeckElement:
"""
Load a HECKformat into a tree of HeckElements from a list of lines from the file.
"""
MODE_INIT = 0
MODE_ELM = 1
MODE_UNPARSE = 2
@ -102,10 +87,16 @@ def load_heck(inp: Iterable[str]) -> HeckElement:
if not mode == MODE_ELM:
raise HeckParseException("Didn't find heck preamble, line {idx}")
else:
pelm.children.append(get_element(ast))
pelm.children.append(_get_element(ast))
return rootelm
def load(infile: TextIO) -> HeckElement:
return load_heck(infile.readlines())
def loads(ins: str) -> HeckElement:
return load_heck(re.split(r'\n|\r|\r\n', ins))
TEST_HECK = """
%%% heck

View File

@ -1,6 +1,10 @@
import ply.yacc as yacc
from lexer import tokens
"""
Parser for HECKformat lines using PLY Parser.
"""
from .lexer import tokens
def p_value(p):
"""