Compare commits

...

10 Commits

Author SHA1 Message Date
277fddc699 minor changes to parsing process, and map file content to structure.
- Move magic marker for unparsed sections to a constant to matcha
  against.
- Add functions to flatten a heckelement tree into a python structure
2024-02-10 20:59:25 -08:00
9b2f9b706c Remove old version. 2024-02-08 21:14:21 -08:00
7c98e6e895 Reorganize project layout. Add buildability. 2024-02-08 21:14:00 -08:00
49ddb152ec Fix element labels to be in line with original design. 2024-02-03 10:13:14 -08:00
71e7cd3f1b Add LICENSE 2024-02-03 09:50:37 -08:00
6a478aa877 Implement subelements. Add allowing valueless elements. Start to design the native structure interface. 2024-02-03 09:42:20 -08:00
4693e341e1 Organize and clean up Python implementation. 2024-01-31 09:13:52 -08:00
048898566b Organize and clean up Python implementation. 2024-01-31 09:13:38 -08:00
fdc0e876ed Add .gitignore 2024-01-31 08:32:46 -08:00
cd4fc75356 Remove some PLY cruft 2024-01-31 08:30:46 -08:00
14 changed files with 587 additions and 522 deletions

214
.gitignore vendored Normal file
View File

@ -0,0 +1,214 @@
# -*- mode: gitignore; -*-
*~
\#*\#
/.emacs.desktop
/.emacs.desktop.lock
*.elc
auto-save-list
tramp
.\#*
# Org-mode
.org-id-locations
*_archive
# flymake-mode
*_flymake.*
# eshell files
/eshell/history
/eshell/lastdir
# elpa packages
/elpa/
# reftex files
.rel
# AUCTeX auto folder
/auto/
# cask packages
.cask/
dist/
# Flycheck
flycheck_*.el
# server auth directory
/server/
# projectiles files
.projectile
# directory configuration
.dir-locals.el
# network security
/network-security.data
#### PYTHON
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
parser.out

10
LICENSE Normal file
View File

@ -0,0 +1,10 @@
No Nazis, otherwise:
Copyright (c) 2024, Aldercone Studio Collective
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.[

7
TODO Normal file
View File

@ -0,0 +1,7 @@
Python:
- More options for accessing a heck tree.
- automated testing
Guile, C (or D), and Haxe:
- Initial implementation

21
pyproject.toml Normal file
View File

@ -0,0 +1,21 @@
[build-system]
requires = ["pdm-backend"]
build-backend = "pdm.backend"
[project]
name = "heckformat"
dynamic = ["version"]
description = "A simple format for configuration and content storage."
authors = [{name = "Cassowary", email="cassowary@aldercone.studio"}]
dependencies = ["ply>=3.1"]
requires-python = ">=3.8"
readme = "README.md"
license = {text = "LICENSE"}
[tool.pdm.build]
package-dir = "python"
[tool.pdm.version]
source = "file"
path = "python/heckformat/__init__.py"

View File

@ -1,127 +0,0 @@
from typing import Iterable, Union, Mapping, TypeVar, List
import re
from parser import parser
class HeckException (Exception):
...
class HeckParseException(HeckException):
...
HeckValue = TypeVar("HeckElement") | str | int | float
class HeckElement:
name: str
children: Iterable[TypeVar]
values: Iterable[HeckValue]
attributes: Mapping[str, HeckValue]
def __init__(self):
self.children = []
self.values = []
self.attributes = dict()
self.name = ""
self.unparsed = False
def __str__(self):
k=''
if self.unparsed:
k='Unparsed '
return f"<HeckElement {k}{self.name} c={self.children} v={self.values} a={self.attributes}>"
def __repr__(self):
return self.__str__()
# COMMENT ::= # .*$
# ATOM ::= [A-Za-z_][A-Za-z0-9_-]?
# BASE10NUMBER ::= (-)?[0-9]+(\.)?[0-9]+([FLUIDCfluidc])?
# BASE16NUMBER ::= 0x[0-9A-Fa-f]+
# NUMBER ::= (<BASE10NUMBER|BASE16NUMBER>)
# STRING ::= "([^\"]*|(\\)|(\"))"
# VALUE ::= (<ATOM>|<STRING>|<NUMBER>)
# VALUES ::= <VALUE>(\s+<VALUES>)?
# ATTRIBUTENAME ::= <ATOM>
# ATTRIBUTE ::= <ATTRIBUTENAME>=<VALUE>
# ATTRIBUTES ::= <ATTRIBUTE>(\s+<ATTRIBUTES>)?
# SECTIONLABEL ::= <ATOM>
# SECTION ::= %%%\s+<SECTIONLABEL>\s+<ATTRIBUTES>
# ELEMENTLABEL ::= [A-Za-z_][A-Za-z0-9!@#$%^&*()_+/\\-]?
# ELEMENT ::= <ELEMENTLABEL>\s+(<VALUES>|<ATTRIBUTES>)
# LINE ::= ^(((>)*<ELEMENT>) | <SECTION> | <COMMENT>) (<COMMENT>|$)
# ATOM = re.compile(r'[A-Za-z_][A-Za-z0-9_-]*')
def get_element(ast: List) -> HeckElement:
if not (ast[0] == 'element'):
raise HeckParseException("Found a non-element where an element was expected.")
elm = HeckElement()
elm.name = ast[1];
for item in ast[2:]:
if item[0] == 'values':
elm.values = [x[1] for x in item[1:]]
elif item[0] == 'attributes':
elm.attributes.update({x[1]: x[2][1] for x in item[1:]})
return elm
def load_heck(inp: Iterable[str]) -> HeckElement:
MODE_INIT = 0
MODE_ELM = 1
MODE_UNPARSE = 2
rootelm = HeckElement()
pelm = rootelm # parent for subelement
rootelm.name = "__ROOT__"
mode = MODE_INIT
for idx, line in enumerate(inp):
if mode == MODE_UNPARSE:
if (line.startswith('%%%')):
mode = MODE_INIT
else:
pelm.values.append(line)
continue
else:
ast = parser.parse(line)
if ast:
if ast[0] == 'section':
if ast[1] == 'heck':
mode = MODE_ELM
pelm = rootelm
else:
mode = MODE_UNPARSE
pelm = HeckElement()
rootelm.children.append(pelm)
pelm.name = ast[1]
pelm.unparsed = True
else:
if not mode == MODE_ELM:
raise HeckParseException("Didn't find heck preamble, line {idx}")
else:
pelm.children.append(get_element(ast))
return rootelm
TEST_HECK = """
%%% heck
# Website!
title "My Website" bold=True
subtitle "Yep it's a website"
scale 3.72
matrix 0 0 0 0 1 2 3 1 2 3 4 29394.2
tags hey man what are you doin
%%% markdown
# Some cheeky markdown to confuse our processing.
All my page content goes here.
"""
if __name__ == "__main__":
result = load_heck(TEST_HECK.split('\n'))
print(result)

View File

@ -1,333 +0,0 @@
Created by PLY version 3.11 (http://www.dabeaz.com/ply)
Unused terminals:
COMMENT
Grammar
Rule 0 S' -> statement
Rule 1 value -> BASE16
Rule 2 value -> BASE10
Rule 3 value -> STRING
Rule 4 value -> ATOM
Rule 5 attribute -> ATOM ATTRIB value
Rule 6 attributes -> attributes attribute
Rule 7 attributes -> attribute
Rule 8 section -> SECTION ATOM
Rule 9 section -> SECTION ATOM attributes
Rule 10 values -> values value
Rule 11 values -> value
Rule 12 element -> ATOM values
Rule 13 element -> ATOM values attributes
Rule 14 element -> ATOM attributes
Rule 15 statement -> element
Rule 16 statement -> DEEP element
Rule 17 statement -> section
Terminals, with rules where they appear
ATOM : 4 5 8 9 12 13 14
ATTRIB : 5
BASE10 : 2
BASE16 : 1
COMMENT :
DEEP : 16
SECTION : 8 9
STRING : 3
error :
Nonterminals, with rules where they appear
attribute : 6 7
attributes : 6 9 13 14
element : 15 16
section : 17
statement : 0
value : 5 10 11
values : 10 12 13
Parsing method: LALR
state 0
(0) S' -> . statement
(15) statement -> . element
(16) statement -> . DEEP element
(17) statement -> . section
(12) element -> . ATOM values
(13) element -> . ATOM values attributes
(14) element -> . ATOM attributes
(8) section -> . SECTION ATOM
(9) section -> . SECTION ATOM attributes
DEEP shift and go to state 3
ATOM shift and go to state 5
SECTION shift and go to state 6
statement shift and go to state 1
element shift and go to state 2
section shift and go to state 4
state 1
(0) S' -> statement .
state 2
(15) statement -> element .
$end reduce using rule 15 (statement -> element .)
state 3
(16) statement -> DEEP . element
(12) element -> . ATOM values
(13) element -> . ATOM values attributes
(14) element -> . ATOM attributes
ATOM shift and go to state 5
element shift and go to state 7
state 4
(17) statement -> section .
$end reduce using rule 17 (statement -> section .)
state 5
(12) element -> ATOM . values
(13) element -> ATOM . values attributes
(14) element -> ATOM . attributes
(10) values -> . values value
(11) values -> . value
(6) attributes -> . attributes attribute
(7) attributes -> . attribute
(1) value -> . BASE16
(2) value -> . BASE10
(3) value -> . STRING
(4) value -> . ATOM
(5) attribute -> . ATOM ATTRIB value
BASE16 shift and go to state 13
BASE10 shift and go to state 14
STRING shift and go to state 15
ATOM shift and go to state 8
values shift and go to state 9
attributes shift and go to state 10
value shift and go to state 11
attribute shift and go to state 12
state 6
(8) section -> SECTION . ATOM
(9) section -> SECTION . ATOM attributes
ATOM shift and go to state 16
state 7
(16) statement -> DEEP element .
$end reduce using rule 16 (statement -> DEEP element .)
state 8
(4) value -> ATOM .
(5) attribute -> ATOM . ATTRIB value
BASE16 reduce using rule 4 (value -> ATOM .)
BASE10 reduce using rule 4 (value -> ATOM .)
STRING reduce using rule 4 (value -> ATOM .)
ATOM reduce using rule 4 (value -> ATOM .)
$end reduce using rule 4 (value -> ATOM .)
ATTRIB shift and go to state 17
state 9
(12) element -> ATOM values .
(13) element -> ATOM values . attributes
(10) values -> values . value
(6) attributes -> . attributes attribute
(7) attributes -> . attribute
(1) value -> . BASE16
(2) value -> . BASE10
(3) value -> . STRING
(4) value -> . ATOM
(5) attribute -> . ATOM ATTRIB value
$end reduce using rule 12 (element -> ATOM values .)
BASE16 shift and go to state 13
BASE10 shift and go to state 14
STRING shift and go to state 15
ATOM shift and go to state 8
attributes shift and go to state 18
value shift and go to state 19
attribute shift and go to state 12
state 10
(14) element -> ATOM attributes .
(6) attributes -> attributes . attribute
(5) attribute -> . ATOM ATTRIB value
$end reduce using rule 14 (element -> ATOM attributes .)
ATOM shift and go to state 20
attribute shift and go to state 21
state 11
(11) values -> value .
BASE16 reduce using rule 11 (values -> value .)
BASE10 reduce using rule 11 (values -> value .)
STRING reduce using rule 11 (values -> value .)
ATOM reduce using rule 11 (values -> value .)
$end reduce using rule 11 (values -> value .)
state 12
(7) attributes -> attribute .
ATOM reduce using rule 7 (attributes -> attribute .)
$end reduce using rule 7 (attributes -> attribute .)
state 13
(1) value -> BASE16 .
BASE16 reduce using rule 1 (value -> BASE16 .)
BASE10 reduce using rule 1 (value -> BASE16 .)
STRING reduce using rule 1 (value -> BASE16 .)
ATOM reduce using rule 1 (value -> BASE16 .)
$end reduce using rule 1 (value -> BASE16 .)
state 14
(2) value -> BASE10 .
BASE16 reduce using rule 2 (value -> BASE10 .)
BASE10 reduce using rule 2 (value -> BASE10 .)
STRING reduce using rule 2 (value -> BASE10 .)
ATOM reduce using rule 2 (value -> BASE10 .)
$end reduce using rule 2 (value -> BASE10 .)
state 15
(3) value -> STRING .
BASE16 reduce using rule 3 (value -> STRING .)
BASE10 reduce using rule 3 (value -> STRING .)
STRING reduce using rule 3 (value -> STRING .)
ATOM reduce using rule 3 (value -> STRING .)
$end reduce using rule 3 (value -> STRING .)
state 16
(8) section -> SECTION ATOM .
(9) section -> SECTION ATOM . attributes
(6) attributes -> . attributes attribute
(7) attributes -> . attribute
(5) attribute -> . ATOM ATTRIB value
$end reduce using rule 8 (section -> SECTION ATOM .)
ATOM shift and go to state 20
attributes shift and go to state 22
attribute shift and go to state 12
state 17
(5) attribute -> ATOM ATTRIB . value
(1) value -> . BASE16
(2) value -> . BASE10
(3) value -> . STRING
(4) value -> . ATOM
BASE16 shift and go to state 13
BASE10 shift and go to state 14
STRING shift and go to state 15
ATOM shift and go to state 23
value shift and go to state 24
state 18
(13) element -> ATOM values attributes .
(6) attributes -> attributes . attribute
(5) attribute -> . ATOM ATTRIB value
$end reduce using rule 13 (element -> ATOM values attributes .)
ATOM shift and go to state 20
attribute shift and go to state 21
state 19
(10) values -> values value .
BASE16 reduce using rule 10 (values -> values value .)
BASE10 reduce using rule 10 (values -> values value .)
STRING reduce using rule 10 (values -> values value .)
ATOM reduce using rule 10 (values -> values value .)
$end reduce using rule 10 (values -> values value .)
state 20
(5) attribute -> ATOM . ATTRIB value
ATTRIB shift and go to state 17
state 21
(6) attributes -> attributes attribute .
ATOM reduce using rule 6 (attributes -> attributes attribute .)
$end reduce using rule 6 (attributes -> attributes attribute .)
state 22
(9) section -> SECTION ATOM attributes .
(6) attributes -> attributes . attribute
(5) attribute -> . ATOM ATTRIB value
$end reduce using rule 9 (section -> SECTION ATOM attributes .)
ATOM shift and go to state 20
attribute shift and go to state 21
state 23
(4) value -> ATOM .
ATOM reduce using rule 4 (value -> ATOM .)
$end reduce using rule 4 (value -> ATOM .)
state 24
(5) attribute -> ATOM ATTRIB value .
ATOM reduce using rule 5 (attribute -> ATOM ATTRIB value .)
$end reduce using rule 5 (attribute -> ATOM ATTRIB value .)

View File

@ -1,47 +0,0 @@
# parsetab.py
# This file is automatically generated. Do not edit.
# pylint: disable=W,C,R
_tabversion = '3.10'
_lr_method = 'LALR'
_lr_signature = 'statementATOM ATTRIB BASE10 BASE16 COMMENT DEEP SECTION STRING\n value : BASE16\n | BASE10\n | STRING\n | ATOM\n attribute : ATOM ATTRIB value\n attributes : attributes attribute\n attributes : attribute\n \n section : SECTION ATOM\n | SECTION ATOM attributes\n \n values : values value\n values : value\n \n element : ATOM values\n | ATOM values attributes\n | ATOM attributes\n \n statement : element\n | DEEP element\n | section\n '
_lr_action_items = {'DEEP':([0,],[3,]),'ATOM':([0,3,5,6,8,9,10,11,12,13,14,15,16,17,18,19,21,22,23,24,],[5,5,8,16,-4,8,20,-11,-7,-1,-2,-3,20,23,20,-10,-6,20,-4,-5,]),'SECTION':([0,],[6,]),'$end':([1,2,4,7,8,9,10,11,12,13,14,15,16,18,19,21,22,23,24,],[0,-15,-17,-16,-4,-12,-14,-11,-7,-1,-2,-3,-8,-13,-10,-6,-9,-4,-5,]),'BASE16':([5,8,9,11,13,14,15,17,19,],[13,-4,13,-11,-1,-2,-3,13,-10,]),'BASE10':([5,8,9,11,13,14,15,17,19,],[14,-4,14,-11,-1,-2,-3,14,-10,]),'STRING':([5,8,9,11,13,14,15,17,19,],[15,-4,15,-11,-1,-2,-3,15,-10,]),'ATTRIB':([8,20,],[17,17,]),}
_lr_action = {}
for _k, _v in _lr_action_items.items():
for _x,_y in zip(_v[0],_v[1]):
if not _x in _lr_action: _lr_action[_x] = {}
_lr_action[_x][_k] = _y
del _lr_action_items
_lr_goto_items = {'statement':([0,],[1,]),'element':([0,3,],[2,7,]),'section':([0,],[4,]),'values':([5,],[9,]),'attributes':([5,9,16,],[10,18,22,]),'value':([5,9,17,],[11,19,24,]),'attribute':([5,9,10,16,18,22,],[12,12,21,12,21,21,]),}
_lr_goto = {}
for _k, _v in _lr_goto_items.items():
for _x, _y in zip(_v[0], _v[1]):
if not _x in _lr_goto: _lr_goto[_x] = {}
_lr_goto[_x][_k] = _y
del _lr_goto_items
_lr_productions = [
("S' -> statement","S'",1,None,None,None),
('value -> BASE16','value',1,'p_value','parser.py',7),
('value -> BASE10','value',1,'p_value','parser.py',8),
('value -> STRING','value',1,'p_value','parser.py',9),
('value -> ATOM','value',1,'p_value','parser.py',10),
('attribute -> ATOM ATTRIB value','attribute',3,'p_attribute','parser.py',17),
('attributes -> attributes attribute','attributes',2,'p_attributes','parser.py',24),
('attributes -> attribute','attributes',1,'p_attributes','parser.py',25),
('section -> SECTION ATOM','section',2,'p_section','parser.py',36),
('section -> SECTION ATOM attributes','section',3,'p_section','parser.py',37),
('values -> values value','values',2,'p_values','parser.py',46),
('values -> value','values',1,'p_values','parser.py',47),
('element -> ATOM values','element',2,'p_element','parser.py',58),
('element -> ATOM values attributes','element',3,'p_element','parser.py',59),
('element -> ATOM attributes','element',2,'p_element','parser.py',60),
('statement -> element','statement',1,'p_statement','parser.py',70),
('statement -> DEEP element','statement',2,'p_statement','parser.py',71),
('statement -> section','statement',1,'p_statement','parser.py',72),
]

View File

@ -0,0 +1,2 @@
__version__ = "0.0.1"

View File

@ -0,0 +1,20 @@
"""
Exceptions for HECKfile processing.
"""
class HeckException (BaseException):
"""
Base exception for HECKfile processing.
"""
class HeckParseException(HeckException):
"""
Raised for parse errors specifically.
"""
class HeckLexException(HeckException):
"""
Raised for lex errors specifically.
"""

View File

@ -1,10 +1,16 @@
import ply.lex as lex
"""
Lexical analyzer for HECKformat lines using PLY Lex.
"""
from .exceptions import HeckLexException
from typing import List, Optional
import string
tokens = ('ATOM', 'BASE10', 'BASE16', 'COMMENT', 'STRING', 'SECTION', 'ATTRIB', 'DEEP')
tokens = ('ATOM', 'BASE10', 'BASE16', 'COMMENT', 'STRING', 'SECTION', 'ATTRIB', 'DEEP', 'ELEMENT')
# COMMENT ::= # .*$
# ATOM ::= [A-Za-z_][A-Za-z0-9_-]?
@ -25,11 +31,23 @@ tokens = ('ATOM', 'BASE10', 'BASE16', 'COMMENT', 'STRING', 'SECTION', 'ATTRIB',
t_ignore = string.whitespace
t_DEEP = r'^(>)+'
t_ATOM = r'[A-Za-z_$][A-Za-z0-9_.-]*'
t_BASE16 = r'0x[0-9A-Fa-f]+'
t_SECTION = r'^%%%\s'
t_ATTRIB = '='
t_ELEMENT = r'[A-Za-z_.][A-Za-z0-9.!@\$%^&*()_+/\\-]*'
def t_ATOM(token: lex.LexToken):
r'[A-Za-z_$][A-Za-z0-9_.-]*'
if token.value in ('true', 'True'):
token.value = True
elif token.value in ('false', 'False'):
token.value = False
return token
def t_BASE10(token: lex.LexToken):
r'(-)?[0-9]+(\.?[0-9]+)?([FLUIDCfluidc])?(\b|$)'
@ -60,7 +78,11 @@ def t_error(token: lex.LexToken):
lexer = lex.lex()
def lex_line(line, lineno=0) -> Optional[List[lex.LexToken]]:
def lex_line(line: str, lineno: int=0) -> List[lex.LexToken]:
"""
Return a list of tokens for a particular HECKformat file line.
"""
lexer.lineno = lineno
try:
lexer.input(line)
@ -74,7 +96,7 @@ def lex_line(line, lineno=0) -> Optional[List[lex.LexToken]]:
return tokens
except lex.LexError as inst:
# fixme raise a HeckFormat exception
raise inst
raise HeckLexException from inst
TEST_STRINGS = [
'"hi yo123 123xyz #foo" 123xyz 123.223 1f abcd123 123abc $foo "hello world" #foo',

210
python/heckformat/parse.py Normal file
View File

@ -0,0 +1,210 @@
from typing import Iterable, Union, Mapping, TypeVar, List, TextIO, Any
import collections.abc
import re
from .parser import parser
from .exceptions import HeckParseException
HeckValue = TypeVar("HeckElement") | str | int | float
UNPARSED_MARKER = "%%% UNPARSED %%% "
class HeckElement:
"""
Container for a tree of HECKformat elements.
"""
name: str
"""The name of the element, either __ROOT__ for top level or whatever is specified in file."""
children: Iterable[TypeVar]
"""The children of the element."""
values: Iterable[HeckValue]
"""One or more values associated with the element."""
attributes: Mapping[str, HeckValue]
"""Zero or more attributes associated with the element as a key-value pair."""
def __init__(self):
self.children = []
self.values = []
self.attributes = dict()
self.name = ""
self.unparsed = False
def flatten(self) -> Mapping:
"""
Convert a hecktree element into a dictionary.
"""
output = {}
for elm in self.children:
elmval = []
if elm.unparsed:
nam = UNPARSED_MARKER+elm.name
val = '\n'.join(elm.values)
if nam in output:
output[nam] = '\n'.join([output[nam], val])
else:
output[nam] = val
else:
if len(elm.children):
elmval.append(elm.flatten())
elmval.extend(elm.values)
if elm.name in output:
output[elm.name].extend(elmval)
else:
output[elm.name] = elmval
return output
def get_flat_value(self) -> Union[List, HeckValue]:
if not len(self.values):
return None
if len(self.values) > 1:
return list(self.values)
return self.values[0]
def flatten_replace(self) -> Mapping:
"""
Convert a hecktree element into a dictionary (but don't try to merge values, just pretend each key is unique)
"""
output = {}
for elm in self.children:
if elm.unparsed:
nam = UNPARSED_MARKER+elm.name
val = '\n'.join(elm.values)
output[nam] = val
else:
elmval = None
if len(elm.children):
elmval = {}
elmval['children'] = elm.flatten_replace()
elmval['value'] = elm.get_flat_value()
else:
elmval = elm.get_flat_value()
output[elm.name] = elmval
return output
def __str__(self):
k=''
if self.unparsed:
k='Unparsed '
return f"<HeckElement {k}{self.name} c={self.children} v={self.values} a={self.attributes}>"
def __repr__(self):
return self.__str__()
def _make_element(ast: List) -> HeckElement:
"""
Get an element from an element AST from the parser.
"""
if not (ast[0] == 'element'):
raise HeckParseException(f"Found a non-element where an element was expected. {ast}")
elm = HeckElement()
elm.name = ast[1];
for item in ast[2:]:
if item[0] == 'values':
elm.values = [x[1] for x in item[1:]]
elif item[0] == 'attributes':
elm.attributes.update({x[1]: x[2][1] for x in item[1:]})
return elm
def load_heck(inp: Iterable[str]) -> HeckElement:
"""
Load a HECKformat into a tree of HeckElements from a list of lines from the file.
"""
MODE_INIT = 0
MODE_ELM = 1
MODE_UNPARSE = 2
rootelm = HeckElement()
pelm = [rootelm] # parent for subelement
pdepth = 0
depth = 0
rootelm.name = "__ROOT__"
mode = MODE_INIT
for idx, line in enumerate(inp):
if mode == MODE_UNPARSE:
if (line.startswith('%%%')):
mode = MODE_INIT
else:
pelm[-1].values.append(line)
continue
else:
ast = parser.parse(line)
if ast:
if ast[0] == 'section':
if ast[1] == 'heck':
mode = MODE_ELM
pelm = [rootelm]
else:
mode = MODE_UNPARSE
pelm = [HeckElement()]
rootelm.children.append(pelm[-1])
pelm[-1].name = ast[1]
pelm[-1].unparsed = True
else:
if not mode == MODE_ELM:
raise HeckParseException("Didn't find heck preamble, line {idx}")
else:
if ast[0] == 'deep':
# we're in a subitem
depth = ast[1]
if (depth > pdepth):
# are we deeper than last time?
try:
pelm.append(pelm[-1].children[-1])
except:
raise HeckParseException("Tried to go deeper without a previous element, line {idx}")
elif (depth < pdepth):
# are we shallower than last time?
pelm.pop()
if (not len(pelm)):
raise HeckParseException("Tried to go shallower while already shallow, line {idx}")
ast = ast[2]
pdepth = depth
elif (pdepth > 0):
# we're no longer deep, just pop up to the top
pdepth = 0
pelm = [rootelm]
pelm[-1].children.append(_make_element(ast))
return rootelm
def load(infile: TextIO) -> HeckElement:
return load_heck(infile.readlines())
def loads(ins: str) -> HeckElement:
return load_heck(re.split(r'\n|\r|\r\n', ins))
TEST_HECK = """
%%% heck
# Website!
title "My Website" bold=True
subtitle "Yep it's a website"
scale 3.72
matrix 0 0 0 0 1 2 3 1 2 3 4 29394.2
tags hey man what are you doin
> more tag tag tag 1 2 3
>> we can go deeper
>>> we can go even deeper
test
> _val 1
> _val 2
> _val 3
valueless
_more.orless complexelement
.yooooo
boolean True
%%% markdown
# Some cheeky markdown to confuse our processing.
All my page content goes here.
"""
if __name__ == "__main__":
result = load_heck(TEST_HECK.split('\n'))
print(result)

View File

@ -1,6 +1,10 @@
import ply.yacc as yacc
from lexer import tokens
"""
Parser for HECKformat lines using PLY Parser.
"""
from .lexer import tokens
def p_value(p):
"""
@ -13,6 +17,13 @@ def p_value(p):
p[0] = ("value", p[1])
def p_elm(p):
"""
elm : ATOM
| ELEMENT
"""
p[0] = p[1]
def p_attribute(p):
"""attribute : ATOM ATTRIB value"""
# print(p[0], p[1])
@ -33,8 +44,8 @@ def p_attributes(p):
def p_section(p):
"""
section : SECTION ATOM
| SECTION ATOM attributes
section : SECTION elm
| SECTION elm attributes
"""
if (len(p) == 3):
p[0] = ("section", p[2])
@ -55,14 +66,18 @@ def p_values(p):
def p_element(p):
"""
element : ATOM values
| ATOM values attributes
| ATOM attributes
element : elm values
| elm values attributes
| elm attributes
| elm
"""
# print(len(p))
p[0] = ["element", p[1], p[2]]
if (len(p) == 4):
p[0].append(p[3])
if len(p) <= 2:
p[0] = ["element", p[1]]
else:
p[0] = ["element", p[1], p[2]]
if (len(p) == 4):
p[0].append(p[3])
def p_statement(p):
@ -72,7 +87,7 @@ def p_statement(p):
| section
"""
if (len(p) > 2):
p[0] = ('deep', p[2])
p[0] = ('deep', len(p[1]), p[2])
else:
p[0] = p[1]
@ -81,7 +96,7 @@ def p_error(p):
if not p:
return
else:
print("Syntax error {p}")
print(f"Syntax error {p}")
parser = yacc.yacc(start="statement")
@ -95,6 +110,7 @@ TEST_STRING = [
'tags yo fresh',
'dumper 1 2 3 4 5 6 7 8 9 dumpped=True',
'> big_dumper 32 23 384848',
'>> deep_dumper 1 2 3 a=false'
]
if __name__ == "__main__":

View File

@ -0,0 +1,50 @@
# parsetab.py
# This file is automatically generated. Do not edit.
# pylint: disable=W,C,R
_tabversion = '3.10'
_lr_method = 'LALR'
_lr_signature = 'statementATOM ATTRIB BASE10 BASE16 COMMENT DEEP ELEMENT SECTION STRING\n value : BASE16\n | BASE10\n | STRING\n | ATOM\n \n elm : ATOM\n | ELEMENT\n attribute : ATOM ATTRIB value\n attributes : attributes attribute\n attributes : attribute\n \n section : SECTION elm\n | SECTION elm attributes\n \n values : values value\n values : value\n \n element : elm values\n | elm values attributes\n | elm attributes\n | elm\n \n statement : element\n | DEEP element\n | section\n '
_lr_action_items = {'DEEP':([0,],[3,]),'SECTION':([0,],[6,]),'ATOM':([0,3,5,6,7,8,10,11,12,13,14,15,16,17,18,19,20,21,23,24,25,26,],[7,7,17,7,-5,-6,17,22,-13,-9,-1,-2,-3,-4,22,22,-12,-8,25,22,-4,-7,]),'ELEMENT':([0,3,6,],[8,8,8,]),'$end':([1,2,4,5,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,24,25,26,],[0,-18,-20,-17,-5,-6,-19,-14,-16,-13,-9,-1,-2,-3,-4,-10,-15,-12,-8,-11,-4,-7,]),'BASE16':([5,7,8,10,12,14,15,16,17,20,23,],[14,-5,-6,14,-13,-1,-2,-3,-4,-12,14,]),'BASE10':([5,7,8,10,12,14,15,16,17,20,23,],[15,-5,-6,15,-13,-1,-2,-3,-4,-12,15,]),'STRING':([5,7,8,10,12,14,15,16,17,20,23,],[16,-5,-6,16,-13,-1,-2,-3,-4,-12,16,]),'ATTRIB':([17,22,],[23,23,]),}
_lr_action = {}
for _k, _v in _lr_action_items.items():
for _x,_y in zip(_v[0],_v[1]):
if not _x in _lr_action: _lr_action[_x] = {}
_lr_action[_x][_k] = _y
del _lr_action_items
_lr_goto_items = {'statement':([0,],[1,]),'element':([0,3,],[2,9,]),'section':([0,],[4,]),'elm':([0,3,6,],[5,5,18,]),'values':([5,],[10,]),'attributes':([5,10,18,],[11,19,24,]),'value':([5,10,23,],[12,20,26,]),'attribute':([5,10,11,18,19,24,],[13,13,21,13,21,21,]),}
_lr_goto = {}
for _k, _v in _lr_goto_items.items():
for _x, _y in zip(_v[0], _v[1]):
if not _x in _lr_goto: _lr_goto[_x] = {}
_lr_goto[_x][_k] = _y
del _lr_goto_items
_lr_productions = [
("S' -> statement","S'",1,None,None,None),
('value -> BASE16','value',1,'p_value','parser.py',11),
('value -> BASE10','value',1,'p_value','parser.py',12),
('value -> STRING','value',1,'p_value','parser.py',13),
('value -> ATOM','value',1,'p_value','parser.py',14),
('elm -> ATOM','elm',1,'p_elm','parser.py',22),
('elm -> ELEMENT','elm',1,'p_elm','parser.py',23),
('attribute -> ATOM ATTRIB value','attribute',3,'p_attribute','parser.py',27),
('attributes -> attributes attribute','attributes',2,'p_attributes','parser.py',34),
('attributes -> attribute','attributes',1,'p_attributes','parser.py',35),
('section -> SECTION elm','section',2,'p_section','parser.py',46),
('section -> SECTION elm attributes','section',3,'p_section','parser.py',47),
('values -> values value','values',2,'p_values','parser.py',56),
('values -> value','values',1,'p_values','parser.py',57),
('element -> elm values','element',2,'p_element','parser.py',68),
('element -> elm values attributes','element',3,'p_element','parser.py',69),
('element -> elm attributes','element',2,'p_element','parser.py',70),
('element -> elm','element',1,'p_element','parser.py',71),
('statement -> element','statement',1,'p_statement','parser.py',84),
('statement -> DEEP element','statement',2,'p_statement','parser.py',85),
('statement -> section','statement',1,'p_statement','parser.py',86),
]