Updates to support HECKformat documents, and minor changes.

- Update copyright.
- Remove manifest.in since we're switching to PDM (and defaults moved
  to the module).
- Remove setup.py since we're switching to PDM.
- Remove chains.yaml, move data to the processor module.
- Fix passthrough in __main__
- Move main function to separate function to support PDM entrypoint.
- metadata.py: Extensive rework
  * Add heck support (lots of little changes to support it). (.heck files can
    replace .meta files)
  * Add yaml metadata support (.meta files can be yaml)
  * Some formatting changes.
  * Make metatree be a little easier to read by separating out
    functionality into extra functions
- processchain.py: Move chains.yaml to a structure internal.
- Add processors/process_heck.py to support the document side of HECKformat
- add pyproject.toml and embrace PDM.
This commit is contained in:
Cassowary 2024-02-10 20:49:52 -08:00
parent 694acf8599
commit b389506b4b
12 changed files with 205 additions and 194 deletions

View File

@ -1,6 +1,8 @@
No Nazis, otherwise:
MIT License
Copyright (c) 2023 Cas Rusnov
Copyright (c) 2023-2024 Aldercone Studio Collective
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -1 +0,0 @@
include heckweasel/defaults/*.yaml

View File

@ -20,5 +20,6 @@
* Run commands as part of processing chains
* Project level processing chain overrides in the .meta or whatever.
* Project settings in separate file from .meta that would basically do .meta stuff. Like global meta + config in a top.heck file by default and overridable by a parameter.

View File

@ -15,7 +15,7 @@ from typing import Dict, List, cast
from .metadata import MetaTree
from .processchain import ProcessorChains
from .processors.processors import PassthroughException
from .processors.processors import PassthroughException, NoOutputException
from .pygments import pygments_get_css, pygments_markup_contents_html
from .template_tools import (
date_iso8601,
@ -144,14 +144,23 @@ def main() -> int:
print("process {} -> {} -> {}".format(os.path.join(root, f), repr(chain), os.path.join(target_dir, chain.output_filename)))
if not args.dry_run:
try:
# normal output
# FIXME support binary streams
collected_output = [line for line in chain.output]
with open(os.path.join(target_dir, chain.output_filename), "w") as outfile:
for line in chain.output:
outfile.write(line)
outfile.writelines(collected_output)
except PassthroughException:
# write output from input
shutil.copyfile(os.path.join(root, f), os.path.join(target_dir, chain.output_filename))
except NoOutputException:
print("skip output {} -> {}".format(os.path.join(root, f), os.path.join(target_dir, chain.output_filename)))
# don't write anyp output
pass
return 0
def do_main():
sys.exit(main())
if __name__ == "__main__":
sys.exit(main())
do_main()

View File

@ -1,111 +0,0 @@
# Default: output == input
default:
extension: default
chain:
- passthrough
# Any object that needs jinja scripts but no other explicit processing
templatable:
extension: null
chain:
- jinja2
# Any object that needs jinja and to be embedded in a parent template
tembed:
extension: null
chain:
- jinja2
- jinja2_page_embed
# Markdown, BBCode and RST are first run through the templater, and then
# they are processed into HTML, and finally embedded in a page template.
markdown:
extension:
- md
chain:
- jinja2
- process_md
- jinja2_page_embed
bbcode:
extension:
- bb
- pp
chain:
- jinja2
- process_pp
- jinja2_page_embed
# FIXME implement RST processor
# restructured:
# extension:
# - rst
# chain:
# - jinja2
# - process_rst
# - jinja2_page_embed
# # JSON and YAML are split, passed through a pretty printer, and then output
# FIXME implement split chain processor, implement processor arguments
# json:
# extension:
# - json
# chain:
# - split (passthrough)
# - pp_json
# yaml:
# extension:
# - yml
# - yaml
# chain:
# - split (passthrough)
# - pp_yaml
# Template-html is first passed through the templater, and then embedded
# in a page template
template-html:
extension:
- thtml
- cont
chain:
- jinja2
- jinja2_page_embed
# # Smart CSS are simply converted to CSS.
# sass:
# extension:
# - sass
# - scss
# chain:
# - process_sass
# less:
# extension:
# - less
# chain:
# - process_less
# stylus:
# extension:
# - styl
# chain:
# - process_styl
# # Images are processed into thumbnails and sized in addition to being retained as their original
# FIXME implement split chain processor, implement processor arguments,
# image:
# extension:
# - jpg
# - jpeg
# - png
# chain:
# - split (image_bigthumb)
# - split (image_smallthumb)
# - passthrough
# image_bigthumb:
# extension:
# chain:
# - smart_resize (big)
# image_smallthumb:
# extension:
# chain:
# - smart_resize (small)

View File

@ -6,15 +6,19 @@ import mimetypes
import os
import uuid
from typing import Any, Dict, List, Optional, Tuple, Union, cast
import yaml
import jstyleson
import heckformat.parse
from .utils import guess_mime
# setup mimetypes with some extra ones
mimetypes.init()
mimetypes.add_type("text/html", "thtml")
mimetypes.add_type("text/html", "cont")
mimetypes.add_type("text/x-heckformat", "heck")
logger = logging.getLogger(__name__)
@ -22,6 +26,8 @@ logger = logging.getLogger(__name__)
class MetaCacheMiss(Exception):
"""Raised on cache miss."""
class MetaLoadError(Exception):
"Raised when metadata fails to load."
class MetaCache:
"""This class provides an in-memory cache for metadata tree."""
@ -50,10 +56,10 @@ class MetaCache:
MetaCacheMiss: on missing key, or on aged out
"""
if key not in self._cache:
if (key not in self._cache):
raise MetaCacheMiss("no item for key {}".format(key))
if self._cache[key][0] + self._max_age <= new_time_stamp:
if ((self._cache[key][0] + self._max_age) <= new_time_stamp):
return self._cache[key][1]
raise MetaCacheMiss("cache expired for key {}".format(key))
@ -82,13 +88,47 @@ class MetaTree:
"""
self._cache = MetaCache()
if default_metadata is None:
if (default_metadata is None):
default_metadata = {}
self._default_metadata = default_metadata
if root[-1] != "/":
if (root[-1] != "/"):
root += "/"
self._root = root
def _get_cache_key(self, fullpath: str):
cachekey = fullpath + '.meta'
if fullpath.endswith(".heck"):
cachekey = fullpath
elif os.path.isdir(fullpath):
cachekey = os.path.join(fullpath, ".meta")
if (not os.path.exists(cachekey)):
cachekey = os.path.join(fullpath, ".heck")
return cachekey
def _load_metadata(self, cachekey: str) -> Dict:
meta = {}
with open(cachekey, "r") as inf:
if cachekey.endswith(".heck"):
# raise NotImplemented("We don't yet support HECKformat")
with open(cachekey) as cachefile:
h = heckformat.parse.load(cachefile)
meta = h.flatten_replace()
else:
try:
# try json load
meta = jstyleson.load(inf)
except jstyleson.JSONDecodeError as exc:
# try yaml load
try:
meta = yaml.load(inf)
except yaml.parser.ParserError as exc2:
# else either the yaml or json has an error
me = MetaLoadError()
exc2.__context__ = exc
raise me from exc2
return meta
def get_metadata(self, rel_path: str) -> Dict:
"""Retrieve the metadata for a given path
@ -113,11 +153,10 @@ class MetaTree:
fullpath = os.path.join(fullpath, pth)
st = os.stat(fullpath)
if os.path.isdir(fullpath):
cachekey = os.path.join(fullpath, ".meta")
else:
cachekey = fullpath + ".meta"
cachekey = self._get_cache_key(fullpath)
meta = cast(Dict, {})
try:
st_meta = os.stat(cachekey)
meta = self._cache.get(cachekey, st_meta.st_mtime)
@ -126,28 +165,40 @@ class MetaTree:
except MetaCacheMiss:
meta = {}
if not meta and st_meta:
meta = jstyleson.load(open(cachekey, "r"))
# if we didn't get any meta from the cache, but the metafile exists, try loading it
if ((not meta) and st_meta):
meta = self._load_metadata(cachekey)
self._cache.put(cachekey, meta, st_meta.st_mtime)
if fullpath == ospath and "wildcard_metadata" in metablob:
# add whatever is in the metablob as 'wildcard_metadata' to the metadata if the filename
# matches the wildcards
if ((fullpath == ospath) and ("wildcard_metadata" in metablob)):
for wild in metablob["wildcard_metadata"]:
if fnmatch.fnmatch(pth, wild[0]):
metablob.update(wild[1])
metablob.update(meta)
# return final dict
### fill in all objective metadata
# containing directory and filename
metablob["dir"], metablob["file_name"] = os.path.split(rel_path)
# path within the source tree
metablob["file_path"] = rel_path
# the path relative to the output tree
metablob["relpath"] = os.path.relpath("/", "/" + metablob["dir"])
# the UUID for this file
metablob["uuid"] = uuid.uuid3(uuid.NAMESPACE_OID, metablob["uuid-oid-root"] + ospath)
# the pre-split components of the full path
metablob["os-path"], _ = os.path.split(fullpath)
# the mime type we guessed for this file
metablob["guessed-type"] = guess_mime(ospath)
# if the mime-type isn't overriden in the explicit metadata, we make it equal to the guessed type
if "mime-type" not in metablob:
metablob["mime-type"] = metablob["guessed-type"]
# the `stat` components
metablob["stat"] = {}
for stk in ("st_mtime", "st_ctime", "st_atime", "st_mode", "st_size", "st_ino"):
metablob["stat"][stk.replace("st_", "")] = getattr(st, stk)
# return final dict
return metablob

View File

@ -9,6 +9,26 @@ import yaml
from .processors.processors import Processor
PROCESS_CHAIN_DEFAULT = {
'default': {'extension': 'default',
'chain': ['passthrough']
},
'templatable': {'extension': None,
'chain': ['jinja2']
},
'tembed': {'extension': None,
'chain': ['jinja2', 'jinja2_page_embed']
},
'markdown': {'extension': ['md'],
'chain': ['jinja2', 'process_md', 'jinja2_page_embed']},
'bbcode': {'extension': ['bb', 'pp'],
'chain': ['jinja2', 'process_pp', 'jinja2_page_embed']},
'template-html': {'extension': ['thtml', 'cont'],
'chain': ['jinja2', 'jinja2_page_embed']},
'heckformat': {'extension': ['heck'],
'chain': ['process_heck', 'jinja2', 'process_md', 'jinja2_page_embed']}
}
class ProcessorChain:
"""This implements a wrapper for an arbitrary set of processors and an associated file stream."""
@ -107,9 +127,9 @@ class ProcessorChains:
"""
if config is None: # pragma: no coverage
config = os.path.join(os.path.dirname(__file__), "defaults", "chains.yaml")
self.chainconfig = yaml.load(open(config, "r"))
self.chainconfig = PROCESS_CHAIN_DEFAULT
else:
self.chainconfig = yaml.full_load(open(config, "r"))
self.extensionmap: Dict[str, Any] = {}
self.processors: Dict[str, Type[Processor]] = {}
for ch, conf in self.chainconfig.items():

View File

@ -0,0 +1,76 @@
"""Convert a HECKformat file to a markdown stream."""
import io
import os
from typing import Dict, Iterable, Optional
import heckformat.parse
from .processors import Processor, NoOutputException
class HECKformatProcessor(Processor):
"""Convert a HECKformat file to a markdown stream."""
def filename(self, oldname: str, ctx: Optional[Dict] = None) -> str:
"""Return the filename of the post-processed file.
Arguments:
oldname (str): the previous name for the file.
ctx (dict, optional): A context object generated from the processor configuration
Returns:
str: the new name for the file
"""
return os.path.splitext(oldname)[0] + ".md"
def mime_type(self, oldname: str, ctx: Optional[Dict] = None) -> str:
"""Return the mimetype of the post-processed file.
Arguments:
oldname (str): the input filename
ctx (dict, optional): A context object generated from the processor configuration
Returns:
str: the new mimetype of the file after processing
"""
return "text/x-markdown"
def extension(self, oldname: str, ctx: Optional[Dict] = None) -> str:
"""Return the mimetype of the post-processed file.
Arguments:
oldname (str): the input filename
ctx (dict, optional): A context object generated from the processor configuration
Returns:
str: the new extension of the file after processing
"""
return "md"
def process(self, input_file: Iterable, ctx: Optional[Dict] = None) -> Iterable:
"""Return an iterable object of the post-processed file.
Arguments:
input_file (iterable): An input stream
ctx (dict, optional): A context object generated from the processor configuration
Returns:
iterable: The post-processed output stream
"""
elm = heckformat.parse.load_heck(input_file).flatten_replace()
for key in elm:
if key.startswith(heckformat.parse.UNPARSED_MARKER):
# fixme later we should use the doclabel to choose which output processor somehow~
doclabel = key.split(' ')[-1]
# we'll just assume the first unparsed part of the document is the page
return elm[key]
# No documents in the input heck, we just prevent output
raise NoOutputException()
processor = HECKformatProcessor # pylint: disable=invalid-name

View File

@ -5,6 +5,8 @@ from typing import Dict, Iterable, Optional
class PassthroughException(Exception):
"""Raised when the processor would like the file to pass through unchanged."""
class NoOutputException(Exception):
"""Raised when the processor would like no output to be written from the processing chain."""
class ProcessorException(Exception): # pragma: no cover
"""A base exception class to be used by processor objects."""

View File

@ -61,6 +61,9 @@ def guess_mime(path: str) -> Optional[str]:
str: the guessed mime-type
"""
# if path.endswith('.heck'):
# return "text/x-heckformat"
mtypes = mimetypes.guess_type(path)
ftype = None
if os.path.isdir(path):

View File

@ -0,0 +1,21 @@
[build-system]
requires = ["pdm-backend"]
build-backend = "pdm.backend"
[project]
name = "heckweasel"
dynamic = ["version"]
description = "A metadata based static site compiler with CMS-like features."
authors = [{name = "Cassowary", email="cassowary@aldercone.studio"}]
dependencies = ["yaml-1.3", "markdown", "jstyleson", "jinja2", "pygments", "heckformat"]
requires-python = ">=3.8"
readme = "README.md"
license = {text = "LICENSE"}
[tool.pdm.version]
source = "file"
path = "heckweasel/__init__.py"
[project.scripts]
heckweasel = "heckweasel.__main__:do_main"

View File

@ -1,62 +0,0 @@
"""Package configuration."""
from setuptools import find_packages, setup
from heckweasel import __version__
LONG_DESCRIPTION = """Heckweasel is a filesystem based static site generator."""
INSTALL_REQUIRES = ["yaml-1.3", "markdown", "jstyleson", "jinja2", "pygments"]
# Extra dependencies
EXTRAS_REQUIRE = {
# Test dependencies
"tests": [
"black",
"bandit>=1.1.0",
"flake8>=3.2.1",
"mypy>=0.470",
"prospector[with_everything]>=0.12.4",
"pytest-cov>=1.8.0",
"pytest-xdist>=1.15.0",
"pytest>=3.0.3",
"sphinx_rtd_theme>=0.1.6",
"sphinx-argparse>=0.1.15",
"Sphinx>=1.4.9",
]
}
SETUP_REQUIRES = ["pytest-runner>=2.7.1", "setuptools_scm>=1.15.0"]
setup(
author="Cassowary Rusnov",
author_email="alderconestudio@gmail.com",
classifiers=[
"Development Status :: 1 - Pre-alpha",
"Environment :: Console",
"License :: OSI Approved :: MIT",
"Operating System :: POSIX :: Linux",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3 :: Only",
"Topic :: Software Development :: Libraries :: Python Modules",
],
description="A filesystem-based website generator / CMS",
# entry_points={
# 'console_scripts': [
# 'cookbook = spicerack.cookbook:main',
# ],
# },
include_package_data=True,
extras_require=EXTRAS_REQUIRE,
install_requires=INSTALL_REQUIRES,
keywords=["cms", "website", "compiler"],
license="MIT",
long_description=LONG_DESCRIPTION,
name="heckweasel",
packages=find_packages(exclude=["*.tests", "*.tests.*"]),
platforms=["GNU/Linux"],
setup_requires=SETUP_REQUIRES,
use_scm_version=True,
url="https://git.aldercone.studio/aldercone/heckweasel",
zip_safe=False,
version=__version__,
)