Updates to support HECKformat documents, and minor changes.

- Update copyright.
- Remove manifest.in since we're switching to PDM (and defaults moved
  to the module).
- Remove setup.py since we're switching to PDM.
- Remove chains.yaml, move data to the processor module.
- Fix passthrough in __main__
- Move main function to separate function to support PDM entrypoint.
- metadata.py: Extensive rework
  * Add heck support (lots of little changes to support it). (.heck files can
    replace .meta files)
  * Add yaml metadata support (.meta files can be yaml)
  * Some formatting changes.
  * Make metatree be a little easier to read by separating out
    functionality into extra functions
- processchain.py: Move chains.yaml to a structure internal.
- Add processors/process_heck.py to support the document side of HECKformat
- add pyproject.toml and embrace PDM.
This commit is contained in:
Cassowary 2024-02-10 20:49:52 -08:00
parent 694acf8599
commit b389506b4b
12 changed files with 205 additions and 194 deletions

View File

@ -1,6 +1,8 @@
No Nazis, otherwise:
MIT License MIT License
Copyright (c) 2023 Cas Rusnov Copyright (c) 2023-2024 Aldercone Studio Collective
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal of this software and associated documentation files (the "Software"), to deal

View File

@ -1 +0,0 @@
include heckweasel/defaults/*.yaml

View File

@ -20,5 +20,6 @@
* Run commands as part of processing chains * Run commands as part of processing chains
* Project level processing chain overrides in the .meta or whatever. * Project level processing chain overrides in the .meta or whatever.
* Project settings in separate file from .meta that would basically do .meta stuff. Like global meta + config in a top.heck file by default and overridable by a parameter.

View File

@ -15,7 +15,7 @@ from typing import Dict, List, cast
from .metadata import MetaTree from .metadata import MetaTree
from .processchain import ProcessorChains from .processchain import ProcessorChains
from .processors.processors import PassthroughException from .processors.processors import PassthroughException, NoOutputException
from .pygments import pygments_get_css, pygments_markup_contents_html from .pygments import pygments_get_css, pygments_markup_contents_html
from .template_tools import ( from .template_tools import (
date_iso8601, date_iso8601,
@ -144,14 +144,23 @@ def main() -> int:
print("process {} -> {} -> {}".format(os.path.join(root, f), repr(chain), os.path.join(target_dir, chain.output_filename))) print("process {} -> {} -> {}".format(os.path.join(root, f), repr(chain), os.path.join(target_dir, chain.output_filename)))
if not args.dry_run: if not args.dry_run:
try: try:
# normal output
# FIXME support binary streams
collected_output = [line for line in chain.output]
with open(os.path.join(target_dir, chain.output_filename), "w") as outfile: with open(os.path.join(target_dir, chain.output_filename), "w") as outfile:
for line in chain.output: outfile.writelines(collected_output)
outfile.write(line)
except PassthroughException: except PassthroughException:
# write output from input
shutil.copyfile(os.path.join(root, f), os.path.join(target_dir, chain.output_filename)) shutil.copyfile(os.path.join(root, f), os.path.join(target_dir, chain.output_filename))
except NoOutputException:
print("skip output {} -> {}".format(os.path.join(root, f), os.path.join(target_dir, chain.output_filename)))
# don't write anyp output
pass
return 0 return 0
def do_main():
sys.exit(main())
if __name__ == "__main__": if __name__ == "__main__":
sys.exit(main()) do_main()

View File

@ -1,111 +0,0 @@
# Default: output == input
default:
extension: default
chain:
- passthrough
# Any object that needs jinja scripts but no other explicit processing
templatable:
extension: null
chain:
- jinja2
# Any object that needs jinja and to be embedded in a parent template
tembed:
extension: null
chain:
- jinja2
- jinja2_page_embed
# Markdown, BBCode and RST are first run through the templater, and then
# they are processed into HTML, and finally embedded in a page template.
markdown:
extension:
- md
chain:
- jinja2
- process_md
- jinja2_page_embed
bbcode:
extension:
- bb
- pp
chain:
- jinja2
- process_pp
- jinja2_page_embed
# FIXME implement RST processor
# restructured:
# extension:
# - rst
# chain:
# - jinja2
# - process_rst
# - jinja2_page_embed
# # JSON and YAML are split, passed through a pretty printer, and then output
# FIXME implement split chain processor, implement processor arguments
# json:
# extension:
# - json
# chain:
# - split (passthrough)
# - pp_json
# yaml:
# extension:
# - yml
# - yaml
# chain:
# - split (passthrough)
# - pp_yaml
# Template-html is first passed through the templater, and then embedded
# in a page template
template-html:
extension:
- thtml
- cont
chain:
- jinja2
- jinja2_page_embed
# # Smart CSS are simply converted to CSS.
# sass:
# extension:
# - sass
# - scss
# chain:
# - process_sass
# less:
# extension:
# - less
# chain:
# - process_less
# stylus:
# extension:
# - styl
# chain:
# - process_styl
# # Images are processed into thumbnails and sized in addition to being retained as their original
# FIXME implement split chain processor, implement processor arguments,
# image:
# extension:
# - jpg
# - jpeg
# - png
# chain:
# - split (image_bigthumb)
# - split (image_smallthumb)
# - passthrough
# image_bigthumb:
# extension:
# chain:
# - smart_resize (big)
# image_smallthumb:
# extension:
# chain:
# - smart_resize (small)

View File

@ -6,15 +6,19 @@ import mimetypes
import os import os
import uuid import uuid
from typing import Any, Dict, List, Optional, Tuple, Union, cast from typing import Any, Dict, List, Optional, Tuple, Union, cast
import yaml
import jstyleson import jstyleson
import heckformat.parse
from .utils import guess_mime from .utils import guess_mime
# setup mimetypes with some extra ones # setup mimetypes with some extra ones
mimetypes.init() mimetypes.init()
mimetypes.add_type("text/html", "thtml") mimetypes.add_type("text/html", "thtml")
mimetypes.add_type("text/html", "cont") mimetypes.add_type("text/html", "cont")
mimetypes.add_type("text/x-heckformat", "heck")
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -22,6 +26,8 @@ logger = logging.getLogger(__name__)
class MetaCacheMiss(Exception): class MetaCacheMiss(Exception):
"""Raised on cache miss.""" """Raised on cache miss."""
class MetaLoadError(Exception):
"Raised when metadata fails to load."
class MetaCache: class MetaCache:
"""This class provides an in-memory cache for metadata tree.""" """This class provides an in-memory cache for metadata tree."""
@ -50,10 +56,10 @@ class MetaCache:
MetaCacheMiss: on missing key, or on aged out MetaCacheMiss: on missing key, or on aged out
""" """
if key not in self._cache: if (key not in self._cache):
raise MetaCacheMiss("no item for key {}".format(key)) raise MetaCacheMiss("no item for key {}".format(key))
if self._cache[key][0] + self._max_age <= new_time_stamp: if ((self._cache[key][0] + self._max_age) <= new_time_stamp):
return self._cache[key][1] return self._cache[key][1]
raise MetaCacheMiss("cache expired for key {}".format(key)) raise MetaCacheMiss("cache expired for key {}".format(key))
@ -82,13 +88,47 @@ class MetaTree:
""" """
self._cache = MetaCache() self._cache = MetaCache()
if default_metadata is None: if (default_metadata is None):
default_metadata = {} default_metadata = {}
self._default_metadata = default_metadata self._default_metadata = default_metadata
if root[-1] != "/": if (root[-1] != "/"):
root += "/" root += "/"
self._root = root self._root = root
def _get_cache_key(self, fullpath: str):
cachekey = fullpath + '.meta'
if fullpath.endswith(".heck"):
cachekey = fullpath
elif os.path.isdir(fullpath):
cachekey = os.path.join(fullpath, ".meta")
if (not os.path.exists(cachekey)):
cachekey = os.path.join(fullpath, ".heck")
return cachekey
def _load_metadata(self, cachekey: str) -> Dict:
meta = {}
with open(cachekey, "r") as inf:
if cachekey.endswith(".heck"):
# raise NotImplemented("We don't yet support HECKformat")
with open(cachekey) as cachefile:
h = heckformat.parse.load(cachefile)
meta = h.flatten_replace()
else:
try:
# try json load
meta = jstyleson.load(inf)
except jstyleson.JSONDecodeError as exc:
# try yaml load
try:
meta = yaml.load(inf)
except yaml.parser.ParserError as exc2:
# else either the yaml or json has an error
me = MetaLoadError()
exc2.__context__ = exc
raise me from exc2
return meta
def get_metadata(self, rel_path: str) -> Dict: def get_metadata(self, rel_path: str) -> Dict:
"""Retrieve the metadata for a given path """Retrieve the metadata for a given path
@ -113,11 +153,10 @@ class MetaTree:
fullpath = os.path.join(fullpath, pth) fullpath = os.path.join(fullpath, pth)
st = os.stat(fullpath) st = os.stat(fullpath)
if os.path.isdir(fullpath): cachekey = self._get_cache_key(fullpath)
cachekey = os.path.join(fullpath, ".meta")
else:
cachekey = fullpath + ".meta"
meta = cast(Dict, {}) meta = cast(Dict, {})
try: try:
st_meta = os.stat(cachekey) st_meta = os.stat(cachekey)
meta = self._cache.get(cachekey, st_meta.st_mtime) meta = self._cache.get(cachekey, st_meta.st_mtime)
@ -126,28 +165,40 @@ class MetaTree:
except MetaCacheMiss: except MetaCacheMiss:
meta = {} meta = {}
if not meta and st_meta: # if we didn't get any meta from the cache, but the metafile exists, try loading it
meta = jstyleson.load(open(cachekey, "r")) if ((not meta) and st_meta):
meta = self._load_metadata(cachekey)
self._cache.put(cachekey, meta, st_meta.st_mtime) self._cache.put(cachekey, meta, st_meta.st_mtime)
if fullpath == ospath and "wildcard_metadata" in metablob: # add whatever is in the metablob as 'wildcard_metadata' to the metadata if the filename
# matches the wildcards
if ((fullpath == ospath) and ("wildcard_metadata" in metablob)):
for wild in metablob["wildcard_metadata"]: for wild in metablob["wildcard_metadata"]:
if fnmatch.fnmatch(pth, wild[0]): if fnmatch.fnmatch(pth, wild[0]):
metablob.update(wild[1]) metablob.update(wild[1])
metablob.update(meta) metablob.update(meta)
# return final dict ### fill in all objective metadata
# containing directory and filename
metablob["dir"], metablob["file_name"] = os.path.split(rel_path) metablob["dir"], metablob["file_name"] = os.path.split(rel_path)
# path within the source tree
metablob["file_path"] = rel_path metablob["file_path"] = rel_path
# the path relative to the output tree
metablob["relpath"] = os.path.relpath("/", "/" + metablob["dir"]) metablob["relpath"] = os.path.relpath("/", "/" + metablob["dir"])
# the UUID for this file
metablob["uuid"] = uuid.uuid3(uuid.NAMESPACE_OID, metablob["uuid-oid-root"] + ospath) metablob["uuid"] = uuid.uuid3(uuid.NAMESPACE_OID, metablob["uuid-oid-root"] + ospath)
# the pre-split components of the full path
metablob["os-path"], _ = os.path.split(fullpath) metablob["os-path"], _ = os.path.split(fullpath)
# the mime type we guessed for this file
metablob["guessed-type"] = guess_mime(ospath) metablob["guessed-type"] = guess_mime(ospath)
# if the mime-type isn't overriden in the explicit metadata, we make it equal to the guessed type
if "mime-type" not in metablob: if "mime-type" not in metablob:
metablob["mime-type"] = metablob["guessed-type"] metablob["mime-type"] = metablob["guessed-type"]
# the `stat` components
metablob["stat"] = {} metablob["stat"] = {}
for stk in ("st_mtime", "st_ctime", "st_atime", "st_mode", "st_size", "st_ino"): for stk in ("st_mtime", "st_ctime", "st_atime", "st_mode", "st_size", "st_ino"):
metablob["stat"][stk.replace("st_", "")] = getattr(st, stk) metablob["stat"][stk.replace("st_", "")] = getattr(st, stk)
# return final dict
return metablob return metablob

View File

@ -9,6 +9,26 @@ import yaml
from .processors.processors import Processor from .processors.processors import Processor
PROCESS_CHAIN_DEFAULT = {
'default': {'extension': 'default',
'chain': ['passthrough']
},
'templatable': {'extension': None,
'chain': ['jinja2']
},
'tembed': {'extension': None,
'chain': ['jinja2', 'jinja2_page_embed']
},
'markdown': {'extension': ['md'],
'chain': ['jinja2', 'process_md', 'jinja2_page_embed']},
'bbcode': {'extension': ['bb', 'pp'],
'chain': ['jinja2', 'process_pp', 'jinja2_page_embed']},
'template-html': {'extension': ['thtml', 'cont'],
'chain': ['jinja2', 'jinja2_page_embed']},
'heckformat': {'extension': ['heck'],
'chain': ['process_heck', 'jinja2', 'process_md', 'jinja2_page_embed']}
}
class ProcessorChain: class ProcessorChain:
"""This implements a wrapper for an arbitrary set of processors and an associated file stream.""" """This implements a wrapper for an arbitrary set of processors and an associated file stream."""
@ -107,9 +127,9 @@ class ProcessorChains:
""" """
if config is None: # pragma: no coverage if config is None: # pragma: no coverage
config = os.path.join(os.path.dirname(__file__), "defaults", "chains.yaml") self.chainconfig = PROCESS_CHAIN_DEFAULT
else:
self.chainconfig = yaml.load(open(config, "r")) self.chainconfig = yaml.full_load(open(config, "r"))
self.extensionmap: Dict[str, Any] = {} self.extensionmap: Dict[str, Any] = {}
self.processors: Dict[str, Type[Processor]] = {} self.processors: Dict[str, Type[Processor]] = {}
for ch, conf in self.chainconfig.items(): for ch, conf in self.chainconfig.items():

View File

@ -0,0 +1,76 @@
"""Convert a HECKformat file to a markdown stream."""
import io
import os
from typing import Dict, Iterable, Optional
import heckformat.parse
from .processors import Processor, NoOutputException
class HECKformatProcessor(Processor):
"""Convert a HECKformat file to a markdown stream."""
def filename(self, oldname: str, ctx: Optional[Dict] = None) -> str:
"""Return the filename of the post-processed file.
Arguments:
oldname (str): the previous name for the file.
ctx (dict, optional): A context object generated from the processor configuration
Returns:
str: the new name for the file
"""
return os.path.splitext(oldname)[0] + ".md"
def mime_type(self, oldname: str, ctx: Optional[Dict] = None) -> str:
"""Return the mimetype of the post-processed file.
Arguments:
oldname (str): the input filename
ctx (dict, optional): A context object generated from the processor configuration
Returns:
str: the new mimetype of the file after processing
"""
return "text/x-markdown"
def extension(self, oldname: str, ctx: Optional[Dict] = None) -> str:
"""Return the mimetype of the post-processed file.
Arguments:
oldname (str): the input filename
ctx (dict, optional): A context object generated from the processor configuration
Returns:
str: the new extension of the file after processing
"""
return "md"
def process(self, input_file: Iterable, ctx: Optional[Dict] = None) -> Iterable:
"""Return an iterable object of the post-processed file.
Arguments:
input_file (iterable): An input stream
ctx (dict, optional): A context object generated from the processor configuration
Returns:
iterable: The post-processed output stream
"""
elm = heckformat.parse.load_heck(input_file).flatten_replace()
for key in elm:
if key.startswith(heckformat.parse.UNPARSED_MARKER):
# fixme later we should use the doclabel to choose which output processor somehow~
doclabel = key.split(' ')[-1]
# we'll just assume the first unparsed part of the document is the page
return elm[key]
# No documents in the input heck, we just prevent output
raise NoOutputException()
processor = HECKformatProcessor # pylint: disable=invalid-name

View File

@ -5,6 +5,8 @@ from typing import Dict, Iterable, Optional
class PassthroughException(Exception): class PassthroughException(Exception):
"""Raised when the processor would like the file to pass through unchanged.""" """Raised when the processor would like the file to pass through unchanged."""
class NoOutputException(Exception):
"""Raised when the processor would like no output to be written from the processing chain."""
class ProcessorException(Exception): # pragma: no cover class ProcessorException(Exception): # pragma: no cover
"""A base exception class to be used by processor objects.""" """A base exception class to be used by processor objects."""

View File

@ -61,6 +61,9 @@ def guess_mime(path: str) -> Optional[str]:
str: the guessed mime-type str: the guessed mime-type
""" """
# if path.endswith('.heck'):
# return "text/x-heckformat"
mtypes = mimetypes.guess_type(path) mtypes = mimetypes.guess_type(path)
ftype = None ftype = None
if os.path.isdir(path): if os.path.isdir(path):

View File

@ -0,0 +1,21 @@
[build-system]
requires = ["pdm-backend"]
build-backend = "pdm.backend"
[project]
name = "heckweasel"
dynamic = ["version"]
description = "A metadata based static site compiler with CMS-like features."
authors = [{name = "Cassowary", email="cassowary@aldercone.studio"}]
dependencies = ["yaml-1.3", "markdown", "jstyleson", "jinja2", "pygments", "heckformat"]
requires-python = ">=3.8"
readme = "README.md"
license = {text = "LICENSE"}
[tool.pdm.version]
source = "file"
path = "heckweasel/__init__.py"
[project.scripts]
heckweasel = "heckweasel.__main__:do_main"

View File

@ -1,62 +0,0 @@
"""Package configuration."""
from setuptools import find_packages, setup
from heckweasel import __version__
LONG_DESCRIPTION = """Heckweasel is a filesystem based static site generator."""
INSTALL_REQUIRES = ["yaml-1.3", "markdown", "jstyleson", "jinja2", "pygments"]
# Extra dependencies
EXTRAS_REQUIRE = {
# Test dependencies
"tests": [
"black",
"bandit>=1.1.0",
"flake8>=3.2.1",
"mypy>=0.470",
"prospector[with_everything]>=0.12.4",
"pytest-cov>=1.8.0",
"pytest-xdist>=1.15.0",
"pytest>=3.0.3",
"sphinx_rtd_theme>=0.1.6",
"sphinx-argparse>=0.1.15",
"Sphinx>=1.4.9",
]
}
SETUP_REQUIRES = ["pytest-runner>=2.7.1", "setuptools_scm>=1.15.0"]
setup(
author="Cassowary Rusnov",
author_email="alderconestudio@gmail.com",
classifiers=[
"Development Status :: 1 - Pre-alpha",
"Environment :: Console",
"License :: OSI Approved :: MIT",
"Operating System :: POSIX :: Linux",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3 :: Only",
"Topic :: Software Development :: Libraries :: Python Modules",
],
description="A filesystem-based website generator / CMS",
# entry_points={
# 'console_scripts': [
# 'cookbook = spicerack.cookbook:main',
# ],
# },
include_package_data=True,
extras_require=EXTRAS_REQUIRE,
install_requires=INSTALL_REQUIRES,
keywords=["cms", "website", "compiler"],
license="MIT",
long_description=LONG_DESCRIPTION,
name="heckweasel",
packages=find_packages(exclude=["*.tests", "*.tests.*"]),
platforms=["GNU/Linux"],
setup_requires=SETUP_REQUIRES,
use_scm_version=True,
url="https://git.aldercone.studio/aldercone/heckweasel",
zip_safe=False,
version=__version__,
)