From b389506b4b7af14462206a4f59d24f8c18ec48a4 Mon Sep 17 00:00:00 2001 From: Cassowary Date: Sat, 10 Feb 2024 20:49:52 -0800 Subject: [PATCH] Updates to support HECKformat documents, and minor changes. - Update copyright. - Remove manifest.in since we're switching to PDM (and defaults moved to the module). - Remove setup.py since we're switching to PDM. - Remove chains.yaml, move data to the processor module. - Fix passthrough in __main__ - Move main function to separate function to support PDM entrypoint. - metadata.py: Extensive rework * Add heck support (lots of little changes to support it). (.heck files can replace .meta files) * Add yaml metadata support (.meta files can be yaml) * Some formatting changes. * Make metatree be a little easier to read by separating out functionality into extra functions - processchain.py: Move chains.yaml to a structure internal. - Add processors/process_heck.py to support the document side of HECKformat - add pyproject.toml and embrace PDM. --- LICENSE | 4 +- MANIFEST.in | 1 - TODO.md | 1 + heckweasel/__main__.py | 17 +++- heckweasel/defaults/chains.yaml | 111 -------------------------- heckweasel/metadata.py | 75 ++++++++++++++--- heckweasel/processchain.py | 26 +++++- heckweasel/processors/process_heck.py | 76 ++++++++++++++++++ heckweasel/processors/processors.py | 2 + heckweasel/utils.py | 3 + pyproject.toml | 21 +++++ setup.py | 62 -------------- 12 files changed, 205 insertions(+), 194 deletions(-) delete mode 100644 MANIFEST.in delete mode 100644 heckweasel/defaults/chains.yaml create mode 100644 heckweasel/processors/process_heck.py delete mode 100644 setup.py diff --git a/LICENSE b/LICENSE index cc948be..364161f 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,8 @@ +No Nazis, otherwise: + MIT License -Copyright (c) 2023 Cas Rusnov +Copyright (c) 2023-2024 Aldercone Studio Collective Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 3e854e2..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1 +0,0 @@ -include heckweasel/defaults/*.yaml \ No newline at end of file diff --git a/TODO.md b/TODO.md index ccb1d2e..db42d2c 100644 --- a/TODO.md +++ b/TODO.md @@ -20,5 +20,6 @@ * Run commands as part of processing chains * Project level processing chain overrides in the .meta or whatever. +* Project settings in separate file from .meta that would basically do .meta stuff. Like global meta + config in a top.heck file by default and overridable by a parameter. diff --git a/heckweasel/__main__.py b/heckweasel/__main__.py index a93a418..a065bfd 100644 --- a/heckweasel/__main__.py +++ b/heckweasel/__main__.py @@ -15,7 +15,7 @@ from typing import Dict, List, cast from .metadata import MetaTree from .processchain import ProcessorChains -from .processors.processors import PassthroughException +from .processors.processors import PassthroughException, NoOutputException from .pygments import pygments_get_css, pygments_markup_contents_html from .template_tools import ( date_iso8601, @@ -144,14 +144,23 @@ def main() -> int: print("process {} -> {} -> {}".format(os.path.join(root, f), repr(chain), os.path.join(target_dir, chain.output_filename))) if not args.dry_run: try: + # normal output + # FIXME support binary streams + collected_output = [line for line in chain.output] with open(os.path.join(target_dir, chain.output_filename), "w") as outfile: - for line in chain.output: - outfile.write(line) + outfile.writelines(collected_output) except PassthroughException: + # write output from input shutil.copyfile(os.path.join(root, f), os.path.join(target_dir, chain.output_filename)) + except NoOutputException: + print("skip output {} -> {}".format(os.path.join(root, f), os.path.join(target_dir, chain.output_filename))) + # don't write anyp output + pass return 0 +def do_main(): + sys.exit(main()) if __name__ == "__main__": - sys.exit(main()) + do_main() diff --git a/heckweasel/defaults/chains.yaml b/heckweasel/defaults/chains.yaml deleted file mode 100644 index 459eae0..0000000 --- a/heckweasel/defaults/chains.yaml +++ /dev/null @@ -1,111 +0,0 @@ -# Default: output == input -default: - extension: default - chain: - - passthrough - -# Any object that needs jinja scripts but no other explicit processing -templatable: - extension: null - chain: - - jinja2 - -# Any object that needs jinja and to be embedded in a parent template -tembed: - extension: null - chain: - - jinja2 - - jinja2_page_embed - -# Markdown, BBCode and RST are first run through the templater, and then -# they are processed into HTML, and finally embedded in a page template. -markdown: - extension: - - md - chain: - - jinja2 - - process_md - - jinja2_page_embed -bbcode: - extension: - - bb - - pp - chain: - - jinja2 - - process_pp - - jinja2_page_embed -# FIXME implement RST processor -# restructured: -# extension: -# - rst -# chain: -# - jinja2 -# - process_rst -# - jinja2_page_embed - -# # JSON and YAML are split, passed through a pretty printer, and then output -# FIXME implement split chain processor, implement processor arguments -# json: -# extension: -# - json -# chain: -# - split (passthrough) -# - pp_json -# yaml: -# extension: -# - yml -# - yaml -# chain: -# - split (passthrough) -# - pp_yaml - -# Template-html is first passed through the templater, and then embedded -# in a page template -template-html: - extension: - - thtml - - cont - chain: - - jinja2 - - jinja2_page_embed - -# # Smart CSS are simply converted to CSS. -# sass: -# extension: -# - sass -# - scss -# chain: -# - process_sass -# less: -# extension: -# - less -# chain: -# - process_less - -# stylus: -# extension: -# - styl -# chain: -# - process_styl - -# # Images are processed into thumbnails and sized in addition to being retained as their original -# FIXME implement split chain processor, implement processor arguments, -# image: -# extension: -# - jpg -# - jpeg -# - png -# chain: -# - split (image_bigthumb) -# - split (image_smallthumb) -# - passthrough - -# image_bigthumb: -# extension: -# chain: -# - smart_resize (big) - -# image_smallthumb: -# extension: -# chain: -# - smart_resize (small) diff --git a/heckweasel/metadata.py b/heckweasel/metadata.py index f92e534..25799ed 100644 --- a/heckweasel/metadata.py +++ b/heckweasel/metadata.py @@ -6,15 +6,19 @@ import mimetypes import os import uuid from typing import Any, Dict, List, Optional, Tuple, Union, cast +import yaml import jstyleson +import heckformat.parse + from .utils import guess_mime # setup mimetypes with some extra ones mimetypes.init() mimetypes.add_type("text/html", "thtml") mimetypes.add_type("text/html", "cont") +mimetypes.add_type("text/x-heckformat", "heck") logger = logging.getLogger(__name__) @@ -22,6 +26,8 @@ logger = logging.getLogger(__name__) class MetaCacheMiss(Exception): """Raised on cache miss.""" +class MetaLoadError(Exception): + "Raised when metadata fails to load." class MetaCache: """This class provides an in-memory cache for metadata tree.""" @@ -50,10 +56,10 @@ class MetaCache: MetaCacheMiss: on missing key, or on aged out """ - if key not in self._cache: + if (key not in self._cache): raise MetaCacheMiss("no item for key {}".format(key)) - if self._cache[key][0] + self._max_age <= new_time_stamp: + if ((self._cache[key][0] + self._max_age) <= new_time_stamp): return self._cache[key][1] raise MetaCacheMiss("cache expired for key {}".format(key)) @@ -82,13 +88,47 @@ class MetaTree: """ self._cache = MetaCache() - if default_metadata is None: + if (default_metadata is None): default_metadata = {} self._default_metadata = default_metadata - if root[-1] != "/": + if (root[-1] != "/"): root += "/" self._root = root + def _get_cache_key(self, fullpath: str): + cachekey = fullpath + '.meta' + if fullpath.endswith(".heck"): + cachekey = fullpath + elif os.path.isdir(fullpath): + cachekey = os.path.join(fullpath, ".meta") + if (not os.path.exists(cachekey)): + cachekey = os.path.join(fullpath, ".heck") + + return cachekey + + def _load_metadata(self, cachekey: str) -> Dict: + meta = {} + with open(cachekey, "r") as inf: + if cachekey.endswith(".heck"): + # raise NotImplemented("We don't yet support HECKformat") + with open(cachekey) as cachefile: + h = heckformat.parse.load(cachefile) + meta = h.flatten_replace() + else: + try: + # try json load + meta = jstyleson.load(inf) + except jstyleson.JSONDecodeError as exc: + # try yaml load + try: + meta = yaml.load(inf) + except yaml.parser.ParserError as exc2: + # else either the yaml or json has an error + me = MetaLoadError() + exc2.__context__ = exc + raise me from exc2 + return meta + def get_metadata(self, rel_path: str) -> Dict: """Retrieve the metadata for a given path @@ -113,11 +153,10 @@ class MetaTree: fullpath = os.path.join(fullpath, pth) st = os.stat(fullpath) - if os.path.isdir(fullpath): - cachekey = os.path.join(fullpath, ".meta") - else: - cachekey = fullpath + ".meta" + cachekey = self._get_cache_key(fullpath) + meta = cast(Dict, {}) + try: st_meta = os.stat(cachekey) meta = self._cache.get(cachekey, st_meta.st_mtime) @@ -126,28 +165,40 @@ class MetaTree: except MetaCacheMiss: meta = {} - if not meta and st_meta: - meta = jstyleson.load(open(cachekey, "r")) + # if we didn't get any meta from the cache, but the metafile exists, try loading it + if ((not meta) and st_meta): + meta = self._load_metadata(cachekey) self._cache.put(cachekey, meta, st_meta.st_mtime) - if fullpath == ospath and "wildcard_metadata" in metablob: + # add whatever is in the metablob as 'wildcard_metadata' to the metadata if the filename + # matches the wildcards + if ((fullpath == ospath) and ("wildcard_metadata" in metablob)): for wild in metablob["wildcard_metadata"]: if fnmatch.fnmatch(pth, wild[0]): metablob.update(wild[1]) metablob.update(meta) - # return final dict + ### fill in all objective metadata + # containing directory and filename metablob["dir"], metablob["file_name"] = os.path.split(rel_path) + # path within the source tree metablob["file_path"] = rel_path + # the path relative to the output tree metablob["relpath"] = os.path.relpath("/", "/" + metablob["dir"]) + # the UUID for this file metablob["uuid"] = uuid.uuid3(uuid.NAMESPACE_OID, metablob["uuid-oid-root"] + ospath) + # the pre-split components of the full path metablob["os-path"], _ = os.path.split(fullpath) + # the mime type we guessed for this file metablob["guessed-type"] = guess_mime(ospath) + # if the mime-type isn't overriden in the explicit metadata, we make it equal to the guessed type if "mime-type" not in metablob: metablob["mime-type"] = metablob["guessed-type"] + # the `stat` components metablob["stat"] = {} for stk in ("st_mtime", "st_ctime", "st_atime", "st_mode", "st_size", "st_ino"): metablob["stat"][stk.replace("st_", "")] = getattr(st, stk) + # return final dict return metablob diff --git a/heckweasel/processchain.py b/heckweasel/processchain.py index cc1e98c..c10dfa7 100644 --- a/heckweasel/processchain.py +++ b/heckweasel/processchain.py @@ -9,6 +9,26 @@ import yaml from .processors.processors import Processor +PROCESS_CHAIN_DEFAULT = { + 'default': {'extension': 'default', + 'chain': ['passthrough'] + }, + 'templatable': {'extension': None, + 'chain': ['jinja2'] + }, + 'tembed': {'extension': None, + 'chain': ['jinja2', 'jinja2_page_embed'] + }, + 'markdown': {'extension': ['md'], + 'chain': ['jinja2', 'process_md', 'jinja2_page_embed']}, + 'bbcode': {'extension': ['bb', 'pp'], + 'chain': ['jinja2', 'process_pp', 'jinja2_page_embed']}, + 'template-html': {'extension': ['thtml', 'cont'], + 'chain': ['jinja2', 'jinja2_page_embed']}, + 'heckformat': {'extension': ['heck'], + 'chain': ['process_heck', 'jinja2', 'process_md', 'jinja2_page_embed']} +} + class ProcessorChain: """This implements a wrapper for an arbitrary set of processors and an associated file stream.""" @@ -107,9 +127,9 @@ class ProcessorChains: """ if config is None: # pragma: no coverage - config = os.path.join(os.path.dirname(__file__), "defaults", "chains.yaml") - - self.chainconfig = yaml.load(open(config, "r")) + self.chainconfig = PROCESS_CHAIN_DEFAULT + else: + self.chainconfig = yaml.full_load(open(config, "r")) self.extensionmap: Dict[str, Any] = {} self.processors: Dict[str, Type[Processor]] = {} for ch, conf in self.chainconfig.items(): diff --git a/heckweasel/processors/process_heck.py b/heckweasel/processors/process_heck.py new file mode 100644 index 0000000..3de76e6 --- /dev/null +++ b/heckweasel/processors/process_heck.py @@ -0,0 +1,76 @@ +"""Convert a HECKformat file to a markdown stream.""" + +import io +import os + +from typing import Dict, Iterable, Optional + +import heckformat.parse + +from .processors import Processor, NoOutputException + + +class HECKformatProcessor(Processor): + """Convert a HECKformat file to a markdown stream.""" + + def filename(self, oldname: str, ctx: Optional[Dict] = None) -> str: + """Return the filename of the post-processed file. + + Arguments: + oldname (str): the previous name for the file. + ctx (dict, optional): A context object generated from the processor configuration + + Returns: + str: the new name for the file + + """ + return os.path.splitext(oldname)[0] + ".md" + + def mime_type(self, oldname: str, ctx: Optional[Dict] = None) -> str: + """Return the mimetype of the post-processed file. + + Arguments: + oldname (str): the input filename + ctx (dict, optional): A context object generated from the processor configuration + + Returns: + str: the new mimetype of the file after processing + + """ + return "text/x-markdown" + + def extension(self, oldname: str, ctx: Optional[Dict] = None) -> str: + """Return the mimetype of the post-processed file. + + Arguments: + oldname (str): the input filename + ctx (dict, optional): A context object generated from the processor configuration + + Returns: + str: the new extension of the file after processing + + """ + return "md" + + def process(self, input_file: Iterable, ctx: Optional[Dict] = None) -> Iterable: + """Return an iterable object of the post-processed file. + + Arguments: + input_file (iterable): An input stream + ctx (dict, optional): A context object generated from the processor configuration + + Returns: + iterable: The post-processed output stream + """ + + elm = heckformat.parse.load_heck(input_file).flatten_replace() + for key in elm: + if key.startswith(heckformat.parse.UNPARSED_MARKER): + # fixme later we should use the doclabel to choose which output processor somehow~ + doclabel = key.split(' ')[-1] + # we'll just assume the first unparsed part of the document is the page + return elm[key] + # No documents in the input heck, we just prevent output + raise NoOutputException() + +processor = HECKformatProcessor # pylint: disable=invalid-name diff --git a/heckweasel/processors/processors.py b/heckweasel/processors/processors.py index f3312e7..fd431d5 100644 --- a/heckweasel/processors/processors.py +++ b/heckweasel/processors/processors.py @@ -5,6 +5,8 @@ from typing import Dict, Iterable, Optional class PassthroughException(Exception): """Raised when the processor would like the file to pass through unchanged.""" +class NoOutputException(Exception): + """Raised when the processor would like no output to be written from the processing chain.""" class ProcessorException(Exception): # pragma: no cover """A base exception class to be used by processor objects.""" diff --git a/heckweasel/utils.py b/heckweasel/utils.py index d12c490..6796787 100644 --- a/heckweasel/utils.py +++ b/heckweasel/utils.py @@ -61,6 +61,9 @@ def guess_mime(path: str) -> Optional[str]: str: the guessed mime-type """ + # if path.endswith('.heck'): + # return "text/x-heckformat" + mtypes = mimetypes.guess_type(path) ftype = None if os.path.isdir(path): diff --git a/pyproject.toml b/pyproject.toml index e69de29..4b6da64 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -0,0 +1,21 @@ +[build-system] +requires = ["pdm-backend"] +build-backend = "pdm.backend" + + +[project] +name = "heckweasel" +dynamic = ["version"] +description = "A metadata based static site compiler with CMS-like features." +authors = [{name = "Cassowary", email="cassowary@aldercone.studio"}] +dependencies = ["yaml-1.3", "markdown", "jstyleson", "jinja2", "pygments", "heckformat"] +requires-python = ">=3.8" +readme = "README.md" +license = {text = "LICENSE"} + +[tool.pdm.version] +source = "file" +path = "heckweasel/__init__.py" + +[project.scripts] +heckweasel = "heckweasel.__main__:do_main" diff --git a/setup.py b/setup.py deleted file mode 100644 index 4d08ac5..0000000 --- a/setup.py +++ /dev/null @@ -1,62 +0,0 @@ -"""Package configuration.""" -from setuptools import find_packages, setup - -from heckweasel import __version__ - -LONG_DESCRIPTION = """Heckweasel is a filesystem based static site generator.""" - -INSTALL_REQUIRES = ["yaml-1.3", "markdown", "jstyleson", "jinja2", "pygments"] - -# Extra dependencies -EXTRAS_REQUIRE = { - # Test dependencies - "tests": [ - "black", - "bandit>=1.1.0", - "flake8>=3.2.1", - "mypy>=0.470", - "prospector[with_everything]>=0.12.4", - "pytest-cov>=1.8.0", - "pytest-xdist>=1.15.0", - "pytest>=3.0.3", - "sphinx_rtd_theme>=0.1.6", - "sphinx-argparse>=0.1.15", - "Sphinx>=1.4.9", - ] -} - -SETUP_REQUIRES = ["pytest-runner>=2.7.1", "setuptools_scm>=1.15.0"] -setup( - author="Cassowary Rusnov", - author_email="alderconestudio@gmail.com", - classifiers=[ - "Development Status :: 1 - Pre-alpha", - "Environment :: Console", - "License :: OSI Approved :: MIT", - "Operating System :: POSIX :: Linux", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3 :: Only", - "Topic :: Software Development :: Libraries :: Python Modules", - ], - description="A filesystem-based website generator / CMS", - # entry_points={ - # 'console_scripts': [ - # 'cookbook = spicerack.cookbook:main', - # ], - # }, - include_package_data=True, - extras_require=EXTRAS_REQUIRE, - install_requires=INSTALL_REQUIRES, - keywords=["cms", "website", "compiler"], - license="MIT", - long_description=LONG_DESCRIPTION, - name="heckweasel", - packages=find_packages(exclude=["*.tests", "*.tests.*"]), - platforms=["GNU/Linux"], - setup_requires=SETUP_REQUIRES, - use_scm_version=True, - url="https://git.aldercone.studio/aldercone/heckweasel", - zip_safe=False, - version=__version__, -)