Updates to support HECKformat documents, and minor changes.

- Update copyright. - Remove manifest.in since we're switching to PDM (and defaults moved to the module). - Remove setup.py since we're switching to PDM. - Remove chains.yaml, move data to the processor module. - Fix passthrough in __main__ - Move main function to separate function to support PDM entrypoint. - metadata.py: Extensive rework * Add heck support (lots of little changes to support it). (.heck files can replace .meta files) * Add yaml metadata support (.meta files can be yaml) * Some formatting changes. * Make metatree be a little easier to read by separating out functionality into extra functions - processchain.py: Move chains.yaml to a structure internal. - Add processors/process_heck.py to support the document side of HECKformat - add pyproject.toml and embrace PDM.
2024-02-10 20:49:52 -08:00
parent 694acf8599
commit b389506b4b
12 changed files with 205 additions and 194 deletions
--- a/4
+++ b/4
@ -1,6 +1,8 @@
+No Nazis, otherwise:
+
 MIT License

-Copyright (c) 2023 Cas Rusnov
+Copyright (c) 2023-2024 Aldercone Studio Collective

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -1 +0,0 @@
-include heckweasel/defaults/*.yaml
--- a/TODO.md
+++ b/TODO.md
@ -20,5 +20,6 @@
 * Run commands as part of processing chains

 * Project level processing chain overrides in the .meta or whatever.
+* Project settings in separate file from .meta that would basically do .meta stuff. Like global meta + config in a top.heck file by default and overridable by a parameter.


--- a/heckweasel/main.py
+++ b/heckweasel/main.py
@ -15,7 +15,7 @@ from typing import Dict, List, cast

 from .metadata import MetaTree
 from .processchain import ProcessorChains
-from .processors.processors import PassthroughException
+from .processors.processors import PassthroughException, NoOutputException
 from .pygments import pygments_get_css, pygments_markup_contents_html
 from .template_tools import (
    date_iso8601,
@ -144,14 +144,23 @@ def main() -> int:
            print("process {} -> {} -> {}".format(os.path.join(root, f), repr(chain), os.path.join(target_dir, chain.output_filename)))
            if not args.dry_run:
                try:
+                    # normal output
+                    # FIXME support binary streams
+                    collected_output = [line for line in chain.output]
                    with open(os.path.join(target_dir, chain.output_filename), "w") as outfile:
-                        for line in chain.output:
-                            outfile.write(line)
+                        outfile.writelines(collected_output)
                except PassthroughException:
+                    # write output from input
                    shutil.copyfile(os.path.join(root, f), os.path.join(target_dir, chain.output_filename))
+                except NoOutputException:
+                    print("skip output {} -> {}".format(os.path.join(root, f), os.path.join(target_dir, chain.output_filename)))
+                    # don't write anyp output
+                    pass

    return 0

+def do_main():
+    sys.exit(main())

 if __name__ == "__main__":
-    sys.exit(main())
+    do_main()
--- a/heckweasel/defaults/chains.yaml
+++ b/heckweasel/defaults/chains.yaml
@ -1,111 +0,0 @@
-# Default: output == input
-default:
-    extension: default
-    chain:
-        - passthrough
-
-# Any object that needs jinja scripts but no other explicit processing
-templatable:
-    extension: null
-    chain:
-      - jinja2
-
-# Any object that needs jinja and to be embedded in a parent template
-tembed:
-  extension: null
-  chain:
-    - jinja2
-    - jinja2_page_embed
-
-# Markdown, BBCode and RST are first run through the templater, and then
-# they are processed into HTML, and finally embedded in a page template.
-markdown:
-    extension:
-        - md
-    chain:
-        - jinja2
-        - process_md
-        - jinja2_page_embed
-bbcode:
-    extension:
-        - bb
-        - pp
-    chain:
-        - jinja2
-        - process_pp
-        - jinja2_page_embed
-# FIXME implement RST processor
-# restructured:
-#     extension:
-#         - rst
-#     chain:
-#         - jinja2
-#         - process_rst
-#         - jinja2_page_embed
-
-# # JSON and YAML are split, passed through a pretty printer, and then output
-# FIXME implement split chain processor, implement processor arguments
-# json:
-#     extension:
-#         - json
-#     chain:
-#         - split (passthrough)
-#         - pp_json
-# yaml:
-#     extension:
-#         - yml
-#         - yaml
-#     chain:
-#         - split (passthrough)
-#         - pp_yaml
-
-# Template-html is first passed through the templater, and then embedded
-# in a page template
-template-html:
-    extension:
-        - thtml
-        - cont
-    chain:
-        - jinja2
-        - jinja2_page_embed
-
-# # Smart CSS are simply converted to CSS.
-# sass:
-#     extension:
-#         - sass
-#         - scss
-#     chain:
-#         - process_sass
-# less:
-#     extension:
-#         - less
-#     chain:
-#         - process_less
-
-# stylus:
-#     extension:
-#         - styl
-#     chain:
-#         - process_styl
-
-# # Images are processed into thumbnails and sized in addition to being retained as their original
-# FIXME implement split chain processor, implement processor arguments,
-# image:
-#     extension:
-#         - jpg
-#         - jpeg
-#         - png
-#     chain:
-#         - split (image_bigthumb)
-#         - split (image_smallthumb)
-#         - passthrough
-
-# image_bigthumb:
-#     extension:
-#     chain:
-#         - smart_resize (big)
-
-# image_smallthumb:
-#     extension:
-#     chain:
-#         - smart_resize (small)
--- a/heckweasel/metadata.py
+++ b/heckweasel/metadata.py
@ -6,15 +6,19 @@ import mimetypes
 import os
 import uuid
 from typing import Any, Dict, List, Optional, Tuple, Union, cast
+import yaml

 import jstyleson

+import heckformat.parse
+
 from .utils import guess_mime

 # setup mimetypes with some extra ones
 mimetypes.init()
 mimetypes.add_type("text/html", "thtml")
 mimetypes.add_type("text/html", "cont")
+mimetypes.add_type("text/x-heckformat", "heck")

 logger = logging.getLogger(__name__)

@ -22,6 +26,8 @@ logger = logging.getLogger(__name__)
 class MetaCacheMiss(Exception):
    """Raised on cache miss."""

+class MetaLoadError(Exception):
+    "Raised when metadata fails to load."

 class MetaCache:
    """This class provides an in-memory cache for metadata tree."""
@ -50,10 +56,10 @@ class MetaCache:
            MetaCacheMiss: on missing key, or on aged out

        """
-        if key not in self._cache:
+        if (key not in self._cache):
            raise MetaCacheMiss("no item for key {}".format(key))

-        if self._cache[key][0] + self._max_age <= new_time_stamp:
+        if ((self._cache[key][0] + self._max_age) <= new_time_stamp):
            return self._cache[key][1]

        raise MetaCacheMiss("cache expired for key {}".format(key))
@ -82,13 +88,47 @@ class MetaTree:

        """
        self._cache = MetaCache()
-        if default_metadata is None:
+        if (default_metadata is None):
            default_metadata = {}
        self._default_metadata = default_metadata
-        if root[-1] != "/":
+        if (root[-1] != "/"):
            root += "/"
        self._root = root

+    def _get_cache_key(self, fullpath: str):
+        cachekey = fullpath + '.meta'
+        if fullpath.endswith(".heck"):
+            cachekey = fullpath
+        elif os.path.isdir(fullpath):
+            cachekey = os.path.join(fullpath, ".meta")
+            if (not os.path.exists(cachekey)):
+                cachekey = os.path.join(fullpath, ".heck")
+
+        return cachekey
+
+    def _load_metadata(self, cachekey: str) -> Dict:
+        meta = {}
+        with open(cachekey, "r") as inf:
+            if cachekey.endswith(".heck"):
+                # raise NotImplemented("We don't yet support HECKformat")
+                with open(cachekey) as cachefile:
+                    h = heckformat.parse.load(cachefile)
+                    meta = h.flatten_replace()
+            else:
+                try:
+                    # try json load
+                    meta = jstyleson.load(inf)
+                except jstyleson.JSONDecodeError as exc:
+                    # try yaml load
+                    try:
+                        meta = yaml.load(inf)
+                    except yaml.parser.ParserError as exc2:
+                        # else either the yaml or json has an error
+                        me = MetaLoadError()
+                        exc2.__context__ = exc
+                        raise me from exc2
+            return meta
+
    def get_metadata(self, rel_path: str) -> Dict:
        """Retrieve the metadata for a given path

@ -113,11 +153,10 @@ class MetaTree:
            fullpath = os.path.join(fullpath, pth)
            st = os.stat(fullpath)

-            if os.path.isdir(fullpath):
-                cachekey = os.path.join(fullpath, ".meta")
-            else:
-                cachekey = fullpath + ".meta"
+            cachekey = self._get_cache_key(fullpath)
+
            meta = cast(Dict, {})
+
            try:
                st_meta = os.stat(cachekey)
                meta = self._cache.get(cachekey, st_meta.st_mtime)
@ -126,28 +165,40 @@ class MetaTree:
            except MetaCacheMiss:
                meta = {}

-            if not meta and st_meta:
-                meta = jstyleson.load(open(cachekey, "r"))
+            # if we didn't get any meta from the cache, but the metafile exists, try loading it
+            if ((not meta) and st_meta):
+                meta = self._load_metadata(cachekey)
                self._cache.put(cachekey, meta, st_meta.st_mtime)

-            if fullpath == ospath and "wildcard_metadata" in metablob:
+            # add whatever is in the metablob as 'wildcard_metadata' to the metadata if the filename
+            # matches the wildcards
+            if ((fullpath == ospath) and ("wildcard_metadata" in metablob)):
                for wild in metablob["wildcard_metadata"]:
                    if fnmatch.fnmatch(pth, wild[0]):
                        metablob.update(wild[1])

            metablob.update(meta)

-        # return final dict
+        ### fill in all objective metadata
+        # containing directory and filename
        metablob["dir"], metablob["file_name"] = os.path.split(rel_path)
+        # path within the source tree
        metablob["file_path"] = rel_path
+        # the path relative to the output tree
        metablob["relpath"] = os.path.relpath("/", "/" + metablob["dir"])
+        # the UUID for this file
        metablob["uuid"] = uuid.uuid3(uuid.NAMESPACE_OID, metablob["uuid-oid-root"] + ospath)
+        # the pre-split components of the full path
        metablob["os-path"], _ = os.path.split(fullpath)
+        # the mime type we guessed for this file
        metablob["guessed-type"] = guess_mime(ospath)
+        # if the mime-type isn't overriden in the explicit metadata, we make it equal to the guessed type
        if "mime-type" not in metablob:
            metablob["mime-type"] = metablob["guessed-type"]
+        # the `stat` components
        metablob["stat"] = {}
        for stk in ("st_mtime", "st_ctime", "st_atime", "st_mode", "st_size", "st_ino"):
            metablob["stat"][stk.replace("st_", "")] = getattr(st, stk)

+        # return final dict
        return metablob
--- a/heckweasel/processchain.py
+++ b/heckweasel/processchain.py
@ -9,6 +9,26 @@ import yaml

 from .processors.processors import Processor

+PROCESS_CHAIN_DEFAULT = {
+    'default': {'extension': 'default',
+                'chain': ['passthrough']
+                },
+    'templatable': {'extension': None,
+                    'chain': ['jinja2']
+                    },
+    'tembed': {'extension': None,
+               'chain': ['jinja2', 'jinja2_page_embed']
+               },
+    'markdown': {'extension': ['md'],
+                 'chain': ['jinja2', 'process_md', 'jinja2_page_embed']},
+    'bbcode': {'extension': ['bb', 'pp'],
+               'chain': ['jinja2', 'process_pp', 'jinja2_page_embed']},
+    'template-html': {'extension': ['thtml', 'cont'],
+                      'chain': ['jinja2', 'jinja2_page_embed']},
+    'heckformat': {'extension': ['heck'],
+                   'chain': ['process_heck', 'jinja2', 'process_md', 'jinja2_page_embed']}
+}
+

 class ProcessorChain:
    """This implements a wrapper for an arbitrary set of processors and an associated file stream."""
@ -107,9 +127,9 @@ class ProcessorChains:

        """
        if config is None:  # pragma: no coverage
-            config = os.path.join(os.path.dirname(__file__), "defaults", "chains.yaml")
-
-        self.chainconfig = yaml.load(open(config, "r"))
+            self.chainconfig = PROCESS_CHAIN_DEFAULT
+        else:
+            self.chainconfig = yaml.full_load(open(config, "r"))
        self.extensionmap: Dict[str, Any] = {}
        self.processors: Dict[str, Type[Processor]] = {}
        for ch, conf in self.chainconfig.items():
--- a/heckweasel/processors/process_heck.py
+++ b/heckweasel/processors/process_heck.py
@ -0,0 +1,76 @@
+"""Convert a HECKformat file to a markdown stream."""
+
+import io
+import os
+
+from typing import Dict, Iterable, Optional
+
+import heckformat.parse
+
+from .processors import Processor, NoOutputException
+
+
+class HECKformatProcessor(Processor):
+    """Convert a HECKformat file to a markdown stream."""
+
+    def filename(self, oldname: str, ctx: Optional[Dict] = None) -> str:
+        """Return the filename of the post-processed file.
+
+        Arguments:
+            oldname (str): the previous name for the file.
+            ctx (dict, optional): A context object generated from the processor configuration
+
+        Returns:
+            str: the new name for the file
+
+        """
+        return os.path.splitext(oldname)[0] + ".md"
+
+    def mime_type(self, oldname: str, ctx: Optional[Dict] = None) -> str:
+        """Return the mimetype of the post-processed file.
+
+        Arguments:
+            oldname (str): the input filename
+            ctx (dict, optional): A context object generated from the processor configuration
+
+        Returns:
+            str: the new mimetype of the file after processing
+
+        """
+        return "text/x-markdown"
+
+    def extension(self, oldname: str, ctx: Optional[Dict] = None) -> str:
+        """Return the mimetype of the post-processed file.
+
+        Arguments:
+            oldname (str): the input filename
+            ctx (dict, optional): A context object generated from the processor configuration
+
+        Returns:
+            str: the new extension of the file after processing
+
+        """
+        return "md"
+
+    def process(self, input_file: Iterable, ctx: Optional[Dict] = None) -> Iterable:
+        """Return an iterable object of the post-processed file.
+
+        Arguments:
+            input_file (iterable): An input stream
+            ctx (dict, optional): A context object generated from the processor configuration
+
+        Returns:
+            iterable: The post-processed output stream
+        """
+
+        elm = heckformat.parse.load_heck(input_file).flatten_replace()
+        for key in elm:
+            if key.startswith(heckformat.parse.UNPARSED_MARKER):
+                # fixme later we should use the doclabel to choose which output processor somehow~
+                doclabel = key.split(' ')[-1]
+                # we'll just assume the first unparsed part of the document is the page
+                return elm[key]
+        # No documents in the input heck, we just prevent output
+        raise NoOutputException()
+
+processor = HECKformatProcessor  # pylint: disable=invalid-name
--- a/heckweasel/processors/processors.py
+++ b/heckweasel/processors/processors.py
@ -5,6 +5,8 @@ from typing import Dict, Iterable, Optional
 class PassthroughException(Exception):
    """Raised when the processor would like the file to pass through unchanged."""

+class NoOutputException(Exception):
+    """Raised when the processor would like no output to be written from the processing chain."""

 class ProcessorException(Exception):  # pragma: no cover
    """A base exception class to be used by processor objects."""
--- a/heckweasel/utils.py
+++ b/heckweasel/utils.py
@ -61,6 +61,9 @@ def guess_mime(path: str) -> Optional[str]:
        str: the guessed mime-type

    """
+    # if path.endswith('.heck'):
+    #     return "text/x-heckformat"
+
    mtypes = mimetypes.guess_type(path)
    ftype = None
    if os.path.isdir(path):
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,21 @@
+[build-system]
+requires      = ["pdm-backend"]
+build-backend = "pdm.backend"
+
+
+[project]
+name = "heckweasel"
+dynamic = ["version"]
+description = "A metadata based static site compiler with CMS-like features."
+authors = [{name = "Cassowary", email="cassowary@aldercone.studio"}]
+dependencies = ["yaml-1.3", "markdown", "jstyleson", "jinja2", "pygments", "heckformat"]
+requires-python = ">=3.8"
+readme = "README.md"
+license = {text = "LICENSE"}
+
+[tool.pdm.version]
+source = "file"
+path = "heckweasel/__init__.py"
+
+[project.scripts]
+heckweasel = "heckweasel.__main__:do_main"
--- a/setup.py
+++ b/setup.py
@ -1,62 +0,0 @@
-"""Package configuration."""
-from setuptools import find_packages, setup
-
-from heckweasel import __version__
-
-LONG_DESCRIPTION = """Heckweasel is a filesystem based static site generator."""
-
-INSTALL_REQUIRES = ["yaml-1.3", "markdown", "jstyleson", "jinja2", "pygments"]
-
-# Extra dependencies
-EXTRAS_REQUIRE = {
-    # Test dependencies
-    "tests": [
-        "black",
-        "bandit>=1.1.0",
-        "flake8>=3.2.1",
-        "mypy>=0.470",
-        "prospector[with_everything]>=0.12.4",
-        "pytest-cov>=1.8.0",
-        "pytest-xdist>=1.15.0",
-        "pytest>=3.0.3",
-        "sphinx_rtd_theme>=0.1.6",
-        "sphinx-argparse>=0.1.15",
-        "Sphinx>=1.4.9",
-    ]
-}
-
-SETUP_REQUIRES = ["pytest-runner>=2.7.1", "setuptools_scm>=1.15.0"]
-setup(
-    author="Cassowary Rusnov",
-    author_email="alderconestudio@gmail.com",
-    classifiers=[
-        "Development Status :: 1 - Pre-alpha",
-        "Environment :: Console",
-        "License :: OSI Approved :: MIT",
-        "Operating System :: POSIX :: Linux",
-        "Programming Language :: Python :: 3.6",
-        "Programming Language :: Python :: 3.7",
-        "Programming Language :: Python :: 3 :: Only",
-        "Topic :: Software Development :: Libraries :: Python Modules",
-    ],
-    description="A filesystem-based website generator / CMS",
-    # entry_points={
-    #    'console_scripts': [
-    #        'cookbook = spicerack.cookbook:main',
-    #    ],
-    # },
-    include_package_data=True,
-    extras_require=EXTRAS_REQUIRE,
-    install_requires=INSTALL_REQUIRES,
-    keywords=["cms", "website", "compiler"],
-    license="MIT",
-    long_description=LONG_DESCRIPTION,
-    name="heckweasel",
-    packages=find_packages(exclude=["*.tests", "*.tests.*"]),
-    platforms=["GNU/Linux"],
-    setup_requires=SETUP_REQUIRES,
-    use_scm_version=True,
-    url="https://git.aldercone.studio/aldercone/heckweasel",
-    zip_safe=False,
-    version=__version__,
-)