Updates to support HECKformat documents, and minor changes.

- Update copyright.
- Remove manifest.in since we're switching to PDM (and defaults moved
  to the module).
- Remove setup.py since we're switching to PDM.
- Remove chains.yaml, move data to the processor module.
- Fix passthrough in __main__
- Move main function to separate function to support PDM entrypoint.
- metadata.py: Extensive rework
  * Add heck support (lots of little changes to support it). (.heck files can
    replace .meta files)
  * Add yaml metadata support (.meta files can be yaml)
  * Some formatting changes.
  * Make metatree be a little easier to read by separating out
    functionality into extra functions
- processchain.py: Move chains.yaml to a structure internal.
- Add processors/process_heck.py to support the document side of HECKformat
- add pyproject.toml and embrace PDM.
This commit is contained in:
2024-02-10 20:49:52 -08:00
parent 694acf8599
commit b389506b4b
12 changed files with 205 additions and 194 deletions

View File

@ -15,7 +15,7 @@ from typing import Dict, List, cast
from .metadata import MetaTree
from .processchain import ProcessorChains
from .processors.processors import PassthroughException
from .processors.processors import PassthroughException, NoOutputException
from .pygments import pygments_get_css, pygments_markup_contents_html
from .template_tools import (
date_iso8601,
@ -144,14 +144,23 @@ def main() -> int:
print("process {} -> {} -> {}".format(os.path.join(root, f), repr(chain), os.path.join(target_dir, chain.output_filename)))
if not args.dry_run:
try:
# normal output
# FIXME support binary streams
collected_output = [line for line in chain.output]
with open(os.path.join(target_dir, chain.output_filename), "w") as outfile:
for line in chain.output:
outfile.write(line)
outfile.writelines(collected_output)
except PassthroughException:
# write output from input
shutil.copyfile(os.path.join(root, f), os.path.join(target_dir, chain.output_filename))
except NoOutputException:
print("skip output {} -> {}".format(os.path.join(root, f), os.path.join(target_dir, chain.output_filename)))
# don't write anyp output
pass
return 0
def do_main():
sys.exit(main())
if __name__ == "__main__":
sys.exit(main())
do_main()

View File

@ -1,111 +0,0 @@
# Default: output == input
default:
extension: default
chain:
- passthrough
# Any object that needs jinja scripts but no other explicit processing
templatable:
extension: null
chain:
- jinja2
# Any object that needs jinja and to be embedded in a parent template
tembed:
extension: null
chain:
- jinja2
- jinja2_page_embed
# Markdown, BBCode and RST are first run through the templater, and then
# they are processed into HTML, and finally embedded in a page template.
markdown:
extension:
- md
chain:
- jinja2
- process_md
- jinja2_page_embed
bbcode:
extension:
- bb
- pp
chain:
- jinja2
- process_pp
- jinja2_page_embed
# FIXME implement RST processor
# restructured:
# extension:
# - rst
# chain:
# - jinja2
# - process_rst
# - jinja2_page_embed
# # JSON and YAML are split, passed through a pretty printer, and then output
# FIXME implement split chain processor, implement processor arguments
# json:
# extension:
# - json
# chain:
# - split (passthrough)
# - pp_json
# yaml:
# extension:
# - yml
# - yaml
# chain:
# - split (passthrough)
# - pp_yaml
# Template-html is first passed through the templater, and then embedded
# in a page template
template-html:
extension:
- thtml
- cont
chain:
- jinja2
- jinja2_page_embed
# # Smart CSS are simply converted to CSS.
# sass:
# extension:
# - sass
# - scss
# chain:
# - process_sass
# less:
# extension:
# - less
# chain:
# - process_less
# stylus:
# extension:
# - styl
# chain:
# - process_styl
# # Images are processed into thumbnails and sized in addition to being retained as their original
# FIXME implement split chain processor, implement processor arguments,
# image:
# extension:
# - jpg
# - jpeg
# - png
# chain:
# - split (image_bigthumb)
# - split (image_smallthumb)
# - passthrough
# image_bigthumb:
# extension:
# chain:
# - smart_resize (big)
# image_smallthumb:
# extension:
# chain:
# - smart_resize (small)

View File

@ -6,15 +6,19 @@ import mimetypes
import os
import uuid
from typing import Any, Dict, List, Optional, Tuple, Union, cast
import yaml
import jstyleson
import heckformat.parse
from .utils import guess_mime
# setup mimetypes with some extra ones
mimetypes.init()
mimetypes.add_type("text/html", "thtml")
mimetypes.add_type("text/html", "cont")
mimetypes.add_type("text/x-heckformat", "heck")
logger = logging.getLogger(__name__)
@ -22,6 +26,8 @@ logger = logging.getLogger(__name__)
class MetaCacheMiss(Exception):
"""Raised on cache miss."""
class MetaLoadError(Exception):
"Raised when metadata fails to load."
class MetaCache:
"""This class provides an in-memory cache for metadata tree."""
@ -50,10 +56,10 @@ class MetaCache:
MetaCacheMiss: on missing key, or on aged out
"""
if key not in self._cache:
if (key not in self._cache):
raise MetaCacheMiss("no item for key {}".format(key))
if self._cache[key][0] + self._max_age <= new_time_stamp:
if ((self._cache[key][0] + self._max_age) <= new_time_stamp):
return self._cache[key][1]
raise MetaCacheMiss("cache expired for key {}".format(key))
@ -82,13 +88,47 @@ class MetaTree:
"""
self._cache = MetaCache()
if default_metadata is None:
if (default_metadata is None):
default_metadata = {}
self._default_metadata = default_metadata
if root[-1] != "/":
if (root[-1] != "/"):
root += "/"
self._root = root
def _get_cache_key(self, fullpath: str):
cachekey = fullpath + '.meta'
if fullpath.endswith(".heck"):
cachekey = fullpath
elif os.path.isdir(fullpath):
cachekey = os.path.join(fullpath, ".meta")
if (not os.path.exists(cachekey)):
cachekey = os.path.join(fullpath, ".heck")
return cachekey
def _load_metadata(self, cachekey: str) -> Dict:
meta = {}
with open(cachekey, "r") as inf:
if cachekey.endswith(".heck"):
# raise NotImplemented("We don't yet support HECKformat")
with open(cachekey) as cachefile:
h = heckformat.parse.load(cachefile)
meta = h.flatten_replace()
else:
try:
# try json load
meta = jstyleson.load(inf)
except jstyleson.JSONDecodeError as exc:
# try yaml load
try:
meta = yaml.load(inf)
except yaml.parser.ParserError as exc2:
# else either the yaml or json has an error
me = MetaLoadError()
exc2.__context__ = exc
raise me from exc2
return meta
def get_metadata(self, rel_path: str) -> Dict:
"""Retrieve the metadata for a given path
@ -113,11 +153,10 @@ class MetaTree:
fullpath = os.path.join(fullpath, pth)
st = os.stat(fullpath)
if os.path.isdir(fullpath):
cachekey = os.path.join(fullpath, ".meta")
else:
cachekey = fullpath + ".meta"
cachekey = self._get_cache_key(fullpath)
meta = cast(Dict, {})
try:
st_meta = os.stat(cachekey)
meta = self._cache.get(cachekey, st_meta.st_mtime)
@ -126,28 +165,40 @@ class MetaTree:
except MetaCacheMiss:
meta = {}
if not meta and st_meta:
meta = jstyleson.load(open(cachekey, "r"))
# if we didn't get any meta from the cache, but the metafile exists, try loading it
if ((not meta) and st_meta):
meta = self._load_metadata(cachekey)
self._cache.put(cachekey, meta, st_meta.st_mtime)
if fullpath == ospath and "wildcard_metadata" in metablob:
# add whatever is in the metablob as 'wildcard_metadata' to the metadata if the filename
# matches the wildcards
if ((fullpath == ospath) and ("wildcard_metadata" in metablob)):
for wild in metablob["wildcard_metadata"]:
if fnmatch.fnmatch(pth, wild[0]):
metablob.update(wild[1])
metablob.update(meta)
# return final dict
### fill in all objective metadata
# containing directory and filename
metablob["dir"], metablob["file_name"] = os.path.split(rel_path)
# path within the source tree
metablob["file_path"] = rel_path
# the path relative to the output tree
metablob["relpath"] = os.path.relpath("/", "/" + metablob["dir"])
# the UUID for this file
metablob["uuid"] = uuid.uuid3(uuid.NAMESPACE_OID, metablob["uuid-oid-root"] + ospath)
# the pre-split components of the full path
metablob["os-path"], _ = os.path.split(fullpath)
# the mime type we guessed for this file
metablob["guessed-type"] = guess_mime(ospath)
# if the mime-type isn't overriden in the explicit metadata, we make it equal to the guessed type
if "mime-type" not in metablob:
metablob["mime-type"] = metablob["guessed-type"]
# the `stat` components
metablob["stat"] = {}
for stk in ("st_mtime", "st_ctime", "st_atime", "st_mode", "st_size", "st_ino"):
metablob["stat"][stk.replace("st_", "")] = getattr(st, stk)
# return final dict
return metablob

View File

@ -9,6 +9,26 @@ import yaml
from .processors.processors import Processor
PROCESS_CHAIN_DEFAULT = {
'default': {'extension': 'default',
'chain': ['passthrough']
},
'templatable': {'extension': None,
'chain': ['jinja2']
},
'tembed': {'extension': None,
'chain': ['jinja2', 'jinja2_page_embed']
},
'markdown': {'extension': ['md'],
'chain': ['jinja2', 'process_md', 'jinja2_page_embed']},
'bbcode': {'extension': ['bb', 'pp'],
'chain': ['jinja2', 'process_pp', 'jinja2_page_embed']},
'template-html': {'extension': ['thtml', 'cont'],
'chain': ['jinja2', 'jinja2_page_embed']},
'heckformat': {'extension': ['heck'],
'chain': ['process_heck', 'jinja2', 'process_md', 'jinja2_page_embed']}
}
class ProcessorChain:
"""This implements a wrapper for an arbitrary set of processors and an associated file stream."""
@ -107,9 +127,9 @@ class ProcessorChains:
"""
if config is None: # pragma: no coverage
config = os.path.join(os.path.dirname(__file__), "defaults", "chains.yaml")
self.chainconfig = yaml.load(open(config, "r"))
self.chainconfig = PROCESS_CHAIN_DEFAULT
else:
self.chainconfig = yaml.full_load(open(config, "r"))
self.extensionmap: Dict[str, Any] = {}
self.processors: Dict[str, Type[Processor]] = {}
for ch, conf in self.chainconfig.items():

View File

@ -0,0 +1,76 @@
"""Convert a HECKformat file to a markdown stream."""
import io
import os
from typing import Dict, Iterable, Optional
import heckformat.parse
from .processors import Processor, NoOutputException
class HECKformatProcessor(Processor):
"""Convert a HECKformat file to a markdown stream."""
def filename(self, oldname: str, ctx: Optional[Dict] = None) -> str:
"""Return the filename of the post-processed file.
Arguments:
oldname (str): the previous name for the file.
ctx (dict, optional): A context object generated from the processor configuration
Returns:
str: the new name for the file
"""
return os.path.splitext(oldname)[0] + ".md"
def mime_type(self, oldname: str, ctx: Optional[Dict] = None) -> str:
"""Return the mimetype of the post-processed file.
Arguments:
oldname (str): the input filename
ctx (dict, optional): A context object generated from the processor configuration
Returns:
str: the new mimetype of the file after processing
"""
return "text/x-markdown"
def extension(self, oldname: str, ctx: Optional[Dict] = None) -> str:
"""Return the mimetype of the post-processed file.
Arguments:
oldname (str): the input filename
ctx (dict, optional): A context object generated from the processor configuration
Returns:
str: the new extension of the file after processing
"""
return "md"
def process(self, input_file: Iterable, ctx: Optional[Dict] = None) -> Iterable:
"""Return an iterable object of the post-processed file.
Arguments:
input_file (iterable): An input stream
ctx (dict, optional): A context object generated from the processor configuration
Returns:
iterable: The post-processed output stream
"""
elm = heckformat.parse.load_heck(input_file).flatten_replace()
for key in elm:
if key.startswith(heckformat.parse.UNPARSED_MARKER):
# fixme later we should use the doclabel to choose which output processor somehow~
doclabel = key.split(' ')[-1]
# we'll just assume the first unparsed part of the document is the page
return elm[key]
# No documents in the input heck, we just prevent output
raise NoOutputException()
processor = HECKformatProcessor # pylint: disable=invalid-name

View File

@ -5,6 +5,8 @@ from typing import Dict, Iterable, Optional
class PassthroughException(Exception):
"""Raised when the processor would like the file to pass through unchanged."""
class NoOutputException(Exception):
"""Raised when the processor would like no output to be written from the processing chain."""
class ProcessorException(Exception): # pragma: no cover
"""A base exception class to be used by processor objects."""

View File

@ -61,6 +61,9 @@ def guess_mime(path: str) -> Optional[str]:
str: the guessed mime-type
"""
# if path.endswith('.heck'):
# return "text/x-heckformat"
mtypes = mimetypes.guess_type(path)
ftype = None
if os.path.isdir(path):