Major cleanup and fixage for new metadata stuff and heckformat
- Clean up a ton of documentation. - Make the modules import nicely. - Add a cool logo to the command line tool - Make the command-line tool use tqdm - Make the command line tool load the metadata before processing the files in a separate loop. - Fix error handling in the command-line tool processing loops so they work correctly (and jinja errors are more useful) - Make command-line tool exit non-zero if there were errors. - Fix load metadata to handle formats and errors better (and return {} if it fails)
This commit is contained in:
parent
b389506b4b
commit
690f110bc5
8
TODO.md
8
TODO.md
|
@ -20,6 +20,12 @@
|
|||
* Run commands as part of processing chains
|
||||
|
||||
* Project level processing chain overrides in the .meta or whatever.
|
||||
* Project settings in separate file from .meta that would basically do .meta stuff. Like global meta + config in a top.heck file by default and overridable by a parameter.
|
||||
* Project settings in separate file from .meta that would basically do .meta stuff. Like global meta + config in a top.heck file by default and overridable by a parameter. Maybe
|
||||
a nice default filename that doesn't start with . (whereas .meta or .heck is the current base metadata)
|
||||
|
||||
* Handle the fact that HECKformat metadata is always a list in a more elegent way. We have some hacks in place where scalar values are expected, but if a project mixes
|
||||
metadata formats it gets a bit messy.
|
||||
|
||||
* Provide a database-like interface to the global metadata tree, so that metameta page templates could query the database to assemble indexes and whatnot without looping over the actual files.
|
||||
|
||||
|
||||
|
|
|
@ -1 +1,14 @@
|
|||
__version__ = '0.7.0'
|
||||
"""
|
||||
HeckWeasel: Metadata based static site compiler.
|
||||
"""
|
||||
__version__ = '0.7.1'
|
||||
__copyright__ = "©2023-2024 Aldercone Studio Collective"
|
||||
|
||||
from . import metadata
|
||||
from . import processors
|
||||
from . import __main__
|
||||
from . import processchain
|
||||
from . import processors
|
||||
from . import template_tools
|
||||
from . import pygments
|
||||
from . import utils
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
"""
|
||||
HeckWeasel command line interface.
|
||||
|
||||
Performs compilation step given an input directory. See --help for more information.
|
||||
|
||||
"""
|
||||
# iterate source tree
|
||||
# create directors in target tree
|
||||
# for each item:
|
||||
|
@ -11,7 +17,13 @@ import os
|
|||
import shutil
|
||||
import sys
|
||||
import time
|
||||
from typing import Dict, List, cast
|
||||
|
||||
import jinja2.exceptions
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, cast, Union
|
||||
|
||||
import tqdm
|
||||
|
||||
from .metadata import MetaTree
|
||||
from .processchain import ProcessorChains
|
||||
|
@ -23,19 +35,69 @@ from .template_tools import (
|
|||
file_list,
|
||||
file_list_hier,
|
||||
file_json,
|
||||
file_heck,
|
||||
file_metadata,
|
||||
file_name,
|
||||
file_raw,
|
||||
time_iso8601,
|
||||
containsone,
|
||||
)
|
||||
from .utils import deep_merge_dicts
|
||||
from .__init__ import __version__, __copyright__
|
||||
|
||||
logger = logging.getLogger()
|
||||
logger = logging.getLogger('heckweasel')
|
||||
|
||||
logo = f"""
|
||||
Aldercone Studio Collective
|
||||
_ _ _
|
||||
| |_ ___ __| |____ __ _____ __ _ ___ ___| |
|
||||
| ' \/ -_) _| / /\ V V / -_) _` (_-</ -_) |
|
||||
|_||_\___\__|_\_\ \_/\_/\___\__,_/__/\___|_|
|
||||
{__version__}
|
||||
"""
|
||||
|
||||
def setup_logging(verbose: bool = False) -> None:
|
||||
pass
|
||||
class TqdmLoggingHandler(logging.Handler):
|
||||
"""
|
||||
A simple logging wrapper that won't clobber TQDM's progress bar.
|
||||
"""
|
||||
def __init__(self, level=logging.NOTSET):
|
||||
super().__init__(level)
|
||||
|
||||
def emit(self, record):
|
||||
try:
|
||||
msg = self.format(record)
|
||||
tqdm.tqdm.write(msg)
|
||||
self.flush()
|
||||
except Exception:
|
||||
self.handleError(record)
|
||||
|
||||
def setup_logging(verbose:bool=False, quiet:bool=False, logfile:Union[Path, str, None]=None) -> None:
|
||||
"""
|
||||
Configure logging based on some flags.
|
||||
"""
|
||||
# Setup Tqdm handler
|
||||
logger.setLevel(logging.DEBUG)
|
||||
h = TqdmLoggingHandler()
|
||||
if verbose:
|
||||
f = logging.Formatter('%(asctime)s %(module)-12s %(levelname)-8s %(message)s')
|
||||
h.setLevel(logging.DEBUG)
|
||||
h.setFormatter(f)
|
||||
elif quiet:
|
||||
f = logging.Formatter('%(levelname)-8s %(message)s')
|
||||
h.setLevel(logging.CRITICAL)
|
||||
h.setFormatter(f)
|
||||
else:
|
||||
f = logging.Formatter('%(levelname)-8s %(message)s')
|
||||
h.setLevel(logging.INFO)
|
||||
h.setFormatter(f)
|
||||
logger.addHandler(h)
|
||||
|
||||
# setup logfile if specified
|
||||
if logfile:
|
||||
lf = logging.FileHandler(logfile)
|
||||
lf.setLevel(logging.DEBUG)
|
||||
lf.setFormatter(logging.Formatter('%(asctime)s %(module)-12s %(levelname)-8s %(message)s'))
|
||||
logger.addHandler(lf)
|
||||
|
||||
def parse_var(varspec: str) -> List:
|
||||
if (not ('=' in varspec)):
|
||||
|
@ -73,15 +135,17 @@ def get_args(args: List[str]) -> argparse.Namespace:
|
|||
|
||||
|
||||
def main() -> int:
|
||||
print(logo)
|
||||
|
||||
try:
|
||||
args = get_args(sys.argv[1:])
|
||||
except FileNotFoundError as ex:
|
||||
print("error finding arguments: {}".format(ex))
|
||||
logger.info("error finding arguments: {}".format(ex))
|
||||
return 1
|
||||
setup_logging(args.verbose)
|
||||
if os.path.exists(args.output) and args.clean:
|
||||
bak = "{}.bak-{}".format(args.output, int(time.time()))
|
||||
print("cleaning target {} -> {}".format(args.output, bak))
|
||||
logger.info("cleaning target {} -> {}".format(args.output, bak))
|
||||
os.rename(args.output, bak)
|
||||
|
||||
process_chains = ProcessorChains(args.processors)
|
||||
|
@ -98,21 +162,26 @@ def main() -> int:
|
|||
"author": "",
|
||||
"author_email": "",
|
||||
}
|
||||
|
||||
if args.define:
|
||||
for var in args.define:
|
||||
default_metadata[var[0]] = var[1]
|
||||
|
||||
|
||||
meta_tree = MetaTree(args.root, default_metadata)
|
||||
file_list_cache = cast(Dict, {})
|
||||
file_cont_cache = cast(Dict, {})
|
||||
file_name_cache = cast(Dict, {})
|
||||
file_raw_cache = cast(Dict, {})
|
||||
flist = file_list(args.root, file_list_cache)
|
||||
|
||||
default_metadata["globals"] = {
|
||||
"get_file_list": flist,
|
||||
"get_hier": file_list_hier(args.root, flist),
|
||||
"get_file_name": file_name(args.root, meta_tree, process_chains, file_name_cache),
|
||||
"get_file_content": file_content(args.root, meta_tree, process_chains, file_cont_cache),
|
||||
"get_json": file_json(args.root),
|
||||
"get_heck": file_heck(args.root),
|
||||
"get_raw": file_raw(args.root, file_raw_cache),
|
||||
"get_file_metadata": file_metadata(meta_tree),
|
||||
"get_time_iso8601": time_iso8601("UTC"),
|
||||
|
@ -120,28 +189,62 @@ def main() -> int:
|
|||
"pygments_get_css": pygments_get_css,
|
||||
"pygments_markup_contents_html": pygments_markup_contents_html,
|
||||
"merge_dicts": deep_merge_dicts,
|
||||
"containsone": containsone,
|
||||
}
|
||||
|
||||
# fixme add no-progress option for loop just to be the files
|
||||
|
||||
md = {}
|
||||
haderrors = False
|
||||
logger.info("Gathering all metadata")
|
||||
for root, _, files in os.walk(args.root, followlinks=args.follow_links):
|
||||
workroot = os.path.relpath(root, args.root)
|
||||
if workroot == ".":
|
||||
workroot = ""
|
||||
for f in tqdm.tqdm(files, desc="Gathering metadata", unit="files", dynamic_ncols=True, leave=False):
|
||||
try:
|
||||
# fixme global generic filters
|
||||
if f.endswith(".meta") or f.endswith("~"):
|
||||
continue
|
||||
pth = os.path.join(workroot, f)
|
||||
metadata = meta_tree.get_metadata(pth)
|
||||
if args.verbose:
|
||||
logger.debug(f"metadata: {metadata}")
|
||||
if pth in md:
|
||||
logger.error("[!] multiple meta? ", pth)
|
||||
haderrors = True
|
||||
md[pth] = metadata
|
||||
except BaseException as inst:
|
||||
# fixme optionally exit on error?
|
||||
logger.error(f"[S] Error loading metadata for {pth} Error was: {inst} (skipped)")
|
||||
|
||||
# technically metatree has all the md in its cache, but we also have md in a dictionary so who's to say. I guess
|
||||
# we should make a separate object that lets you query md.
|
||||
|
||||
logger.info("Building Webbed Site")
|
||||
for root, _, files in os.walk(args.root, followlinks=args.follow_links):
|
||||
workroot = os.path.relpath(root, args.root)
|
||||
if workroot == ".":
|
||||
workroot = ""
|
||||
target_dir = os.path.join(args.output, workroot)
|
||||
print("mkdir -> {}".format(target_dir))
|
||||
logger.info("[D] Make directory -> {}".format(target_dir))
|
||||
if not args.dry_run:
|
||||
try:
|
||||
os.mkdir(target_dir)
|
||||
except FileExistsError:
|
||||
if args.safe:
|
||||
print("error, target directory exists, aborting")
|
||||
logger.info("[A] Error, target directory exists and we are in safe mode, aborting")
|
||||
return 1
|
||||
for f in files:
|
||||
for f in tqdm.tqdm(files, desc="Building webbed site", unit="files", dynamic_ncols=True, leave=False):
|
||||
# fixme global generic filters
|
||||
try:
|
||||
if f.endswith(".meta") or f.endswith("~"):
|
||||
continue
|
||||
metadata = meta_tree.get_metadata(os.path.join(workroot, f))
|
||||
metadata = md[os.path.join(workroot, f)]
|
||||
chain = process_chains.get_chain_for_filename(os.path.join(root, f), ctx=metadata)
|
||||
print("process {} -> {} -> {}".format(os.path.join(root, f), repr(chain), os.path.join(target_dir, chain.output_filename)))
|
||||
if args.verbose:
|
||||
logger.debug(f"metadata: {metadata}")
|
||||
logger.info("[P] Processing {} -> chains: {} -> output: {}".format(os.path.join(root, f), repr(chain), os.path.join(target_dir, chain.output_filename)))
|
||||
if not args.dry_run:
|
||||
try:
|
||||
# normal output
|
||||
|
@ -153,10 +256,20 @@ def main() -> int:
|
|||
# write output from input
|
||||
shutil.copyfile(os.path.join(root, f), os.path.join(target_dir, chain.output_filename))
|
||||
except NoOutputException:
|
||||
print("skip output {} -> {}".format(os.path.join(root, f), os.path.join(target_dir, chain.output_filename)))
|
||||
logger.warn("[S] No content or output prevented {}".format(os.path.join(root, f), os.path.join(target_dir, chain.output_filename)))
|
||||
# don't write anyp output
|
||||
pass
|
||||
except jinja2.exceptions.TemplateSyntaxError as inst:
|
||||
logger.error(f"[!][S] Template error processing {f} Error was: {inst.filename}:{inst.lineno} {inst.message} (skipped)")
|
||||
haderrors = True
|
||||
except BaseException as inst:
|
||||
# fixme optionally exit on error?
|
||||
logger.error(f"[!][S] General error processing {f} Error was: {inst} (skipped)")
|
||||
haderrors = True
|
||||
|
||||
if haderrors:
|
||||
logger.error("One or more errors in processing.")
|
||||
return 1
|
||||
return 0
|
||||
|
||||
def do_main():
|
||||
|
|
|
@ -108,9 +108,9 @@ class MetaTree:
|
|||
|
||||
def _load_metadata(self, cachekey: str) -> Dict:
|
||||
meta = {}
|
||||
try:
|
||||
with open(cachekey, "r") as inf:
|
||||
if cachekey.endswith(".heck"):
|
||||
# raise NotImplemented("We don't yet support HECKformat")
|
||||
with open(cachekey) as cachefile:
|
||||
h = heckformat.parse.load(cachefile)
|
||||
meta = h.flatten_replace()
|
||||
|
@ -126,7 +126,9 @@ class MetaTree:
|
|||
# else either the yaml or json has an error
|
||||
me = MetaLoadError()
|
||||
exc2.__context__ = exc
|
||||
raise me from exc2
|
||||
except BaseException as inst:
|
||||
logger.error(f"Can't load any metadata for key {cachekey}: {inst}")
|
||||
|
||||
return meta
|
||||
|
||||
def get_metadata(self, rel_path: str) -> Dict:
|
||||
|
|
|
@ -166,7 +166,10 @@ class ProcessorChains:
|
|||
ftype = "default"
|
||||
|
||||
if ctx and "type" in ctx:
|
||||
if isinstance(ctx["type"], str):
|
||||
ftype = ctx["type"]
|
||||
else:
|
||||
ftype = ctx["type"][0]
|
||||
return self.get_chain_for_file(open(filename, "r"), ftype, filename, ctx)
|
||||
|
||||
def get_chain_for_file(
|
||||
|
|
|
@ -1 +1,12 @@
|
|||
# processors metadata here
|
||||
|
||||
from . import jinja2_page_embed
|
||||
from . import jinja2
|
||||
from . import passthrough
|
||||
from . import process_heck
|
||||
from . import process_less
|
||||
from . import process_md
|
||||
from . import processors
|
||||
from . import process_pp
|
||||
from . import process_sass
|
||||
from . import process_styl
|
||||
|
|
|
@ -21,7 +21,8 @@ class Jinja2(PassThrough):
|
|||
Returns:
|
||||
iterable: The post-processed output stream
|
||||
"""
|
||||
ctx = cast(Dict, ctx)
|
||||
if (ctx is None):
|
||||
ctx = {}
|
||||
template_env = Environment(loader=FileSystemLoader(ctx["templates"]), extensions=["jinja2.ext.do"])
|
||||
template_env.globals.update(ctx["globals"])
|
||||
template_env.filters.update(ctx["filters"])
|
||||
|
|
|
@ -53,7 +53,12 @@ class Jinja2PageEmbed(Processor):
|
|||
template_env = Environment(loader=FileSystemLoader(ctx["templates"]), extensions=["jinja2.ext.do"])
|
||||
template_env.globals.update(ctx["globals"])
|
||||
template_env.filters.update(ctx["filters"])
|
||||
if isinstance(ctx["template"], str):
|
||||
tmpl = template_env.get_template(ctx["template"])
|
||||
else:
|
||||
# we've got a heck
|
||||
tmpl = template_env.get_template(ctx["template"][0])
|
||||
# print(tmpl)
|
||||
content = "".join([x for x in input_file])
|
||||
return tmpl.render(content=content, metadata=ctx)
|
||||
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
"""
|
||||
Provides various utility functions that are exposed to the templates.
|
||||
"""
|
||||
import copy
|
||||
import datetime
|
||||
import glob
|
||||
|
@ -15,6 +18,10 @@ from .utils import deep_merge_dicts
|
|||
|
||||
|
||||
def file_list(root: str, listcache: Dict) -> Callable:
|
||||
"""
|
||||
Return a function (memoized for the cache and root directory) which returns a list of files matching glob and
|
||||
sorted as required.
|
||||
"""
|
||||
def get_file_list(
|
||||
path_glob: Union[str, List[str], Tuple[str]],
|
||||
*,
|
||||
|
@ -54,7 +61,7 @@ def file_list(root: str, listcache: Dict) -> Callable:
|
|||
|
||||
|
||||
def file_list_hier(root: str, flist: Callable) -> Callable:
|
||||
"""Return a callable which, given a directory, will walk the directory and return the files within
|
||||
"""Return a function which, given a directory, will walk the directory and return the files within
|
||||
it that match the glob passed."""
|
||||
|
||||
def get_file_list_hier(path: str, glob: str, *, sort_order: str = "ctime", reverse: bool = False) -> Iterable:
|
||||
|
@ -75,6 +82,10 @@ def file_list_hier(root: str, flist: Callable) -> Callable:
|
|||
|
||||
|
||||
def file_name(root: str, metatree: MetaTree, processor_chains: ProcessorChains, namecache: Dict) -> Callable:
|
||||
"""
|
||||
Return a function (memoized for root directory, metatree and processor chains) which returns the output filename
|
||||
given an input filename based on metadata and said processing chains.
|
||||
"""
|
||||
def get_file_name(file_name: str) -> Dict:
|
||||
if file_name in namecache:
|
||||
return namecache[file_name]
|
||||
|
@ -87,6 +98,9 @@ def file_name(root: str, metatree: MetaTree, processor_chains: ProcessorChains,
|
|||
|
||||
|
||||
def file_raw(root: str, contcache: Dict) -> Callable:
|
||||
"""
|
||||
Return a function (memoizedfor the root directory) which returns the raw content of a file.
|
||||
"""
|
||||
def get_raw(file_name: str) -> str:
|
||||
if file_name in contcache:
|
||||
return contcache[file_name]
|
||||
|
@ -97,6 +111,10 @@ def file_raw(root: str, contcache: Dict) -> Callable:
|
|||
|
||||
|
||||
def file_json(root: str) -> Callable:
|
||||
"""
|
||||
Return a function (memoized for the root directory) which loads a file as json, merges it with an optional input dictionary
|
||||
and returns.
|
||||
"""
|
||||
def get_json(file_name: str, parent: Dict = None) -> Dict:
|
||||
outd = {}
|
||||
if parent is not None:
|
||||
|
@ -108,7 +126,27 @@ def file_json(root: str) -> Callable:
|
|||
return get_json
|
||||
|
||||
|
||||
def file_heck(root: str) -> Callable:
|
||||
"""
|
||||
Return a function (memoized for the root directory) which loads a file as HECKFormat, merges it with an optional input
|
||||
dictionary, and returns.
|
||||
"""
|
||||
def get_heck(file_name: str, parent: Dict = None) -> Dict:
|
||||
outd = {}
|
||||
if parent is not None:
|
||||
outd = copy.deepcopy(parent)
|
||||
|
||||
with open(os.path.join(root, file_name), "r", encoding="utf-8") as f:
|
||||
return deep_merge_dicts(outd, heckformat.parse.load(f).flatten_replace())
|
||||
|
||||
return get_heck
|
||||
|
||||
|
||||
def file_content(root: str, metatree: MetaTree, processor_chains: ProcessorChains, contcache: Dict) -> Callable:
|
||||
"""
|
||||
Return a function (memoized for the root directory, metatree, and processor chains) which returns the post-processed
|
||||
content of the input file.
|
||||
"""
|
||||
def get_file_content(file_name: str) -> Iterable:
|
||||
if file_name in contcache:
|
||||
return contcache[file_name]
|
||||
|
@ -121,13 +159,25 @@ def file_content(root: str, metatree: MetaTree, processor_chains: ProcessorChain
|
|||
|
||||
|
||||
def file_metadata(metatree: MetaTree) -> Callable:
|
||||
"""Returns a function (memoized for a metatree) which returns the meta data for a given file."""
|
||||
def get_file_metadata(file_name: str) -> Dict:
|
||||
return metatree.get_metadata(file_name)
|
||||
|
||||
return get_file_metadata
|
||||
|
||||
|
||||
def containsone(needle: Iterable, haystack: Iterable):
|
||||
"""
|
||||
Returns true if at least one of the contents of needle is in haystack.
|
||||
"""
|
||||
for n in needle:
|
||||
if n in haystack:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def time_iso8601(timezone: str) -> Callable:
|
||||
"""Returns a function (memoized for a particular timezone) which formats a time as ISO8601 standard. """
|
||||
tz = pytz.timezone(timezone)
|
||||
|
||||
def get_time_iso8601(time_t: Union[int, float]) -> str:
|
||||
|
@ -137,6 +187,7 @@ def time_iso8601(timezone: str) -> Callable:
|
|||
|
||||
|
||||
def date_iso8601(timezone: str) -> Callable:
|
||||
"""Returns a function (memoized for a particular timezone) which formats a date as ISO8601 standard. """
|
||||
tz = pytz.timezone(timezone)
|
||||
|
||||
def get_date_iso8601(time_t: Union[int, float]) -> str:
|
||||
|
|
Loading…
Reference in New Issue