Files
heckweasel/heckweasel/processchain.py
Cassowary 690f110bc5 Major cleanup and fixage for new metadata stuff and heckformat
- Clean up a ton of documentation.
- Make the modules import nicely.
- Add a cool logo to the command line tool
- Make the command-line tool use tqdm
- Make the command line tool load the metadata before processing the
  files in a separate loop.
- Fix error handling in the command-line tool processing loops so they
  work correctly (and jinja errors are more useful)
- Make command-line tool exit non-zero if there were errors.
- Fix load metadata to handle formats and errors better (and return {}
  if it fails)
2024-02-27 21:50:03 -08:00

206 lines
6.6 KiB
Python

"""Interface for chains of processors"""
import os
import os.path
import random
from typing import Any, Dict, Iterable, List, Optional, Type, cast
import yaml
from .processors.processors import Processor
PROCESS_CHAIN_DEFAULT = {
'default': {'extension': 'default',
'chain': ['passthrough']
},
'templatable': {'extension': None,
'chain': ['jinja2']
},
'tembed': {'extension': None,
'chain': ['jinja2', 'jinja2_page_embed']
},
'markdown': {'extension': ['md'],
'chain': ['jinja2', 'process_md', 'jinja2_page_embed']},
'bbcode': {'extension': ['bb', 'pp'],
'chain': ['jinja2', 'process_pp', 'jinja2_page_embed']},
'template-html': {'extension': ['thtml', 'cont'],
'chain': ['jinja2', 'jinja2_page_embed']},
'heckformat': {'extension': ['heck'],
'chain': ['process_heck', 'jinja2', 'process_md', 'jinja2_page_embed']}
}
class ProcessorChain:
"""This implements a wrapper for an arbitrary set of processors and an associated file stream."""
def __init__(
self,
processors: List[Processor],
file_name: str,
file_data: Iterable[str],
file_type: str,
ctx: Optional[Dict] = None,
):
"""Initialize the processing stream.
Arguments:
processors (list): A list of processor objects.
file_data (Iterable): An iterable from which to retrieve the input
file_type (str): the specified file type for consumer information.
"""
self._processors = processors
self._file_data = file_data
self._file_type = file_type
self._file_name = file_name
self._ctx: Dict = {}
if ctx is not None:
self._ctx = cast(Dict, ctx)
@property
def output(self) -> Iterable:
"""Return an iterable for the output of the process chain
Returns:
:obj:'iterable': the iterable
"""
prev = self._file_data
for processor in self._processors:
if processor:
prev = processor.process(prev, self._ctx)
return prev
@property
def output_mime(self) -> str:
"""Return the post-processed MIME value from the processing chain
Returns:
str: the mime type
"""
fname = self._file_name
for processor in self._processors:
fname = processor.mime_type(fname, self._ctx)
return fname
@property
def output_ext(self) -> str:
"""Return the post-processed extension from the processing chain
Returns:
str: the extension
"""
fname = self._file_name
for processor in self._processors:
fname = processor.extension(fname, self._ctx)
return fname
@property
def output_filename(self) -> str:
"""Return the post-processed filename from the processing chain
Returns:
str: the new filename
"""
fname = os.path.basename(self._file_name)
for processor in self._processors:
fname = processor.filename(fname, self._ctx)
return fname
def __repr__(self) -> str:
return "[" + ",".join([x.__class__.__name__ for x in self._processors]) + "]"
class ProcessorChains:
"""Load a configuration for processor chains, and provide ability to process the chains given a particular input
file.
"""
def __init__(self, config: Optional[str] = None):
"""Initialize, with a specified configuration file
Arguments:
config (str, optional): The path to a yaml formatted configuration file.
"""
if config is None: # pragma: no coverage
self.chainconfig = PROCESS_CHAIN_DEFAULT
else:
self.chainconfig = yaml.full_load(open(config, "r"))
self.extensionmap: Dict[str, Any] = {}
self.processors: Dict[str, Type[Processor]] = {}
for ch, conf in self.chainconfig.items():
if conf["extension"] == "default":
self.default = ch
else:
if conf["extension"]:
for ex in conf["extension"]:
if ex in self.extensionmap or ex is None:
# log an error or except or something we'll just override for now.
pass
self.extensionmap[ex] = ch
for pr in conf["chain"]:
if pr in self.processors:
continue
processor_module = __import__("processors", globals(), locals(), [pr], 1)
self.processors[pr] = processor_module.__dict__[pr].processor
def get_chain_for_filename(self, filename: str, ctx: Optional[Dict] = None) -> ProcessorChain:
"""Get the ProcessorChain, as configured for a given file by extension.
Arguments:
filename (str): The name of the file to get a chain for.
Returns:
ProcessorChain: the constructed processor chain.
"""
r = filename.rsplit(".", 1)
ftype = "default"
if r:
ftype = r[-1]
if ctx and "pragma" in ctx:
if "no-proc" in ctx["pragma"]:
ftype = "default"
if ctx and "type" in ctx:
if isinstance(ctx["type"], str):
ftype = ctx["type"]
else:
ftype = ctx["type"][0]
return self.get_chain_for_file(open(filename, "r"), ftype, filename, ctx)
def get_chain_for_file(
self, file_obj: Iterable, file_ext: str, file_name: Optional[str] = None, ctx: Optional[Dict] = None
) -> ProcessorChain:
"""Get the ProcessorChain for a given iterable object based on the specified file type
Arguments:
file_obj (:obj:`iterable`): The input file stream
file_ext (str): The type (extension) of the input stream
Returns:
ProcessorChain: the constructed processor chain.
"""
if file_ext not in self.extensionmap or not self.extensionmap[file_ext]:
if file_ext in self.chainconfig:
file_type = file_ext
else:
file_type = "default"
else:
file_type = self.extensionmap[file_ext]
if not (bool(file_name)):
file_name = hex(random.randint(0, 65536))
return ProcessorChain(
[self.processors[x]() for x in self.chainconfig[file_type]["chain"]],
cast(str, file_name),
file_obj,
file_type,
ctx,
)