Make nltk a runtime requirement, bump version by one minor

2026-03-24 20:03:22 -07:00
3 changed files with 16 additions and 14 deletions
--- a/carkov/init.py
+++ b/carkov/init.py
@ -4,4 +4,4 @@
 # This is free software, see the included LICENSE for terms and conditions.
 #
-version = '0.4.0'
+version = '0.4.1'
--- a/carkov/analyze/utils.py
+++ b/carkov/analyze/utils.py
@ -4,16 +4,6 @@
 # This is free software, see the included LICENSE for terms and conditions.
 #
 # for now we'll use nltk but this is here to make it so we implement our own
 import nltk
 try:
    nltk.sent_tokenize("foo bar")
 except LookupError:
    nltk.download('punkt')
    nltk.download('punkt_tab')
 # this is surely incomplete, but good enough possibly.
 PUNC="!@#^&*():/\\.,+-?|;'\"<>“”‘’‽=_$"
 WS=" \t"
@ -33,8 +23,21 @@ def english_sentence_split(corpus: str, quotations: str = '"\'', sent_endings: s
    # if there's no spaces around it, otherwise treat it as a quotation. We'll need to write a proper parser for it.
    # FIXME
-    return nltk.sent_tokenize(corpus)
+    # for now we'll use nltk but this is here to make it so we implement our own
    try:
        import nltk
        try:
            nltk.sent_tokenize("foo bar")
        except LookupError:
            nltk.download('punkt')
            nltk.download('punkt_tab')
        return nltk.sent_tokenize(corpus)
    except ImportError:
        import sys
        print("Cannot import nltk, we require for the time being nltk to be installed to do english sentence splitting.")
        sys.exit(-1)
 def english_sentence_tokenize(sentence: str) -> list[str]:
    """
--- a/setup.cfg
+++ b/setup.cfg
@ -1,6 +1,6 @@
 [metadata]
 name = carkov
-version = 0.4.0
+version = 0.4.1
 description = A markov chainer library
 author = Aldercone Studio
 author_email = alderconestudio@gmail.com
@ -33,7 +33,6 @@ packages =
 zip_safe = true
 install_requires =
   unidecode
   nltk
   msgpack
 [options.entry_points]