Make nltk a runtime requirement, bump version by one minor

2026-03-24 20:03:22 -07:00
3 changed files with 16 additions and 14 deletions
--- a/carkov/init.py
+++ b/carkov/init.py
@ -4,4 +4,4 @@
 # This is free software, see the included LICENSE for terms and conditions.
 #

-version = '0.4.0'
+version = '0.4.1'
--- a/carkov/analyze/utils.py
+++ b/carkov/analyze/utils.py
@ -4,16 +4,6 @@
 # This is free software, see the included LICENSE for terms and conditions.
 #

-# for now we'll use nltk but this is here to make it so we implement our own
-import nltk
-
-try:
-    nltk.sent_tokenize("foo bar")
-except LookupError:
-    nltk.download('punkt')
-    nltk.download('punkt_tab')
-
-
 # this is surely incomplete, but good enough possibly.
 PUNC="!@#^&*():/\\.,+-?|;'\"<>“”‘’‽=_$"
 WS=" \t"
@ -33,8 +23,21 @@ def english_sentence_split(corpus: str, quotations: str = '"\'', sent_endings: s
    # if there's no spaces around it, otherwise treat it as a quotation. We'll need to write a proper parser for it.

    # FIXME
-    return nltk.sent_tokenize(corpus)
+    # for now we'll use nltk but this is here to make it so we implement our own
+    try:
+        import nltk

+        try:
+            nltk.sent_tokenize("foo bar")
+        except LookupError:
+            nltk.download('punkt')
+            nltk.download('punkt_tab')
+
+        return nltk.sent_tokenize(corpus)
+    except ImportError:
+        import sys
+        print("Cannot import nltk, we require for the time being nltk to be installed to do english sentence splitting.")
+        sys.exit(-1)

 def english_sentence_tokenize(sentence: str) -> list[str]:
    """
--- a/setup.cfg
+++ b/setup.cfg
@ -1,6 +1,6 @@
 [metadata]
 name = carkov
-version = 0.4.0
+version = 0.4.1
 description = A markov chainer library
 author = Aldercone Studio
 author_email = alderconestudio@gmail.com
@ -33,7 +33,6 @@ packages =
 zip_safe = true
 install_requires =
   unidecode
-   nltk
   msgpack

 [options.entry_points]