1 Commits
0.4.0 ... 0.4.1

Author SHA1 Message Date
4c7c3d0d9b Make nltk a runtime requirement, bump version by one minor 2026-03-24 20:03:22 -07:00
3 changed files with 16 additions and 14 deletions

View File

@ -4,4 +4,4 @@
# This is free software, see the included LICENSE for terms and conditions.
#
version = '0.4.0'
version = '0.4.1'

View File

@ -4,16 +4,6 @@
# This is free software, see the included LICENSE for terms and conditions.
#
# for now we'll use nltk but this is here to make it so we implement our own
import nltk
try:
nltk.sent_tokenize("foo bar")
except LookupError:
nltk.download('punkt')
nltk.download('punkt_tab')
# this is surely incomplete, but good enough possibly.
PUNC="!@#^&*():/\\.,+-?|;'\"<>“”‘’‽=_$"
WS=" \t"
@ -33,8 +23,21 @@ def english_sentence_split(corpus: str, quotations: str = '"\'', sent_endings: s
# if there's no spaces around it, otherwise treat it as a quotation. We'll need to write a proper parser for it.
# FIXME
return nltk.sent_tokenize(corpus)
# for now we'll use nltk but this is here to make it so we implement our own
try:
import nltk
try:
nltk.sent_tokenize("foo bar")
except LookupError:
nltk.download('punkt')
nltk.download('punkt_tab')
return nltk.sent_tokenize(corpus)
except ImportError:
import sys
print("Cannot import nltk, we require for the time being nltk to be installed to do english sentence splitting.")
sys.exit(-1)
def english_sentence_tokenize(sentence: str) -> list[str]:
"""

View File

@ -1,6 +1,6 @@
[metadata]
name = carkov
version = 0.4.0
version = 0.4.1
description = A markov chainer library
author = Aldercone Studio
author_email = alderconestudio@gmail.com
@ -33,7 +33,6 @@ packages =
zip_safe = true
install_requires =
unidecode
nltk
msgpack
[options.entry_points]