1 Commits
trunk ... 0.4.1

Author SHA1 Message Date
4c7c3d0d9b Make nltk a runtime requirement, bump version by one minor 2026-03-24 20:03:22 -07:00
3 changed files with 16 additions and 14 deletions

View File

@ -4,4 +4,4 @@
# This is free software, see the included LICENSE for terms and conditions. # This is free software, see the included LICENSE for terms and conditions.
# #
version = '0.4.0' version = '0.4.1'

View File

@ -4,16 +4,6 @@
# This is free software, see the included LICENSE for terms and conditions. # This is free software, see the included LICENSE for terms and conditions.
# #
# for now we'll use nltk but this is here to make it so we implement our own
import nltk
try:
nltk.sent_tokenize("foo bar")
except LookupError:
nltk.download('punkt')
nltk.download('punkt_tab')
# this is surely incomplete, but good enough possibly. # this is surely incomplete, but good enough possibly.
PUNC="!@#^&*():/\\.,+-?|;'\"<>“”‘’‽=_$" PUNC="!@#^&*():/\\.,+-?|;'\"<>“”‘’‽=_$"
WS=" \t" WS=" \t"
@ -33,8 +23,21 @@ def english_sentence_split(corpus: str, quotations: str = '"\'', sent_endings: s
# if there's no spaces around it, otherwise treat it as a quotation. We'll need to write a proper parser for it. # if there's no spaces around it, otherwise treat it as a quotation. We'll need to write a proper parser for it.
# FIXME # FIXME
return nltk.sent_tokenize(corpus) # for now we'll use nltk but this is here to make it so we implement our own
try:
import nltk
try:
nltk.sent_tokenize("foo bar")
except LookupError:
nltk.download('punkt')
nltk.download('punkt_tab')
return nltk.sent_tokenize(corpus)
except ImportError:
import sys
print("Cannot import nltk, we require for the time being nltk to be installed to do english sentence splitting.")
sys.exit(-1)
def english_sentence_tokenize(sentence: str) -> list[str]: def english_sentence_tokenize(sentence: str) -> list[str]:
""" """

View File

@ -1,6 +1,6 @@
[metadata] [metadata]
name = carkov name = carkov
version = 0.4.0 version = 0.4.1
description = A markov chainer library description = A markov chainer library
author = Aldercone Studio author = Aldercone Studio
author_email = alderconestudio@gmail.com author_email = alderconestudio@gmail.com
@ -33,7 +33,6 @@ packages =
zip_safe = true zip_safe = true
install_requires = install_requires =
unidecode unidecode
nltk
msgpack msgpack
[options.entry_points] [options.entry_points]