Make nltk a runtime requirement, bump version by one minor
This commit is contained in:
@ -4,4 +4,4 @@
|
||||
# This is free software, see the included LICENSE for terms and conditions.
|
||||
#
|
||||
|
||||
version = '0.4.0'
|
||||
version = '0.4.1'
|
||||
|
||||
@ -4,16 +4,6 @@
|
||||
# This is free software, see the included LICENSE for terms and conditions.
|
||||
#
|
||||
|
||||
# for now we'll use nltk but this is here to make it so we implement our own
|
||||
import nltk
|
||||
|
||||
try:
|
||||
nltk.sent_tokenize("foo bar")
|
||||
except LookupError:
|
||||
nltk.download('punkt')
|
||||
nltk.download('punkt_tab')
|
||||
|
||||
|
||||
# this is surely incomplete, but good enough possibly.
|
||||
PUNC="!@#^&*():/\\.,+-?|;'\"<>“”‘’‽=_$"
|
||||
WS=" \t"
|
||||
@ -33,8 +23,21 @@ def english_sentence_split(corpus: str, quotations: str = '"\'', sent_endings: s
|
||||
# if there's no spaces around it, otherwise treat it as a quotation. We'll need to write a proper parser for it.
|
||||
|
||||
# FIXME
|
||||
return nltk.sent_tokenize(corpus)
|
||||
# for now we'll use nltk but this is here to make it so we implement our own
|
||||
try:
|
||||
import nltk
|
||||
|
||||
try:
|
||||
nltk.sent_tokenize("foo bar")
|
||||
except LookupError:
|
||||
nltk.download('punkt')
|
||||
nltk.download('punkt_tab')
|
||||
|
||||
return nltk.sent_tokenize(corpus)
|
||||
except ImportError:
|
||||
import sys
|
||||
print("Cannot import nltk, we require for the time being nltk to be installed to do english sentence splitting.")
|
||||
sys.exit(-1)
|
||||
|
||||
def english_sentence_tokenize(sentence: str) -> list[str]:
|
||||
"""
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[metadata]
|
||||
name = carkov
|
||||
version = 0.4.0
|
||||
version = 0.4.1
|
||||
description = A markov chainer library
|
||||
author = Aldercone Studio
|
||||
author_email = alderconestudio@gmail.com
|
||||
@ -33,7 +33,6 @@ packages =
|
||||
zip_safe = true
|
||||
install_requires =
|
||||
unidecode
|
||||
nltk
|
||||
msgpack
|
||||
|
||||
[options.entry_points]
|
||||
|
||||
Reference in New Issue
Block a user