Bump version to 0.4; fix some changes to analyze/english

This commit is contained in:
2026-03-24 19:33:30 -07:00
parent d6e329ff39
commit 3fae360c2e
3 changed files with 5 additions and 13 deletions

View File

@ -4,4 +4,4 @@
# This is free software, see the included LICENSE for terms and conditions.
#
version = '0.2.0'
version = '0.4.0'

View File

@ -4,16 +4,8 @@
# This is free software, see the included LICENSE for terms and conditions.
#
import nltk
from .abstract import AbstractAnalyzer
try:
nltk.sent_tokenize("foo bar")
except LookupError:
nltk.download('punkt')
from .utils import english_sentence_split, english_sentence_tokenize
class English(AbstractAnalyzer):
def __init__(self, order, filters=None):
@ -25,8 +17,8 @@ class English(AbstractAnalyzer):
chunks = corpus.split('\n\n')
ret = []
for chunk in chunks:
ret = ret + nltk.sent_tokenize(chunk)
ret = ret + english_sentence_split(chunk)
return ret
def tokenize_segment(self, segment):
return list(nltk.word_tokenize(segment))
return list(english_sentence_tokenize(segment))

View File

@ -1,6 +1,6 @@
[metadata]
name = carkov
version = 0.2.0
version = 0.4.0
description = A markov chainer library
author = Aldercone Studio
author_email = alderconestudio@gmail.com