Bump version to 0.4; fix some changes to analyze/english
This commit is contained in:
@ -4,4 +4,4 @@
|
|||||||
# This is free software, see the included LICENSE for terms and conditions.
|
# This is free software, see the included LICENSE for terms and conditions.
|
||||||
#
|
#
|
||||||
|
|
||||||
version = '0.2.0'
|
version = '0.4.0'
|
||||||
|
|||||||
@ -4,16 +4,8 @@
|
|||||||
# This is free software, see the included LICENSE for terms and conditions.
|
# This is free software, see the included LICENSE for terms and conditions.
|
||||||
#
|
#
|
||||||
|
|
||||||
import nltk
|
|
||||||
|
|
||||||
from .abstract import AbstractAnalyzer
|
from .abstract import AbstractAnalyzer
|
||||||
|
from .utils import english_sentence_split, english_sentence_tokenize
|
||||||
|
|
||||||
try:
|
|
||||||
nltk.sent_tokenize("foo bar")
|
|
||||||
except LookupError:
|
|
||||||
nltk.download('punkt')
|
|
||||||
|
|
||||||
|
|
||||||
class English(AbstractAnalyzer):
|
class English(AbstractAnalyzer):
|
||||||
def __init__(self, order, filters=None):
|
def __init__(self, order, filters=None):
|
||||||
@ -25,8 +17,8 @@ class English(AbstractAnalyzer):
|
|||||||
chunks = corpus.split('\n\n')
|
chunks = corpus.split('\n\n')
|
||||||
ret = []
|
ret = []
|
||||||
for chunk in chunks:
|
for chunk in chunks:
|
||||||
ret = ret + nltk.sent_tokenize(chunk)
|
ret = ret + english_sentence_split(chunk)
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def tokenize_segment(self, segment):
|
def tokenize_segment(self, segment):
|
||||||
return list(nltk.word_tokenize(segment))
|
return list(english_sentence_tokenize(segment))
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
[metadata]
|
[metadata]
|
||||||
name = carkov
|
name = carkov
|
||||||
version = 0.2.0
|
version = 0.4.0
|
||||||
description = A markov chainer library
|
description = A markov chainer library
|
||||||
author = Aldercone Studio
|
author = Aldercone Studio
|
||||||
author_email = alderconestudio@gmail.com
|
author_email = alderconestudio@gmail.com
|
||||||
|
|||||||
Reference in New Issue
Block a user