1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-05 00:32:51 +08:00

fixed a but in tokenizer under search mode, added more tests

This commit is contained in:
Wang Bin
2015-03-17 16:29:09 +08:00
parent 2c95c61d33
commit a14788addb
2 changed files with 5843 additions and 3 deletions

View File

@@ -43,11 +43,13 @@ func (jt *JiebaTokenizer) Tokenize(input []byte) analysis.TokenStream {
if width > step {
for i := 0; i < width-step+1; i++ {
gram = string(runes[i : i+step])
gramLen := len(gram)
if value, ok := jiebago.Trie.Freq[gram]; ok && value > 0 {
gramStart := start + len(string(runes[:i]))
token := analysis.Token{
Term: []byte(gram),
Start: start,
End: start + len(gram),
Start: gramStart,
End: gramStart + gramLen,
Position: pos,
Type: detectTokenType(gram),
}

File diff suppressed because it is too large Load Diff