mirror of
https://github.com/fumiama/jieba.git
synced 2026-06-12 05:00:24 +08:00
fixed a but in tokenizer under search mode, added more tests
This commit is contained in:
@@ -43,11 +43,13 @@ func (jt *JiebaTokenizer) Tokenize(input []byte) analysis.TokenStream {
|
|||||||
if width > step {
|
if width > step {
|
||||||
for i := 0; i < width-step+1; i++ {
|
for i := 0; i < width-step+1; i++ {
|
||||||
gram = string(runes[i : i+step])
|
gram = string(runes[i : i+step])
|
||||||
|
gramLen := len(gram)
|
||||||
if value, ok := jiebago.Trie.Freq[gram]; ok && value > 0 {
|
if value, ok := jiebago.Trie.Freq[gram]; ok && value > 0 {
|
||||||
|
gramStart := start + len(string(runes[:i]))
|
||||||
token := analysis.Token{
|
token := analysis.Token{
|
||||||
Term: []byte(gram),
|
Term: []byte(gram),
|
||||||
Start: start,
|
Start: gramStart,
|
||||||
End: start + len(gram),
|
End: gramStart + gramLen,
|
||||||
Position: pos,
|
Position: pos,
|
||||||
Type: detectTokenType(gram),
|
Type: detectTokenType(gram),
|
||||||
}
|
}
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user