mirror of
https://github.com/fumiama/jieba.git
synced 2026-06-12 13:10:25 +08:00
5230 lines
100 KiB
Go
5230 lines
100 KiB
Go
package tokenizers
|
||
|
||
import (
|
||
"github.com/blevesearch/bleve/analysis"
|
||
"reflect"
|
||
"testing"
|
||
)
|
||
|
||
func TestJiebaTokenizer(t *testing.T) {
|
||
tests := []struct {
|
||
input []byte
|
||
output analysis.TokenStream
|
||
}{
|
||
{
|
||
[]byte("这是一个伸手不见五指的黑夜。我叫孙悟空,我爱北京,我爱Python和C++。"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 6,
|
||
Term: []byte("这是"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 12,
|
||
Term: []byte("一个"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 30,
|
||
Term: []byte("伸手不见五指"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 30,
|
||
End: 33,
|
||
Term: []byte("的"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 33,
|
||
End: 39,
|
||
Term: []byte("黑夜"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 39,
|
||
End: 42,
|
||
Term: []byte("。"),
|
||
Position: 6,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 42,
|
||
End: 45,
|
||
Term: []byte("我"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 45,
|
||
End: 48,
|
||
Term: []byte("叫"),
|
||
Position: 8,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 48,
|
||
End: 57,
|
||
Term: []byte("孙悟空"),
|
||
Position: 9,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 57,
|
||
End: 60,
|
||
Term: []byte(","),
|
||
Position: 10,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 60,
|
||
End: 63,
|
||
Term: []byte("我"),
|
||
Position: 11,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 63,
|
||
End: 66,
|
||
Term: []byte("爱"),
|
||
Position: 12,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 66,
|
||
End: 72,
|
||
Term: []byte("北京"),
|
||
Position: 13,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 72,
|
||
End: 75,
|
||
Term: []byte(","),
|
||
Position: 14,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 75,
|
||
End: 78,
|
||
Term: []byte("我"),
|
||
Position: 15,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 78,
|
||
End: 81,
|
||
Term: []byte("爱"),
|
||
Position: 16,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 81,
|
||
End: 87,
|
||
Term: []byte("Python"),
|
||
Position: 17,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 87,
|
||
End: 90,
|
||
Term: []byte("和"),
|
||
Position: 18,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 90,
|
||
End: 93,
|
||
Term: []byte("C++"),
|
||
Position: 19,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 93,
|
||
End: 96,
|
||
Term: []byte("。"),
|
||
Position: 20,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("我不喜欢日本和服。"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 3,
|
||
Term: []byte("我"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 3,
|
||
End: 6,
|
||
Term: []byte("不"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 12,
|
||
Term: []byte("喜欢"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 18,
|
||
Term: []byte("日本"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 24,
|
||
Term: []byte("和服"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 24,
|
||
End: 27,
|
||
Term: []byte("。"),
|
||
Position: 6,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("雷猴回归人间。"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 6,
|
||
Term: []byte("雷猴"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 12,
|
||
Term: []byte("回归"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 18,
|
||
Term: []byte("人间"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 21,
|
||
Term: []byte("。"),
|
||
Position: 4,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 9,
|
||
Term: []byte("工信处"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 18,
|
||
Term: []byte("女干事"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 24,
|
||
Term: []byte("每月"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 24,
|
||
End: 30,
|
||
Term: []byte("经过"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 30,
|
||
End: 36,
|
||
Term: []byte("下属"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 36,
|
||
End: 42,
|
||
Term: []byte("科室"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 42,
|
||
End: 45,
|
||
Term: []byte("都"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 45,
|
||
End: 48,
|
||
Term: []byte("要"),
|
||
Position: 8,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 48,
|
||
End: 54,
|
||
Term: []byte("亲口"),
|
||
Position: 9,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 54,
|
||
End: 60,
|
||
Term: []byte("交代"),
|
||
Position: 10,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 60,
|
||
End: 62,
|
||
Term: []byte("24"),
|
||
Position: 11,
|
||
Type: analysis.Numeric,
|
||
},
|
||
{
|
||
Start: 62,
|
||
End: 65,
|
||
Term: []byte("口"),
|
||
Position: 12,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 65,
|
||
End: 74,
|
||
Term: []byte("交换机"),
|
||
Position: 13,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 74,
|
||
End: 77,
|
||
Term: []byte("等"),
|
||
Position: 14,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 77,
|
||
End: 86,
|
||
Term: []byte("技术性"),
|
||
Position: 15,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 86,
|
||
End: 92,
|
||
Term: []byte("器件"),
|
||
Position: 16,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 92,
|
||
End: 95,
|
||
Term: []byte("的"),
|
||
Position: 17,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 95,
|
||
End: 101,
|
||
Term: []byte("安装"),
|
||
Position: 18,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 101,
|
||
End: 107,
|
||
Term: []byte("工作"),
|
||
Position: 19,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("我需要廉租房"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 3,
|
||
Term: []byte("我"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 3,
|
||
End: 9,
|
||
Term: []byte("需要"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 18,
|
||
Term: []byte("廉租房"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("永和服装饰品有限公司"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 6,
|
||
Term: []byte("永和"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 12,
|
||
Term: []byte("服装"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 18,
|
||
Term: []byte("饰品"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 30,
|
||
Term: []byte("有限公司"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("我爱北京天安门"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 3,
|
||
Term: []byte("我"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 3,
|
||
End: 6,
|
||
Term: []byte("爱"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 12,
|
||
Term: []byte("北京"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 21,
|
||
Term: []byte("天安门"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("abc"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 3,
|
||
Term: []byte("abc"),
|
||
Position: 1,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("隐马尔可夫"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 3,
|
||
Term: []byte("隐"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 3,
|
||
End: 15,
|
||
Term: []byte("马尔可夫"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("雷猴是个好网站"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 6,
|
||
Term: []byte("雷猴"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 9,
|
||
Term: []byte("是"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 12,
|
||
Term: []byte("个"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 15,
|
||
Term: []byte("好"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 21,
|
||
Term: []byte("网站"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("“Microsoft”一词由“MICROcomputer(微型计算机)”和“SOFTware(软件)”两部分组成"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 3,
|
||
Term: []byte("“"),
|
||
Position: 1,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 3,
|
||
End: 12,
|
||
Term: []byte("Microsoft"),
|
||
Position: 2,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 15,
|
||
Term: []byte("”"),
|
||
Position: 3,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 21,
|
||
Term: []byte("一词"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 21,
|
||
End: 24,
|
||
Term: []byte("由"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 24,
|
||
End: 27,
|
||
Term: []byte("“"),
|
||
Position: 6,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 27,
|
||
End: 40,
|
||
Term: []byte("MICROcomputer"),
|
||
Position: 7,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 40,
|
||
End: 43,
|
||
Term: []byte("("),
|
||
Position: 8,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 43,
|
||
End: 49,
|
||
Term: []byte("微型"),
|
||
Position: 9,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 49,
|
||
End: 58,
|
||
Term: []byte("计算机"),
|
||
Position: 10,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 58,
|
||
End: 61,
|
||
Term: []byte(")"),
|
||
Position: 11,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 61,
|
||
End: 64,
|
||
Term: []byte("”"),
|
||
Position: 12,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 64,
|
||
End: 67,
|
||
Term: []byte("和"),
|
||
Position: 13,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 67,
|
||
End: 70,
|
||
Term: []byte("“"),
|
||
Position: 14,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 70,
|
||
End: 78,
|
||
Term: []byte("SOFTware"),
|
||
Position: 15,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 78,
|
||
End: 81,
|
||
Term: []byte("("),
|
||
Position: 16,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 81,
|
||
End: 87,
|
||
Term: []byte("软件"),
|
||
Position: 17,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 87,
|
||
End: 90,
|
||
Term: []byte(")"),
|
||
Position: 18,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 90,
|
||
End: 93,
|
||
Term: []byte("”"),
|
||
Position: 19,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 93,
|
||
End: 96,
|
||
Term: []byte("两"),
|
||
Position: 20,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 96,
|
||
End: 102,
|
||
Term: []byte("部分"),
|
||
Position: 21,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 102,
|
||
End: 108,
|
||
Term: []byte("组成"),
|
||
Position: 22,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("草泥马和欺实马是今年的流行词汇"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 9,
|
||
Term: []byte("草泥马"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 12,
|
||
Term: []byte("和"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 18,
|
||
Term: []byte("欺实"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 21,
|
||
Term: []byte("马"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 21,
|
||
End: 24,
|
||
Term: []byte("是"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 24,
|
||
End: 30,
|
||
Term: []byte("今年"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 30,
|
||
End: 33,
|
||
Term: []byte("的"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 33,
|
||
End: 39,
|
||
Term: []byte("流行"),
|
||
Position: 8,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 39,
|
||
End: 45,
|
||
Term: []byte("词汇"),
|
||
Position: 9,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("伊藤洋华堂总府店"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 6,
|
||
Term: []byte("伊藤"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 15,
|
||
Term: []byte("洋华堂"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 21,
|
||
Term: []byte("总府"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 21,
|
||
End: 24,
|
||
Term: []byte("店"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("中国科学院计算技术研究所"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 36,
|
||
Term: []byte("中国科学院计算技术研究所"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("罗密欧与朱丽叶"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 9,
|
||
Term: []byte("罗密欧"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 12,
|
||
Term: []byte("与"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 21,
|
||
Term: []byte("朱丽叶"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("我购买了道具和服装"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 3,
|
||
Term: []byte("我"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 3,
|
||
End: 9,
|
||
Term: []byte("购买"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 12,
|
||
Term: []byte("了"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 18,
|
||
Term: []byte("道具"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 21,
|
||
Term: []byte("和"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 21,
|
||
End: 27,
|
||
Term: []byte("服装"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("PS: 我觉得开源有一个好处,就是能够敦促自己不断改进,避免敞帚自珍"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 2,
|
||
Term: []byte("PS"),
|
||
Position: 1,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 2,
|
||
End: 3,
|
||
Term: []byte(":"),
|
||
Position: 2,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 3,
|
||
End: 4,
|
||
Term: []byte(" "),
|
||
Position: 3,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 4,
|
||
End: 7,
|
||
Term: []byte("我"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 7,
|
||
End: 13,
|
||
Term: []byte("觉得"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 13,
|
||
End: 19,
|
||
Term: []byte("开源"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 19,
|
||
End: 22,
|
||
Term: []byte("有"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 22,
|
||
End: 28,
|
||
Term: []byte("一个"),
|
||
Position: 8,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 28,
|
||
End: 34,
|
||
Term: []byte("好处"),
|
||
Position: 9,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 34,
|
||
End: 37,
|
||
Term: []byte(","),
|
||
Position: 10,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 37,
|
||
End: 43,
|
||
Term: []byte("就是"),
|
||
Position: 11,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 43,
|
||
End: 49,
|
||
Term: []byte("能够"),
|
||
Position: 12,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 49,
|
||
End: 55,
|
||
Term: []byte("敦促"),
|
||
Position: 13,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 55,
|
||
End: 61,
|
||
Term: []byte("自己"),
|
||
Position: 14,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 61,
|
||
End: 73,
|
||
Term: []byte("不断改进"),
|
||
Position: 15,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 73,
|
||
End: 76,
|
||
Term: []byte(","),
|
||
Position: 16,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 76,
|
||
End: 82,
|
||
Term: []byte("避免"),
|
||
Position: 17,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 82,
|
||
End: 88,
|
||
Term: []byte("敞帚"),
|
||
Position: 18,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 88,
|
||
End: 94,
|
||
Term: []byte("自珍"),
|
||
Position: 19,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("湖北省石首市"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 9,
|
||
Term: []byte("湖北省"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 18,
|
||
Term: []byte("石首市"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("湖北省十堰市"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 9,
|
||
Term: []byte("湖北省"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 18,
|
||
Term: []byte("十堰市"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("总经理完成了这件事情"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 9,
|
||
Term: []byte("总经理"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 15,
|
||
Term: []byte("完成"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 18,
|
||
Term: []byte("了"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 24,
|
||
Term: []byte("这件"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 24,
|
||
End: 30,
|
||
Term: []byte("事情"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("电脑修好了"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 6,
|
||
Term: []byte("电脑"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 12,
|
||
Term: []byte("修好"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 15,
|
||
Term: []byte("了"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("做好了这件事情就一了百了了"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 6,
|
||
Term: []byte("做好"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 9,
|
||
Term: []byte("了"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 15,
|
||
Term: []byte("这件"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 21,
|
||
Term: []byte("事情"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 21,
|
||
End: 24,
|
||
Term: []byte("就"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 24,
|
||
End: 36,
|
||
Term: []byte("一了百了"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 36,
|
||
End: 39,
|
||
Term: []byte("了"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("人们审美的观点是不同的"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 6,
|
||
Term: []byte("人们"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 12,
|
||
Term: []byte("审美"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 15,
|
||
Term: []byte("的"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 21,
|
||
Term: []byte("观点"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 21,
|
||
End: 24,
|
||
Term: []byte("是"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 24,
|
||
End: 30,
|
||
Term: []byte("不同"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 30,
|
||
End: 33,
|
||
Term: []byte("的"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("我们买了一个美的空调"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 6,
|
||
Term: []byte("我们"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 9,
|
||
Term: []byte("买"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 12,
|
||
Term: []byte("了"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 18,
|
||
Term: []byte("一个"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 24,
|
||
Term: []byte("美的"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 24,
|
||
End: 30,
|
||
Term: []byte("空调"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("线程初始化时我们要注意"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 6,
|
||
Term: []byte("线程"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 15,
|
||
Term: []byte("初始化"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 18,
|
||
Term: []byte("时"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 24,
|
||
Term: []byte("我们"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 24,
|
||
End: 27,
|
||
Term: []byte("要"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 27,
|
||
End: 33,
|
||
Term: []byte("注意"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("一个分子是由好多原子组织成的"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 6,
|
||
Term: []byte("一个"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 12,
|
||
Term: []byte("分子"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 15,
|
||
Term: []byte("是"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 18,
|
||
Term: []byte("由"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 24,
|
||
Term: []byte("好多"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 24,
|
||
End: 30,
|
||
Term: []byte("原子"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 30,
|
||
End: 36,
|
||
Term: []byte("组织"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 36,
|
||
End: 39,
|
||
Term: []byte("成"),
|
||
Position: 8,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 39,
|
||
End: 42,
|
||
Term: []byte("的"),
|
||
Position: 9,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("祝你马到功成"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 3,
|
||
Term: []byte("祝"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 3,
|
||
End: 6,
|
||
Term: []byte("你"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 18,
|
||
Term: []byte("马到功成"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("他掉进了无底洞里"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 3,
|
||
Term: []byte("他"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 3,
|
||
End: 6,
|
||
Term: []byte("掉"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 9,
|
||
Term: []byte("进"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 12,
|
||
Term: []byte("了"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 21,
|
||
Term: []byte("无底洞"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 21,
|
||
End: 24,
|
||
Term: []byte("里"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("中国的首都是北京"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 6,
|
||
Term: []byte("中国"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 9,
|
||
Term: []byte("的"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 15,
|
||
Term: []byte("首都"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 18,
|
||
Term: []byte("是"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 24,
|
||
Term: []byte("北京"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("孙君意"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 9,
|
||
Term: []byte("孙君意"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("外交部发言人马朝旭"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 9,
|
||
Term: []byte("外交部"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 18,
|
||
Term: []byte("发言人"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 27,
|
||
Term: []byte("马朝旭"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("领导人会议和第四届东亚峰会"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 9,
|
||
Term: []byte("领导人"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 15,
|
||
Term: []byte("会议"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 18,
|
||
Term: []byte("和"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 27,
|
||
Term: []byte("第四届"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 27,
|
||
End: 33,
|
||
Term: []byte("东亚"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 33,
|
||
End: 39,
|
||
Term: []byte("峰会"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("在过去的这五年"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 3,
|
||
Term: []byte("在"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 3,
|
||
End: 9,
|
||
Term: []byte("过去"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 12,
|
||
Term: []byte("的"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 15,
|
||
Term: []byte("这"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 21,
|
||
Term: []byte("五年"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("还需要很长的路要走"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 3,
|
||
Term: []byte("还"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 3,
|
||
End: 9,
|
||
Term: []byte("需要"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 15,
|
||
Term: []byte("很长"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 18,
|
||
Term: []byte("的"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 21,
|
||
Term: []byte("路"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 21,
|
||
End: 24,
|
||
Term: []byte("要"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 24,
|
||
End: 27,
|
||
Term: []byte("走"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("60周年首都阅兵"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 2,
|
||
Term: []byte("60"),
|
||
Position: 1,
|
||
Type: analysis.Numeric,
|
||
},
|
||
{
|
||
Start: 2,
|
||
End: 8,
|
||
Term: []byte("周年"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 8,
|
||
End: 14,
|
||
Term: []byte("首都"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 14,
|
||
End: 20,
|
||
Term: []byte("阅兵"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("你好人们审美的观点是不同的"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 6,
|
||
Term: []byte("你好"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 12,
|
||
Term: []byte("人们"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 18,
|
||
Term: []byte("审美"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 21,
|
||
Term: []byte("的"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 21,
|
||
End: 27,
|
||
Term: []byte("观点"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 27,
|
||
End: 30,
|
||
Term: []byte("是"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 30,
|
||
End: 36,
|
||
Term: []byte("不同"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 36,
|
||
End: 39,
|
||
Term: []byte("的"),
|
||
Position: 8,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("买水果然后来世博园"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 3,
|
||
Term: []byte("买"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 3,
|
||
End: 9,
|
||
Term: []byte("水果"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 15,
|
||
Term: []byte("然后"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 18,
|
||
Term: []byte("来"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 27,
|
||
Term: []byte("世博园"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("买水果然后去世博园"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 3,
|
||
Term: []byte("买"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 3,
|
||
End: 9,
|
||
Term: []byte("水果"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 15,
|
||
Term: []byte("然后"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 18,
|
||
Term: []byte("去"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 27,
|
||
Term: []byte("世博园"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("但是后来我才知道你是对的"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 6,
|
||
Term: []byte("但是"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 12,
|
||
Term: []byte("后来"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 15,
|
||
Term: []byte("我"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 18,
|
||
Term: []byte("才"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 24,
|
||
Term: []byte("知道"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 24,
|
||
End: 27,
|
||
Term: []byte("你"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 27,
|
||
End: 30,
|
||
Term: []byte("是"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 30,
|
||
End: 33,
|
||
Term: []byte("对"),
|
||
Position: 8,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 33,
|
||
End: 36,
|
||
Term: []byte("的"),
|
||
Position: 9,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("存在即合理"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 6,
|
||
Term: []byte("存在"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 9,
|
||
Term: []byte("即"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 15,
|
||
Term: []byte("合理"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("的的的的的在的的的的就以和和和"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 3,
|
||
Term: []byte("的"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 3,
|
||
End: 6,
|
||
Term: []byte("的"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 9,
|
||
Term: []byte("的"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 12,
|
||
Term: []byte("的"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 15,
|
||
Term: []byte("的"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 18,
|
||
Term: []byte("在"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 21,
|
||
Term: []byte("的"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 21,
|
||
End: 24,
|
||
Term: []byte("的"),
|
||
Position: 8,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 24,
|
||
End: 27,
|
||
Term: []byte("的"),
|
||
Position: 9,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 27,
|
||
End: 30,
|
||
Term: []byte("的"),
|
||
Position: 10,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 30,
|
||
End: 33,
|
||
Term: []byte("就"),
|
||
Position: 11,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 33,
|
||
End: 36,
|
||
Term: []byte("以"),
|
||
Position: 12,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 36,
|
||
End: 39,
|
||
Term: []byte("和"),
|
||
Position: 13,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 39,
|
||
End: 42,
|
||
Term: []byte("和"),
|
||
Position: 14,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 42,
|
||
End: 45,
|
||
Term: []byte("和"),
|
||
Position: 15,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("I love你,不以为耻,反以为rong"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 1,
|
||
Term: []byte("I"),
|
||
Position: 1,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 1,
|
||
End: 2,
|
||
Term: []byte(" "),
|
||
Position: 2,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 2,
|
||
End: 6,
|
||
Term: []byte("love"),
|
||
Position: 3,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 9,
|
||
Term: []byte("你"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 12,
|
||
Term: []byte(","),
|
||
Position: 5,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 24,
|
||
Term: []byte("不以为耻"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 24,
|
||
End: 27,
|
||
Term: []byte(","),
|
||
Position: 7,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 27,
|
||
End: 30,
|
||
Term: []byte("反"),
|
||
Position: 8,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 30,
|
||
End: 36,
|
||
Term: []byte("以为"),
|
||
Position: 9,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 36,
|
||
End: 40,
|
||
Term: []byte("rong"),
|
||
Position: 10,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("因"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 3,
|
||
Term: []byte("因"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte(""),
|
||
analysis.TokenStream{},
|
||
},
|
||
{
|
||
[]byte("hello你好人们审美的观点是不同的"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 5,
|
||
Term: []byte("hello"),
|
||
Position: 1,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 5,
|
||
End: 11,
|
||
Term: []byte("你好"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 11,
|
||
End: 17,
|
||
Term: []byte("人们"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 17,
|
||
End: 23,
|
||
Term: []byte("审美"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 23,
|
||
End: 26,
|
||
Term: []byte("的"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 26,
|
||
End: 32,
|
||
Term: []byte("观点"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 32,
|
||
End: 35,
|
||
Term: []byte("是"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 35,
|
||
End: 41,
|
||
Term: []byte("不同"),
|
||
Position: 8,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 41,
|
||
End: 44,
|
||
Term: []byte("的"),
|
||
Position: 9,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("很好但主要是基于网页形式"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 3,
|
||
Term: []byte("很"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 3,
|
||
End: 6,
|
||
Term: []byte("好"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 9,
|
||
Term: []byte("但"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 15,
|
||
Term: []byte("主要"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 18,
|
||
Term: []byte("是"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 24,
|
||
Term: []byte("基于"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 24,
|
||
End: 30,
|
||
Term: []byte("网页"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 30,
|
||
End: 36,
|
||
Term: []byte("形式"),
|
||
Position: 8,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("hello你好人们审美的观点是不同的"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 5,
|
||
Term: []byte("hello"),
|
||
Position: 1,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 5,
|
||
End: 11,
|
||
Term: []byte("你好"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 11,
|
||
End: 17,
|
||
Term: []byte("人们"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 17,
|
||
End: 23,
|
||
Term: []byte("审美"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 23,
|
||
End: 26,
|
||
Term: []byte("的"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 26,
|
||
End: 32,
|
||
Term: []byte("观点"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 32,
|
||
End: 35,
|
||
Term: []byte("是"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 35,
|
||
End: 41,
|
||
Term: []byte("不同"),
|
||
Position: 8,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 41,
|
||
End: 44,
|
||
Term: []byte("的"),
|
||
Position: 9,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("为什么我不能拥有想要的生活"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 9,
|
||
Term: []byte("为什么"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 12,
|
||
Term: []byte("我"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 18,
|
||
Term: []byte("不能"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 24,
|
||
Term: []byte("拥有"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 24,
|
||
End: 30,
|
||
Term: []byte("想要"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 30,
|
||
End: 33,
|
||
Term: []byte("的"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 33,
|
||
End: 39,
|
||
Term: []byte("生活"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("后来我才"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 6,
|
||
Term: []byte("后来"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 9,
|
||
Term: []byte("我"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 12,
|
||
Term: []byte("才"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("此次来中国是为了"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 6,
|
||
Term: []byte("此次"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 9,
|
||
Term: []byte("来"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 15,
|
||
Term: []byte("中国"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 18,
|
||
Term: []byte("是"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 24,
|
||
Term: []byte("为了"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("使用了它就可以解决一些问题"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 6,
|
||
Term: []byte("使用"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 9,
|
||
Term: []byte("了"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 12,
|
||
Term: []byte("它"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 15,
|
||
Term: []byte("就"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 21,
|
||
Term: []byte("可以"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 21,
|
||
End: 27,
|
||
Term: []byte("解决"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 27,
|
||
End: 33,
|
||
Term: []byte("一些"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 33,
|
||
End: 39,
|
||
Term: []byte("问题"),
|
||
Position: 8,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte(",使用了它就可以解决一些问题"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 1,
|
||
Term: []byte(","),
|
||
Position: 1,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 1,
|
||
End: 7,
|
||
Term: []byte("使用"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 7,
|
||
End: 10,
|
||
Term: []byte("了"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 10,
|
||
End: 13,
|
||
Term: []byte("它"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 13,
|
||
End: 16,
|
||
Term: []byte("就"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 16,
|
||
End: 22,
|
||
Term: []byte("可以"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 22,
|
||
End: 28,
|
||
Term: []byte("解决"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 28,
|
||
End: 34,
|
||
Term: []byte("一些"),
|
||
Position: 8,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 34,
|
||
End: 40,
|
||
Term: []byte("问题"),
|
||
Position: 9,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("其实使用了它就可以解决一些问题"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 6,
|
||
Term: []byte("其实"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 12,
|
||
Term: []byte("使用"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 15,
|
||
Term: []byte("了"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 18,
|
||
Term: []byte("它"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 21,
|
||
Term: []byte("就"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 21,
|
||
End: 27,
|
||
Term: []byte("可以"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 27,
|
||
End: 33,
|
||
Term: []byte("解决"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 33,
|
||
End: 39,
|
||
Term: []byte("一些"),
|
||
Position: 8,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 39,
|
||
End: 45,
|
||
Term: []byte("问题"),
|
||
Position: 9,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("好人使用了它就可以解决一些问题"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 6,
|
||
Term: []byte("好人"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 12,
|
||
Term: []byte("使用"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 15,
|
||
Term: []byte("了"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 18,
|
||
Term: []byte("它"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 21,
|
||
Term: []byte("就"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 21,
|
||
End: 27,
|
||
Term: []byte("可以"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 27,
|
||
End: 33,
|
||
Term: []byte("解决"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 33,
|
||
End: 39,
|
||
Term: []byte("一些"),
|
||
Position: 8,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 39,
|
||
End: 45,
|
||
Term: []byte("问题"),
|
||
Position: 9,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("是因为和国家"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 9,
|
||
Term: []byte("是因为"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 12,
|
||
Term: []byte("和"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 18,
|
||
Term: []byte("国家"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("老年搜索还支持"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 6,
|
||
Term: []byte("老年"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 12,
|
||
Term: []byte("搜索"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 15,
|
||
Term: []byte("还"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 21,
|
||
Term: []byte("支持"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("干脆就把那部蒙人的闲法给废了拉倒!RT @laoshipukong : 27日,全国人大常委会第三次审议侵权责任法草案,删除了有关医疗损害责任“举证倒置”的规定。在医患纠纷中本已处于弱势地位的消费者由此将陷入万劫不复的境地。 "),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 6,
|
||
Term: []byte("干脆"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 9,
|
||
Term: []byte("就"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 12,
|
||
Term: []byte("把"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 18,
|
||
Term: []byte("那部"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 24,
|
||
Term: []byte("蒙人"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 24,
|
||
End: 27,
|
||
Term: []byte("的"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 27,
|
||
End: 33,
|
||
Term: []byte("闲法"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 33,
|
||
End: 36,
|
||
Term: []byte("给"),
|
||
Position: 8,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 36,
|
||
End: 39,
|
||
Term: []byte("废"),
|
||
Position: 9,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 39,
|
||
End: 42,
|
||
Term: []byte("了"),
|
||
Position: 10,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 42,
|
||
End: 48,
|
||
Term: []byte("拉倒"),
|
||
Position: 11,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 48,
|
||
End: 51,
|
||
Term: []byte("!"),
|
||
Position: 12,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 51,
|
||
End: 53,
|
||
Term: []byte("RT"),
|
||
Position: 13,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 53,
|
||
End: 54,
|
||
Term: []byte(" "),
|
||
Position: 14,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 54,
|
||
End: 55,
|
||
Term: []byte("@"),
|
||
Position: 15,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 55,
|
||
End: 67,
|
||
Term: []byte("laoshipukong"),
|
||
Position: 16,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 67,
|
||
End: 68,
|
||
Term: []byte(" "),
|
||
Position: 17,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 68,
|
||
End: 69,
|
||
Term: []byte(":"),
|
||
Position: 18,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 69,
|
||
End: 70,
|
||
Term: []byte(" "),
|
||
Position: 19,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 70,
|
||
End: 72,
|
||
Term: []byte("27"),
|
||
Position: 20,
|
||
Type: analysis.Numeric,
|
||
},
|
||
{
|
||
Start: 72,
|
||
End: 75,
|
||
Term: []byte("日"),
|
||
Position: 21,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 75,
|
||
End: 78,
|
||
Term: []byte(","),
|
||
Position: 22,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 78,
|
||
End: 99,
|
||
Term: []byte("全国人大常委会"),
|
||
Position: 23,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 99,
|
||
End: 108,
|
||
Term: []byte("第三次"),
|
||
Position: 24,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 108,
|
||
End: 114,
|
||
Term: []byte("审议"),
|
||
Position: 25,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 114,
|
||
End: 120,
|
||
Term: []byte("侵权"),
|
||
Position: 26,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 120,
|
||
End: 129,
|
||
Term: []byte("责任法"),
|
||
Position: 27,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 129,
|
||
End: 135,
|
||
Term: []byte("草案"),
|
||
Position: 28,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 135,
|
||
End: 138,
|
||
Term: []byte(","),
|
||
Position: 29,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 138,
|
||
End: 144,
|
||
Term: []byte("删除"),
|
||
Position: 30,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 144,
|
||
End: 147,
|
||
Term: []byte("了"),
|
||
Position: 31,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 147,
|
||
End: 153,
|
||
Term: []byte("有关"),
|
||
Position: 32,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 153,
|
||
End: 159,
|
||
Term: []byte("医疗"),
|
||
Position: 33,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 159,
|
||
End: 165,
|
||
Term: []byte("损害"),
|
||
Position: 34,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 165,
|
||
End: 171,
|
||
Term: []byte("责任"),
|
||
Position: 35,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 171,
|
||
End: 174,
|
||
Term: []byte("“"),
|
||
Position: 36,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 174,
|
||
End: 180,
|
||
Term: []byte("举证"),
|
||
Position: 37,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 180,
|
||
End: 186,
|
||
Term: []byte("倒置"),
|
||
Position: 38,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 186,
|
||
End: 189,
|
||
Term: []byte("”"),
|
||
Position: 39,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 189,
|
||
End: 192,
|
||
Term: []byte("的"),
|
||
Position: 40,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 192,
|
||
End: 198,
|
||
Term: []byte("规定"),
|
||
Position: 41,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 198,
|
||
End: 201,
|
||
Term: []byte("。"),
|
||
Position: 42,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 201,
|
||
End: 204,
|
||
Term: []byte("在"),
|
||
Position: 43,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 204,
|
||
End: 210,
|
||
Term: []byte("医患"),
|
||
Position: 44,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 210,
|
||
End: 216,
|
||
Term: []byte("纠纷"),
|
||
Position: 45,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 216,
|
||
End: 222,
|
||
Term: []byte("中本"),
|
||
Position: 46,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 222,
|
||
End: 225,
|
||
Term: []byte("已"),
|
||
Position: 47,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 225,
|
||
End: 231,
|
||
Term: []byte("处于"),
|
||
Position: 48,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 231,
|
||
End: 237,
|
||
Term: []byte("弱势"),
|
||
Position: 49,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 237,
|
||
End: 243,
|
||
Term: []byte("地位"),
|
||
Position: 50,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 243,
|
||
End: 246,
|
||
Term: []byte("的"),
|
||
Position: 51,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 246,
|
||
End: 255,
|
||
Term: []byte("消费者"),
|
||
Position: 52,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 255,
|
||
End: 261,
|
||
Term: []byte("由此"),
|
||
Position: 53,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 261,
|
||
End: 264,
|
||
Term: []byte("将"),
|
||
Position: 54,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 264,
|
||
End: 270,
|
||
Term: []byte("陷入"),
|
||
Position: 55,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 270,
|
||
End: 282,
|
||
Term: []byte("万劫不复"),
|
||
Position: 56,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 282,
|
||
End: 285,
|
||
Term: []byte("的"),
|
||
Position: 57,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 285,
|
||
End: 291,
|
||
Term: []byte("境地"),
|
||
Position: 58,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 291,
|
||
End: 294,
|
||
Term: []byte("。"),
|
||
Position: 59,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 294,
|
||
End: 295,
|
||
Term: []byte(" "),
|
||
Position: 60,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("大"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 3,
|
||
Term: []byte("大"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte(""),
|
||
analysis.TokenStream{},
|
||
},
|
||
{
|
||
[]byte("他说的确实在理"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 3,
|
||
Term: []byte("他"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 3,
|
||
End: 6,
|
||
Term: []byte("说"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 9,
|
||
Term: []byte("的"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 15,
|
||
Term: []byte("确实"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 21,
|
||
Term: []byte("在理"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("长春市长春节讲话"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 6,
|
||
Term: []byte("长春"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 12,
|
||
Term: []byte("市长"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 18,
|
||
Term: []byte("春节"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 24,
|
||
Term: []byte("讲话"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("结婚的和尚未结婚的"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 6,
|
||
Term: []byte("结婚"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 9,
|
||
Term: []byte("的"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 12,
|
||
Term: []byte("和"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 18,
|
||
Term: []byte("尚未"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 24,
|
||
Term: []byte("结婚"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 24,
|
||
End: 27,
|
||
Term: []byte("的"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("结合成分子时"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 6,
|
||
Term: []byte("结合"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 9,
|
||
Term: []byte("成"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 15,
|
||
Term: []byte("分子"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 18,
|
||
Term: []byte("时"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("旅游和服务是最好的"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 6,
|
||
Term: []byte("旅游"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 9,
|
||
Term: []byte("和"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 15,
|
||
Term: []byte("服务"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 18,
|
||
Term: []byte("是"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 24,
|
||
Term: []byte("最好"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 24,
|
||
End: 27,
|
||
Term: []byte("的"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("这件事情的确是我的错"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 6,
|
||
Term: []byte("这件"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 12,
|
||
Term: []byte("事情"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 18,
|
||
Term: []byte("的确"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 21,
|
||
Term: []byte("是"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 21,
|
||
End: 24,
|
||
Term: []byte("我"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 24,
|
||
End: 27,
|
||
Term: []byte("的"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 27,
|
||
End: 30,
|
||
Term: []byte("错"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("供大家参考指正"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 3,
|
||
Term: []byte("供"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 3,
|
||
End: 9,
|
||
Term: []byte("大家"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 15,
|
||
Term: []byte("参考"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 21,
|
||
Term: []byte("指正"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("哈尔滨政府公布塌桥原因"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 9,
|
||
Term: []byte("哈尔滨"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 15,
|
||
Term: []byte("政府"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 21,
|
||
Term: []byte("公布"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 21,
|
||
End: 27,
|
||
Term: []byte("塌桥"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 27,
|
||
End: 33,
|
||
Term: []byte("原因"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("我在机场入口处"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 3,
|
||
Term: []byte("我"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 3,
|
||
End: 6,
|
||
Term: []byte("在"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 12,
|
||
Term: []byte("机场"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 21,
|
||
Term: []byte("入口处"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("邢永臣摄影报道"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 9,
|
||
Term: []byte("邢永臣"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 15,
|
||
Term: []byte("摄影"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 21,
|
||
Term: []byte("报道"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("BP神经网络如何训练才能在分类时增加区分度?"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 2,
|
||
Term: []byte("BP"),
|
||
Position: 1,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 2,
|
||
End: 14,
|
||
Term: []byte("神经网络"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 14,
|
||
End: 20,
|
||
Term: []byte("如何"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 20,
|
||
End: 26,
|
||
Term: []byte("训练"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 26,
|
||
End: 32,
|
||
Term: []byte("才能"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 32,
|
||
End: 35,
|
||
Term: []byte("在"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 35,
|
||
End: 41,
|
||
Term: []byte("分类"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 41,
|
||
End: 44,
|
||
Term: []byte("时"),
|
||
Position: 8,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 44,
|
||
End: 50,
|
||
Term: []byte("增加"),
|
||
Position: 9,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 50,
|
||
End: 59,
|
||
Term: []byte("区分度"),
|
||
Position: 10,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 59,
|
||
End: 62,
|
||
Term: []byte("?"),
|
||
Position: 11,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("南京市长江大桥"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 9,
|
||
Term: []byte("南京市"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 21,
|
||
Term: []byte("长江大桥"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("应一些使用者的建议,也为了便于利用NiuTrans用于SMT研究"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 3,
|
||
Term: []byte("应"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 3,
|
||
End: 9,
|
||
Term: []byte("一些"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 18,
|
||
Term: []byte("使用者"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 21,
|
||
Term: []byte("的"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 21,
|
||
End: 27,
|
||
Term: []byte("建议"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 27,
|
||
End: 30,
|
||
Term: []byte(","),
|
||
Position: 6,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 30,
|
||
End: 33,
|
||
Term: []byte("也"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 33,
|
||
End: 39,
|
||
Term: []byte("为了"),
|
||
Position: 8,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 39,
|
||
End: 45,
|
||
Term: []byte("便于"),
|
||
Position: 9,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 45,
|
||
End: 51,
|
||
Term: []byte("利用"),
|
||
Position: 10,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 51,
|
||
End: 59,
|
||
Term: []byte("NiuTrans"),
|
||
Position: 11,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 59,
|
||
End: 65,
|
||
Term: []byte("用于"),
|
||
Position: 12,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 65,
|
||
End: 68,
|
||
Term: []byte("SMT"),
|
||
Position: 13,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 68,
|
||
End: 74,
|
||
Term: []byte("研究"),
|
||
Position: 14,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("长春市长春药店"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 9,
|
||
Term: []byte("长春市"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 15,
|
||
Term: []byte("长春"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 21,
|
||
Term: []byte("药店"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("邓颖超生前最喜欢的衣服"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 9,
|
||
Term: []byte("邓颖超"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 15,
|
||
Term: []byte("生前"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 18,
|
||
Term: []byte("最"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 24,
|
||
Term: []byte("喜欢"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 24,
|
||
End: 27,
|
||
Term: []byte("的"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 27,
|
||
End: 33,
|
||
Term: []byte("衣服"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("胡锦涛是热爱世界和平的政治局常委"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 9,
|
||
Term: []byte("胡锦涛"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 12,
|
||
Term: []byte("是"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 18,
|
||
Term: []byte("热爱"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 24,
|
||
Term: []byte("世界"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 24,
|
||
End: 30,
|
||
Term: []byte("和平"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 30,
|
||
End: 33,
|
||
Term: []byte("的"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 33,
|
||
End: 42,
|
||
Term: []byte("政治局"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 42,
|
||
End: 48,
|
||
Term: []byte("常委"),
|
||
Position: 8,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("程序员祝海林和朱会震是在孙健的左面和右面, 范凯在最右面.再往左是李松洪"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 9,
|
||
Term: []byte("程序员"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 12,
|
||
Term: []byte("祝"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 18,
|
||
Term: []byte("海林"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 21,
|
||
Term: []byte("和"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 21,
|
||
End: 30,
|
||
Term: []byte("朱会震"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 30,
|
||
End: 33,
|
||
Term: []byte("是"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 33,
|
||
End: 36,
|
||
Term: []byte("在"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 36,
|
||
End: 42,
|
||
Term: []byte("孙健"),
|
||
Position: 8,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 42,
|
||
End: 45,
|
||
Term: []byte("的"),
|
||
Position: 9,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 45,
|
||
End: 51,
|
||
Term: []byte("左面"),
|
||
Position: 10,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 51,
|
||
End: 54,
|
||
Term: []byte("和"),
|
||
Position: 11,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 54,
|
||
End: 60,
|
||
Term: []byte("右面"),
|
||
Position: 12,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 60,
|
||
End: 61,
|
||
Term: []byte(","),
|
||
Position: 13,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 61,
|
||
End: 62,
|
||
Term: []byte(" "),
|
||
Position: 14,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 62,
|
||
End: 68,
|
||
Term: []byte("范凯"),
|
||
Position: 15,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 68,
|
||
End: 71,
|
||
Term: []byte("在"),
|
||
Position: 16,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 71,
|
||
End: 74,
|
||
Term: []byte("最"),
|
||
Position: 17,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 74,
|
||
End: 80,
|
||
Term: []byte("右面"),
|
||
Position: 18,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 80,
|
||
End: 81,
|
||
Term: []byte("."),
|
||
Position: 19,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 81,
|
||
End: 87,
|
||
Term: []byte("再往"),
|
||
Position: 20,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 87,
|
||
End: 90,
|
||
Term: []byte("左"),
|
||
Position: 21,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 90,
|
||
End: 93,
|
||
Term: []byte("是"),
|
||
Position: 22,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 93,
|
||
End: 102,
|
||
Term: []byte("李松洪"),
|
||
Position: 23,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("一次性交多少钱"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 9,
|
||
Term: []byte("一次性"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 12,
|
||
Term: []byte("交"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 18,
|
||
Term: []byte("多少"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 21,
|
||
Term: []byte("钱"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("两块五一套,三块八一斤,四块七一本,五块六一条"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 6,
|
||
Term: []byte("两块"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 9,
|
||
Term: []byte("五"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 15,
|
||
Term: []byte("一套"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 18,
|
||
Term: []byte(","),
|
||
Position: 4,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 24,
|
||
Term: []byte("三块"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 24,
|
||
End: 27,
|
||
Term: []byte("八"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 27,
|
||
End: 33,
|
||
Term: []byte("一斤"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 33,
|
||
End: 36,
|
||
Term: []byte(","),
|
||
Position: 8,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 36,
|
||
End: 42,
|
||
Term: []byte("四块"),
|
||
Position: 9,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 42,
|
||
End: 45,
|
||
Term: []byte("七"),
|
||
Position: 10,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 45,
|
||
End: 51,
|
||
Term: []byte("一本"),
|
||
Position: 11,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 51,
|
||
End: 54,
|
||
Term: []byte(","),
|
||
Position: 12,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 54,
|
||
End: 60,
|
||
Term: []byte("五块"),
|
||
Position: 13,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 60,
|
||
End: 63,
|
||
Term: []byte("六"),
|
||
Position: 14,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 63,
|
||
End: 69,
|
||
Term: []byte("一条"),
|
||
Position: 15,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("小和尚留了一个像大和尚一样的和尚头"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 3,
|
||
Term: []byte("小"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 3,
|
||
End: 9,
|
||
Term: []byte("和尚"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 12,
|
||
Term: []byte("留"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 15,
|
||
Term: []byte("了"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 21,
|
||
Term: []byte("一个"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 21,
|
||
End: 24,
|
||
Term: []byte("像"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 24,
|
||
End: 27,
|
||
Term: []byte("大"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 27,
|
||
End: 33,
|
||
Term: []byte("和尚"),
|
||
Position: 8,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 33,
|
||
End: 39,
|
||
Term: []byte("一样"),
|
||
Position: 9,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 39,
|
||
End: 42,
|
||
Term: []byte("的"),
|
||
Position: 10,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 42,
|
||
End: 51,
|
||
Term: []byte("和尚头"),
|
||
Position: 11,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("我是中华人民共和国公民;我爸爸是共和党党员; 地铁和平门站"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 3,
|
||
Term: []byte("我"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 3,
|
||
End: 6,
|
||
Term: []byte("是"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 27,
|
||
Term: []byte("中华人民共和国"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 27,
|
||
End: 33,
|
||
Term: []byte("公民"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 33,
|
||
End: 34,
|
||
Term: []byte(";"),
|
||
Position: 5,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 34,
|
||
End: 37,
|
||
Term: []byte("我"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 37,
|
||
End: 43,
|
||
Term: []byte("爸爸"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 43,
|
||
End: 46,
|
||
Term: []byte("是"),
|
||
Position: 8,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 46,
|
||
End: 55,
|
||
Term: []byte("共和党"),
|
||
Position: 9,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 55,
|
||
End: 61,
|
||
Term: []byte("党员"),
|
||
Position: 10,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 61,
|
||
End: 62,
|
||
Term: []byte(";"),
|
||
Position: 11,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 62,
|
||
End: 63,
|
||
Term: []byte(" "),
|
||
Position: 12,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 63,
|
||
End: 69,
|
||
Term: []byte("地铁"),
|
||
Position: 13,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 69,
|
||
End: 78,
|
||
Term: []byte("和平门"),
|
||
Position: 14,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 78,
|
||
End: 81,
|
||
Term: []byte("站"),
|
||
Position: 15,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("张晓梅去人民医院做了个B超然后去买了件T恤"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 9,
|
||
Term: []byte("张晓梅"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 12,
|
||
Term: []byte("去"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 18,
|
||
Term: []byte("人民"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 24,
|
||
Term: []byte("医院"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 24,
|
||
End: 27,
|
||
Term: []byte("做"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 27,
|
||
End: 30,
|
||
Term: []byte("了"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 30,
|
||
End: 33,
|
||
Term: []byte("个"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 33,
|
||
End: 37,
|
||
Term: []byte("B超"),
|
||
Position: 8,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 37,
|
||
End: 43,
|
||
Term: []byte("然后"),
|
||
Position: 9,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 43,
|
||
End: 46,
|
||
Term: []byte("去"),
|
||
Position: 10,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 46,
|
||
End: 49,
|
||
Term: []byte("买"),
|
||
Position: 11,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 49,
|
||
End: 52,
|
||
Term: []byte("了"),
|
||
Position: 12,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 52,
|
||
End: 55,
|
||
Term: []byte("件"),
|
||
Position: 13,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 55,
|
||
End: 59,
|
||
Term: []byte("T恤"),
|
||
Position: 14,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("AT&T是一件不错的公司,给你发offer了吗?"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 4,
|
||
Term: []byte("AT&T"),
|
||
Position: 1,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 4,
|
||
End: 7,
|
||
Term: []byte("是"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 7,
|
||
End: 13,
|
||
Term: []byte("一件"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 13,
|
||
End: 19,
|
||
Term: []byte("不错"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 19,
|
||
End: 22,
|
||
Term: []byte("的"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 22,
|
||
End: 28,
|
||
Term: []byte("公司"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 28,
|
||
End: 31,
|
||
Term: []byte(","),
|
||
Position: 7,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 31,
|
||
End: 34,
|
||
Term: []byte("给"),
|
||
Position: 8,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 34,
|
||
End: 37,
|
||
Term: []byte("你"),
|
||
Position: 9,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 37,
|
||
End: 40,
|
||
Term: []byte("发"),
|
||
Position: 10,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 40,
|
||
End: 45,
|
||
Term: []byte("offer"),
|
||
Position: 11,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 45,
|
||
End: 48,
|
||
Term: []byte("了"),
|
||
Position: 12,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 48,
|
||
End: 51,
|
||
Term: []byte("吗"),
|
||
Position: 13,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 51,
|
||
End: 54,
|
||
Term: []byte("?"),
|
||
Position: 14,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("C++和c#是什么关系?11+122=133,是吗?PI=3.14159"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 3,
|
||
Term: []byte("C++"),
|
||
Position: 1,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 3,
|
||
End: 6,
|
||
Term: []byte("和"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 6,
|
||
End: 8,
|
||
Term: []byte("c#"),
|
||
Position: 3,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 8,
|
||
End: 11,
|
||
Term: []byte("是"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 11,
|
||
End: 17,
|
||
Term: []byte("什么"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 17,
|
||
End: 23,
|
||
Term: []byte("关系"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 23,
|
||
End: 26,
|
||
Term: []byte("?"),
|
||
Position: 7,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 26,
|
||
End: 28,
|
||
Term: []byte("11"),
|
||
Position: 8,
|
||
Type: analysis.Numeric,
|
||
},
|
||
{
|
||
Start: 28,
|
||
End: 29,
|
||
Term: []byte("+"),
|
||
Position: 9,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 29,
|
||
End: 32,
|
||
Term: []byte("122"),
|
||
Position: 10,
|
||
Type: analysis.Numeric,
|
||
},
|
||
{
|
||
Start: 32,
|
||
End: 33,
|
||
Term: []byte("="),
|
||
Position: 11,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 33,
|
||
End: 36,
|
||
Term: []byte("133"),
|
||
Position: 12,
|
||
Type: analysis.Numeric,
|
||
},
|
||
{
|
||
Start: 36,
|
||
End: 39,
|
||
Term: []byte(","),
|
||
Position: 13,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 39,
|
||
End: 42,
|
||
Term: []byte("是"),
|
||
Position: 14,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 42,
|
||
End: 45,
|
||
Term: []byte("吗"),
|
||
Position: 15,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 45,
|
||
End: 48,
|
||
Term: []byte("?"),
|
||
Position: 16,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 48,
|
||
End: 50,
|
||
Term: []byte("PI"),
|
||
Position: 17,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 50,
|
||
End: 51,
|
||
Term: []byte("="),
|
||
Position: 18,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 51,
|
||
End: 58,
|
||
Term: []byte("3.14159"),
|
||
Position: 19,
|
||
Type: analysis.Numeric,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("你认识那个和主席握手的的哥吗?他开一辆黑色的士。"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 3,
|
||
Term: []byte("你"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 3,
|
||
End: 9,
|
||
Term: []byte("认识"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 15,
|
||
Term: []byte("那个"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 18,
|
||
Term: []byte("和"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 18,
|
||
End: 24,
|
||
Term: []byte("主席"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 24,
|
||
End: 30,
|
||
Term: []byte("握手"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 30,
|
||
End: 33,
|
||
Term: []byte("的"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 33,
|
||
End: 39,
|
||
Term: []byte("的哥"),
|
||
Position: 8,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 39,
|
||
End: 42,
|
||
Term: []byte("吗"),
|
||
Position: 9,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 42,
|
||
End: 45,
|
||
Term: []byte("?"),
|
||
Position: 10,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 45,
|
||
End: 51,
|
||
Term: []byte("他开"),
|
||
Position: 11,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 51,
|
||
End: 57,
|
||
Term: []byte("一辆"),
|
||
Position: 12,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 57,
|
||
End: 63,
|
||
Term: []byte("黑色"),
|
||
Position: 13,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 63,
|
||
End: 69,
|
||
Term: []byte("的士"),
|
||
Position: 14,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 69,
|
||
End: 72,
|
||
Term: []byte("。"),
|
||
Position: 15,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("枪杆子中出政权"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 9,
|
||
Term: []byte("枪杆子"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 12,
|
||
Term: []byte("中"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 12,
|
||
End: 15,
|
||
Term: []byte("出"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 21,
|
||
Term: []byte("政权"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("张三风同学走上了不归路"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 9,
|
||
Term: []byte("张三风"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 9,
|
||
End: 15,
|
||
Term: []byte("同学"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 15,
|
||
End: 21,
|
||
Term: []byte("走上"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 21,
|
||
End: 24,
|
||
Term: []byte("了"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 24,
|
||
End: 33,
|
||
Term: []byte("不归路"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("阿Q腰间挂着BB机手里拿着大哥大,说:我一般吃饭不AA制的。"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 4,
|
||
Term: []byte("阿Q"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 4,
|
||
End: 10,
|
||
Term: []byte("腰间"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 10,
|
||
End: 13,
|
||
Term: []byte("挂"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 13,
|
||
End: 16,
|
||
Term: []byte("着"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 16,
|
||
End: 21,
|
||
Term: []byte("BB机"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 21,
|
||
End: 27,
|
||
Term: []byte("手里"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 27,
|
||
End: 30,
|
||
Term: []byte("拿"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 30,
|
||
End: 33,
|
||
Term: []byte("着"),
|
||
Position: 8,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 33,
|
||
End: 42,
|
||
Term: []byte("大哥大"),
|
||
Position: 9,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 42,
|
||
End: 45,
|
||
Term: []byte(","),
|
||
Position: 10,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 45,
|
||
End: 48,
|
||
Term: []byte("说"),
|
||
Position: 11,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 48,
|
||
End: 51,
|
||
Term: []byte(":"),
|
||
Position: 12,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
{
|
||
Start: 51,
|
||
End: 54,
|
||
Term: []byte("我"),
|
||
Position: 13,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 54,
|
||
End: 60,
|
||
Term: []byte("一般"),
|
||
Position: 14,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 60,
|
||
End: 66,
|
||
Term: []byte("吃饭"),
|
||
Position: 15,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 66,
|
||
End: 69,
|
||
Term: []byte("不"),
|
||
Position: 16,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 69,
|
||
End: 74,
|
||
Term: []byte("AA制"),
|
||
Position: 17,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 74,
|
||
End: 77,
|
||
Term: []byte("的"),
|
||
Position: 18,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 77,
|
||
End: 80,
|
||
Term: []byte("。"),
|
||
Position: 19,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
[]byte("在1号店能买到小S和大S八卦的书。"),
|
||
analysis.TokenStream{
|
||
{
|
||
Start: 0,
|
||
End: 3,
|
||
Term: []byte("在"),
|
||
Position: 1,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 3,
|
||
End: 10,
|
||
Term: []byte("1号店"),
|
||
Position: 2,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 10,
|
||
End: 13,
|
||
Term: []byte("能"),
|
||
Position: 3,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 13,
|
||
End: 16,
|
||
Term: []byte("买"),
|
||
Position: 4,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 16,
|
||
End: 19,
|
||
Term: []byte("到"),
|
||
Position: 5,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 19,
|
||
End: 23,
|
||
Term: []byte("小S"),
|
||
Position: 6,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 23,
|
||
End: 26,
|
||
Term: []byte("和"),
|
||
Position: 7,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 26,
|
||
End: 30,
|
||
Term: []byte("大S"),
|
||
Position: 8,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 30,
|
||
End: 36,
|
||
Term: []byte("八卦"),
|
||
Position: 9,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 36,
|
||
End: 39,
|
||
Term: []byte("的"),
|
||
Position: 10,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 39,
|
||
End: 42,
|
||
Term: []byte("书"),
|
||
Position: 11,
|
||
Type: analysis.Ideographic,
|
||
},
|
||
{
|
||
Start: 42,
|
||
End: 45,
|
||
Term: []byte("。"),
|
||
Position: 12,
|
||
Type: analysis.AlphaNumeric,
|
||
},
|
||
},
|
||
},
|
||
}
|
||
|
||
tokenizer, _ := NewJiebaTokenizer("../../dict.txt", true, false)
|
||
for _, test := range tests {
|
||
actual := tokenizer.Tokenize(test.input)
|
||
if !reflect.DeepEqual(actual, test.output) {
|
||
t.Errorf("Expected %v, got %v for %s", test.output, actual, string(test.input))
|
||
}
|
||
}
|
||
|
||
}
|