package jiebago import ( "reflect" "testing" "github.com/blevesearch/bleve/analysis" ) func TestJiebaTokenizerDefaultModeWithHMM(t *testing.T) { tests := []struct { input []byte output analysis.TokenStream }{ { []byte("这是一个伸手不见五指的黑夜。我叫孙悟空,我爱北京,我爱Python和C++。"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("这是"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("一个"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 30, Term: []byte("伸手不见五指"), Position: 3, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("的"), Position: 4, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("黑夜"), Position: 5, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("。"), Position: 6, Type: analysis.AlphaNumeric, }, { Start: 42, End: 45, Term: []byte("我"), Position: 7, Type: analysis.Ideographic, }, { Start: 45, End: 48, Term: []byte("叫"), Position: 8, Type: analysis.Ideographic, }, { Start: 48, End: 57, Term: []byte("孙悟空"), Position: 9, Type: analysis.Ideographic, }, { Start: 57, End: 60, Term: []byte(","), Position: 10, Type: analysis.AlphaNumeric, }, { Start: 60, End: 63, Term: []byte("我"), Position: 11, Type: analysis.Ideographic, }, { Start: 63, End: 66, Term: []byte("爱"), Position: 12, Type: analysis.Ideographic, }, { Start: 66, End: 72, Term: []byte("北京"), Position: 13, Type: analysis.Ideographic, }, { Start: 72, End: 75, Term: []byte(","), Position: 14, Type: analysis.AlphaNumeric, }, { Start: 75, End: 78, Term: []byte("我"), Position: 15, Type: analysis.Ideographic, }, { Start: 78, End: 81, Term: []byte("爱"), Position: 16, Type: analysis.Ideographic, }, { Start: 81, End: 87, Term: []byte("Python"), Position: 17, Type: analysis.AlphaNumeric, }, { Start: 87, End: 90, Term: []byte("和"), Position: 18, Type: analysis.Ideographic, }, { Start: 90, End: 93, Term: []byte("C++"), Position: 19, Type: analysis.AlphaNumeric, }, { Start: 93, End: 96, Term: []byte("。"), Position: 20, Type: analysis.AlphaNumeric, }, }, }, { []byte("我不喜欢日本和服。"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("我"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("不"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("喜欢"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("日本"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("和服"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("。"), Position: 6, Type: analysis.AlphaNumeric, }, }, }, { []byte("雷猴回归人间。"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("雷猴"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("回归"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("人间"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("。"), Position: 4, Type: analysis.AlphaNumeric, }, }, }, { []byte("工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("工信处"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 18, Term: []byte("女干事"), Position: 2, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("每月"), Position: 3, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("经过"), Position: 4, Type: analysis.Ideographic, }, { Start: 30, End: 36, Term: []byte("下属"), Position: 5, Type: analysis.Ideographic, }, { Start: 36, End: 42, Term: []byte("科室"), Position: 6, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte("都"), Position: 7, Type: analysis.Ideographic, }, { Start: 45, End: 48, Term: []byte("要"), Position: 8, Type: analysis.Ideographic, }, { Start: 48, End: 54, Term: []byte("亲口"), Position: 9, Type: analysis.Ideographic, }, { Start: 54, End: 60, Term: []byte("交代"), Position: 10, Type: analysis.Ideographic, }, { Start: 60, End: 62, Term: []byte("24"), Position: 11, Type: analysis.Numeric, }, { Start: 62, End: 65, Term: []byte("口"), Position: 12, Type: analysis.Ideographic, }, { Start: 65, End: 74, Term: []byte("交换机"), Position: 13, Type: analysis.Ideographic, }, { Start: 74, End: 77, Term: []byte("等"), Position: 14, Type: analysis.Ideographic, }, { Start: 77, End: 86, Term: []byte("技术性"), Position: 15, Type: analysis.Ideographic, }, { Start: 86, End: 92, Term: []byte("器件"), Position: 16, Type: analysis.Ideographic, }, { Start: 92, End: 95, Term: []byte("的"), Position: 17, Type: analysis.Ideographic, }, { Start: 95, End: 101, Term: []byte("安装"), Position: 18, Type: analysis.Ideographic, }, { Start: 101, End: 107, Term: []byte("工作"), Position: 19, Type: analysis.Ideographic, }, }, }, { []byte("我需要廉租房"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("我"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("需要"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 18, Term: []byte("廉租房"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("永和服装饰品有限公司"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("永和"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("服装"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("饰品"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 30, Term: []byte("有限公司"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("我爱北京天安门"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("我"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("爱"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("北京"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 21, Term: []byte("天安门"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("abc"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("abc"), Position: 1, Type: analysis.AlphaNumeric, }, }, }, { []byte("隐马尔可夫"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("隐"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 15, Term: []byte("马尔可夫"), Position: 2, Type: analysis.Ideographic, }, }, }, { []byte("雷猴是个好网站"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("雷猴"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("是"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("个"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("好"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("网站"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("“Microsoft”一词由“MICROcomputer(微型计算机)”和“SOFTware(软件)”两部分组成"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("“"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 3, End: 12, Term: []byte("Microsoft"), Position: 2, Type: analysis.AlphaNumeric, }, { Start: 12, End: 15, Term: []byte("”"), Position: 3, Type: analysis.AlphaNumeric, }, { Start: 15, End: 21, Term: []byte("一词"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("由"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("“"), Position: 6, Type: analysis.AlphaNumeric, }, { Start: 27, End: 40, Term: []byte("MICROcomputer"), Position: 7, Type: analysis.AlphaNumeric, }, { Start: 40, End: 43, Term: []byte("("), Position: 8, Type: analysis.AlphaNumeric, }, { Start: 43, End: 49, Term: []byte("微型"), Position: 9, Type: analysis.Ideographic, }, { Start: 49, End: 58, Term: []byte("计算机"), Position: 10, Type: analysis.Ideographic, }, { Start: 58, End: 61, Term: []byte(")"), Position: 11, Type: analysis.AlphaNumeric, }, { Start: 61, End: 64, Term: []byte("”"), Position: 12, Type: analysis.AlphaNumeric, }, { Start: 64, End: 67, Term: []byte("和"), Position: 13, Type: analysis.Ideographic, }, { Start: 67, End: 70, Term: []byte("“"), Position: 14, Type: analysis.AlphaNumeric, }, { Start: 70, End: 78, Term: []byte("SOFTware"), Position: 15, Type: analysis.AlphaNumeric, }, { Start: 78, End: 81, Term: []byte("("), Position: 16, Type: analysis.AlphaNumeric, }, { Start: 81, End: 87, Term: []byte("软件"), Position: 17, Type: analysis.Ideographic, }, { Start: 87, End: 90, Term: []byte(")"), Position: 18, Type: analysis.AlphaNumeric, }, { Start: 90, End: 93, Term: []byte("”"), Position: 19, Type: analysis.AlphaNumeric, }, { Start: 93, End: 96, Term: []byte("两"), Position: 20, Type: analysis.Ideographic, }, { Start: 96, End: 102, Term: []byte("部分"), Position: 21, Type: analysis.Ideographic, }, { Start: 102, End: 108, Term: []byte("组成"), Position: 22, Type: analysis.Ideographic, }, }, }, { []byte("草泥马和欺实马是今年的流行词汇"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("草泥马"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("和"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("欺实"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("马"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("是"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("今年"), Position: 6, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("的"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("流行"), Position: 8, Type: analysis.Ideographic, }, { Start: 39, End: 45, Term: []byte("词汇"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("伊藤洋华堂总府店"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("伊藤"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 15, Term: []byte("洋华堂"), Position: 2, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("总府"), Position: 3, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("店"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("中国科学院计算技术研究所"), analysis.TokenStream{ { Start: 0, End: 36, Term: []byte("中国科学院计算技术研究所"), Position: 1, Type: analysis.Ideographic, }, }, }, { []byte("罗密欧与朱丽叶"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("罗密欧"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("与"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 21, Term: []byte("朱丽叶"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("我购买了道具和服装"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("我"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("购买"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("了"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("道具"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("和"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("服装"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("PS: 我觉得开源有一个好处,就是能够敦促自己不断改进,避免敞帚自珍"), analysis.TokenStream{ { Start: 0, End: 2, Term: []byte("PS"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 2, End: 3, Term: []byte(":"), Position: 2, Type: analysis.AlphaNumeric, }, { Start: 3, End: 4, Term: []byte(" "), Position: 3, Type: analysis.AlphaNumeric, }, { Start: 4, End: 7, Term: []byte("我"), Position: 4, Type: analysis.Ideographic, }, { Start: 7, End: 13, Term: []byte("觉得"), Position: 5, Type: analysis.Ideographic, }, { Start: 13, End: 19, Term: []byte("开源"), Position: 6, Type: analysis.Ideographic, }, { Start: 19, End: 22, Term: []byte("有"), Position: 7, Type: analysis.Ideographic, }, { Start: 22, End: 28, Term: []byte("一个"), Position: 8, Type: analysis.Ideographic, }, { Start: 28, End: 34, Term: []byte("好处"), Position: 9, Type: analysis.Ideographic, }, { Start: 34, End: 37, Term: []byte(","), Position: 10, Type: analysis.AlphaNumeric, }, { Start: 37, End: 43, Term: []byte("就是"), Position: 11, Type: analysis.Ideographic, }, { Start: 43, End: 49, Term: []byte("能够"), Position: 12, Type: analysis.Ideographic, }, { Start: 49, End: 55, Term: []byte("敦促"), Position: 13, Type: analysis.Ideographic, }, { Start: 55, End: 61, Term: []byte("自己"), Position: 14, Type: analysis.Ideographic, }, { Start: 61, End: 73, Term: []byte("不断改进"), Position: 15, Type: analysis.Ideographic, }, { Start: 73, End: 76, Term: []byte(","), Position: 16, Type: analysis.AlphaNumeric, }, { Start: 76, End: 82, Term: []byte("避免"), Position: 17, Type: analysis.Ideographic, }, { Start: 82, End: 88, Term: []byte("敞帚"), Position: 18, Type: analysis.Ideographic, }, { Start: 88, End: 94, Term: []byte("自珍"), Position: 19, Type: analysis.Ideographic, }, }, }, { []byte("湖北省石首市"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("湖北省"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 18, Term: []byte("石首市"), Position: 2, Type: analysis.Ideographic, }, }, }, { []byte("湖北省十堰市"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("湖北省"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 18, Term: []byte("十堰市"), Position: 2, Type: analysis.Ideographic, }, }, }, { []byte("总经理完成了这件事情"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("总经理"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("完成"), Position: 2, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("了"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("这件"), Position: 4, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("事情"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("电脑修好了"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("电脑"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("修好"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("了"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("做好了这件事情就一了百了了"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("做好"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("了"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("这件"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("事情"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("就"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 36, Term: []byte("一了百了"), Position: 6, Type: analysis.Ideographic, }, { Start: 36, End: 39, Term: []byte("了"), Position: 7, Type: analysis.Ideographic, }, }, }, { []byte("人们审美的观点是不同的"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("人们"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("审美"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("的"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("观点"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("是"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("不同"), Position: 6, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("的"), Position: 7, Type: analysis.Ideographic, }, }, }, { []byte("我们买了一个美的空调"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("我们"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("买"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("了"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("一个"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("美的"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("空调"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("线程初始化时我们要注意"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("线程"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 15, Term: []byte("初始化"), Position: 2, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("时"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("我们"), Position: 4, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("要"), Position: 5, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("注意"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("一个分子是由好多原子组织成的"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("一个"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("分子"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("是"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("由"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("好多"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("原子"), Position: 6, Type: analysis.Ideographic, }, { Start: 30, End: 36, Term: []byte("组织"), Position: 7, Type: analysis.Ideographic, }, { Start: 36, End: 39, Term: []byte("成"), Position: 8, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("的"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("祝你马到功成"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("祝"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("你"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 18, Term: []byte("马到功成"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("他掉进了无底洞里"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("他"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("掉"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("进"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("了"), Position: 4, Type: analysis.Ideographic, }, { Start: 12, End: 21, Term: []byte("无底洞"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("里"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("中国的首都是北京"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("中国"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("的"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("首都"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("是"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("北京"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("孙君意"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("孙君意"), Position: 1, Type: analysis.Ideographic, }, }, }, { []byte("外交部发言人马朝旭"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("外交部"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 18, Term: []byte("发言人"), Position: 2, Type: analysis.Ideographic, }, { Start: 18, End: 27, Term: []byte("马朝旭"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("领导人会议和第四届东亚峰会"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("领导人"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("会议"), Position: 2, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("和"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 27, Term: []byte("第四届"), Position: 4, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("东亚"), Position: 5, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("峰会"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("在过去的这五年"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("在"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("过去"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("的"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("这"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("五年"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("还需要很长的路要走"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("还"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("需要"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("很长"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("的"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("路"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("要"), Position: 6, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("走"), Position: 7, Type: analysis.Ideographic, }, }, }, { []byte("60周年首都阅兵"), analysis.TokenStream{ { Start: 0, End: 2, Term: []byte("60"), Position: 1, Type: analysis.Numeric, }, { Start: 2, End: 8, Term: []byte("周年"), Position: 2, Type: analysis.Ideographic, }, { Start: 8, End: 14, Term: []byte("首都"), Position: 3, Type: analysis.Ideographic, }, { Start: 14, End: 20, Term: []byte("阅兵"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("你好人们审美的观点是不同的"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("你好"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("人们"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("审美"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("的"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("观点"), Position: 5, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte("是"), Position: 6, Type: analysis.Ideographic, }, { Start: 30, End: 36, Term: []byte("不同"), Position: 7, Type: analysis.Ideographic, }, { Start: 36, End: 39, Term: []byte("的"), Position: 8, Type: analysis.Ideographic, }, }, }, { []byte("买水果然后来世博园"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("买"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("水果"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("然后"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("来"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 27, Term: []byte("世博园"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("买水果然后去世博园"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("买"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("水果"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("然后"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("去"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 27, Term: []byte("世博园"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("但是后来我才知道你是对的"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("但是"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("后来"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("我"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("才"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("知道"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("你"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte("是"), Position: 7, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("对"), Position: 8, Type: analysis.Ideographic, }, { Start: 33, End: 36, Term: []byte("的"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("存在即合理"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("存在"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("即"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("合理"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("的的的的的在的的的的就以和和和"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("的"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("的"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("的"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("的"), Position: 4, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("的"), Position: 5, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("在"), Position: 6, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("的"), Position: 7, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("的"), Position: 8, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("的"), Position: 9, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte("的"), Position: 10, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("就"), Position: 11, Type: analysis.Ideographic, }, { Start: 33, End: 36, Term: []byte("以"), Position: 12, Type: analysis.Ideographic, }, { Start: 36, End: 39, Term: []byte("和"), Position: 13, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("和"), Position: 14, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte("和"), Position: 15, Type: analysis.Ideographic, }, }, }, { []byte("I love你,不以为耻,反以为rong"), analysis.TokenStream{ { Start: 0, End: 1, Term: []byte("I"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 1, End: 2, Term: []byte(" "), Position: 2, Type: analysis.AlphaNumeric, }, { Start: 2, End: 6, Term: []byte("love"), Position: 3, Type: analysis.AlphaNumeric, }, { Start: 6, End: 9, Term: []byte("你"), Position: 4, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte(","), Position: 5, Type: analysis.AlphaNumeric, }, { Start: 12, End: 24, Term: []byte("不以为耻"), Position: 6, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte(","), Position: 7, Type: analysis.AlphaNumeric, }, { Start: 27, End: 30, Term: []byte("反"), Position: 8, Type: analysis.Ideographic, }, { Start: 30, End: 36, Term: []byte("以为"), Position: 9, Type: analysis.Ideographic, }, { Start: 36, End: 40, Term: []byte("rong"), Position: 10, Type: analysis.AlphaNumeric, }, }, }, { []byte("因"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("因"), Position: 1, Type: analysis.Ideographic, }, }, }, { []byte(""), analysis.TokenStream{}, }, { []byte("hello你好人们审美的观点是不同的"), analysis.TokenStream{ { Start: 0, End: 5, Term: []byte("hello"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 5, End: 11, Term: []byte("你好"), Position: 2, Type: analysis.Ideographic, }, { Start: 11, End: 17, Term: []byte("人们"), Position: 3, Type: analysis.Ideographic, }, { Start: 17, End: 23, Term: []byte("审美"), Position: 4, Type: analysis.Ideographic, }, { Start: 23, End: 26, Term: []byte("的"), Position: 5, Type: analysis.Ideographic, }, { Start: 26, End: 32, Term: []byte("观点"), Position: 6, Type: analysis.Ideographic, }, { Start: 32, End: 35, Term: []byte("是"), Position: 7, Type: analysis.Ideographic, }, { Start: 35, End: 41, Term: []byte("不同"), Position: 8, Type: analysis.Ideographic, }, { Start: 41, End: 44, Term: []byte("的"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("很好但主要是基于网页形式"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("很"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("好"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("但"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("主要"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("是"), Position: 5, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("基于"), Position: 6, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("网页"), Position: 7, Type: analysis.Ideographic, }, { Start: 30, End: 36, Term: []byte("形式"), Position: 8, Type: analysis.Ideographic, }, }, }, { []byte("hello你好人们审美的观点是不同的"), analysis.TokenStream{ { Start: 0, End: 5, Term: []byte("hello"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 5, End: 11, Term: []byte("你好"), Position: 2, Type: analysis.Ideographic, }, { Start: 11, End: 17, Term: []byte("人们"), Position: 3, Type: analysis.Ideographic, }, { Start: 17, End: 23, Term: []byte("审美"), Position: 4, Type: analysis.Ideographic, }, { Start: 23, End: 26, Term: []byte("的"), Position: 5, Type: analysis.Ideographic, }, { Start: 26, End: 32, Term: []byte("观点"), Position: 6, Type: analysis.Ideographic, }, { Start: 32, End: 35, Term: []byte("是"), Position: 7, Type: analysis.Ideographic, }, { Start: 35, End: 41, Term: []byte("不同"), Position: 8, Type: analysis.Ideographic, }, { Start: 41, End: 44, Term: []byte("的"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("为什么我不能拥有想要的生活"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("为什么"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("我"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("不能"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("拥有"), Position: 4, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("想要"), Position: 5, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("的"), Position: 6, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("生活"), Position: 7, Type: analysis.Ideographic, }, }, }, { []byte("后来我才"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("后来"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("我"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("才"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("此次来中国是为了"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("此次"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("来"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("中国"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("是"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("为了"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("使用了它就可以解决一些问题"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("使用"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("了"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("它"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("就"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("可以"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("解决"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("一些"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("问题"), Position: 8, Type: analysis.Ideographic, }, }, }, { []byte(",使用了它就可以解决一些问题"), analysis.TokenStream{ { Start: 0, End: 1, Term: []byte(","), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 1, End: 7, Term: []byte("使用"), Position: 2, Type: analysis.Ideographic, }, { Start: 7, End: 10, Term: []byte("了"), Position: 3, Type: analysis.Ideographic, }, { Start: 10, End: 13, Term: []byte("它"), Position: 4, Type: analysis.Ideographic, }, { Start: 13, End: 16, Term: []byte("就"), Position: 5, Type: analysis.Ideographic, }, { Start: 16, End: 22, Term: []byte("可以"), Position: 6, Type: analysis.Ideographic, }, { Start: 22, End: 28, Term: []byte("解决"), Position: 7, Type: analysis.Ideographic, }, { Start: 28, End: 34, Term: []byte("一些"), Position: 8, Type: analysis.Ideographic, }, { Start: 34, End: 40, Term: []byte("问题"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("其实使用了它就可以解决一些问题"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("其实"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("使用"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("了"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("它"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("就"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("可以"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("解决"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("一些"), Position: 8, Type: analysis.Ideographic, }, { Start: 39, End: 45, Term: []byte("问题"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("好人使用了它就可以解决一些问题"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("好人"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("使用"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("了"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("它"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("就"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("可以"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("解决"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("一些"), Position: 8, Type: analysis.Ideographic, }, { Start: 39, End: 45, Term: []byte("问题"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("是因为和国家"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("是因为"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("和"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("国家"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("老年搜索还支持"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("老年"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("搜索"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("还"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("支持"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("干脆就把那部蒙人的闲法给废了拉倒!RT @laoshipukong : 27日,全国人大常委会第三次审议侵权责任法草案,删除了有关医疗损害责任“举证倒置”的规定。在医患纠纷中本已处于弱势地位的消费者由此将陷入万劫不复的境地。 "), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("干脆"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("就"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("把"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("那部"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("蒙人"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("的"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("闲法"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 36, Term: []byte("给"), Position: 8, Type: analysis.Ideographic, }, { Start: 36, End: 39, Term: []byte("废"), Position: 9, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("了"), Position: 10, Type: analysis.Ideographic, }, { Start: 42, End: 48, Term: []byte("拉倒"), Position: 11, Type: analysis.Ideographic, }, { Start: 48, End: 51, Term: []byte("!"), Position: 12, Type: analysis.AlphaNumeric, }, { Start: 51, End: 53, Term: []byte("RT"), Position: 13, Type: analysis.AlphaNumeric, }, { Start: 53, End: 54, Term: []byte(" "), Position: 14, Type: analysis.AlphaNumeric, }, { Start: 54, End: 55, Term: []byte("@"), Position: 15, Type: analysis.AlphaNumeric, }, { Start: 55, End: 67, Term: []byte("laoshipukong"), Position: 16, Type: analysis.AlphaNumeric, }, { Start: 67, End: 68, Term: []byte(" "), Position: 17, Type: analysis.AlphaNumeric, }, { Start: 68, End: 69, Term: []byte(":"), Position: 18, Type: analysis.AlphaNumeric, }, { Start: 69, End: 70, Term: []byte(" "), Position: 19, Type: analysis.AlphaNumeric, }, { Start: 70, End: 72, Term: []byte("27"), Position: 20, Type: analysis.Numeric, }, { Start: 72, End: 75, Term: []byte("日"), Position: 21, Type: analysis.Ideographic, }, { Start: 75, End: 78, Term: []byte(","), Position: 22, Type: analysis.AlphaNumeric, }, { Start: 78, End: 99, Term: []byte("全国人大常委会"), Position: 23, Type: analysis.Ideographic, }, { Start: 99, End: 108, Term: []byte("第三次"), Position: 24, Type: analysis.Ideographic, }, { Start: 108, End: 114, Term: []byte("审议"), Position: 25, Type: analysis.Ideographic, }, { Start: 114, End: 120, Term: []byte("侵权"), Position: 26, Type: analysis.Ideographic, }, { Start: 120, End: 129, Term: []byte("责任法"), Position: 27, Type: analysis.Ideographic, }, { Start: 129, End: 135, Term: []byte("草案"), Position: 28, Type: analysis.Ideographic, }, { Start: 135, End: 138, Term: []byte(","), Position: 29, Type: analysis.AlphaNumeric, }, { Start: 138, End: 144, Term: []byte("删除"), Position: 30, Type: analysis.Ideographic, }, { Start: 144, End: 147, Term: []byte("了"), Position: 31, Type: analysis.Ideographic, }, { Start: 147, End: 153, Term: []byte("有关"), Position: 32, Type: analysis.Ideographic, }, { Start: 153, End: 159, Term: []byte("医疗"), Position: 33, Type: analysis.Ideographic, }, { Start: 159, End: 165, Term: []byte("损害"), Position: 34, Type: analysis.Ideographic, }, { Start: 165, End: 171, Term: []byte("责任"), Position: 35, Type: analysis.Ideographic, }, { Start: 171, End: 174, Term: []byte("“"), Position: 36, Type: analysis.AlphaNumeric, }, { Start: 174, End: 180, Term: []byte("举证"), Position: 37, Type: analysis.Ideographic, }, { Start: 180, End: 186, Term: []byte("倒置"), Position: 38, Type: analysis.Ideographic, }, { Start: 186, End: 189, Term: []byte("”"), Position: 39, Type: analysis.AlphaNumeric, }, { Start: 189, End: 192, Term: []byte("的"), Position: 40, Type: analysis.Ideographic, }, { Start: 192, End: 198, Term: []byte("规定"), Position: 41, Type: analysis.Ideographic, }, { Start: 198, End: 201, Term: []byte("。"), Position: 42, Type: analysis.AlphaNumeric, }, { Start: 201, End: 204, Term: []byte("在"), Position: 43, Type: analysis.Ideographic, }, { Start: 204, End: 210, Term: []byte("医患"), Position: 44, Type: analysis.Ideographic, }, { Start: 210, End: 216, Term: []byte("纠纷"), Position: 45, Type: analysis.Ideographic, }, { Start: 216, End: 222, Term: []byte("中本"), Position: 46, Type: analysis.Ideographic, }, { Start: 222, End: 225, Term: []byte("已"), Position: 47, Type: analysis.Ideographic, }, { Start: 225, End: 231, Term: []byte("处于"), Position: 48, Type: analysis.Ideographic, }, { Start: 231, End: 237, Term: []byte("弱势"), Position: 49, Type: analysis.Ideographic, }, { Start: 237, End: 243, Term: []byte("地位"), Position: 50, Type: analysis.Ideographic, }, { Start: 243, End: 246, Term: []byte("的"), Position: 51, Type: analysis.Ideographic, }, { Start: 246, End: 255, Term: []byte("消费者"), Position: 52, Type: analysis.Ideographic, }, { Start: 255, End: 261, Term: []byte("由此"), Position: 53, Type: analysis.Ideographic, }, { Start: 261, End: 264, Term: []byte("将"), Position: 54, Type: analysis.Ideographic, }, { Start: 264, End: 270, Term: []byte("陷入"), Position: 55, Type: analysis.Ideographic, }, { Start: 270, End: 282, Term: []byte("万劫不复"), Position: 56, Type: analysis.Ideographic, }, { Start: 282, End: 285, Term: []byte("的"), Position: 57, Type: analysis.Ideographic, }, { Start: 285, End: 291, Term: []byte("境地"), Position: 58, Type: analysis.Ideographic, }, { Start: 291, End: 294, Term: []byte("。"), Position: 59, Type: analysis.AlphaNumeric, }, { Start: 294, End: 295, Term: []byte(" "), Position: 60, Type: analysis.AlphaNumeric, }, }, }, { []byte("大"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("大"), Position: 1, Type: analysis.Ideographic, }, }, }, { []byte(""), analysis.TokenStream{}, }, { []byte("他说的确实在理"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("他"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("说"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("的"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("确实"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("在理"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("长春市长春节讲话"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("长春"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("市长"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("春节"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("讲话"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("结婚的和尚未结婚的"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("结婚"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("的"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("和"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("尚未"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("结婚"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("的"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("结合成分子时"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("结合"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("成"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("分子"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("时"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("旅游和服务是最好的"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("旅游"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("和"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("服务"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("是"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("最好"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("的"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("这件事情的确是我的错"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("这件"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("事情"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("的确"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("是"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("我"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("的"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte("错"), Position: 7, Type: analysis.Ideographic, }, }, }, { []byte("供大家参考指正"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("供"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("大家"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("参考"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("指正"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("哈尔滨政府公布塌桥原因"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("哈尔滨"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("政府"), Position: 2, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("公布"), Position: 3, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("塌桥"), Position: 4, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("原因"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("我在机场入口处"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("我"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("在"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("机场"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 21, Term: []byte("入口处"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("邢永臣摄影报道"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("邢永臣"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("摄影"), Position: 2, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("报道"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("BP神经网络如何训练才能在分类时增加区分度?"), analysis.TokenStream{ { Start: 0, End: 2, Term: []byte("BP"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 2, End: 14, Term: []byte("神经网络"), Position: 2, Type: analysis.Ideographic, }, { Start: 14, End: 20, Term: []byte("如何"), Position: 3, Type: analysis.Ideographic, }, { Start: 20, End: 26, Term: []byte("训练"), Position: 4, Type: analysis.Ideographic, }, { Start: 26, End: 32, Term: []byte("才能"), Position: 5, Type: analysis.Ideographic, }, { Start: 32, End: 35, Term: []byte("在"), Position: 6, Type: analysis.Ideographic, }, { Start: 35, End: 41, Term: []byte("分类"), Position: 7, Type: analysis.Ideographic, }, { Start: 41, End: 44, Term: []byte("时"), Position: 8, Type: analysis.Ideographic, }, { Start: 44, End: 50, Term: []byte("增加"), Position: 9, Type: analysis.Ideographic, }, { Start: 50, End: 59, Term: []byte("区分度"), Position: 10, Type: analysis.Ideographic, }, { Start: 59, End: 62, Term: []byte("?"), Position: 11, Type: analysis.AlphaNumeric, }, }, }, { []byte("南京市长江大桥"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("南京市"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 21, Term: []byte("长江大桥"), Position: 2, Type: analysis.Ideographic, }, }, }, { []byte("应一些使用者的建议,也为了便于利用NiuTrans用于SMT研究"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("应"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("一些"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 18, Term: []byte("使用者"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("的"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("建议"), Position: 5, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte(","), Position: 6, Type: analysis.AlphaNumeric, }, { Start: 30, End: 33, Term: []byte("也"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("为了"), Position: 8, Type: analysis.Ideographic, }, { Start: 39, End: 45, Term: []byte("便于"), Position: 9, Type: analysis.Ideographic, }, { Start: 45, End: 51, Term: []byte("利用"), Position: 10, Type: analysis.Ideographic, }, { Start: 51, End: 59, Term: []byte("NiuTrans"), Position: 11, Type: analysis.AlphaNumeric, }, { Start: 59, End: 65, Term: []byte("用于"), Position: 12, Type: analysis.Ideographic, }, { Start: 65, End: 68, Term: []byte("SMT"), Position: 13, Type: analysis.AlphaNumeric, }, { Start: 68, End: 74, Term: []byte("研究"), Position: 14, Type: analysis.Ideographic, }, }, }, { []byte("长春市长春药店"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("长春市"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("长春"), Position: 2, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("药店"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("邓颖超生前最喜欢的衣服"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("邓颖超"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("生前"), Position: 2, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("最"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("喜欢"), Position: 4, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("的"), Position: 5, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("衣服"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("胡锦涛是热爱世界和平的政治局常委"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("胡锦涛"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("是"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("热爱"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("世界"), Position: 4, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("和平"), Position: 5, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("的"), Position: 6, Type: analysis.Ideographic, }, { Start: 33, End: 42, Term: []byte("政治局"), Position: 7, Type: analysis.Ideographic, }, { Start: 42, End: 48, Term: []byte("常委"), Position: 8, Type: analysis.Ideographic, }, }, }, { []byte("程序员祝海林和朱会震是在孙健的左面和右面, 范凯在最右面.再往左是李松洪"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("程序员"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("祝"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("海林"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("和"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 30, Term: []byte("朱会震"), Position: 5, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("是"), Position: 6, Type: analysis.Ideographic, }, { Start: 33, End: 36, Term: []byte("在"), Position: 7, Type: analysis.Ideographic, }, { Start: 36, End: 42, Term: []byte("孙健"), Position: 8, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte("的"), Position: 9, Type: analysis.Ideographic, }, { Start: 45, End: 51, Term: []byte("左面"), Position: 10, Type: analysis.Ideographic, }, { Start: 51, End: 54, Term: []byte("和"), Position: 11, Type: analysis.Ideographic, }, { Start: 54, End: 60, Term: []byte("右面"), Position: 12, Type: analysis.Ideographic, }, { Start: 60, End: 61, Term: []byte(","), Position: 13, Type: analysis.AlphaNumeric, }, { Start: 61, End: 62, Term: []byte(" "), Position: 14, Type: analysis.AlphaNumeric, }, { Start: 62, End: 68, Term: []byte("范凯"), Position: 15, Type: analysis.Ideographic, }, { Start: 68, End: 71, Term: []byte("在"), Position: 16, Type: analysis.Ideographic, }, { Start: 71, End: 74, Term: []byte("最"), Position: 17, Type: analysis.Ideographic, }, { Start: 74, End: 80, Term: []byte("右面"), Position: 18, Type: analysis.Ideographic, }, { Start: 80, End: 81, Term: []byte("."), Position: 19, Type: analysis.AlphaNumeric, }, { Start: 81, End: 87, Term: []byte("再往"), Position: 20, Type: analysis.Ideographic, }, { Start: 87, End: 90, Term: []byte("左"), Position: 21, Type: analysis.Ideographic, }, { Start: 90, End: 93, Term: []byte("是"), Position: 22, Type: analysis.Ideographic, }, { Start: 93, End: 102, Term: []byte("李松洪"), Position: 23, Type: analysis.Ideographic, }, }, }, { []byte("一次性交多少钱"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("一次性"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("交"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("多少"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("钱"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("两块五一套,三块八一斤,四块七一本,五块六一条"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("两块"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("五"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("一套"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte(","), Position: 4, Type: analysis.AlphaNumeric, }, { Start: 18, End: 24, Term: []byte("三块"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("八"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("一斤"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 36, Term: []byte(","), Position: 8, Type: analysis.AlphaNumeric, }, { Start: 36, End: 42, Term: []byte("四块"), Position: 9, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte("七"), Position: 10, Type: analysis.Ideographic, }, { Start: 45, End: 51, Term: []byte("一本"), Position: 11, Type: analysis.Ideographic, }, { Start: 51, End: 54, Term: []byte(","), Position: 12, Type: analysis.AlphaNumeric, }, { Start: 54, End: 60, Term: []byte("五块"), Position: 13, Type: analysis.Ideographic, }, { Start: 60, End: 63, Term: []byte("六"), Position: 14, Type: analysis.Ideographic, }, { Start: 63, End: 69, Term: []byte("一条"), Position: 15, Type: analysis.Ideographic, }, }, }, { []byte("小和尚留了一个像大和尚一样的和尚头"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("小"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("和尚"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("留"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("了"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("一个"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("像"), Position: 6, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("大"), Position: 7, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("和尚"), Position: 8, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("一样"), Position: 9, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("的"), Position: 10, Type: analysis.Ideographic, }, { Start: 42, End: 51, Term: []byte("和尚头"), Position: 11, Type: analysis.Ideographic, }, }, }, { []byte("我是中华人民共和国公民;我爸爸是共和党党员; 地铁和平门站"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("我"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("是"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 27, Term: []byte("中华人民共和国"), Position: 3, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("公民"), Position: 4, Type: analysis.Ideographic, }, { Start: 33, End: 34, Term: []byte(";"), Position: 5, Type: analysis.AlphaNumeric, }, { Start: 34, End: 37, Term: []byte("我"), Position: 6, Type: analysis.Ideographic, }, { Start: 37, End: 43, Term: []byte("爸爸"), Position: 7, Type: analysis.Ideographic, }, { Start: 43, End: 46, Term: []byte("是"), Position: 8, Type: analysis.Ideographic, }, { Start: 46, End: 55, Term: []byte("共和党"), Position: 9, Type: analysis.Ideographic, }, { Start: 55, End: 61, Term: []byte("党员"), Position: 10, Type: analysis.Ideographic, }, { Start: 61, End: 62, Term: []byte(";"), Position: 11, Type: analysis.AlphaNumeric, }, { Start: 62, End: 63, Term: []byte(" "), Position: 12, Type: analysis.AlphaNumeric, }, { Start: 63, End: 69, Term: []byte("地铁"), Position: 13, Type: analysis.Ideographic, }, { Start: 69, End: 78, Term: []byte("和平门"), Position: 14, Type: analysis.Ideographic, }, { Start: 78, End: 81, Term: []byte("站"), Position: 15, Type: analysis.Ideographic, }, }, }, { []byte("张晓梅去人民医院做了个B超然后去买了件T恤"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("张晓梅"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("去"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("人民"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("医院"), Position: 4, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("做"), Position: 5, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte("了"), Position: 6, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("个"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 37, Term: []byte("B超"), Position: 8, Type: analysis.Ideographic, }, { Start: 37, End: 43, Term: []byte("然后"), Position: 9, Type: analysis.Ideographic, }, { Start: 43, End: 46, Term: []byte("去"), Position: 10, Type: analysis.Ideographic, }, { Start: 46, End: 49, Term: []byte("买"), Position: 11, Type: analysis.Ideographic, }, { Start: 49, End: 52, Term: []byte("了"), Position: 12, Type: analysis.Ideographic, }, { Start: 52, End: 55, Term: []byte("件"), Position: 13, Type: analysis.Ideographic, }, { Start: 55, End: 59, Term: []byte("T恤"), Position: 14, Type: analysis.Ideographic, }, }, }, { []byte("AT&T是一件不错的公司,给你发offer了吗?"), analysis.TokenStream{ { Start: 0, End: 4, Term: []byte("AT&T"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 4, End: 7, Term: []byte("是"), Position: 2, Type: analysis.Ideographic, }, { Start: 7, End: 13, Term: []byte("一件"), Position: 3, Type: analysis.Ideographic, }, { Start: 13, End: 19, Term: []byte("不错"), Position: 4, Type: analysis.Ideographic, }, { Start: 19, End: 22, Term: []byte("的"), Position: 5, Type: analysis.Ideographic, }, { Start: 22, End: 28, Term: []byte("公司"), Position: 6, Type: analysis.Ideographic, }, { Start: 28, End: 31, Term: []byte(","), Position: 7, Type: analysis.AlphaNumeric, }, { Start: 31, End: 34, Term: []byte("给"), Position: 8, Type: analysis.Ideographic, }, { Start: 34, End: 37, Term: []byte("你"), Position: 9, Type: analysis.Ideographic, }, { Start: 37, End: 40, Term: []byte("发"), Position: 10, Type: analysis.Ideographic, }, { Start: 40, End: 45, Term: []byte("offer"), Position: 11, Type: analysis.AlphaNumeric, }, { Start: 45, End: 48, Term: []byte("了"), Position: 12, Type: analysis.Ideographic, }, { Start: 48, End: 51, Term: []byte("吗"), Position: 13, Type: analysis.Ideographic, }, { Start: 51, End: 54, Term: []byte("?"), Position: 14, Type: analysis.AlphaNumeric, }, }, }, { []byte("C++和c#是什么关系?11+122=133,是吗?PI=3.14159"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("C++"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 3, End: 6, Term: []byte("和"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 8, Term: []byte("c#"), Position: 3, Type: analysis.AlphaNumeric, }, { Start: 8, End: 11, Term: []byte("是"), Position: 4, Type: analysis.Ideographic, }, { Start: 11, End: 17, Term: []byte("什么"), Position: 5, Type: analysis.Ideographic, }, { Start: 17, End: 23, Term: []byte("关系"), Position: 6, Type: analysis.Ideographic, }, { Start: 23, End: 26, Term: []byte("?"), Position: 7, Type: analysis.AlphaNumeric, }, { Start: 26, End: 28, Term: []byte("11"), Position: 8, Type: analysis.Numeric, }, { Start: 28, End: 29, Term: []byte("+"), Position: 9, Type: analysis.AlphaNumeric, }, { Start: 29, End: 32, Term: []byte("122"), Position: 10, Type: analysis.Numeric, }, { Start: 32, End: 33, Term: []byte("="), Position: 11, Type: analysis.AlphaNumeric, }, { Start: 33, End: 36, Term: []byte("133"), Position: 12, Type: analysis.Numeric, }, { Start: 36, End: 39, Term: []byte(","), Position: 13, Type: analysis.AlphaNumeric, }, { Start: 39, End: 42, Term: []byte("是"), Position: 14, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte("吗"), Position: 15, Type: analysis.Ideographic, }, { Start: 45, End: 48, Term: []byte("?"), Position: 16, Type: analysis.AlphaNumeric, }, { Start: 48, End: 50, Term: []byte("PI"), Position: 17, Type: analysis.AlphaNumeric, }, { Start: 50, End: 51, Term: []byte("="), Position: 18, Type: analysis.AlphaNumeric, }, { Start: 51, End: 58, Term: []byte("3.14159"), Position: 19, Type: analysis.Numeric, }, }, }, { []byte("你认识那个和主席握手的的哥吗?他开一辆黑色的士。"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("你"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("认识"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("那个"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("和"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("主席"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("握手"), Position: 6, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("的"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("的哥"), Position: 8, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("吗"), Position: 9, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte("?"), Position: 10, Type: analysis.AlphaNumeric, }, { Start: 45, End: 51, Term: []byte("他开"), Position: 11, Type: analysis.Ideographic, }, { Start: 51, End: 57, Term: []byte("一辆"), Position: 12, Type: analysis.Ideographic, }, { Start: 57, End: 63, Term: []byte("黑色"), Position: 13, Type: analysis.Ideographic, }, { Start: 63, End: 69, Term: []byte("的士"), Position: 14, Type: analysis.Ideographic, }, { Start: 69, End: 72, Term: []byte("。"), Position: 15, Type: analysis.AlphaNumeric, }, }, }, { []byte("枪杆子中出政权"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("枪杆子"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("中"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("出"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("政权"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("张三风同学走上了不归路"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("张三风"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("同学"), Position: 2, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("走上"), Position: 3, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("了"), Position: 4, Type: analysis.Ideographic, }, { Start: 24, End: 33, Term: []byte("不归路"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("阿Q腰间挂着BB机手里拿着大哥大,说:我一般吃饭不AA制的。"), analysis.TokenStream{ { Start: 0, End: 4, Term: []byte("阿Q"), Position: 1, Type: analysis.Ideographic, }, { Start: 4, End: 10, Term: []byte("腰间"), Position: 2, Type: analysis.Ideographic, }, { Start: 10, End: 13, Term: []byte("挂"), Position: 3, Type: analysis.Ideographic, }, { Start: 13, End: 16, Term: []byte("着"), Position: 4, Type: analysis.Ideographic, }, { Start: 16, End: 21, Term: []byte("BB机"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("手里"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte("拿"), Position: 7, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("着"), Position: 8, Type: analysis.Ideographic, }, { Start: 33, End: 42, Term: []byte("大哥大"), Position: 9, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte(","), Position: 10, Type: analysis.AlphaNumeric, }, { Start: 45, End: 48, Term: []byte("说"), Position: 11, Type: analysis.Ideographic, }, { Start: 48, End: 51, Term: []byte(":"), Position: 12, Type: analysis.AlphaNumeric, }, { Start: 51, End: 54, Term: []byte("我"), Position: 13, Type: analysis.Ideographic, }, { Start: 54, End: 60, Term: []byte("一般"), Position: 14, Type: analysis.Ideographic, }, { Start: 60, End: 66, Term: []byte("吃饭"), Position: 15, Type: analysis.Ideographic, }, { Start: 66, End: 69, Term: []byte("不"), Position: 16, Type: analysis.Ideographic, }, { Start: 69, End: 74, Term: []byte("AA制"), Position: 17, Type: analysis.Ideographic, }, { Start: 74, End: 77, Term: []byte("的"), Position: 18, Type: analysis.Ideographic, }, { Start: 77, End: 80, Term: []byte("。"), Position: 19, Type: analysis.AlphaNumeric, }, }, }, { []byte("在1号店能买到小S和大S八卦的书。"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("在"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 10, Term: []byte("1号店"), Position: 2, Type: analysis.Ideographic, }, { Start: 10, End: 13, Term: []byte("能"), Position: 3, Type: analysis.Ideographic, }, { Start: 13, End: 16, Term: []byte("买"), Position: 4, Type: analysis.Ideographic, }, { Start: 16, End: 19, Term: []byte("到"), Position: 5, Type: analysis.Ideographic, }, { Start: 19, End: 23, Term: []byte("小S"), Position: 6, Type: analysis.Ideographic, }, { Start: 23, End: 26, Term: []byte("和"), Position: 7, Type: analysis.Ideographic, }, { Start: 26, End: 30, Term: []byte("大S"), Position: 8, Type: analysis.Ideographic, }, { Start: 30, End: 36, Term: []byte("八卦"), Position: 9, Type: analysis.Ideographic, }, { Start: 36, End: 39, Term: []byte("的"), Position: 10, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("书"), Position: 11, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte("。"), Position: 12, Type: analysis.AlphaNumeric, }, }, }, } tokenizer, _ := NewJiebaTokenizer("dict.txt", true, false) for _, test := range tests { actual := tokenizer.Tokenize(test.input) if !reflect.DeepEqual(actual, test.output) { t.Errorf("Expected %v, got %v for %s", test.output, actual, string(test.input)) } } } func TestJiebaTokenizerSearchModeWithHMM(t *testing.T) { tests := []struct { input []byte output analysis.TokenStream }{ { []byte("这是一个伸手不见五指的黑夜。我叫孙悟空,我爱北京,我爱Python和C++。"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("这是"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("一个"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("伸手"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("不见"), Position: 4, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("五指"), Position: 5, Type: analysis.Ideographic, }, { Start: 12, End: 30, Term: []byte("伸手不见五指"), Position: 6, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("的"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("黑夜"), Position: 8, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("。"), Position: 9, Type: analysis.AlphaNumeric, }, { Start: 42, End: 45, Term: []byte("我"), Position: 10, Type: analysis.Ideographic, }, { Start: 45, End: 48, Term: []byte("叫"), Position: 11, Type: analysis.Ideographic, }, { Start: 51, End: 57, Term: []byte("悟空"), Position: 12, Type: analysis.Ideographic, }, { Start: 48, End: 57, Term: []byte("孙悟空"), Position: 13, Type: analysis.Ideographic, }, { Start: 57, End: 60, Term: []byte(","), Position: 14, Type: analysis.AlphaNumeric, }, { Start: 60, End: 63, Term: []byte("我"), Position: 15, Type: analysis.Ideographic, }, { Start: 63, End: 66, Term: []byte("爱"), Position: 16, Type: analysis.Ideographic, }, { Start: 66, End: 72, Term: []byte("北京"), Position: 17, Type: analysis.Ideographic, }, { Start: 72, End: 75, Term: []byte(","), Position: 18, Type: analysis.AlphaNumeric, }, { Start: 75, End: 78, Term: []byte("我"), Position: 19, Type: analysis.Ideographic, }, { Start: 78, End: 81, Term: []byte("爱"), Position: 20, Type: analysis.Ideographic, }, { Start: 81, End: 87, Term: []byte("Python"), Position: 21, Type: analysis.AlphaNumeric, }, { Start: 87, End: 90, Term: []byte("和"), Position: 22, Type: analysis.Ideographic, }, { Start: 90, End: 93, Term: []byte("C++"), Position: 23, Type: analysis.AlphaNumeric, }, { Start: 93, End: 96, Term: []byte("。"), Position: 24, Type: analysis.AlphaNumeric, }, }, }, { []byte("我不喜欢日本和服。"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("我"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("不"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("喜欢"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("日本"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("和服"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("。"), Position: 6, Type: analysis.AlphaNumeric, }, }, }, { []byte("雷猴回归人间。"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("雷猴"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("回归"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("人间"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("。"), Position: 4, Type: analysis.AlphaNumeric, }, }, }, { []byte("工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("工信处"), Position: 1, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("干事"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 18, Term: []byte("女干事"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("每月"), Position: 4, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("经过"), Position: 5, Type: analysis.Ideographic, }, { Start: 30, End: 36, Term: []byte("下属"), Position: 6, Type: analysis.Ideographic, }, { Start: 36, End: 42, Term: []byte("科室"), Position: 7, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte("都"), Position: 8, Type: analysis.Ideographic, }, { Start: 45, End: 48, Term: []byte("要"), Position: 9, Type: analysis.Ideographic, }, { Start: 48, End: 54, Term: []byte("亲口"), Position: 10, Type: analysis.Ideographic, }, { Start: 54, End: 60, Term: []byte("交代"), Position: 11, Type: analysis.Ideographic, }, { Start: 60, End: 62, Term: []byte("24"), Position: 12, Type: analysis.Numeric, }, { Start: 62, End: 65, Term: []byte("口"), Position: 13, Type: analysis.Ideographic, }, { Start: 65, End: 71, Term: []byte("交换"), Position: 14, Type: analysis.Ideographic, }, { Start: 68, End: 74, Term: []byte("换机"), Position: 15, Type: analysis.Ideographic, }, { Start: 65, End: 74, Term: []byte("交换机"), Position: 16, Type: analysis.Ideographic, }, { Start: 74, End: 77, Term: []byte("等"), Position: 17, Type: analysis.Ideographic, }, { Start: 77, End: 83, Term: []byte("技术"), Position: 18, Type: analysis.Ideographic, }, { Start: 77, End: 86, Term: []byte("技术性"), Position: 19, Type: analysis.Ideographic, }, { Start: 86, End: 92, Term: []byte("器件"), Position: 20, Type: analysis.Ideographic, }, { Start: 92, End: 95, Term: []byte("的"), Position: 21, Type: analysis.Ideographic, }, { Start: 95, End: 101, Term: []byte("安装"), Position: 22, Type: analysis.Ideographic, }, { Start: 101, End: 107, Term: []byte("工作"), Position: 23, Type: analysis.Ideographic, }, }, }, { []byte("我需要廉租房"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("我"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("需要"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("廉租"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("租房"), Position: 4, Type: analysis.Ideographic, }, { Start: 9, End: 18, Term: []byte("廉租房"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("永和服装饰品有限公司"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("永和"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("服装"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("饰品"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("有限"), Position: 4, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("公司"), Position: 5, Type: analysis.Ideographic, }, { Start: 18, End: 30, Term: []byte("有限公司"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("我爱北京天安门"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("我"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("爱"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("北京"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("天安"), Position: 4, Type: analysis.Ideographic, }, { Start: 12, End: 21, Term: []byte("天安门"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("abc"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("abc"), Position: 1, Type: analysis.AlphaNumeric, }, }, }, { []byte("隐马尔可夫"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("隐"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("可夫"), Position: 2, Type: analysis.Ideographic, }, { Start: 3, End: 12, Term: []byte("马尔可"), Position: 3, Type: analysis.Ideographic, }, { Start: 3, End: 15, Term: []byte("马尔可夫"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("雷猴是个好网站"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("雷猴"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("是"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("个"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("好"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("网站"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("“Microsoft”一词由“MICROcomputer(微型计算机)”和“SOFTware(软件)”两部分组成"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("“"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 3, End: 12, Term: []byte("Microsoft"), Position: 2, Type: analysis.AlphaNumeric, }, { Start: 12, End: 15, Term: []byte("”"), Position: 3, Type: analysis.AlphaNumeric, }, { Start: 15, End: 21, Term: []byte("一词"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("由"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("“"), Position: 6, Type: analysis.AlphaNumeric, }, { Start: 27, End: 40, Term: []byte("MICROcomputer"), Position: 7, Type: analysis.AlphaNumeric, }, { Start: 40, End: 43, Term: []byte("("), Position: 8, Type: analysis.AlphaNumeric, }, { Start: 43, End: 49, Term: []byte("微型"), Position: 9, Type: analysis.Ideographic, }, { Start: 49, End: 55, Term: []byte("计算"), Position: 10, Type: analysis.Ideographic, }, { Start: 52, End: 58, Term: []byte("算机"), Position: 11, Type: analysis.Ideographic, }, { Start: 49, End: 58, Term: []byte("计算机"), Position: 12, Type: analysis.Ideographic, }, { Start: 58, End: 61, Term: []byte(")"), Position: 13, Type: analysis.AlphaNumeric, }, { Start: 61, End: 64, Term: []byte("”"), Position: 14, Type: analysis.AlphaNumeric, }, { Start: 64, End: 67, Term: []byte("和"), Position: 15, Type: analysis.Ideographic, }, { Start: 67, End: 70, Term: []byte("“"), Position: 16, Type: analysis.AlphaNumeric, }, { Start: 70, End: 78, Term: []byte("SOFTware"), Position: 17, Type: analysis.AlphaNumeric, }, { Start: 78, End: 81, Term: []byte("("), Position: 18, Type: analysis.AlphaNumeric, }, { Start: 81, End: 87, Term: []byte("软件"), Position: 19, Type: analysis.Ideographic, }, { Start: 87, End: 90, Term: []byte(")"), Position: 20, Type: analysis.AlphaNumeric, }, { Start: 90, End: 93, Term: []byte("”"), Position: 21, Type: analysis.AlphaNumeric, }, { Start: 93, End: 96, Term: []byte("两"), Position: 22, Type: analysis.Ideographic, }, { Start: 96, End: 102, Term: []byte("部分"), Position: 23, Type: analysis.Ideographic, }, { Start: 102, End: 108, Term: []byte("组成"), Position: 24, Type: analysis.Ideographic, }, }, }, { []byte("草泥马和欺实马是今年的流行词汇"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("草泥马"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("和"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("欺实"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("马"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("是"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("今年"), Position: 6, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("的"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("流行"), Position: 8, Type: analysis.Ideographic, }, { Start: 39, End: 45, Term: []byte("词汇"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("伊藤洋华堂总府店"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("伊藤"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 15, Term: []byte("洋华堂"), Position: 2, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("总府"), Position: 3, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("店"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("中国科学院计算技术研究所"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("中国"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("科学"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("学院"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("计算"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("技术"), Position: 5, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("研究"), Position: 6, Type: analysis.Ideographic, }, { Start: 6, End: 15, Term: []byte("科学院"), Position: 7, Type: analysis.Ideographic, }, { Start: 27, End: 36, Term: []byte("研究所"), Position: 8, Type: analysis.Ideographic, }, { Start: 0, End: 36, Term: []byte("中国科学院计算技术研究所"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("罗密欧与朱丽叶"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("罗密欧"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("与"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 21, Term: []byte("朱丽叶"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("我购买了道具和服装"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("我"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("购买"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("了"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("道具"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("和"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("服装"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("PS: 我觉得开源有一个好处,就是能够敦促自己不断改进,避免敞帚自珍"), analysis.TokenStream{ { Start: 0, End: 2, Term: []byte("PS"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 2, End: 3, Term: []byte(":"), Position: 2, Type: analysis.AlphaNumeric, }, { Start: 3, End: 4, Term: []byte(" "), Position: 3, Type: analysis.AlphaNumeric, }, { Start: 4, End: 7, Term: []byte("我"), Position: 4, Type: analysis.Ideographic, }, { Start: 7, End: 13, Term: []byte("觉得"), Position: 5, Type: analysis.Ideographic, }, { Start: 13, End: 19, Term: []byte("开源"), Position: 6, Type: analysis.Ideographic, }, { Start: 19, End: 22, Term: []byte("有"), Position: 7, Type: analysis.Ideographic, }, { Start: 22, End: 28, Term: []byte("一个"), Position: 8, Type: analysis.Ideographic, }, { Start: 28, End: 34, Term: []byte("好处"), Position: 9, Type: analysis.Ideographic, }, { Start: 34, End: 37, Term: []byte(","), Position: 10, Type: analysis.AlphaNumeric, }, { Start: 37, End: 43, Term: []byte("就是"), Position: 11, Type: analysis.Ideographic, }, { Start: 43, End: 49, Term: []byte("能够"), Position: 12, Type: analysis.Ideographic, }, { Start: 49, End: 55, Term: []byte("敦促"), Position: 13, Type: analysis.Ideographic, }, { Start: 55, End: 61, Term: []byte("自己"), Position: 14, Type: analysis.Ideographic, }, { Start: 61, End: 67, Term: []byte("不断"), Position: 15, Type: analysis.Ideographic, }, { Start: 67, End: 73, Term: []byte("改进"), Position: 16, Type: analysis.Ideographic, }, { Start: 61, End: 73, Term: []byte("不断改进"), Position: 17, Type: analysis.Ideographic, }, { Start: 73, End: 76, Term: []byte(","), Position: 18, Type: analysis.AlphaNumeric, }, { Start: 76, End: 82, Term: []byte("避免"), Position: 19, Type: analysis.Ideographic, }, { Start: 82, End: 88, Term: []byte("敞帚"), Position: 20, Type: analysis.Ideographic, }, { Start: 88, End: 94, Term: []byte("自珍"), Position: 21, Type: analysis.Ideographic, }, }, }, { []byte("湖北省石首市"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("湖北"), Position: 1, Type: analysis.Ideographic, }, { Start: 0, End: 9, Term: []byte("湖北省"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("石首"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 18, Term: []byte("石首市"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("湖北省十堰市"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("湖北"), Position: 1, Type: analysis.Ideographic, }, { Start: 0, End: 9, Term: []byte("湖北省"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("十堰"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 18, Term: []byte("十堰市"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("总经理完成了这件事情"), analysis.TokenStream{ { Start: 3, End: 9, Term: []byte("经理"), Position: 1, Type: analysis.Ideographic, }, { Start: 0, End: 9, Term: []byte("总经理"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("完成"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("了"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("这件"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("事情"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("电脑修好了"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("电脑"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("修好"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("了"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("做好了这件事情就一了百了了"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("做好"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("了"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("这件"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("事情"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("就"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 36, Term: []byte("一了百了"), Position: 6, Type: analysis.Ideographic, }, { Start: 36, End: 39, Term: []byte("了"), Position: 7, Type: analysis.Ideographic, }, }, }, { []byte("人们审美的观点是不同的"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("人们"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("审美"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("的"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("观点"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("是"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("不同"), Position: 6, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("的"), Position: 7, Type: analysis.Ideographic, }, }, }, { []byte("我们买了一个美的空调"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("我们"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("买"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("了"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("一个"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("美的"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("空调"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("线程初始化时我们要注意"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("线程"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("初始"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 15, Term: []byte("初始化"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("时"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("我们"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("要"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("注意"), Position: 7, Type: analysis.Ideographic, }, }, }, { []byte("一个分子是由好多原子组织成的"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("一个"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("分子"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("是"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("由"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("好多"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("原子"), Position: 6, Type: analysis.Ideographic, }, { Start: 30, End: 36, Term: []byte("组织"), Position: 7, Type: analysis.Ideographic, }, { Start: 36, End: 39, Term: []byte("成"), Position: 8, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("的"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("祝你马到功成"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("祝"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("你"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 18, Term: []byte("马到功成"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("他掉进了无底洞里"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("他"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("掉"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("进"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("了"), Position: 4, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("无底"), Position: 5, Type: analysis.Ideographic, }, { Start: 12, End: 21, Term: []byte("无底洞"), Position: 6, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("里"), Position: 7, Type: analysis.Ideographic, }, }, }, { []byte("中国的首都是北京"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("中国"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("的"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("首都"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("是"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("北京"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("孙君意"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("孙君意"), Position: 1, Type: analysis.Ideographic, }, }, }, { []byte("外交部发言人马朝旭"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("外交"), Position: 1, Type: analysis.Ideographic, }, { Start: 0, End: 9, Term: []byte("外交部"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("发言"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 18, Term: []byte("发言人"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 27, Term: []byte("马朝旭"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("领导人会议和第四届东亚峰会"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("领导"), Position: 1, Type: analysis.Ideographic, }, { Start: 0, End: 9, Term: []byte("领导人"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("会议"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("和"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("第四"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("四届"), Position: 6, Type: analysis.Ideographic, }, { Start: 18, End: 27, Term: []byte("第四届"), Position: 7, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("东亚"), Position: 8, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("峰会"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("在过去的这五年"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("在"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("过去"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("的"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("这"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("五年"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("还需要很长的路要走"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("还"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("需要"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("很长"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("的"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("路"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("要"), Position: 6, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("走"), Position: 7, Type: analysis.Ideographic, }, }, }, { []byte("60周年首都阅兵"), analysis.TokenStream{ { Start: 0, End: 2, Term: []byte("60"), Position: 1, Type: analysis.Numeric, }, { Start: 2, End: 8, Term: []byte("周年"), Position: 2, Type: analysis.Ideographic, }, { Start: 8, End: 14, Term: []byte("首都"), Position: 3, Type: analysis.Ideographic, }, { Start: 14, End: 20, Term: []byte("阅兵"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("你好人们审美的观点是不同的"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("你好"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("人们"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("审美"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("的"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("观点"), Position: 5, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte("是"), Position: 6, Type: analysis.Ideographic, }, { Start: 30, End: 36, Term: []byte("不同"), Position: 7, Type: analysis.Ideographic, }, { Start: 36, End: 39, Term: []byte("的"), Position: 8, Type: analysis.Ideographic, }, }, }, { []byte("买水果然后来世博园"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("买"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("水果"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("然后"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("来"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("世博"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("博园"), Position: 6, Type: analysis.Ideographic, }, { Start: 18, End: 27, Term: []byte("世博园"), Position: 7, Type: analysis.Ideographic, }, }, }, { []byte("买水果然后去世博园"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("买"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("水果"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("然后"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("去"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("世博"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("博园"), Position: 6, Type: analysis.Ideographic, }, { Start: 18, End: 27, Term: []byte("世博园"), Position: 7, Type: analysis.Ideographic, }, }, }, { []byte("但是后来我才知道你是对的"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("但是"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("后来"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("我"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("才"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("知道"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("你"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte("是"), Position: 7, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("对"), Position: 8, Type: analysis.Ideographic, }, { Start: 33, End: 36, Term: []byte("的"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("存在即合理"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("存在"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("即"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("合理"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("的的的的的在的的的的就以和和和"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("的"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("的"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("的"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("的"), Position: 4, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("的"), Position: 5, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("在"), Position: 6, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("的"), Position: 7, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("的"), Position: 8, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("的"), Position: 9, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte("的"), Position: 10, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("就"), Position: 11, Type: analysis.Ideographic, }, { Start: 33, End: 36, Term: []byte("以"), Position: 12, Type: analysis.Ideographic, }, { Start: 36, End: 39, Term: []byte("和"), Position: 13, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("和"), Position: 14, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte("和"), Position: 15, Type: analysis.Ideographic, }, }, }, { []byte("I love你,不以为耻,反以为rong"), analysis.TokenStream{ { Start: 0, End: 1, Term: []byte("I"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 1, End: 2, Term: []byte(" "), Position: 2, Type: analysis.AlphaNumeric, }, { Start: 2, End: 6, Term: []byte("love"), Position: 3, Type: analysis.AlphaNumeric, }, { Start: 6, End: 9, Term: []byte("你"), Position: 4, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte(","), Position: 5, Type: analysis.AlphaNumeric, }, { Start: 12, End: 18, Term: []byte("不以"), Position: 6, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("以为"), Position: 7, Type: analysis.Ideographic, }, { Start: 12, End: 24, Term: []byte("不以为耻"), Position: 8, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte(","), Position: 9, Type: analysis.AlphaNumeric, }, { Start: 27, End: 30, Term: []byte("反"), Position: 10, Type: analysis.Ideographic, }, { Start: 30, End: 36, Term: []byte("以为"), Position: 11, Type: analysis.Ideographic, }, { Start: 36, End: 40, Term: []byte("rong"), Position: 12, Type: analysis.AlphaNumeric, }, }, }, { []byte("因"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("因"), Position: 1, Type: analysis.Ideographic, }, }, }, { []byte(""), analysis.TokenStream{}, }, { []byte("hello你好人们审美的观点是不同的"), analysis.TokenStream{ { Start: 0, End: 5, Term: []byte("hello"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 5, End: 11, Term: []byte("你好"), Position: 2, Type: analysis.Ideographic, }, { Start: 11, End: 17, Term: []byte("人们"), Position: 3, Type: analysis.Ideographic, }, { Start: 17, End: 23, Term: []byte("审美"), Position: 4, Type: analysis.Ideographic, }, { Start: 23, End: 26, Term: []byte("的"), Position: 5, Type: analysis.Ideographic, }, { Start: 26, End: 32, Term: []byte("观点"), Position: 6, Type: analysis.Ideographic, }, { Start: 32, End: 35, Term: []byte("是"), Position: 7, Type: analysis.Ideographic, }, { Start: 35, End: 41, Term: []byte("不同"), Position: 8, Type: analysis.Ideographic, }, { Start: 41, End: 44, Term: []byte("的"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("很好但主要是基于网页形式"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("很"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("好"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("但"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("主要"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("是"), Position: 5, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("基于"), Position: 6, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("网页"), Position: 7, Type: analysis.Ideographic, }, { Start: 30, End: 36, Term: []byte("形式"), Position: 8, Type: analysis.Ideographic, }, }, }, { []byte("hello你好人们审美的观点是不同的"), analysis.TokenStream{ { Start: 0, End: 5, Term: []byte("hello"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 5, End: 11, Term: []byte("你好"), Position: 2, Type: analysis.Ideographic, }, { Start: 11, End: 17, Term: []byte("人们"), Position: 3, Type: analysis.Ideographic, }, { Start: 17, End: 23, Term: []byte("审美"), Position: 4, Type: analysis.Ideographic, }, { Start: 23, End: 26, Term: []byte("的"), Position: 5, Type: analysis.Ideographic, }, { Start: 26, End: 32, Term: []byte("观点"), Position: 6, Type: analysis.Ideographic, }, { Start: 32, End: 35, Term: []byte("是"), Position: 7, Type: analysis.Ideographic, }, { Start: 35, End: 41, Term: []byte("不同"), Position: 8, Type: analysis.Ideographic, }, { Start: 41, End: 44, Term: []byte("的"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("为什么我不能拥有想要的生活"), analysis.TokenStream{ { Start: 3, End: 9, Term: []byte("什么"), Position: 1, Type: analysis.Ideographic, }, { Start: 0, End: 9, Term: []byte("为什么"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("我"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("不能"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("拥有"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("想要"), Position: 6, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("的"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("生活"), Position: 8, Type: analysis.Ideographic, }, }, }, { []byte("后来我才"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("后来"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("我"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("才"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("此次来中国是为了"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("此次"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("来"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("中国"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("是"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("为了"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("使用了它就可以解决一些问题"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("使用"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("了"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("它"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("就"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("可以"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("解决"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("一些"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("问题"), Position: 8, Type: analysis.Ideographic, }, }, }, { []byte(",使用了它就可以解决一些问题"), analysis.TokenStream{ { Start: 0, End: 1, Term: []byte(","), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 1, End: 7, Term: []byte("使用"), Position: 2, Type: analysis.Ideographic, }, { Start: 7, End: 10, Term: []byte("了"), Position: 3, Type: analysis.Ideographic, }, { Start: 10, End: 13, Term: []byte("它"), Position: 4, Type: analysis.Ideographic, }, { Start: 13, End: 16, Term: []byte("就"), Position: 5, Type: analysis.Ideographic, }, { Start: 16, End: 22, Term: []byte("可以"), Position: 6, Type: analysis.Ideographic, }, { Start: 22, End: 28, Term: []byte("解决"), Position: 7, Type: analysis.Ideographic, }, { Start: 28, End: 34, Term: []byte("一些"), Position: 8, Type: analysis.Ideographic, }, { Start: 34, End: 40, Term: []byte("问题"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("其实使用了它就可以解决一些问题"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("其实"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("使用"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("了"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("它"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("就"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("可以"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("解决"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("一些"), Position: 8, Type: analysis.Ideographic, }, { Start: 39, End: 45, Term: []byte("问题"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("好人使用了它就可以解决一些问题"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("好人"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("使用"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("了"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("它"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("就"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("可以"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("解决"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("一些"), Position: 8, Type: analysis.Ideographic, }, { Start: 39, End: 45, Term: []byte("问题"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("是因为和国家"), analysis.TokenStream{ { Start: 3, End: 9, Term: []byte("因为"), Position: 1, Type: analysis.Ideographic, }, { Start: 0, End: 9, Term: []byte("是因为"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("和"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("国家"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("老年搜索还支持"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("老年"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("搜索"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("还"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("支持"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("干脆就把那部蒙人的闲法给废了拉倒!RT @laoshipukong : 27日,全国人大常委会第三次审议侵权责任法草案,删除了有关医疗损害责任“举证倒置”的规定。在医患纠纷中本已处于弱势地位的消费者由此将陷入万劫不复的境地。 "), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("干脆"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("就"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("把"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("那部"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("蒙人"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("的"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("闲法"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 36, Term: []byte("给"), Position: 8, Type: analysis.Ideographic, }, { Start: 36, End: 39, Term: []byte("废"), Position: 9, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("了"), Position: 10, Type: analysis.Ideographic, }, { Start: 42, End: 48, Term: []byte("拉倒"), Position: 11, Type: analysis.Ideographic, }, { Start: 48, End: 51, Term: []byte("!"), Position: 12, Type: analysis.AlphaNumeric, }, { Start: 51, End: 53, Term: []byte("RT"), Position: 13, Type: analysis.AlphaNumeric, }, { Start: 53, End: 54, Term: []byte(" "), Position: 14, Type: analysis.AlphaNumeric, }, { Start: 54, End: 55, Term: []byte("@"), Position: 15, Type: analysis.AlphaNumeric, }, { Start: 55, End: 67, Term: []byte("laoshipukong"), Position: 16, Type: analysis.AlphaNumeric, }, { Start: 67, End: 68, Term: []byte(" "), Position: 17, Type: analysis.AlphaNumeric, }, { Start: 68, End: 69, Term: []byte(":"), Position: 18, Type: analysis.AlphaNumeric, }, { Start: 69, End: 70, Term: []byte(" "), Position: 19, Type: analysis.AlphaNumeric, }, { Start: 70, End: 72, Term: []byte("27"), Position: 20, Type: analysis.Numeric, }, { Start: 72, End: 75, Term: []byte("日"), Position: 21, Type: analysis.Ideographic, }, { Start: 75, End: 78, Term: []byte(","), Position: 22, Type: analysis.AlphaNumeric, }, { Start: 78, End: 84, Term: []byte("全国"), Position: 23, Type: analysis.Ideographic, }, { Start: 81, End: 87, Term: []byte("国人"), Position: 24, Type: analysis.Ideographic, }, { Start: 84, End: 90, Term: []byte("人大"), Position: 25, Type: analysis.Ideographic, }, { Start: 90, End: 96, Term: []byte("常委"), Position: 26, Type: analysis.Ideographic, }, { Start: 93, End: 99, Term: []byte("委会"), Position: 27, Type: analysis.Ideographic, }, { Start: 90, End: 99, Term: []byte("常委会"), Position: 28, Type: analysis.Ideographic, }, { Start: 78, End: 99, Term: []byte("全国人大常委会"), Position: 29, Type: analysis.Ideographic, }, { Start: 99, End: 105, Term: []byte("第三"), Position: 30, Type: analysis.Ideographic, }, { Start: 102, End: 108, Term: []byte("三次"), Position: 31, Type: analysis.Ideographic, }, { Start: 99, End: 108, Term: []byte("第三次"), Position: 32, Type: analysis.Ideographic, }, { Start: 108, End: 114, Term: []byte("审议"), Position: 33, Type: analysis.Ideographic, }, { Start: 114, End: 120, Term: []byte("侵权"), Position: 34, Type: analysis.Ideographic, }, { Start: 120, End: 126, Term: []byte("责任"), Position: 35, Type: analysis.Ideographic, }, { Start: 120, End: 129, Term: []byte("责任法"), Position: 36, Type: analysis.Ideographic, }, { Start: 129, End: 135, Term: []byte("草案"), Position: 37, Type: analysis.Ideographic, }, { Start: 135, End: 138, Term: []byte(","), Position: 38, Type: analysis.AlphaNumeric, }, { Start: 138, End: 144, Term: []byte("删除"), Position: 39, Type: analysis.Ideographic, }, { Start: 144, End: 147, Term: []byte("了"), Position: 40, Type: analysis.Ideographic, }, { Start: 147, End: 153, Term: []byte("有关"), Position: 41, Type: analysis.Ideographic, }, { Start: 153, End: 159, Term: []byte("医疗"), Position: 42, Type: analysis.Ideographic, }, { Start: 159, End: 165, Term: []byte("损害"), Position: 43, Type: analysis.Ideographic, }, { Start: 165, End: 171, Term: []byte("责任"), Position: 44, Type: analysis.Ideographic, }, { Start: 171, End: 174, Term: []byte("“"), Position: 45, Type: analysis.AlphaNumeric, }, { Start: 174, End: 180, Term: []byte("举证"), Position: 46, Type: analysis.Ideographic, }, { Start: 180, End: 186, Term: []byte("倒置"), Position: 47, Type: analysis.Ideographic, }, { Start: 186, End: 189, Term: []byte("”"), Position: 48, Type: analysis.AlphaNumeric, }, { Start: 189, End: 192, Term: []byte("的"), Position: 49, Type: analysis.Ideographic, }, { Start: 192, End: 198, Term: []byte("规定"), Position: 50, Type: analysis.Ideographic, }, { Start: 198, End: 201, Term: []byte("。"), Position: 51, Type: analysis.AlphaNumeric, }, { Start: 201, End: 204, Term: []byte("在"), Position: 52, Type: analysis.Ideographic, }, { Start: 204, End: 210, Term: []byte("医患"), Position: 53, Type: analysis.Ideographic, }, { Start: 210, End: 216, Term: []byte("纠纷"), Position: 54, Type: analysis.Ideographic, }, { Start: 216, End: 222, Term: []byte("中本"), Position: 55, Type: analysis.Ideographic, }, { Start: 222, End: 225, Term: []byte("已"), Position: 56, Type: analysis.Ideographic, }, { Start: 225, End: 231, Term: []byte("处于"), Position: 57, Type: analysis.Ideographic, }, { Start: 231, End: 237, Term: []byte("弱势"), Position: 58, Type: analysis.Ideographic, }, { Start: 237, End: 243, Term: []byte("地位"), Position: 59, Type: analysis.Ideographic, }, { Start: 243, End: 246, Term: []byte("的"), Position: 60, Type: analysis.Ideographic, }, { Start: 246, End: 252, Term: []byte("消费"), Position: 61, Type: analysis.Ideographic, }, { Start: 246, End: 255, Term: []byte("消费者"), Position: 62, Type: analysis.Ideographic, }, { Start: 255, End: 261, Term: []byte("由此"), Position: 63, Type: analysis.Ideographic, }, { Start: 261, End: 264, Term: []byte("将"), Position: 64, Type: analysis.Ideographic, }, { Start: 264, End: 270, Term: []byte("陷入"), Position: 65, Type: analysis.Ideographic, }, { Start: 276, End: 282, Term: []byte("不复"), Position: 66, Type: analysis.Ideographic, }, { Start: 270, End: 282, Term: []byte("万劫不复"), Position: 67, Type: analysis.Ideographic, }, { Start: 282, End: 285, Term: []byte("的"), Position: 68, Type: analysis.Ideographic, }, { Start: 285, End: 291, Term: []byte("境地"), Position: 69, Type: analysis.Ideographic, }, { Start: 291, End: 294, Term: []byte("。"), Position: 70, Type: analysis.AlphaNumeric, }, { Start: 294, End: 295, Term: []byte(" "), Position: 71, Type: analysis.AlphaNumeric, }, }, }, { []byte("大"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("大"), Position: 1, Type: analysis.Ideographic, }, }, }, { []byte(""), analysis.TokenStream{}, }, { []byte("他说的确实在理"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("他"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("说"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("的"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("确实"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("在理"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("长春市长春节讲话"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("长春"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("市长"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("春节"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("讲话"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("结婚的和尚未结婚的"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("结婚"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("的"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("和"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("尚未"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("结婚"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("的"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("结合成分子时"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("结合"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("成"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("分子"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("时"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("旅游和服务是最好的"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("旅游"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("和"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("服务"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("是"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("最好"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("的"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("这件事情的确是我的错"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("这件"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("事情"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("的确"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("是"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("我"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("的"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte("错"), Position: 7, Type: analysis.Ideographic, }, }, }, { []byte("供大家参考指正"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("供"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("大家"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("参考"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("指正"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("哈尔滨政府公布塌桥原因"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("哈尔"), Position: 1, Type: analysis.Ideographic, }, { Start: 0, End: 9, Term: []byte("哈尔滨"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("政府"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("公布"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("塌桥"), Position: 5, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("原因"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("我在机场入口处"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("我"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("在"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("机场"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("入口"), Position: 4, Type: analysis.Ideographic, }, { Start: 12, End: 21, Term: []byte("入口处"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("邢永臣摄影报道"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("邢永臣"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("摄影"), Position: 2, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("报道"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("BP神经网络如何训练才能在分类时增加区分度?"), analysis.TokenStream{ { Start: 0, End: 2, Term: []byte("BP"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 2, End: 8, Term: []byte("神经"), Position: 2, Type: analysis.Ideographic, }, { Start: 8, End: 14, Term: []byte("网络"), Position: 3, Type: analysis.Ideographic, }, { Start: 2, End: 11, Term: []byte("神经网"), Position: 4, Type: analysis.Ideographic, }, { Start: 2, End: 14, Term: []byte("神经网络"), Position: 5, Type: analysis.Ideographic, }, { Start: 14, End: 20, Term: []byte("如何"), Position: 6, Type: analysis.Ideographic, }, { Start: 20, End: 26, Term: []byte("训练"), Position: 7, Type: analysis.Ideographic, }, { Start: 26, End: 32, Term: []byte("才能"), Position: 8, Type: analysis.Ideographic, }, { Start: 32, End: 35, Term: []byte("在"), Position: 9, Type: analysis.Ideographic, }, { Start: 35, End: 41, Term: []byte("分类"), Position: 10, Type: analysis.Ideographic, }, { Start: 41, End: 44, Term: []byte("时"), Position: 11, Type: analysis.Ideographic, }, { Start: 44, End: 50, Term: []byte("增加"), Position: 12, Type: analysis.Ideographic, }, { Start: 50, End: 56, Term: []byte("区分"), Position: 13, Type: analysis.Ideographic, }, { Start: 53, End: 59, Term: []byte("分度"), Position: 14, Type: analysis.Ideographic, }, { Start: 50, End: 59, Term: []byte("区分度"), Position: 15, Type: analysis.Ideographic, }, { Start: 59, End: 62, Term: []byte("?"), Position: 16, Type: analysis.AlphaNumeric, }, }, }, { []byte("南京市长江大桥"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("南京"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("京市"), Position: 2, Type: analysis.Ideographic, }, { Start: 0, End: 9, Term: []byte("南京市"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("长江"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("大桥"), Position: 5, Type: analysis.Ideographic, }, { Start: 9, End: 21, Term: []byte("长江大桥"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("应一些使用者的建议,也为了便于利用NiuTrans用于SMT研究"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("应"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("一些"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("使用"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("用者"), Position: 4, Type: analysis.Ideographic, }, { Start: 9, End: 18, Term: []byte("使用者"), Position: 5, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("的"), Position: 6, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("建议"), Position: 7, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte(","), Position: 8, Type: analysis.AlphaNumeric, }, { Start: 30, End: 33, Term: []byte("也"), Position: 9, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("为了"), Position: 10, Type: analysis.Ideographic, }, { Start: 39, End: 45, Term: []byte("便于"), Position: 11, Type: analysis.Ideographic, }, { Start: 45, End: 51, Term: []byte("利用"), Position: 12, Type: analysis.Ideographic, }, { Start: 51, End: 59, Term: []byte("NiuTrans"), Position: 13, Type: analysis.AlphaNumeric, }, { Start: 59, End: 65, Term: []byte("用于"), Position: 14, Type: analysis.Ideographic, }, { Start: 65, End: 68, Term: []byte("SMT"), Position: 15, Type: analysis.AlphaNumeric, }, { Start: 68, End: 74, Term: []byte("研究"), Position: 16, Type: analysis.Ideographic, }, }, }, { []byte("长春市长春药店"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("长春"), Position: 1, Type: analysis.Ideographic, }, { Start: 0, End: 9, Term: []byte("长春市"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("长春"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("药店"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("邓颖超生前最喜欢的衣服"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("邓颖超"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("生前"), Position: 2, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("最"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("喜欢"), Position: 4, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("的"), Position: 5, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("衣服"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("胡锦涛是热爱世界和平的政治局常委"), analysis.TokenStream{ { Start: 3, End: 9, Term: []byte("锦涛"), Position: 1, Type: analysis.Ideographic, }, { Start: 0, End: 9, Term: []byte("胡锦涛"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("是"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("热爱"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("世界"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("和平"), Position: 6, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("的"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("政治"), Position: 8, Type: analysis.Ideographic, }, { Start: 33, End: 42, Term: []byte("政治局"), Position: 9, Type: analysis.Ideographic, }, { Start: 42, End: 48, Term: []byte("常委"), Position: 10, Type: analysis.Ideographic, }, }, }, { []byte("程序员祝海林和朱会震是在孙健的左面和右面, 范凯在最右面.再往左是李松洪"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("程序"), Position: 1, Type: analysis.Ideographic, }, { Start: 0, End: 9, Term: []byte("程序员"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("祝"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("海林"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("和"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 30, Term: []byte("朱会震"), Position: 6, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("是"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 36, Term: []byte("在"), Position: 8, Type: analysis.Ideographic, }, { Start: 36, End: 42, Term: []byte("孙健"), Position: 9, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte("的"), Position: 10, Type: analysis.Ideographic, }, { Start: 45, End: 51, Term: []byte("左面"), Position: 11, Type: analysis.Ideographic, }, { Start: 51, End: 54, Term: []byte("和"), Position: 12, Type: analysis.Ideographic, }, { Start: 54, End: 60, Term: []byte("右面"), Position: 13, Type: analysis.Ideographic, }, { Start: 60, End: 61, Term: []byte(","), Position: 14, Type: analysis.AlphaNumeric, }, { Start: 61, End: 62, Term: []byte(" "), Position: 15, Type: analysis.AlphaNumeric, }, { Start: 62, End: 68, Term: []byte("范凯"), Position: 16, Type: analysis.Ideographic, }, { Start: 68, End: 71, Term: []byte("在"), Position: 17, Type: analysis.Ideographic, }, { Start: 71, End: 74, Term: []byte("最"), Position: 18, Type: analysis.Ideographic, }, { Start: 74, End: 80, Term: []byte("右面"), Position: 19, Type: analysis.Ideographic, }, { Start: 80, End: 81, Term: []byte("."), Position: 20, Type: analysis.AlphaNumeric, }, { Start: 81, End: 87, Term: []byte("再往"), Position: 21, Type: analysis.Ideographic, }, { Start: 87, End: 90, Term: []byte("左"), Position: 22, Type: analysis.Ideographic, }, { Start: 90, End: 93, Term: []byte("是"), Position: 23, Type: analysis.Ideographic, }, { Start: 93, End: 102, Term: []byte("李松洪"), Position: 24, Type: analysis.Ideographic, }, }, }, { []byte("一次性交多少钱"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("一次"), Position: 1, Type: analysis.Ideographic, }, { Start: 0, End: 9, Term: []byte("一次性"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("交"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("多少"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("钱"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("两块五一套,三块八一斤,四块七一本,五块六一条"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("两块"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("五"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("一套"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte(","), Position: 4, Type: analysis.AlphaNumeric, }, { Start: 18, End: 24, Term: []byte("三块"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("八"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("一斤"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 36, Term: []byte(","), Position: 8, Type: analysis.AlphaNumeric, }, { Start: 36, End: 42, Term: []byte("四块"), Position: 9, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte("七"), Position: 10, Type: analysis.Ideographic, }, { Start: 45, End: 51, Term: []byte("一本"), Position: 11, Type: analysis.Ideographic, }, { Start: 51, End: 54, Term: []byte(","), Position: 12, Type: analysis.AlphaNumeric, }, { Start: 54, End: 60, Term: []byte("五块"), Position: 13, Type: analysis.Ideographic, }, { Start: 60, End: 63, Term: []byte("六"), Position: 14, Type: analysis.Ideographic, }, { Start: 63, End: 69, Term: []byte("一条"), Position: 15, Type: analysis.Ideographic, }, }, }, { []byte("小和尚留了一个像大和尚一样的和尚头"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("小"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("和尚"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("留"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("了"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("一个"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("像"), Position: 6, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("大"), Position: 7, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("和尚"), Position: 8, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("一样"), Position: 9, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("的"), Position: 10, Type: analysis.Ideographic, }, { Start: 42, End: 48, Term: []byte("和尚"), Position: 11, Type: analysis.Ideographic, }, { Start: 42, End: 51, Term: []byte("和尚头"), Position: 12, Type: analysis.Ideographic, }, }, }, { []byte("我是中华人民共和国公民;我爸爸是共和党党员; 地铁和平门站"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("我"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("是"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("中华"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("华人"), Position: 4, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("人民"), Position: 5, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("共和"), Position: 6, Type: analysis.Ideographic, }, { Start: 18, End: 27, Term: []byte("共和国"), Position: 7, Type: analysis.Ideographic, }, { Start: 6, End: 27, Term: []byte("中华人民共和国"), Position: 8, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("公民"), Position: 9, Type: analysis.Ideographic, }, { Start: 33, End: 34, Term: []byte(";"), Position: 10, Type: analysis.AlphaNumeric, }, { Start: 34, End: 37, Term: []byte("我"), Position: 11, Type: analysis.Ideographic, }, { Start: 37, End: 43, Term: []byte("爸爸"), Position: 12, Type: analysis.Ideographic, }, { Start: 43, End: 46, Term: []byte("是"), Position: 13, Type: analysis.Ideographic, }, { Start: 46, End: 52, Term: []byte("共和"), Position: 14, Type: analysis.Ideographic, }, { Start: 46, End: 55, Term: []byte("共和党"), Position: 15, Type: analysis.Ideographic, }, { Start: 55, End: 61, Term: []byte("党员"), Position: 16, Type: analysis.Ideographic, }, { Start: 61, End: 62, Term: []byte(";"), Position: 17, Type: analysis.AlphaNumeric, }, { Start: 62, End: 63, Term: []byte(" "), Position: 18, Type: analysis.AlphaNumeric, }, { Start: 63, End: 69, Term: []byte("地铁"), Position: 19, Type: analysis.Ideographic, }, { Start: 69, End: 75, Term: []byte("和平"), Position: 20, Type: analysis.Ideographic, }, { Start: 69, End: 78, Term: []byte("和平门"), Position: 21, Type: analysis.Ideographic, }, { Start: 78, End: 81, Term: []byte("站"), Position: 22, Type: analysis.Ideographic, }, }, }, { []byte("张晓梅去人民医院做了个B超然后去买了件T恤"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("张晓梅"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("去"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("人民"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("医院"), Position: 4, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("做"), Position: 5, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte("了"), Position: 6, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("个"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 37, Term: []byte("B超"), Position: 8, Type: analysis.Ideographic, }, { Start: 37, End: 43, Term: []byte("然后"), Position: 9, Type: analysis.Ideographic, }, { Start: 43, End: 46, Term: []byte("去"), Position: 10, Type: analysis.Ideographic, }, { Start: 46, End: 49, Term: []byte("买"), Position: 11, Type: analysis.Ideographic, }, { Start: 49, End: 52, Term: []byte("了"), Position: 12, Type: analysis.Ideographic, }, { Start: 52, End: 55, Term: []byte("件"), Position: 13, Type: analysis.Ideographic, }, { Start: 55, End: 59, Term: []byte("T恤"), Position: 14, Type: analysis.Ideographic, }, }, }, { []byte("AT&T是一件不错的公司,给你发offer了吗?"), analysis.TokenStream{ { Start: 0, End: 4, Term: []byte("AT&T"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 4, End: 7, Term: []byte("是"), Position: 2, Type: analysis.Ideographic, }, { Start: 7, End: 13, Term: []byte("一件"), Position: 3, Type: analysis.Ideographic, }, { Start: 13, End: 19, Term: []byte("不错"), Position: 4, Type: analysis.Ideographic, }, { Start: 19, End: 22, Term: []byte("的"), Position: 5, Type: analysis.Ideographic, }, { Start: 22, End: 28, Term: []byte("公司"), Position: 6, Type: analysis.Ideographic, }, { Start: 28, End: 31, Term: []byte(","), Position: 7, Type: analysis.AlphaNumeric, }, { Start: 31, End: 34, Term: []byte("给"), Position: 8, Type: analysis.Ideographic, }, { Start: 34, End: 37, Term: []byte("你"), Position: 9, Type: analysis.Ideographic, }, { Start: 37, End: 40, Term: []byte("发"), Position: 10, Type: analysis.Ideographic, }, { Start: 40, End: 45, Term: []byte("offer"), Position: 11, Type: analysis.AlphaNumeric, }, { Start: 45, End: 48, Term: []byte("了"), Position: 12, Type: analysis.Ideographic, }, { Start: 48, End: 51, Term: []byte("吗"), Position: 13, Type: analysis.Ideographic, }, { Start: 51, End: 54, Term: []byte("?"), Position: 14, Type: analysis.AlphaNumeric, }, }, }, { []byte("C++和c#是什么关系?11+122=133,是吗?PI=3.14159"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("C++"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 3, End: 6, Term: []byte("和"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 8, Term: []byte("c#"), Position: 3, Type: analysis.AlphaNumeric, }, { Start: 8, End: 11, Term: []byte("是"), Position: 4, Type: analysis.Ideographic, }, { Start: 11, End: 17, Term: []byte("什么"), Position: 5, Type: analysis.Ideographic, }, { Start: 17, End: 23, Term: []byte("关系"), Position: 6, Type: analysis.Ideographic, }, { Start: 23, End: 26, Term: []byte("?"), Position: 7, Type: analysis.AlphaNumeric, }, { Start: 26, End: 28, Term: []byte("11"), Position: 8, Type: analysis.Numeric, }, { Start: 28, End: 29, Term: []byte("+"), Position: 9, Type: analysis.AlphaNumeric, }, { Start: 29, End: 32, Term: []byte("122"), Position: 10, Type: analysis.Numeric, }, { Start: 32, End: 33, Term: []byte("="), Position: 11, Type: analysis.AlphaNumeric, }, { Start: 33, End: 36, Term: []byte("133"), Position: 12, Type: analysis.Numeric, }, { Start: 36, End: 39, Term: []byte(","), Position: 13, Type: analysis.AlphaNumeric, }, { Start: 39, End: 42, Term: []byte("是"), Position: 14, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte("吗"), Position: 15, Type: analysis.Ideographic, }, { Start: 45, End: 48, Term: []byte("?"), Position: 16, Type: analysis.AlphaNumeric, }, { Start: 48, End: 50, Term: []byte("PI"), Position: 17, Type: analysis.AlphaNumeric, }, { Start: 50, End: 51, Term: []byte("="), Position: 18, Type: analysis.AlphaNumeric, }, { Start: 51, End: 58, Term: []byte("3.14159"), Position: 19, Type: analysis.Numeric, }, }, }, { []byte("你认识那个和主席握手的的哥吗?他开一辆黑色的士。"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("你"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("认识"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("那个"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("和"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("主席"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("握手"), Position: 6, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("的"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("的哥"), Position: 8, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("吗"), Position: 9, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte("?"), Position: 10, Type: analysis.AlphaNumeric, }, { Start: 45, End: 51, Term: []byte("他开"), Position: 11, Type: analysis.Ideographic, }, { Start: 51, End: 57, Term: []byte("一辆"), Position: 12, Type: analysis.Ideographic, }, { Start: 57, End: 63, Term: []byte("黑色"), Position: 13, Type: analysis.Ideographic, }, { Start: 63, End: 69, Term: []byte("的士"), Position: 14, Type: analysis.Ideographic, }, { Start: 69, End: 72, Term: []byte("。"), Position: 15, Type: analysis.AlphaNumeric, }, }, }, { []byte("枪杆子中出政权"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("枪杆"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("杆子"), Position: 2, Type: analysis.Ideographic, }, { Start: 0, End: 9, Term: []byte("枪杆子"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("中"), Position: 4, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("出"), Position: 5, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("政权"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("张三风同学走上了不归路"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("张三风"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("同学"), Position: 2, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("走上"), Position: 3, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("了"), Position: 4, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("归路"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 33, Term: []byte("不归路"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("阿Q腰间挂着BB机手里拿着大哥大,说:我一般吃饭不AA制的。"), analysis.TokenStream{ { Start: 0, End: 4, Term: []byte("阿Q"), Position: 1, Type: analysis.Ideographic, }, { Start: 4, End: 10, Term: []byte("腰间"), Position: 2, Type: analysis.Ideographic, }, { Start: 10, End: 13, Term: []byte("挂"), Position: 3, Type: analysis.Ideographic, }, { Start: 13, End: 16, Term: []byte("着"), Position: 4, Type: analysis.Ideographic, }, { Start: 16, End: 21, Term: []byte("BB机"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("手里"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte("拿"), Position: 7, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("着"), Position: 8, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("大哥"), Position: 9, Type: analysis.Ideographic, }, { Start: 33, End: 42, Term: []byte("大哥大"), Position: 10, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte(","), Position: 11, Type: analysis.AlphaNumeric, }, { Start: 45, End: 48, Term: []byte("说"), Position: 12, Type: analysis.Ideographic, }, { Start: 48, End: 51, Term: []byte(":"), Position: 13, Type: analysis.AlphaNumeric, }, { Start: 51, End: 54, Term: []byte("我"), Position: 14, Type: analysis.Ideographic, }, { Start: 54, End: 60, Term: []byte("一般"), Position: 15, Type: analysis.Ideographic, }, { Start: 60, End: 66, Term: []byte("吃饭"), Position: 16, Type: analysis.Ideographic, }, { Start: 66, End: 69, Term: []byte("不"), Position: 17, Type: analysis.Ideographic, }, { Start: 69, End: 74, Term: []byte("AA制"), Position: 18, Type: analysis.Ideographic, }, { Start: 74, End: 77, Term: []byte("的"), Position: 19, Type: analysis.Ideographic, }, { Start: 77, End: 80, Term: []byte("。"), Position: 20, Type: analysis.AlphaNumeric, }, }, }, { []byte("在1号店能买到小S和大S八卦的书。"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("在"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 10, Term: []byte("1号店"), Position: 2, Type: analysis.Ideographic, }, { Start: 10, End: 13, Term: []byte("能"), Position: 3, Type: analysis.Ideographic, }, { Start: 13, End: 16, Term: []byte("买"), Position: 4, Type: analysis.Ideographic, }, { Start: 16, End: 19, Term: []byte("到"), Position: 5, Type: analysis.Ideographic, }, { Start: 19, End: 23, Term: []byte("小S"), Position: 6, Type: analysis.Ideographic, }, { Start: 23, End: 26, Term: []byte("和"), Position: 7, Type: analysis.Ideographic, }, { Start: 26, End: 30, Term: []byte("大S"), Position: 8, Type: analysis.Ideographic, }, { Start: 30, End: 36, Term: []byte("八卦"), Position: 9, Type: analysis.Ideographic, }, { Start: 36, End: 39, Term: []byte("的"), Position: 10, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("书"), Position: 11, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte("。"), Position: 12, Type: analysis.AlphaNumeric, }, }, }, } tokenizer, _ := NewJiebaTokenizer("dict.txt", true, true) for _, test := range tests { actual := tokenizer.Tokenize(test.input) if !reflect.DeepEqual(actual, test.output) { t.Errorf("Expected %v, got %v for %s", test.output, actual, string(test.input)) } } } func TestJiebaTokenizerDefaultModeWithoutHMM(t *testing.T) { tests := []struct { input []byte output analysis.TokenStream }{ { []byte("这是一个伸手不见五指的黑夜。我叫孙悟空,我爱北京,我爱Python和C++。"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("这"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("是"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("一个"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 30, Term: []byte("伸手不见五指"), Position: 4, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("的"), Position: 5, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("黑夜"), Position: 6, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("。"), Position: 7, Type: analysis.AlphaNumeric, }, { Start: 42, End: 45, Term: []byte("我"), Position: 8, Type: analysis.Ideographic, }, { Start: 45, End: 48, Term: []byte("叫"), Position: 9, Type: analysis.Ideographic, }, { Start: 48, End: 57, Term: []byte("孙悟空"), Position: 10, Type: analysis.Ideographic, }, { Start: 57, End: 60, Term: []byte(","), Position: 11, Type: analysis.AlphaNumeric, }, { Start: 60, End: 63, Term: []byte("我"), Position: 12, Type: analysis.Ideographic, }, { Start: 63, End: 66, Term: []byte("爱"), Position: 13, Type: analysis.Ideographic, }, { Start: 66, End: 72, Term: []byte("北京"), Position: 14, Type: analysis.Ideographic, }, { Start: 72, End: 75, Term: []byte(","), Position: 15, Type: analysis.AlphaNumeric, }, { Start: 75, End: 78, Term: []byte("我"), Position: 16, Type: analysis.Ideographic, }, { Start: 78, End: 81, Term: []byte("爱"), Position: 17, Type: analysis.Ideographic, }, { Start: 81, End: 87, Term: []byte("Python"), Position: 18, Type: analysis.AlphaNumeric, }, { Start: 87, End: 90, Term: []byte("和"), Position: 19, Type: analysis.Ideographic, }, { Start: 90, End: 93, Term: []byte("C++"), Position: 20, Type: analysis.AlphaNumeric, }, { Start: 93, End: 96, Term: []byte("。"), Position: 21, Type: analysis.AlphaNumeric, }, }, }, { []byte("我不喜欢日本和服。"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("我"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("不"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("喜欢"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("日本"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("和服"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("。"), Position: 6, Type: analysis.AlphaNumeric, }, }, }, { []byte("雷猴回归人间。"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("雷猴"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("回归"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("人间"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("。"), Position: 4, Type: analysis.AlphaNumeric, }, }, }, { []byte("工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("工信处"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 18, Term: []byte("女干事"), Position: 2, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("每月"), Position: 3, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("经过"), Position: 4, Type: analysis.Ideographic, }, { Start: 30, End: 36, Term: []byte("下属"), Position: 5, Type: analysis.Ideographic, }, { Start: 36, End: 42, Term: []byte("科室"), Position: 6, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte("都"), Position: 7, Type: analysis.Ideographic, }, { Start: 45, End: 48, Term: []byte("要"), Position: 8, Type: analysis.Ideographic, }, { Start: 48, End: 54, Term: []byte("亲口"), Position: 9, Type: analysis.Ideographic, }, { Start: 54, End: 60, Term: []byte("交代"), Position: 10, Type: analysis.Ideographic, }, { Start: 60, End: 62, Term: []byte("24"), Position: 11, Type: analysis.Numeric, }, { Start: 62, End: 65, Term: []byte("口"), Position: 12, Type: analysis.Ideographic, }, { Start: 65, End: 74, Term: []byte("交换机"), Position: 13, Type: analysis.Ideographic, }, { Start: 74, End: 77, Term: []byte("等"), Position: 14, Type: analysis.Ideographic, }, { Start: 77, End: 86, Term: []byte("技术性"), Position: 15, Type: analysis.Ideographic, }, { Start: 86, End: 92, Term: []byte("器件"), Position: 16, Type: analysis.Ideographic, }, { Start: 92, End: 95, Term: []byte("的"), Position: 17, Type: analysis.Ideographic, }, { Start: 95, End: 101, Term: []byte("安装"), Position: 18, Type: analysis.Ideographic, }, { Start: 101, End: 107, Term: []byte("工作"), Position: 19, Type: analysis.Ideographic, }, }, }, { []byte("我需要廉租房"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("我"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("需要"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 18, Term: []byte("廉租房"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("永和服装饰品有限公司"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("永和"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("服装"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("饰品"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 30, Term: []byte("有限公司"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("我爱北京天安门"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("我"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("爱"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("北京"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 21, Term: []byte("天安门"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("abc"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("abc"), Position: 1, Type: analysis.AlphaNumeric, }, }, }, { []byte("隐马尔可夫"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("隐"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 15, Term: []byte("马尔可夫"), Position: 2, Type: analysis.Ideographic, }, }, }, { []byte("雷猴是个好网站"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("雷猴"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("是"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("个"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("好"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("网站"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("“Microsoft”一词由“MICROcomputer(微型计算机)”和“SOFTware(软件)”两部分组成"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("“"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 3, End: 12, Term: []byte("Microsoft"), Position: 2, Type: analysis.AlphaNumeric, }, { Start: 12, End: 15, Term: []byte("”"), Position: 3, Type: analysis.AlphaNumeric, }, { Start: 15, End: 18, Term: []byte("一"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("词"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("由"), Position: 6, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("“"), Position: 7, Type: analysis.AlphaNumeric, }, { Start: 27, End: 40, Term: []byte("MICROcomputer"), Position: 8, Type: analysis.AlphaNumeric, }, { Start: 40, End: 43, Term: []byte("("), Position: 9, Type: analysis.AlphaNumeric, }, { Start: 43, End: 49, Term: []byte("微型"), Position: 10, Type: analysis.Ideographic, }, { Start: 49, End: 58, Term: []byte("计算机"), Position: 11, Type: analysis.Ideographic, }, { Start: 58, End: 61, Term: []byte(")"), Position: 12, Type: analysis.AlphaNumeric, }, { Start: 61, End: 64, Term: []byte("”"), Position: 13, Type: analysis.AlphaNumeric, }, { Start: 64, End: 67, Term: []byte("和"), Position: 14, Type: analysis.Ideographic, }, { Start: 67, End: 70, Term: []byte("“"), Position: 15, Type: analysis.AlphaNumeric, }, { Start: 70, End: 78, Term: []byte("SOFTware"), Position: 16, Type: analysis.AlphaNumeric, }, { Start: 78, End: 81, Term: []byte("("), Position: 17, Type: analysis.AlphaNumeric, }, { Start: 81, End: 87, Term: []byte("软件"), Position: 18, Type: analysis.Ideographic, }, { Start: 87, End: 90, Term: []byte(")"), Position: 19, Type: analysis.AlphaNumeric, }, { Start: 90, End: 93, Term: []byte("”"), Position: 20, Type: analysis.AlphaNumeric, }, { Start: 93, End: 96, Term: []byte("两"), Position: 21, Type: analysis.Ideographic, }, { Start: 96, End: 102, Term: []byte("部分"), Position: 22, Type: analysis.Ideographic, }, { Start: 102, End: 108, Term: []byte("组成"), Position: 23, Type: analysis.Ideographic, }, }, }, { []byte("草泥马和欺实马是今年的流行词汇"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("草泥马"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("和"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("欺"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("实"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("马"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("是"), Position: 6, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("今年"), Position: 7, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("的"), Position: 8, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("流行"), Position: 9, Type: analysis.Ideographic, }, { Start: 39, End: 45, Term: []byte("词汇"), Position: 10, Type: analysis.Ideographic, }, }, }, { []byte("伊藤洋华堂总府店"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("伊"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("藤"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 15, Term: []byte("洋华堂"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("总府"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("店"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("中国科学院计算技术研究所"), analysis.TokenStream{ { Start: 0, End: 36, Term: []byte("中国科学院计算技术研究所"), Position: 1, Type: analysis.Ideographic, }, }, }, { []byte("罗密欧与朱丽叶"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("罗密欧"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("与"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 21, Term: []byte("朱丽叶"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("我购买了道具和服装"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("我"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("购买"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("了"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("道具"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("和"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("服装"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("PS: 我觉得开源有一个好处,就是能够敦促自己不断改进,避免敞帚自珍"), analysis.TokenStream{ { Start: 0, End: 2, Term: []byte("PS"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 2, End: 3, Term: []byte(":"), Position: 2, Type: analysis.AlphaNumeric, }, { Start: 3, End: 4, Term: []byte(" "), Position: 3, Type: analysis.AlphaNumeric, }, { Start: 4, End: 7, Term: []byte("我"), Position: 4, Type: analysis.Ideographic, }, { Start: 7, End: 13, Term: []byte("觉得"), Position: 5, Type: analysis.Ideographic, }, { Start: 13, End: 19, Term: []byte("开源"), Position: 6, Type: analysis.Ideographic, }, { Start: 19, End: 22, Term: []byte("有"), Position: 7, Type: analysis.Ideographic, }, { Start: 22, End: 28, Term: []byte("一个"), Position: 8, Type: analysis.Ideographic, }, { Start: 28, End: 34, Term: []byte("好处"), Position: 9, Type: analysis.Ideographic, }, { Start: 34, End: 37, Term: []byte(","), Position: 10, Type: analysis.AlphaNumeric, }, { Start: 37, End: 43, Term: []byte("就是"), Position: 11, Type: analysis.Ideographic, }, { Start: 43, End: 49, Term: []byte("能够"), Position: 12, Type: analysis.Ideographic, }, { Start: 49, End: 55, Term: []byte("敦促"), Position: 13, Type: analysis.Ideographic, }, { Start: 55, End: 61, Term: []byte("自己"), Position: 14, Type: analysis.Ideographic, }, { Start: 61, End: 73, Term: []byte("不断改进"), Position: 15, Type: analysis.Ideographic, }, { Start: 73, End: 76, Term: []byte(","), Position: 16, Type: analysis.AlphaNumeric, }, { Start: 76, End: 82, Term: []byte("避免"), Position: 17, Type: analysis.Ideographic, }, { Start: 82, End: 85, Term: []byte("敞"), Position: 18, Type: analysis.Ideographic, }, { Start: 85, End: 88, Term: []byte("帚"), Position: 19, Type: analysis.Ideographic, }, { Start: 88, End: 94, Term: []byte("自珍"), Position: 20, Type: analysis.Ideographic, }, }, }, { []byte("湖北省石首市"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("湖北省"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 18, Term: []byte("石首市"), Position: 2, Type: analysis.Ideographic, }, }, }, { []byte("湖北省十堰市"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("湖北省"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 18, Term: []byte("十堰市"), Position: 2, Type: analysis.Ideographic, }, }, }, { []byte("总经理完成了这件事情"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("总经理"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("完成"), Position: 2, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("了"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("这件"), Position: 4, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("事情"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("电脑修好了"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("电脑"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("修好"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("了"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("做好了这件事情就一了百了了"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("做好"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("了"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("这件"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("事情"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("就"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 36, Term: []byte("一了百了"), Position: 6, Type: analysis.Ideographic, }, { Start: 36, End: 39, Term: []byte("了"), Position: 7, Type: analysis.Ideographic, }, }, }, { []byte("人们审美的观点是不同的"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("人们"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("审美"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("的"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("观点"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("是"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("不同"), Position: 6, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("的"), Position: 7, Type: analysis.Ideographic, }, }, }, { []byte("我们买了一个美的空调"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("我们"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("买"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("了"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("一个"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("美的"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("空调"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("线程初始化时我们要注意"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("线程"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 15, Term: []byte("初始化"), Position: 2, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("时"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("我们"), Position: 4, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("要"), Position: 5, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("注意"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("一个分子是由好多原子组织成的"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("一个"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("分子"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("是"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("由"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("好多"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("原子"), Position: 6, Type: analysis.Ideographic, }, { Start: 30, End: 36, Term: []byte("组织"), Position: 7, Type: analysis.Ideographic, }, { Start: 36, End: 39, Term: []byte("成"), Position: 8, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("的"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("祝你马到功成"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("祝"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("你"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 18, Term: []byte("马到功成"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("他掉进了无底洞里"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("他"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("掉"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("进"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("了"), Position: 4, Type: analysis.Ideographic, }, { Start: 12, End: 21, Term: []byte("无底洞"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("里"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("中国的首都是北京"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("中国"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("的"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("首都"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("是"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("北京"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("孙君意"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("孙"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("君"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("意"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("外交部发言人马朝旭"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("外交部"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 18, Term: []byte("发言人"), Position: 2, Type: analysis.Ideographic, }, { Start: 18, End: 27, Term: []byte("马朝旭"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("领导人会议和第四届东亚峰会"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("领导人"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("会议"), Position: 2, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("和"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 27, Term: []byte("第四届"), Position: 4, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("东亚"), Position: 5, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("峰会"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("在过去的这五年"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("在"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("过去"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("的"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("这"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("五年"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("还需要很长的路要走"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("还"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("需要"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("很"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("长"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("的"), Position: 5, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("路"), Position: 6, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("要"), Position: 7, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("走"), Position: 8, Type: analysis.Ideographic, }, }, }, { []byte("60周年首都阅兵"), analysis.TokenStream{ { Start: 0, End: 2, Term: []byte("60"), Position: 1, Type: analysis.Numeric, }, { Start: 2, End: 8, Term: []byte("周年"), Position: 2, Type: analysis.Ideographic, }, { Start: 8, End: 14, Term: []byte("首都"), Position: 3, Type: analysis.Ideographic, }, { Start: 14, End: 20, Term: []byte("阅兵"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("你好人们审美的观点是不同的"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("你好"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("人们"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("审美"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("的"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("观点"), Position: 5, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte("是"), Position: 6, Type: analysis.Ideographic, }, { Start: 30, End: 36, Term: []byte("不同"), Position: 7, Type: analysis.Ideographic, }, { Start: 36, End: 39, Term: []byte("的"), Position: 8, Type: analysis.Ideographic, }, }, }, { []byte("买水果然后来世博园"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("买"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("水果"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("然后"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("来"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 27, Term: []byte("世博园"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("买水果然后去世博园"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("买"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("水果"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("然后"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("去"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 27, Term: []byte("世博园"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("但是后来我才知道你是对的"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("但是"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("后来"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("我"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("才"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("知道"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("你"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte("是"), Position: 7, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("对"), Position: 8, Type: analysis.Ideographic, }, { Start: 33, End: 36, Term: []byte("的"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("存在即合理"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("存在"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("即"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("合理"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("的的的的的在的的的的就以和和和"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("的"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("的"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("的"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("的"), Position: 4, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("的"), Position: 5, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("在"), Position: 6, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("的"), Position: 7, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("的"), Position: 8, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("的"), Position: 9, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte("的"), Position: 10, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("就"), Position: 11, Type: analysis.Ideographic, }, { Start: 33, End: 36, Term: []byte("以"), Position: 12, Type: analysis.Ideographic, }, { Start: 36, End: 39, Term: []byte("和"), Position: 13, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("和"), Position: 14, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte("和"), Position: 15, Type: analysis.Ideographic, }, }, }, { []byte("I love你,不以为耻,反以为rong"), analysis.TokenStream{ { Start: 0, End: 1, Term: []byte("I"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 1, End: 2, Term: []byte(" "), Position: 2, Type: analysis.AlphaNumeric, }, { Start: 2, End: 6, Term: []byte("love"), Position: 3, Type: analysis.AlphaNumeric, }, { Start: 6, End: 9, Term: []byte("你"), Position: 4, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte(","), Position: 5, Type: analysis.AlphaNumeric, }, { Start: 12, End: 24, Term: []byte("不以为耻"), Position: 6, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte(","), Position: 7, Type: analysis.AlphaNumeric, }, { Start: 27, End: 30, Term: []byte("反"), Position: 8, Type: analysis.Ideographic, }, { Start: 30, End: 36, Term: []byte("以为"), Position: 9, Type: analysis.Ideographic, }, { Start: 36, End: 40, Term: []byte("rong"), Position: 10, Type: analysis.AlphaNumeric, }, }, }, { []byte("因"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("因"), Position: 1, Type: analysis.Ideographic, }, }, }, { []byte(""), analysis.TokenStream{}, }, { []byte("hello你好人们审美的观点是不同的"), analysis.TokenStream{ { Start: 0, End: 5, Term: []byte("hello"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 5, End: 11, Term: []byte("你好"), Position: 2, Type: analysis.Ideographic, }, { Start: 11, End: 17, Term: []byte("人们"), Position: 3, Type: analysis.Ideographic, }, { Start: 17, End: 23, Term: []byte("审美"), Position: 4, Type: analysis.Ideographic, }, { Start: 23, End: 26, Term: []byte("的"), Position: 5, Type: analysis.Ideographic, }, { Start: 26, End: 32, Term: []byte("观点"), Position: 6, Type: analysis.Ideographic, }, { Start: 32, End: 35, Term: []byte("是"), Position: 7, Type: analysis.Ideographic, }, { Start: 35, End: 41, Term: []byte("不同"), Position: 8, Type: analysis.Ideographic, }, { Start: 41, End: 44, Term: []byte("的"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("很好但主要是基于网页形式"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("很"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("好"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("但"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("主要"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("是"), Position: 5, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("基于"), Position: 6, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("网页"), Position: 7, Type: analysis.Ideographic, }, { Start: 30, End: 36, Term: []byte("形式"), Position: 8, Type: analysis.Ideographic, }, }, }, { []byte("hello你好人们审美的观点是不同的"), analysis.TokenStream{ { Start: 0, End: 5, Term: []byte("hello"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 5, End: 11, Term: []byte("你好"), Position: 2, Type: analysis.Ideographic, }, { Start: 11, End: 17, Term: []byte("人们"), Position: 3, Type: analysis.Ideographic, }, { Start: 17, End: 23, Term: []byte("审美"), Position: 4, Type: analysis.Ideographic, }, { Start: 23, End: 26, Term: []byte("的"), Position: 5, Type: analysis.Ideographic, }, { Start: 26, End: 32, Term: []byte("观点"), Position: 6, Type: analysis.Ideographic, }, { Start: 32, End: 35, Term: []byte("是"), Position: 7, Type: analysis.Ideographic, }, { Start: 35, End: 41, Term: []byte("不同"), Position: 8, Type: analysis.Ideographic, }, { Start: 41, End: 44, Term: []byte("的"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("为什么我不能拥有想要的生活"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("为什么"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("我"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("不能"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("拥有"), Position: 4, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("想要"), Position: 5, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("的"), Position: 6, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("生活"), Position: 7, Type: analysis.Ideographic, }, }, }, { []byte("后来我才"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("后来"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("我"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("才"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("此次来中国是为了"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("此次"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("来"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("中国"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("是"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("为了"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("使用了它就可以解决一些问题"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("使用"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("了"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("它"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("就"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("可以"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("解决"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("一些"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("问题"), Position: 8, Type: analysis.Ideographic, }, }, }, { []byte(",使用了它就可以解决一些问题"), analysis.TokenStream{ { Start: 0, End: 1, Term: []byte(","), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 1, End: 7, Term: []byte("使用"), Position: 2, Type: analysis.Ideographic, }, { Start: 7, End: 10, Term: []byte("了"), Position: 3, Type: analysis.Ideographic, }, { Start: 10, End: 13, Term: []byte("它"), Position: 4, Type: analysis.Ideographic, }, { Start: 13, End: 16, Term: []byte("就"), Position: 5, Type: analysis.Ideographic, }, { Start: 16, End: 22, Term: []byte("可以"), Position: 6, Type: analysis.Ideographic, }, { Start: 22, End: 28, Term: []byte("解决"), Position: 7, Type: analysis.Ideographic, }, { Start: 28, End: 34, Term: []byte("一些"), Position: 8, Type: analysis.Ideographic, }, { Start: 34, End: 40, Term: []byte("问题"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("其实使用了它就可以解决一些问题"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("其实"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("使用"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("了"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("它"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("就"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("可以"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("解决"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("一些"), Position: 8, Type: analysis.Ideographic, }, { Start: 39, End: 45, Term: []byte("问题"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("好人使用了它就可以解决一些问题"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("好人"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("使用"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("了"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("它"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("就"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("可以"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("解决"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("一些"), Position: 8, Type: analysis.Ideographic, }, { Start: 39, End: 45, Term: []byte("问题"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("是因为和国家"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("是因为"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("和"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("国家"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("老年搜索还支持"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("老年"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("搜索"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("还"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("支持"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("干脆就把那部蒙人的闲法给废了拉倒!RT @laoshipukong : 27日,全国人大常委会第三次审议侵权责任法草案,删除了有关医疗损害责任“举证倒置”的规定。在医患纠纷中本已处于弱势地位的消费者由此将陷入万劫不复的境地。 "), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("干脆"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("就"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("把"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("那"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("部"), Position: 5, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("蒙"), Position: 6, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("人"), Position: 7, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("的"), Position: 8, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte("闲"), Position: 9, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("法"), Position: 10, Type: analysis.Ideographic, }, { Start: 33, End: 36, Term: []byte("给"), Position: 11, Type: analysis.Ideographic, }, { Start: 36, End: 39, Term: []byte("废"), Position: 12, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("了"), Position: 13, Type: analysis.Ideographic, }, { Start: 42, End: 48, Term: []byte("拉倒"), Position: 14, Type: analysis.Ideographic, }, { Start: 48, End: 51, Term: []byte("!"), Position: 15, Type: analysis.AlphaNumeric, }, { Start: 51, End: 53, Term: []byte("RT"), Position: 16, Type: analysis.AlphaNumeric, }, { Start: 53, End: 54, Term: []byte(" "), Position: 17, Type: analysis.AlphaNumeric, }, { Start: 54, End: 55, Term: []byte("@"), Position: 18, Type: analysis.AlphaNumeric, }, { Start: 55, End: 67, Term: []byte("laoshipukong"), Position: 19, Type: analysis.AlphaNumeric, }, { Start: 67, End: 68, Term: []byte(" "), Position: 20, Type: analysis.AlphaNumeric, }, { Start: 68, End: 69, Term: []byte(":"), Position: 21, Type: analysis.AlphaNumeric, }, { Start: 69, End: 70, Term: []byte(" "), Position: 22, Type: analysis.AlphaNumeric, }, { Start: 70, End: 72, Term: []byte("27"), Position: 23, Type: analysis.Numeric, }, { Start: 72, End: 75, Term: []byte("日"), Position: 24, Type: analysis.Ideographic, }, { Start: 75, End: 78, Term: []byte(","), Position: 25, Type: analysis.AlphaNumeric, }, { Start: 78, End: 99, Term: []byte("全国人大常委会"), Position: 26, Type: analysis.Ideographic, }, { Start: 99, End: 108, Term: []byte("第三次"), Position: 27, Type: analysis.Ideographic, }, { Start: 108, End: 114, Term: []byte("审议"), Position: 28, Type: analysis.Ideographic, }, { Start: 114, End: 120, Term: []byte("侵权"), Position: 29, Type: analysis.Ideographic, }, { Start: 120, End: 129, Term: []byte("责任法"), Position: 30, Type: analysis.Ideographic, }, { Start: 129, End: 135, Term: []byte("草案"), Position: 31, Type: analysis.Ideographic, }, { Start: 135, End: 138, Term: []byte(","), Position: 32, Type: analysis.AlphaNumeric, }, { Start: 138, End: 144, Term: []byte("删除"), Position: 33, Type: analysis.Ideographic, }, { Start: 144, End: 147, Term: []byte("了"), Position: 34, Type: analysis.Ideographic, }, { Start: 147, End: 153, Term: []byte("有关"), Position: 35, Type: analysis.Ideographic, }, { Start: 153, End: 159, Term: []byte("医疗"), Position: 36, Type: analysis.Ideographic, }, { Start: 159, End: 165, Term: []byte("损害"), Position: 37, Type: analysis.Ideographic, }, { Start: 165, End: 171, Term: []byte("责任"), Position: 38, Type: analysis.Ideographic, }, { Start: 171, End: 174, Term: []byte("“"), Position: 39, Type: analysis.AlphaNumeric, }, { Start: 174, End: 180, Term: []byte("举证"), Position: 40, Type: analysis.Ideographic, }, { Start: 180, End: 186, Term: []byte("倒置"), Position: 41, Type: analysis.Ideographic, }, { Start: 186, End: 189, Term: []byte("”"), Position: 42, Type: analysis.AlphaNumeric, }, { Start: 189, End: 192, Term: []byte("的"), Position: 43, Type: analysis.Ideographic, }, { Start: 192, End: 198, Term: []byte("规定"), Position: 44, Type: analysis.Ideographic, }, { Start: 198, End: 201, Term: []byte("。"), Position: 45, Type: analysis.AlphaNumeric, }, { Start: 201, End: 204, Term: []byte("在"), Position: 46, Type: analysis.Ideographic, }, { Start: 204, End: 210, Term: []byte("医患"), Position: 47, Type: analysis.Ideographic, }, { Start: 210, End: 216, Term: []byte("纠纷"), Position: 48, Type: analysis.Ideographic, }, { Start: 216, End: 219, Term: []byte("中"), Position: 49, Type: analysis.Ideographic, }, { Start: 219, End: 222, Term: []byte("本"), Position: 50, Type: analysis.Ideographic, }, { Start: 222, End: 225, Term: []byte("已"), Position: 51, Type: analysis.Ideographic, }, { Start: 225, End: 231, Term: []byte("处于"), Position: 52, Type: analysis.Ideographic, }, { Start: 231, End: 237, Term: []byte("弱势"), Position: 53, Type: analysis.Ideographic, }, { Start: 237, End: 243, Term: []byte("地位"), Position: 54, Type: analysis.Ideographic, }, { Start: 243, End: 246, Term: []byte("的"), Position: 55, Type: analysis.Ideographic, }, { Start: 246, End: 255, Term: []byte("消费者"), Position: 56, Type: analysis.Ideographic, }, { Start: 255, End: 261, Term: []byte("由此"), Position: 57, Type: analysis.Ideographic, }, { Start: 261, End: 264, Term: []byte("将"), Position: 58, Type: analysis.Ideographic, }, { Start: 264, End: 270, Term: []byte("陷入"), Position: 59, Type: analysis.Ideographic, }, { Start: 270, End: 282, Term: []byte("万劫不复"), Position: 60, Type: analysis.Ideographic, }, { Start: 282, End: 285, Term: []byte("的"), Position: 61, Type: analysis.Ideographic, }, { Start: 285, End: 291, Term: []byte("境地"), Position: 62, Type: analysis.Ideographic, }, { Start: 291, End: 294, Term: []byte("。"), Position: 63, Type: analysis.AlphaNumeric, }, { Start: 294, End: 295, Term: []byte(" "), Position: 64, Type: analysis.AlphaNumeric, }, }, }, { []byte("大"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("大"), Position: 1, Type: analysis.Ideographic, }, }, }, { []byte(""), analysis.TokenStream{}, }, { []byte("他说的确实在理"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("他"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("说"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("的"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("确实"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("在"), Position: 5, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("理"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("长春市长春节讲话"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("长春"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("市长"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("春节"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("讲话"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("结婚的和尚未结婚的"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("结婚"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("的"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("和"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("尚未"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("结婚"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("的"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("结合成分子时"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("结合"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("成"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("分子"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("时"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("旅游和服务是最好的"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("旅游"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("和"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("服务"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("是"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("最好"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("的"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("这件事情的确是我的错"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("这件"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("事情"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("的确"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("是"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("我"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("的"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte("错"), Position: 7, Type: analysis.Ideographic, }, }, }, { []byte("供大家参考指正"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("供"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("大家"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("参考"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("指正"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("哈尔滨政府公布塌桥原因"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("哈尔滨"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("政府"), Position: 2, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("公布"), Position: 3, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("塌"), Position: 4, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("桥"), Position: 5, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("原因"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("我在机场入口处"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("我"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("在"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("机场"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 21, Term: []byte("入口处"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("邢永臣摄影报道"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("邢"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("永"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("臣"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("摄影"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("报道"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("BP神经网络如何训练才能在分类时增加区分度?"), analysis.TokenStream{ { Start: 0, End: 2, Term: []byte("BP"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 2, End: 14, Term: []byte("神经网络"), Position: 2, Type: analysis.Ideographic, }, { Start: 14, End: 20, Term: []byte("如何"), Position: 3, Type: analysis.Ideographic, }, { Start: 20, End: 26, Term: []byte("训练"), Position: 4, Type: analysis.Ideographic, }, { Start: 26, End: 32, Term: []byte("才能"), Position: 5, Type: analysis.Ideographic, }, { Start: 32, End: 35, Term: []byte("在"), Position: 6, Type: analysis.Ideographic, }, { Start: 35, End: 41, Term: []byte("分类"), Position: 7, Type: analysis.Ideographic, }, { Start: 41, End: 44, Term: []byte("时"), Position: 8, Type: analysis.Ideographic, }, { Start: 44, End: 50, Term: []byte("增加"), Position: 9, Type: analysis.Ideographic, }, { Start: 50, End: 59, Term: []byte("区分度"), Position: 10, Type: analysis.Ideographic, }, { Start: 59, End: 62, Term: []byte("?"), Position: 11, Type: analysis.AlphaNumeric, }, }, }, { []byte("南京市长江大桥"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("南京市"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 21, Term: []byte("长江大桥"), Position: 2, Type: analysis.Ideographic, }, }, }, { []byte("应一些使用者的建议,也为了便于利用NiuTrans用于SMT研究"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("应"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("一些"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 18, Term: []byte("使用者"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("的"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("建议"), Position: 5, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte(","), Position: 6, Type: analysis.AlphaNumeric, }, { Start: 30, End: 33, Term: []byte("也"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("为了"), Position: 8, Type: analysis.Ideographic, }, { Start: 39, End: 45, Term: []byte("便于"), Position: 9, Type: analysis.Ideographic, }, { Start: 45, End: 51, Term: []byte("利用"), Position: 10, Type: analysis.Ideographic, }, { Start: 51, End: 59, Term: []byte("NiuTrans"), Position: 11, Type: analysis.AlphaNumeric, }, { Start: 59, End: 65, Term: []byte("用于"), Position: 12, Type: analysis.Ideographic, }, { Start: 65, End: 68, Term: []byte("SMT"), Position: 13, Type: analysis.AlphaNumeric, }, { Start: 68, End: 74, Term: []byte("研究"), Position: 14, Type: analysis.Ideographic, }, }, }, { []byte("长春市长春药店"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("长春市"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("长春"), Position: 2, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("药店"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("邓颖超生前最喜欢的衣服"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("邓颖超"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("生前"), Position: 2, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("最"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("喜欢"), Position: 4, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("的"), Position: 5, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("衣服"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("胡锦涛是热爱世界和平的政治局常委"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("胡锦涛"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("是"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("热爱"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("世界"), Position: 4, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("和平"), Position: 5, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("的"), Position: 6, Type: analysis.Ideographic, }, { Start: 33, End: 42, Term: []byte("政治局"), Position: 7, Type: analysis.Ideographic, }, { Start: 42, End: 48, Term: []byte("常委"), Position: 8, Type: analysis.Ideographic, }, }, }, { []byte("程序员祝海林和朱会震是在孙健的左面和右面, 范凯在最右面.再往左是李松洪"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("程序员"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("祝"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("海林"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("和"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("朱"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("会"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte("震"), Position: 7, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("是"), Position: 8, Type: analysis.Ideographic, }, { Start: 33, End: 36, Term: []byte("在"), Position: 9, Type: analysis.Ideographic, }, { Start: 36, End: 39, Term: []byte("孙"), Position: 10, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("健"), Position: 11, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte("的"), Position: 12, Type: analysis.Ideographic, }, { Start: 45, End: 51, Term: []byte("左面"), Position: 13, Type: analysis.Ideographic, }, { Start: 51, End: 54, Term: []byte("和"), Position: 14, Type: analysis.Ideographic, }, { Start: 54, End: 60, Term: []byte("右面"), Position: 15, Type: analysis.Ideographic, }, { Start: 60, End: 61, Term: []byte(","), Position: 16, Type: analysis.AlphaNumeric, }, { Start: 61, End: 62, Term: []byte(" "), Position: 17, Type: analysis.AlphaNumeric, }, { Start: 62, End: 65, Term: []byte("范"), Position: 18, Type: analysis.Ideographic, }, { Start: 65, End: 68, Term: []byte("凯"), Position: 19, Type: analysis.Ideographic, }, { Start: 68, End: 71, Term: []byte("在"), Position: 20, Type: analysis.Ideographic, }, { Start: 71, End: 74, Term: []byte("最"), Position: 21, Type: analysis.Ideographic, }, { Start: 74, End: 80, Term: []byte("右面"), Position: 22, Type: analysis.Ideographic, }, { Start: 80, End: 81, Term: []byte("."), Position: 23, Type: analysis.AlphaNumeric, }, { Start: 81, End: 84, Term: []byte("再"), Position: 24, Type: analysis.Ideographic, }, { Start: 84, End: 87, Term: []byte("往"), Position: 25, Type: analysis.Ideographic, }, { Start: 87, End: 90, Term: []byte("左"), Position: 26, Type: analysis.Ideographic, }, { Start: 90, End: 93, Term: []byte("是"), Position: 27, Type: analysis.Ideographic, }, { Start: 93, End: 96, Term: []byte("李"), Position: 28, Type: analysis.Ideographic, }, { Start: 96, End: 99, Term: []byte("松"), Position: 29, Type: analysis.Ideographic, }, { Start: 99, End: 102, Term: []byte("洪"), Position: 30, Type: analysis.Ideographic, }, }, }, { []byte("一次性交多少钱"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("一次性"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("交"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("多少"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("钱"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("两块五一套,三块八一斤,四块七一本,五块六一条"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("两块"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("五"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("一套"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte(","), Position: 4, Type: analysis.AlphaNumeric, }, { Start: 18, End: 24, Term: []byte("三块"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("八"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("一斤"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 36, Term: []byte(","), Position: 8, Type: analysis.AlphaNumeric, }, { Start: 36, End: 42, Term: []byte("四块"), Position: 9, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte("七"), Position: 10, Type: analysis.Ideographic, }, { Start: 45, End: 51, Term: []byte("一本"), Position: 11, Type: analysis.Ideographic, }, { Start: 51, End: 54, Term: []byte(","), Position: 12, Type: analysis.AlphaNumeric, }, { Start: 54, End: 60, Term: []byte("五块"), Position: 13, Type: analysis.Ideographic, }, { Start: 60, End: 63, Term: []byte("六"), Position: 14, Type: analysis.Ideographic, }, { Start: 63, End: 69, Term: []byte("一条"), Position: 15, Type: analysis.Ideographic, }, }, }, { []byte("小和尚留了一个像大和尚一样的和尚头"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("小"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("和尚"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("留"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("了"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("一个"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("像"), Position: 6, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("大"), Position: 7, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("和尚"), Position: 8, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("一样"), Position: 9, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("的"), Position: 10, Type: analysis.Ideographic, }, { Start: 42, End: 51, Term: []byte("和尚头"), Position: 11, Type: analysis.Ideographic, }, }, }, { []byte("我是中华人民共和国公民;我爸爸是共和党党员; 地铁和平门站"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("我"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("是"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 27, Term: []byte("中华人民共和国"), Position: 3, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("公民"), Position: 4, Type: analysis.Ideographic, }, { Start: 33, End: 34, Term: []byte(";"), Position: 5, Type: analysis.AlphaNumeric, }, { Start: 34, End: 37, Term: []byte("我"), Position: 6, Type: analysis.Ideographic, }, { Start: 37, End: 43, Term: []byte("爸爸"), Position: 7, Type: analysis.Ideographic, }, { Start: 43, End: 46, Term: []byte("是"), Position: 8, Type: analysis.Ideographic, }, { Start: 46, End: 55, Term: []byte("共和党"), Position: 9, Type: analysis.Ideographic, }, { Start: 55, End: 61, Term: []byte("党员"), Position: 10, Type: analysis.Ideographic, }, { Start: 61, End: 62, Term: []byte(";"), Position: 11, Type: analysis.AlphaNumeric, }, { Start: 62, End: 63, Term: []byte(" "), Position: 12, Type: analysis.AlphaNumeric, }, { Start: 63, End: 69, Term: []byte("地铁"), Position: 13, Type: analysis.Ideographic, }, { Start: 69, End: 78, Term: []byte("和平门"), Position: 14, Type: analysis.Ideographic, }, { Start: 78, End: 81, Term: []byte("站"), Position: 15, Type: analysis.Ideographic, }, }, }, { []byte("张晓梅去人民医院做了个B超然后去买了件T恤"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("张晓梅"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("去"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("人民"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("医院"), Position: 4, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("做"), Position: 5, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte("了"), Position: 6, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("个"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 37, Term: []byte("B超"), Position: 8, Type: analysis.Ideographic, }, { Start: 37, End: 43, Term: []byte("然后"), Position: 9, Type: analysis.Ideographic, }, { Start: 43, End: 46, Term: []byte("去"), Position: 10, Type: analysis.Ideographic, }, { Start: 46, End: 49, Term: []byte("买"), Position: 11, Type: analysis.Ideographic, }, { Start: 49, End: 52, Term: []byte("了"), Position: 12, Type: analysis.Ideographic, }, { Start: 52, End: 55, Term: []byte("件"), Position: 13, Type: analysis.Ideographic, }, { Start: 55, End: 59, Term: []byte("T恤"), Position: 14, Type: analysis.Ideographic, }, }, }, { []byte("AT&T是一件不错的公司,给你发offer了吗?"), analysis.TokenStream{ { Start: 0, End: 4, Term: []byte("AT&T"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 4, End: 7, Term: []byte("是"), Position: 2, Type: analysis.Ideographic, }, { Start: 7, End: 13, Term: []byte("一件"), Position: 3, Type: analysis.Ideographic, }, { Start: 13, End: 19, Term: []byte("不错"), Position: 4, Type: analysis.Ideographic, }, { Start: 19, End: 22, Term: []byte("的"), Position: 5, Type: analysis.Ideographic, }, { Start: 22, End: 28, Term: []byte("公司"), Position: 6, Type: analysis.Ideographic, }, { Start: 28, End: 31, Term: []byte(","), Position: 7, Type: analysis.AlphaNumeric, }, { Start: 31, End: 34, Term: []byte("给"), Position: 8, Type: analysis.Ideographic, }, { Start: 34, End: 37, Term: []byte("你"), Position: 9, Type: analysis.Ideographic, }, { Start: 37, End: 40, Term: []byte("发"), Position: 10, Type: analysis.Ideographic, }, { Start: 40, End: 45, Term: []byte("offer"), Position: 11, Type: analysis.AlphaNumeric, }, { Start: 45, End: 48, Term: []byte("了"), Position: 12, Type: analysis.Ideographic, }, { Start: 48, End: 51, Term: []byte("吗"), Position: 13, Type: analysis.Ideographic, }, { Start: 51, End: 54, Term: []byte("?"), Position: 14, Type: analysis.AlphaNumeric, }, }, }, { []byte("C++和c#是什么关系?11+122=133,是吗?PI=3.14159"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("C++"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 3, End: 6, Term: []byte("和"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 8, Term: []byte("c#"), Position: 3, Type: analysis.AlphaNumeric, }, { Start: 8, End: 11, Term: []byte("是"), Position: 4, Type: analysis.Ideographic, }, { Start: 11, End: 17, Term: []byte("什么"), Position: 5, Type: analysis.Ideographic, }, { Start: 17, End: 23, Term: []byte("关系"), Position: 6, Type: analysis.Ideographic, }, { Start: 23, End: 26, Term: []byte("?"), Position: 7, Type: analysis.AlphaNumeric, }, { Start: 26, End: 28, Term: []byte("11"), Position: 8, Type: analysis.Numeric, }, { Start: 28, End: 29, Term: []byte("+"), Position: 9, Type: analysis.AlphaNumeric, }, { Start: 29, End: 32, Term: []byte("122"), Position: 10, Type: analysis.Numeric, }, { Start: 32, End: 33, Term: []byte("="), Position: 11, Type: analysis.AlphaNumeric, }, { Start: 33, End: 36, Term: []byte("133"), Position: 12, Type: analysis.Numeric, }, { Start: 36, End: 39, Term: []byte(","), Position: 13, Type: analysis.AlphaNumeric, }, { Start: 39, End: 42, Term: []byte("是"), Position: 14, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte("吗"), Position: 15, Type: analysis.Ideographic, }, { Start: 45, End: 48, Term: []byte("?"), Position: 16, Type: analysis.AlphaNumeric, }, { Start: 48, End: 50, Term: []byte("PI"), Position: 17, Type: analysis.AlphaNumeric, }, { Start: 50, End: 51, Term: []byte("="), Position: 18, Type: analysis.AlphaNumeric, }, { Start: 51, End: 52, Term: []byte("3"), Position: 19, Type: analysis.Numeric, }, { Start: 52, End: 53, Term: []byte("."), Position: 20, Type: analysis.AlphaNumeric, }, { Start: 53, End: 58, Term: []byte("14159"), Position: 21, Type: analysis.Numeric, }, }, }, { []byte("你认识那个和主席握手的的哥吗?他开一辆黑色的士。"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("你"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("认识"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("那个"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("和"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("主席"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("握手"), Position: 6, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("的"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("的哥"), Position: 8, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("吗"), Position: 9, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte("?"), Position: 10, Type: analysis.AlphaNumeric, }, { Start: 45, End: 48, Term: []byte("他"), Position: 11, Type: analysis.Ideographic, }, { Start: 48, End: 51, Term: []byte("开"), Position: 12, Type: analysis.Ideographic, }, { Start: 51, End: 57, Term: []byte("一辆"), Position: 13, Type: analysis.Ideographic, }, { Start: 57, End: 63, Term: []byte("黑色"), Position: 14, Type: analysis.Ideographic, }, { Start: 63, End: 69, Term: []byte("的士"), Position: 15, Type: analysis.Ideographic, }, { Start: 69, End: 72, Term: []byte("。"), Position: 16, Type: analysis.AlphaNumeric, }, }, }, { []byte("枪杆子中出政权"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("枪杆子"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("中"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("出"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("政权"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("张三风同学走上了不归路"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("张"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("三"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("风"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("同学"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("走上"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("了"), Position: 6, Type: analysis.Ideographic, }, { Start: 24, End: 33, Term: []byte("不归路"), Position: 7, Type: analysis.Ideographic, }, }, }, { []byte("阿Q腰间挂着BB机手里拿着大哥大,说:我一般吃饭不AA制的。"), analysis.TokenStream{ { Start: 0, End: 4, Term: []byte("阿Q"), Position: 1, Type: analysis.Ideographic, }, { Start: 4, End: 10, Term: []byte("腰间"), Position: 2, Type: analysis.Ideographic, }, { Start: 10, End: 13, Term: []byte("挂"), Position: 3, Type: analysis.Ideographic, }, { Start: 13, End: 16, Term: []byte("着"), Position: 4, Type: analysis.Ideographic, }, { Start: 16, End: 21, Term: []byte("BB机"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("手里"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte("拿"), Position: 7, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("着"), Position: 8, Type: analysis.Ideographic, }, { Start: 33, End: 42, Term: []byte("大哥大"), Position: 9, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte(","), Position: 10, Type: analysis.AlphaNumeric, }, { Start: 45, End: 48, Term: []byte("说"), Position: 11, Type: analysis.Ideographic, }, { Start: 48, End: 51, Term: []byte(":"), Position: 12, Type: analysis.AlphaNumeric, }, { Start: 51, End: 54, Term: []byte("我"), Position: 13, Type: analysis.Ideographic, }, { Start: 54, End: 60, Term: []byte("一般"), Position: 14, Type: analysis.Ideographic, }, { Start: 60, End: 66, Term: []byte("吃饭"), Position: 15, Type: analysis.Ideographic, }, { Start: 66, End: 69, Term: []byte("不"), Position: 16, Type: analysis.Ideographic, }, { Start: 69, End: 74, Term: []byte("AA制"), Position: 17, Type: analysis.Ideographic, }, { Start: 74, End: 77, Term: []byte("的"), Position: 18, Type: analysis.Ideographic, }, { Start: 77, End: 80, Term: []byte("。"), Position: 19, Type: analysis.AlphaNumeric, }, }, }, { []byte("在1号店能买到小S和大S八卦的书。"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("在"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 10, Term: []byte("1号店"), Position: 2, Type: analysis.Ideographic, }, { Start: 10, End: 13, Term: []byte("能"), Position: 3, Type: analysis.Ideographic, }, { Start: 13, End: 16, Term: []byte("买"), Position: 4, Type: analysis.Ideographic, }, { Start: 16, End: 19, Term: []byte("到"), Position: 5, Type: analysis.Ideographic, }, { Start: 19, End: 23, Term: []byte("小S"), Position: 6, Type: analysis.Ideographic, }, { Start: 23, End: 26, Term: []byte("和"), Position: 7, Type: analysis.Ideographic, }, { Start: 26, End: 30, Term: []byte("大S"), Position: 8, Type: analysis.Ideographic, }, { Start: 30, End: 36, Term: []byte("八卦"), Position: 9, Type: analysis.Ideographic, }, { Start: 36, End: 39, Term: []byte("的"), Position: 10, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("书"), Position: 11, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte("。"), Position: 12, Type: analysis.AlphaNumeric, }, }, }, } tokenizer, _ := NewJiebaTokenizer("dict.txt", false, false) for _, test := range tests { actual := tokenizer.Tokenize(test.input) if !reflect.DeepEqual(actual, test.output) { t.Errorf("Expected %v, got %v for %s", test.output, actual, string(test.input)) } } } func TestJiebaTokenizerSearchModeWithoutHMM(t *testing.T) { tests := []struct { input []byte output analysis.TokenStream }{{ []byte("这是一个伸手不见五指的黑夜。我叫孙悟空,我爱北京,我爱Python和C++。"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("这"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("是"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("一个"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("伸手"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("不见"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("五指"), Position: 6, Type: analysis.Ideographic, }, { Start: 12, End: 30, Term: []byte("伸手不见五指"), Position: 7, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("的"), Position: 8, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("黑夜"), Position: 9, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("。"), Position: 10, Type: analysis.AlphaNumeric, }, { Start: 42, End: 45, Term: []byte("我"), Position: 11, Type: analysis.Ideographic, }, { Start: 45, End: 48, Term: []byte("叫"), Position: 12, Type: analysis.Ideographic, }, { Start: 51, End: 57, Term: []byte("悟空"), Position: 13, Type: analysis.Ideographic, }, { Start: 48, End: 57, Term: []byte("孙悟空"), Position: 14, Type: analysis.Ideographic, }, { Start: 57, End: 60, Term: []byte(","), Position: 15, Type: analysis.AlphaNumeric, }, { Start: 60, End: 63, Term: []byte("我"), Position: 16, Type: analysis.Ideographic, }, { Start: 63, End: 66, Term: []byte("爱"), Position: 17, Type: analysis.Ideographic, }, { Start: 66, End: 72, Term: []byte("北京"), Position: 18, Type: analysis.Ideographic, }, { Start: 72, End: 75, Term: []byte(","), Position: 19, Type: analysis.AlphaNumeric, }, { Start: 75, End: 78, Term: []byte("我"), Position: 20, Type: analysis.Ideographic, }, { Start: 78, End: 81, Term: []byte("爱"), Position: 21, Type: analysis.Ideographic, }, { Start: 81, End: 87, Term: []byte("Python"), Position: 22, Type: analysis.AlphaNumeric, }, { Start: 87, End: 90, Term: []byte("和"), Position: 23, Type: analysis.Ideographic, }, { Start: 90, End: 93, Term: []byte("C++"), Position: 24, Type: analysis.AlphaNumeric, }, { Start: 93, End: 96, Term: []byte("。"), Position: 25, Type: analysis.AlphaNumeric, }, }, }, { []byte("我不喜欢日本和服。"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("我"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("不"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("喜欢"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("日本"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("和服"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("。"), Position: 6, Type: analysis.AlphaNumeric, }, }, }, { []byte("雷猴回归人间。"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("雷猴"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("回归"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("人间"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("。"), Position: 4, Type: analysis.AlphaNumeric, }, }, }, { []byte("工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("工信处"), Position: 1, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("干事"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 18, Term: []byte("女干事"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("每月"), Position: 4, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("经过"), Position: 5, Type: analysis.Ideographic, }, { Start: 30, End: 36, Term: []byte("下属"), Position: 6, Type: analysis.Ideographic, }, { Start: 36, End: 42, Term: []byte("科室"), Position: 7, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte("都"), Position: 8, Type: analysis.Ideographic, }, { Start: 45, End: 48, Term: []byte("要"), Position: 9, Type: analysis.Ideographic, }, { Start: 48, End: 54, Term: []byte("亲口"), Position: 10, Type: analysis.Ideographic, }, { Start: 54, End: 60, Term: []byte("交代"), Position: 11, Type: analysis.Ideographic, }, { Start: 60, End: 62, Term: []byte("24"), Position: 12, Type: analysis.Numeric, }, { Start: 62, End: 65, Term: []byte("口"), Position: 13, Type: analysis.Ideographic, }, { Start: 65, End: 71, Term: []byte("交换"), Position: 14, Type: analysis.Ideographic, }, { Start: 68, End: 74, Term: []byte("换机"), Position: 15, Type: analysis.Ideographic, }, { Start: 65, End: 74, Term: []byte("交换机"), Position: 16, Type: analysis.Ideographic, }, { Start: 74, End: 77, Term: []byte("等"), Position: 17, Type: analysis.Ideographic, }, { Start: 77, End: 83, Term: []byte("技术"), Position: 18, Type: analysis.Ideographic, }, { Start: 77, End: 86, Term: []byte("技术性"), Position: 19, Type: analysis.Ideographic, }, { Start: 86, End: 92, Term: []byte("器件"), Position: 20, Type: analysis.Ideographic, }, { Start: 92, End: 95, Term: []byte("的"), Position: 21, Type: analysis.Ideographic, }, { Start: 95, End: 101, Term: []byte("安装"), Position: 22, Type: analysis.Ideographic, }, { Start: 101, End: 107, Term: []byte("工作"), Position: 23, Type: analysis.Ideographic, }, }, }, { []byte("我需要廉租房"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("我"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("需要"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("廉租"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("租房"), Position: 4, Type: analysis.Ideographic, }, { Start: 9, End: 18, Term: []byte("廉租房"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("永和服装饰品有限公司"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("永和"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("服装"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("饰品"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("有限"), Position: 4, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("公司"), Position: 5, Type: analysis.Ideographic, }, { Start: 18, End: 30, Term: []byte("有限公司"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("我爱北京天安门"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("我"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("爱"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("北京"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("天安"), Position: 4, Type: analysis.Ideographic, }, { Start: 12, End: 21, Term: []byte("天安门"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("abc"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("abc"), Position: 1, Type: analysis.AlphaNumeric, }, }, }, { []byte("隐马尔可夫"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("隐"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("可夫"), Position: 2, Type: analysis.Ideographic, }, { Start: 3, End: 12, Term: []byte("马尔可"), Position: 3, Type: analysis.Ideographic, }, { Start: 3, End: 15, Term: []byte("马尔可夫"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("雷猴是个好网站"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("雷猴"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("是"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("个"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("好"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("网站"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("“Microsoft”一词由“MICROcomputer(微型计算机)”和“SOFTware(软件)”两部分组成"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("“"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 3, End: 12, Term: []byte("Microsoft"), Position: 2, Type: analysis.AlphaNumeric, }, { Start: 12, End: 15, Term: []byte("”"), Position: 3, Type: analysis.AlphaNumeric, }, { Start: 15, End: 18, Term: []byte("一"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("词"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("由"), Position: 6, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("“"), Position: 7, Type: analysis.AlphaNumeric, }, { Start: 27, End: 40, Term: []byte("MICROcomputer"), Position: 8, Type: analysis.AlphaNumeric, }, { Start: 40, End: 43, Term: []byte("("), Position: 9, Type: analysis.AlphaNumeric, }, { Start: 43, End: 49, Term: []byte("微型"), Position: 10, Type: analysis.Ideographic, }, { Start: 49, End: 55, Term: []byte("计算"), Position: 11, Type: analysis.Ideographic, }, { Start: 52, End: 58, Term: []byte("算机"), Position: 12, Type: analysis.Ideographic, }, { Start: 49, End: 58, Term: []byte("计算机"), Position: 13, Type: analysis.Ideographic, }, { Start: 58, End: 61, Term: []byte(")"), Position: 14, Type: analysis.AlphaNumeric, }, { Start: 61, End: 64, Term: []byte("”"), Position: 15, Type: analysis.AlphaNumeric, }, { Start: 64, End: 67, Term: []byte("和"), Position: 16, Type: analysis.Ideographic, }, { Start: 67, End: 70, Term: []byte("“"), Position: 17, Type: analysis.AlphaNumeric, }, { Start: 70, End: 78, Term: []byte("SOFTware"), Position: 18, Type: analysis.AlphaNumeric, }, { Start: 78, End: 81, Term: []byte("("), Position: 19, Type: analysis.AlphaNumeric, }, { Start: 81, End: 87, Term: []byte("软件"), Position: 20, Type: analysis.Ideographic, }, { Start: 87, End: 90, Term: []byte(")"), Position: 21, Type: analysis.AlphaNumeric, }, { Start: 90, End: 93, Term: []byte("”"), Position: 22, Type: analysis.AlphaNumeric, }, { Start: 93, End: 96, Term: []byte("两"), Position: 23, Type: analysis.Ideographic, }, { Start: 96, End: 102, Term: []byte("部分"), Position: 24, Type: analysis.Ideographic, }, { Start: 102, End: 108, Term: []byte("组成"), Position: 25, Type: analysis.Ideographic, }, }, }, { []byte("草泥马和欺实马是今年的流行词汇"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("草泥马"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("和"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("欺"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("实"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("马"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("是"), Position: 6, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("今年"), Position: 7, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("的"), Position: 8, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("流行"), Position: 9, Type: analysis.Ideographic, }, { Start: 39, End: 45, Term: []byte("词汇"), Position: 10, Type: analysis.Ideographic, }, }, }, { []byte("伊藤洋华堂总府店"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("伊"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("藤"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 15, Term: []byte("洋华堂"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("总府"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("店"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("中国科学院计算技术研究所"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("中国"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("科学"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("学院"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("计算"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("技术"), Position: 5, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("研究"), Position: 6, Type: analysis.Ideographic, }, { Start: 6, End: 15, Term: []byte("科学院"), Position: 7, Type: analysis.Ideographic, }, { Start: 27, End: 36, Term: []byte("研究所"), Position: 8, Type: analysis.Ideographic, }, { Start: 0, End: 36, Term: []byte("中国科学院计算技术研究所"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("罗密欧与朱丽叶"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("罗密欧"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("与"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 21, Term: []byte("朱丽叶"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("我购买了道具和服装"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("我"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("购买"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("了"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("道具"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("和"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("服装"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("PS: 我觉得开源有一个好处,就是能够敦促自己不断改进,避免敞帚自珍"), analysis.TokenStream{ { Start: 0, End: 2, Term: []byte("PS"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 2, End: 3, Term: []byte(":"), Position: 2, Type: analysis.AlphaNumeric, }, { Start: 3, End: 4, Term: []byte(" "), Position: 3, Type: analysis.AlphaNumeric, }, { Start: 4, End: 7, Term: []byte("我"), Position: 4, Type: analysis.Ideographic, }, { Start: 7, End: 13, Term: []byte("觉得"), Position: 5, Type: analysis.Ideographic, }, { Start: 13, End: 19, Term: []byte("开源"), Position: 6, Type: analysis.Ideographic, }, { Start: 19, End: 22, Term: []byte("有"), Position: 7, Type: analysis.Ideographic, }, { Start: 22, End: 28, Term: []byte("一个"), Position: 8, Type: analysis.Ideographic, }, { Start: 28, End: 34, Term: []byte("好处"), Position: 9, Type: analysis.Ideographic, }, { Start: 34, End: 37, Term: []byte(","), Position: 10, Type: analysis.AlphaNumeric, }, { Start: 37, End: 43, Term: []byte("就是"), Position: 11, Type: analysis.Ideographic, }, { Start: 43, End: 49, Term: []byte("能够"), Position: 12, Type: analysis.Ideographic, }, { Start: 49, End: 55, Term: []byte("敦促"), Position: 13, Type: analysis.Ideographic, }, { Start: 55, End: 61, Term: []byte("自己"), Position: 14, Type: analysis.Ideographic, }, { Start: 61, End: 67, Term: []byte("不断"), Position: 15, Type: analysis.Ideographic, }, { Start: 67, End: 73, Term: []byte("改进"), Position: 16, Type: analysis.Ideographic, }, { Start: 61, End: 73, Term: []byte("不断改进"), Position: 17, Type: analysis.Ideographic, }, { Start: 73, End: 76, Term: []byte(","), Position: 18, Type: analysis.AlphaNumeric, }, { Start: 76, End: 82, Term: []byte("避免"), Position: 19, Type: analysis.Ideographic, }, { Start: 82, End: 85, Term: []byte("敞"), Position: 20, Type: analysis.Ideographic, }, { Start: 85, End: 88, Term: []byte("帚"), Position: 21, Type: analysis.Ideographic, }, { Start: 88, End: 94, Term: []byte("自珍"), Position: 22, Type: analysis.Ideographic, }, }, }, { []byte("湖北省石首市"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("湖北"), Position: 1, Type: analysis.Ideographic, }, { Start: 0, End: 9, Term: []byte("湖北省"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("石首"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 18, Term: []byte("石首市"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("湖北省十堰市"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("湖北"), Position: 1, Type: analysis.Ideographic, }, { Start: 0, End: 9, Term: []byte("湖北省"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("十堰"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 18, Term: []byte("十堰市"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("总经理完成了这件事情"), analysis.TokenStream{ { Start: 3, End: 9, Term: []byte("经理"), Position: 1, Type: analysis.Ideographic, }, { Start: 0, End: 9, Term: []byte("总经理"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("完成"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("了"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("这件"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("事情"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("电脑修好了"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("电脑"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("修好"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("了"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("做好了这件事情就一了百了了"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("做好"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("了"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("这件"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("事情"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("就"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 36, Term: []byte("一了百了"), Position: 6, Type: analysis.Ideographic, }, { Start: 36, End: 39, Term: []byte("了"), Position: 7, Type: analysis.Ideographic, }, }, }, { []byte("人们审美的观点是不同的"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("人们"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("审美"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("的"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("观点"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("是"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("不同"), Position: 6, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("的"), Position: 7, Type: analysis.Ideographic, }, }, }, { []byte("我们买了一个美的空调"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("我们"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("买"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("了"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("一个"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("美的"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("空调"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("线程初始化时我们要注意"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("线程"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("初始"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 15, Term: []byte("初始化"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("时"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("我们"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("要"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("注意"), Position: 7, Type: analysis.Ideographic, }, }, }, { []byte("一个分子是由好多原子组织成的"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("一个"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("分子"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("是"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("由"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("好多"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("原子"), Position: 6, Type: analysis.Ideographic, }, { Start: 30, End: 36, Term: []byte("组织"), Position: 7, Type: analysis.Ideographic, }, { Start: 36, End: 39, Term: []byte("成"), Position: 8, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("的"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("祝你马到功成"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("祝"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("你"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 18, Term: []byte("马到功成"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("他掉进了无底洞里"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("他"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("掉"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("进"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("了"), Position: 4, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("无底"), Position: 5, Type: analysis.Ideographic, }, { Start: 12, End: 21, Term: []byte("无底洞"), Position: 6, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("里"), Position: 7, Type: analysis.Ideographic, }, }, }, { []byte("中国的首都是北京"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("中国"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("的"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("首都"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("是"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("北京"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("孙君意"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("孙"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("君"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("意"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("外交部发言人马朝旭"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("外交"), Position: 1, Type: analysis.Ideographic, }, { Start: 0, End: 9, Term: []byte("外交部"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("发言"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 18, Term: []byte("发言人"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 27, Term: []byte("马朝旭"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("领导人会议和第四届东亚峰会"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("领导"), Position: 1, Type: analysis.Ideographic, }, { Start: 0, End: 9, Term: []byte("领导人"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("会议"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("和"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("第四"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("四届"), Position: 6, Type: analysis.Ideographic, }, { Start: 18, End: 27, Term: []byte("第四届"), Position: 7, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("东亚"), Position: 8, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("峰会"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("在过去的这五年"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("在"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("过去"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("的"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("这"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("五年"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("还需要很长的路要走"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("还"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("需要"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("很"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("长"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("的"), Position: 5, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("路"), Position: 6, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("要"), Position: 7, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("走"), Position: 8, Type: analysis.Ideographic, }, }, }, { []byte("60周年首都阅兵"), analysis.TokenStream{ { Start: 0, End: 2, Term: []byte("60"), Position: 1, Type: analysis.Numeric, }, { Start: 2, End: 8, Term: []byte("周年"), Position: 2, Type: analysis.Ideographic, }, { Start: 8, End: 14, Term: []byte("首都"), Position: 3, Type: analysis.Ideographic, }, { Start: 14, End: 20, Term: []byte("阅兵"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("你好人们审美的观点是不同的"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("你好"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("人们"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("审美"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("的"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("观点"), Position: 5, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte("是"), Position: 6, Type: analysis.Ideographic, }, { Start: 30, End: 36, Term: []byte("不同"), Position: 7, Type: analysis.Ideographic, }, { Start: 36, End: 39, Term: []byte("的"), Position: 8, Type: analysis.Ideographic, }, }, }, { []byte("买水果然后来世博园"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("买"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("水果"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("然后"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("来"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("世博"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("博园"), Position: 6, Type: analysis.Ideographic, }, { Start: 18, End: 27, Term: []byte("世博园"), Position: 7, Type: analysis.Ideographic, }, }, }, { []byte("买水果然后去世博园"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("买"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("水果"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("然后"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("去"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("世博"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("博园"), Position: 6, Type: analysis.Ideographic, }, { Start: 18, End: 27, Term: []byte("世博园"), Position: 7, Type: analysis.Ideographic, }, }, }, { []byte("但是后来我才知道你是对的"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("但是"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("后来"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("我"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("才"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("知道"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("你"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte("是"), Position: 7, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("对"), Position: 8, Type: analysis.Ideographic, }, { Start: 33, End: 36, Term: []byte("的"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("存在即合理"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("存在"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("即"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("合理"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("的的的的的在的的的的就以和和和"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("的"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("的"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("的"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("的"), Position: 4, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("的"), Position: 5, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("在"), Position: 6, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("的"), Position: 7, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("的"), Position: 8, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("的"), Position: 9, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte("的"), Position: 10, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("就"), Position: 11, Type: analysis.Ideographic, }, { Start: 33, End: 36, Term: []byte("以"), Position: 12, Type: analysis.Ideographic, }, { Start: 36, End: 39, Term: []byte("和"), Position: 13, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("和"), Position: 14, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte("和"), Position: 15, Type: analysis.Ideographic, }, }, }, { []byte("I love你,不以为耻,反以为rong"), analysis.TokenStream{ { Start: 0, End: 1, Term: []byte("I"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 1, End: 2, Term: []byte(" "), Position: 2, Type: analysis.AlphaNumeric, }, { Start: 2, End: 6, Term: []byte("love"), Position: 3, Type: analysis.AlphaNumeric, }, { Start: 6, End: 9, Term: []byte("你"), Position: 4, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte(","), Position: 5, Type: analysis.AlphaNumeric, }, { Start: 12, End: 18, Term: []byte("不以"), Position: 6, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("以为"), Position: 7, Type: analysis.Ideographic, }, { Start: 12, End: 24, Term: []byte("不以为耻"), Position: 8, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte(","), Position: 9, Type: analysis.AlphaNumeric, }, { Start: 27, End: 30, Term: []byte("反"), Position: 10, Type: analysis.Ideographic, }, { Start: 30, End: 36, Term: []byte("以为"), Position: 11, Type: analysis.Ideographic, }, { Start: 36, End: 40, Term: []byte("rong"), Position: 12, Type: analysis.AlphaNumeric, }, }, }, { []byte("因"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("因"), Position: 1, Type: analysis.Ideographic, }, }, }, { []byte(""), analysis.TokenStream{}, }, { []byte("hello你好人们审美的观点是不同的"), analysis.TokenStream{ { Start: 0, End: 5, Term: []byte("hello"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 5, End: 11, Term: []byte("你好"), Position: 2, Type: analysis.Ideographic, }, { Start: 11, End: 17, Term: []byte("人们"), Position: 3, Type: analysis.Ideographic, }, { Start: 17, End: 23, Term: []byte("审美"), Position: 4, Type: analysis.Ideographic, }, { Start: 23, End: 26, Term: []byte("的"), Position: 5, Type: analysis.Ideographic, }, { Start: 26, End: 32, Term: []byte("观点"), Position: 6, Type: analysis.Ideographic, }, { Start: 32, End: 35, Term: []byte("是"), Position: 7, Type: analysis.Ideographic, }, { Start: 35, End: 41, Term: []byte("不同"), Position: 8, Type: analysis.Ideographic, }, { Start: 41, End: 44, Term: []byte("的"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("很好但主要是基于网页形式"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("很"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("好"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("但"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("主要"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("是"), Position: 5, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("基于"), Position: 6, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("网页"), Position: 7, Type: analysis.Ideographic, }, { Start: 30, End: 36, Term: []byte("形式"), Position: 8, Type: analysis.Ideographic, }, }, }, { []byte("hello你好人们审美的观点是不同的"), analysis.TokenStream{ { Start: 0, End: 5, Term: []byte("hello"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 5, End: 11, Term: []byte("你好"), Position: 2, Type: analysis.Ideographic, }, { Start: 11, End: 17, Term: []byte("人们"), Position: 3, Type: analysis.Ideographic, }, { Start: 17, End: 23, Term: []byte("审美"), Position: 4, Type: analysis.Ideographic, }, { Start: 23, End: 26, Term: []byte("的"), Position: 5, Type: analysis.Ideographic, }, { Start: 26, End: 32, Term: []byte("观点"), Position: 6, Type: analysis.Ideographic, }, { Start: 32, End: 35, Term: []byte("是"), Position: 7, Type: analysis.Ideographic, }, { Start: 35, End: 41, Term: []byte("不同"), Position: 8, Type: analysis.Ideographic, }, { Start: 41, End: 44, Term: []byte("的"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("为什么我不能拥有想要的生活"), analysis.TokenStream{ { Start: 3, End: 9, Term: []byte("什么"), Position: 1, Type: analysis.Ideographic, }, { Start: 0, End: 9, Term: []byte("为什么"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("我"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("不能"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("拥有"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("想要"), Position: 6, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("的"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("生活"), Position: 8, Type: analysis.Ideographic, }, }, }, { []byte("后来我才"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("后来"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("我"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("才"), Position: 3, Type: analysis.Ideographic, }, }, }, { []byte("此次来中国是为了"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("此次"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("来"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("中国"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("是"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("为了"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("使用了它就可以解决一些问题"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("使用"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("了"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("它"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("就"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("可以"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("解决"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("一些"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("问题"), Position: 8, Type: analysis.Ideographic, }, }, }, { []byte(",使用了它就可以解决一些问题"), analysis.TokenStream{ { Start: 0, End: 1, Term: []byte(","), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 1, End: 7, Term: []byte("使用"), Position: 2, Type: analysis.Ideographic, }, { Start: 7, End: 10, Term: []byte("了"), Position: 3, Type: analysis.Ideographic, }, { Start: 10, End: 13, Term: []byte("它"), Position: 4, Type: analysis.Ideographic, }, { Start: 13, End: 16, Term: []byte("就"), Position: 5, Type: analysis.Ideographic, }, { Start: 16, End: 22, Term: []byte("可以"), Position: 6, Type: analysis.Ideographic, }, { Start: 22, End: 28, Term: []byte("解决"), Position: 7, Type: analysis.Ideographic, }, { Start: 28, End: 34, Term: []byte("一些"), Position: 8, Type: analysis.Ideographic, }, { Start: 34, End: 40, Term: []byte("问题"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("其实使用了它就可以解决一些问题"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("其实"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("使用"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("了"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("它"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("就"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("可以"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("解决"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("一些"), Position: 8, Type: analysis.Ideographic, }, { Start: 39, End: 45, Term: []byte("问题"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("好人使用了它就可以解决一些问题"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("好人"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("使用"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("了"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("它"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("就"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("可以"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("解决"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("一些"), Position: 8, Type: analysis.Ideographic, }, { Start: 39, End: 45, Term: []byte("问题"), Position: 9, Type: analysis.Ideographic, }, }, }, { []byte("是因为和国家"), analysis.TokenStream{ { Start: 3, End: 9, Term: []byte("因为"), Position: 1, Type: analysis.Ideographic, }, { Start: 0, End: 9, Term: []byte("是因为"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("和"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("国家"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("老年搜索还支持"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("老年"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("搜索"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("还"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("支持"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("干脆就把那部蒙人的闲法给废了拉倒!RT @laoshipukong : 27日,全国人大常委会第三次审议侵权责任法草案,删除了有关医疗损害责任“举证倒置”的规定。在医患纠纷中本已处于弱势地位的消费者由此将陷入万劫不复的境地。 "), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("干脆"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("就"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("把"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("那"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("部"), Position: 5, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("蒙"), Position: 6, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("人"), Position: 7, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("的"), Position: 8, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte("闲"), Position: 9, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("法"), Position: 10, Type: analysis.Ideographic, }, { Start: 33, End: 36, Term: []byte("给"), Position: 11, Type: analysis.Ideographic, }, { Start: 36, End: 39, Term: []byte("废"), Position: 12, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("了"), Position: 13, Type: analysis.Ideographic, }, { Start: 42, End: 48, Term: []byte("拉倒"), Position: 14, Type: analysis.Ideographic, }, { Start: 48, End: 51, Term: []byte("!"), Position: 15, Type: analysis.AlphaNumeric, }, { Start: 51, End: 53, Term: []byte("RT"), Position: 16, Type: analysis.AlphaNumeric, }, { Start: 53, End: 54, Term: []byte(" "), Position: 17, Type: analysis.AlphaNumeric, }, { Start: 54, End: 55, Term: []byte("@"), Position: 18, Type: analysis.AlphaNumeric, }, { Start: 55, End: 67, Term: []byte("laoshipukong"), Position: 19, Type: analysis.AlphaNumeric, }, { Start: 67, End: 68, Term: []byte(" "), Position: 20, Type: analysis.AlphaNumeric, }, { Start: 68, End: 69, Term: []byte(":"), Position: 21, Type: analysis.AlphaNumeric, }, { Start: 69, End: 70, Term: []byte(" "), Position: 22, Type: analysis.AlphaNumeric, }, { Start: 70, End: 72, Term: []byte("27"), Position: 23, Type: analysis.Numeric, }, { Start: 72, End: 75, Term: []byte("日"), Position: 24, Type: analysis.Ideographic, }, { Start: 75, End: 78, Term: []byte(","), Position: 25, Type: analysis.AlphaNumeric, }, { Start: 78, End: 84, Term: []byte("全国"), Position: 26, Type: analysis.Ideographic, }, { Start: 81, End: 87, Term: []byte("国人"), Position: 27, Type: analysis.Ideographic, }, { Start: 84, End: 90, Term: []byte("人大"), Position: 28, Type: analysis.Ideographic, }, { Start: 90, End: 96, Term: []byte("常委"), Position: 29, Type: analysis.Ideographic, }, { Start: 93, End: 99, Term: []byte("委会"), Position: 30, Type: analysis.Ideographic, }, { Start: 90, End: 99, Term: []byte("常委会"), Position: 31, Type: analysis.Ideographic, }, { Start: 78, End: 99, Term: []byte("全国人大常委会"), Position: 32, Type: analysis.Ideographic, }, { Start: 99, End: 105, Term: []byte("第三"), Position: 33, Type: analysis.Ideographic, }, { Start: 102, End: 108, Term: []byte("三次"), Position: 34, Type: analysis.Ideographic, }, { Start: 99, End: 108, Term: []byte("第三次"), Position: 35, Type: analysis.Ideographic, }, { Start: 108, End: 114, Term: []byte("审议"), Position: 36, Type: analysis.Ideographic, }, { Start: 114, End: 120, Term: []byte("侵权"), Position: 37, Type: analysis.Ideographic, }, { Start: 120, End: 126, Term: []byte("责任"), Position: 38, Type: analysis.Ideographic, }, { Start: 120, End: 129, Term: []byte("责任法"), Position: 39, Type: analysis.Ideographic, }, { Start: 129, End: 135, Term: []byte("草案"), Position: 40, Type: analysis.Ideographic, }, { Start: 135, End: 138, Term: []byte(","), Position: 41, Type: analysis.AlphaNumeric, }, { Start: 138, End: 144, Term: []byte("删除"), Position: 42, Type: analysis.Ideographic, }, { Start: 144, End: 147, Term: []byte("了"), Position: 43, Type: analysis.Ideographic, }, { Start: 147, End: 153, Term: []byte("有关"), Position: 44, Type: analysis.Ideographic, }, { Start: 153, End: 159, Term: []byte("医疗"), Position: 45, Type: analysis.Ideographic, }, { Start: 159, End: 165, Term: []byte("损害"), Position: 46, Type: analysis.Ideographic, }, { Start: 165, End: 171, Term: []byte("责任"), Position: 47, Type: analysis.Ideographic, }, { Start: 171, End: 174, Term: []byte("“"), Position: 48, Type: analysis.AlphaNumeric, }, { Start: 174, End: 180, Term: []byte("举证"), Position: 49, Type: analysis.Ideographic, }, { Start: 180, End: 186, Term: []byte("倒置"), Position: 50, Type: analysis.Ideographic, }, { Start: 186, End: 189, Term: []byte("”"), Position: 51, Type: analysis.AlphaNumeric, }, { Start: 189, End: 192, Term: []byte("的"), Position: 52, Type: analysis.Ideographic, }, { Start: 192, End: 198, Term: []byte("规定"), Position: 53, Type: analysis.Ideographic, }, { Start: 198, End: 201, Term: []byte("。"), Position: 54, Type: analysis.AlphaNumeric, }, { Start: 201, End: 204, Term: []byte("在"), Position: 55, Type: analysis.Ideographic, }, { Start: 204, End: 210, Term: []byte("医患"), Position: 56, Type: analysis.Ideographic, }, { Start: 210, End: 216, Term: []byte("纠纷"), Position: 57, Type: analysis.Ideographic, }, { Start: 216, End: 219, Term: []byte("中"), Position: 58, Type: analysis.Ideographic, }, { Start: 219, End: 222, Term: []byte("本"), Position: 59, Type: analysis.Ideographic, }, { Start: 222, End: 225, Term: []byte("已"), Position: 60, Type: analysis.Ideographic, }, { Start: 225, End: 231, Term: []byte("处于"), Position: 61, Type: analysis.Ideographic, }, { Start: 231, End: 237, Term: []byte("弱势"), Position: 62, Type: analysis.Ideographic, }, { Start: 237, End: 243, Term: []byte("地位"), Position: 63, Type: analysis.Ideographic, }, { Start: 243, End: 246, Term: []byte("的"), Position: 64, Type: analysis.Ideographic, }, { Start: 246, End: 252, Term: []byte("消费"), Position: 65, Type: analysis.Ideographic, }, { Start: 246, End: 255, Term: []byte("消费者"), Position: 66, Type: analysis.Ideographic, }, { Start: 255, End: 261, Term: []byte("由此"), Position: 67, Type: analysis.Ideographic, }, { Start: 261, End: 264, Term: []byte("将"), Position: 68, Type: analysis.Ideographic, }, { Start: 264, End: 270, Term: []byte("陷入"), Position: 69, Type: analysis.Ideographic, }, { Start: 276, End: 282, Term: []byte("不复"), Position: 70, Type: analysis.Ideographic, }, { Start: 270, End: 282, Term: []byte("万劫不复"), Position: 71, Type: analysis.Ideographic, }, { Start: 282, End: 285, Term: []byte("的"), Position: 72, Type: analysis.Ideographic, }, { Start: 285, End: 291, Term: []byte("境地"), Position: 73, Type: analysis.Ideographic, }, { Start: 291, End: 294, Term: []byte("。"), Position: 74, Type: analysis.AlphaNumeric, }, { Start: 294, End: 295, Term: []byte(" "), Position: 75, Type: analysis.AlphaNumeric, }, }, }, { []byte("大"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("大"), Position: 1, Type: analysis.Ideographic, }, }, }, { []byte(""), analysis.TokenStream{}, }, { []byte("他说的确实在理"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("他"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("说"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("的"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("确实"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("在"), Position: 5, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("理"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("长春市长春节讲话"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("长春"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("市长"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("春节"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("讲话"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("结婚的和尚未结婚的"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("结婚"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("的"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("和"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("尚未"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("结婚"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("的"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("结合成分子时"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("结合"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("成"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("分子"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("时"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("旅游和服务是最好的"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("旅游"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("和"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("服务"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("是"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("最好"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("的"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("这件事情的确是我的错"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("这件"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("事情"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("的确"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("是"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("我"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("的"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte("错"), Position: 7, Type: analysis.Ideographic, }, }, }, { []byte("供大家参考指正"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("供"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("大家"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("参考"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("指正"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("哈尔滨政府公布塌桥原因"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("哈尔"), Position: 1, Type: analysis.Ideographic, }, { Start: 0, End: 9, Term: []byte("哈尔滨"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("政府"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("公布"), Position: 4, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("塌"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("桥"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("原因"), Position: 7, Type: analysis.Ideographic, }, }, }, { []byte("我在机场入口处"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("我"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("在"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("机场"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("入口"), Position: 4, Type: analysis.Ideographic, }, { Start: 12, End: 21, Term: []byte("入口处"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("邢永臣摄影报道"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("邢"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("永"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("臣"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("摄影"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("报道"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("BP神经网络如何训练才能在分类时增加区分度?"), analysis.TokenStream{ { Start: 0, End: 2, Term: []byte("BP"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 2, End: 8, Term: []byte("神经"), Position: 2, Type: analysis.Ideographic, }, { Start: 8, End: 14, Term: []byte("网络"), Position: 3, Type: analysis.Ideographic, }, { Start: 2, End: 11, Term: []byte("神经网"), Position: 4, Type: analysis.Ideographic, }, { Start: 2, End: 14, Term: []byte("神经网络"), Position: 5, Type: analysis.Ideographic, }, { Start: 14, End: 20, Term: []byte("如何"), Position: 6, Type: analysis.Ideographic, }, { Start: 20, End: 26, Term: []byte("训练"), Position: 7, Type: analysis.Ideographic, }, { Start: 26, End: 32, Term: []byte("才能"), Position: 8, Type: analysis.Ideographic, }, { Start: 32, End: 35, Term: []byte("在"), Position: 9, Type: analysis.Ideographic, }, { Start: 35, End: 41, Term: []byte("分类"), Position: 10, Type: analysis.Ideographic, }, { Start: 41, End: 44, Term: []byte("时"), Position: 11, Type: analysis.Ideographic, }, { Start: 44, End: 50, Term: []byte("增加"), Position: 12, Type: analysis.Ideographic, }, { Start: 50, End: 56, Term: []byte("区分"), Position: 13, Type: analysis.Ideographic, }, { Start: 53, End: 59, Term: []byte("分度"), Position: 14, Type: analysis.Ideographic, }, { Start: 50, End: 59, Term: []byte("区分度"), Position: 15, Type: analysis.Ideographic, }, { Start: 59, End: 62, Term: []byte("?"), Position: 16, Type: analysis.AlphaNumeric, }, }, }, { []byte("南京市长江大桥"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("南京"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("京市"), Position: 2, Type: analysis.Ideographic, }, { Start: 0, End: 9, Term: []byte("南京市"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("长江"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("大桥"), Position: 5, Type: analysis.Ideographic, }, { Start: 9, End: 21, Term: []byte("长江大桥"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("应一些使用者的建议,也为了便于利用NiuTrans用于SMT研究"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("应"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("一些"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("使用"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("用者"), Position: 4, Type: analysis.Ideographic, }, { Start: 9, End: 18, Term: []byte("使用者"), Position: 5, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("的"), Position: 6, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("建议"), Position: 7, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte(","), Position: 8, Type: analysis.AlphaNumeric, }, { Start: 30, End: 33, Term: []byte("也"), Position: 9, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("为了"), Position: 10, Type: analysis.Ideographic, }, { Start: 39, End: 45, Term: []byte("便于"), Position: 11, Type: analysis.Ideographic, }, { Start: 45, End: 51, Term: []byte("利用"), Position: 12, Type: analysis.Ideographic, }, { Start: 51, End: 59, Term: []byte("NiuTrans"), Position: 13, Type: analysis.AlphaNumeric, }, { Start: 59, End: 65, Term: []byte("用于"), Position: 14, Type: analysis.Ideographic, }, { Start: 65, End: 68, Term: []byte("SMT"), Position: 15, Type: analysis.AlphaNumeric, }, { Start: 68, End: 74, Term: []byte("研究"), Position: 16, Type: analysis.Ideographic, }, }, }, { []byte("长春市长春药店"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("长春"), Position: 1, Type: analysis.Ideographic, }, { Start: 0, End: 9, Term: []byte("长春市"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("长春"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("药店"), Position: 4, Type: analysis.Ideographic, }, }, }, { []byte("邓颖超生前最喜欢的衣服"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("邓颖超"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("生前"), Position: 2, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("最"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("喜欢"), Position: 4, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("的"), Position: 5, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("衣服"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("胡锦涛是热爱世界和平的政治局常委"), analysis.TokenStream{ { Start: 3, End: 9, Term: []byte("锦涛"), Position: 1, Type: analysis.Ideographic, }, { Start: 0, End: 9, Term: []byte("胡锦涛"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("是"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("热爱"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("世界"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("和平"), Position: 6, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("的"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("政治"), Position: 8, Type: analysis.Ideographic, }, { Start: 33, End: 42, Term: []byte("政治局"), Position: 9, Type: analysis.Ideographic, }, { Start: 42, End: 48, Term: []byte("常委"), Position: 10, Type: analysis.Ideographic, }, }, }, { []byte("程序员祝海林和朱会震是在孙健的左面和右面, 范凯在最右面.再往左是李松洪"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("程序"), Position: 1, Type: analysis.Ideographic, }, { Start: 0, End: 9, Term: []byte("程序员"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("祝"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("海林"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("和"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("朱"), Position: 6, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("会"), Position: 7, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte("震"), Position: 8, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("是"), Position: 9, Type: analysis.Ideographic, }, { Start: 33, End: 36, Term: []byte("在"), Position: 10, Type: analysis.Ideographic, }, { Start: 36, End: 39, Term: []byte("孙"), Position: 11, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("健"), Position: 12, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte("的"), Position: 13, Type: analysis.Ideographic, }, { Start: 45, End: 51, Term: []byte("左面"), Position: 14, Type: analysis.Ideographic, }, { Start: 51, End: 54, Term: []byte("和"), Position: 15, Type: analysis.Ideographic, }, { Start: 54, End: 60, Term: []byte("右面"), Position: 16, Type: analysis.Ideographic, }, { Start: 60, End: 61, Term: []byte(","), Position: 17, Type: analysis.AlphaNumeric, }, { Start: 61, End: 62, Term: []byte(" "), Position: 18, Type: analysis.AlphaNumeric, }, { Start: 62, End: 65, Term: []byte("范"), Position: 19, Type: analysis.Ideographic, }, { Start: 65, End: 68, Term: []byte("凯"), Position: 20, Type: analysis.Ideographic, }, { Start: 68, End: 71, Term: []byte("在"), Position: 21, Type: analysis.Ideographic, }, { Start: 71, End: 74, Term: []byte("最"), Position: 22, Type: analysis.Ideographic, }, { Start: 74, End: 80, Term: []byte("右面"), Position: 23, Type: analysis.Ideographic, }, { Start: 80, End: 81, Term: []byte("."), Position: 24, Type: analysis.AlphaNumeric, }, { Start: 81, End: 84, Term: []byte("再"), Position: 25, Type: analysis.Ideographic, }, { Start: 84, End: 87, Term: []byte("往"), Position: 26, Type: analysis.Ideographic, }, { Start: 87, End: 90, Term: []byte("左"), Position: 27, Type: analysis.Ideographic, }, { Start: 90, End: 93, Term: []byte("是"), Position: 28, Type: analysis.Ideographic, }, { Start: 93, End: 96, Term: []byte("李"), Position: 29, Type: analysis.Ideographic, }, { Start: 96, End: 99, Term: []byte("松"), Position: 30, Type: analysis.Ideographic, }, { Start: 99, End: 102, Term: []byte("洪"), Position: 31, Type: analysis.Ideographic, }, }, }, { []byte("一次性交多少钱"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("一次"), Position: 1, Type: analysis.Ideographic, }, { Start: 0, End: 9, Term: []byte("一次性"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("交"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("多少"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 21, Term: []byte("钱"), Position: 5, Type: analysis.Ideographic, }, }, }, { []byte("两块五一套,三块八一斤,四块七一本,五块六一条"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("两块"), Position: 1, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("五"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("一套"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte(","), Position: 4, Type: analysis.AlphaNumeric, }, { Start: 18, End: 24, Term: []byte("三块"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("八"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("一斤"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 36, Term: []byte(","), Position: 8, Type: analysis.AlphaNumeric, }, { Start: 36, End: 42, Term: []byte("四块"), Position: 9, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte("七"), Position: 10, Type: analysis.Ideographic, }, { Start: 45, End: 51, Term: []byte("一本"), Position: 11, Type: analysis.Ideographic, }, { Start: 51, End: 54, Term: []byte(","), Position: 12, Type: analysis.AlphaNumeric, }, { Start: 54, End: 60, Term: []byte("五块"), Position: 13, Type: analysis.Ideographic, }, { Start: 60, End: 63, Term: []byte("六"), Position: 14, Type: analysis.Ideographic, }, { Start: 63, End: 69, Term: []byte("一条"), Position: 15, Type: analysis.Ideographic, }, }, }, { []byte("小和尚留了一个像大和尚一样的和尚头"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("小"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("和尚"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("留"), Position: 3, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("了"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("一个"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("像"), Position: 6, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("大"), Position: 7, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("和尚"), Position: 8, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("一样"), Position: 9, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("的"), Position: 10, Type: analysis.Ideographic, }, { Start: 42, End: 48, Term: []byte("和尚"), Position: 11, Type: analysis.Ideographic, }, { Start: 42, End: 51, Term: []byte("和尚头"), Position: 12, Type: analysis.Ideographic, }, }, }, { []byte("我是中华人民共和国公民;我爸爸是共和党党员; 地铁和平门站"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("我"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("是"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 12, Term: []byte("中华"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("华人"), Position: 4, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("人民"), Position: 5, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("共和"), Position: 6, Type: analysis.Ideographic, }, { Start: 18, End: 27, Term: []byte("共和国"), Position: 7, Type: analysis.Ideographic, }, { Start: 6, End: 27, Term: []byte("中华人民共和国"), Position: 8, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("公民"), Position: 9, Type: analysis.Ideographic, }, { Start: 33, End: 34, Term: []byte(";"), Position: 10, Type: analysis.AlphaNumeric, }, { Start: 34, End: 37, Term: []byte("我"), Position: 11, Type: analysis.Ideographic, }, { Start: 37, End: 43, Term: []byte("爸爸"), Position: 12, Type: analysis.Ideographic, }, { Start: 43, End: 46, Term: []byte("是"), Position: 13, Type: analysis.Ideographic, }, { Start: 46, End: 52, Term: []byte("共和"), Position: 14, Type: analysis.Ideographic, }, { Start: 46, End: 55, Term: []byte("共和党"), Position: 15, Type: analysis.Ideographic, }, { Start: 55, End: 61, Term: []byte("党员"), Position: 16, Type: analysis.Ideographic, }, { Start: 61, End: 62, Term: []byte(";"), Position: 17, Type: analysis.AlphaNumeric, }, { Start: 62, End: 63, Term: []byte(" "), Position: 18, Type: analysis.AlphaNumeric, }, { Start: 63, End: 69, Term: []byte("地铁"), Position: 19, Type: analysis.Ideographic, }, { Start: 69, End: 75, Term: []byte("和平"), Position: 20, Type: analysis.Ideographic, }, { Start: 69, End: 78, Term: []byte("和平门"), Position: 21, Type: analysis.Ideographic, }, { Start: 78, End: 81, Term: []byte("站"), Position: 22, Type: analysis.Ideographic, }, }, }, { []byte("张晓梅去人民医院做了个B超然后去买了件T恤"), analysis.TokenStream{ { Start: 0, End: 9, Term: []byte("张晓梅"), Position: 1, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("去"), Position: 2, Type: analysis.Ideographic, }, { Start: 12, End: 18, Term: []byte("人民"), Position: 3, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("医院"), Position: 4, Type: analysis.Ideographic, }, { Start: 24, End: 27, Term: []byte("做"), Position: 5, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte("了"), Position: 6, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("个"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 37, Term: []byte("B超"), Position: 8, Type: analysis.Ideographic, }, { Start: 37, End: 43, Term: []byte("然后"), Position: 9, Type: analysis.Ideographic, }, { Start: 43, End: 46, Term: []byte("去"), Position: 10, Type: analysis.Ideographic, }, { Start: 46, End: 49, Term: []byte("买"), Position: 11, Type: analysis.Ideographic, }, { Start: 49, End: 52, Term: []byte("了"), Position: 12, Type: analysis.Ideographic, }, { Start: 52, End: 55, Term: []byte("件"), Position: 13, Type: analysis.Ideographic, }, { Start: 55, End: 59, Term: []byte("T恤"), Position: 14, Type: analysis.Ideographic, }, }, }, { []byte("AT&T是一件不错的公司,给你发offer了吗?"), analysis.TokenStream{ { Start: 0, End: 4, Term: []byte("AT&T"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 4, End: 7, Term: []byte("是"), Position: 2, Type: analysis.Ideographic, }, { Start: 7, End: 13, Term: []byte("一件"), Position: 3, Type: analysis.Ideographic, }, { Start: 13, End: 19, Term: []byte("不错"), Position: 4, Type: analysis.Ideographic, }, { Start: 19, End: 22, Term: []byte("的"), Position: 5, Type: analysis.Ideographic, }, { Start: 22, End: 28, Term: []byte("公司"), Position: 6, Type: analysis.Ideographic, }, { Start: 28, End: 31, Term: []byte(","), Position: 7, Type: analysis.AlphaNumeric, }, { Start: 31, End: 34, Term: []byte("给"), Position: 8, Type: analysis.Ideographic, }, { Start: 34, End: 37, Term: []byte("你"), Position: 9, Type: analysis.Ideographic, }, { Start: 37, End: 40, Term: []byte("发"), Position: 10, Type: analysis.Ideographic, }, { Start: 40, End: 45, Term: []byte("offer"), Position: 11, Type: analysis.AlphaNumeric, }, { Start: 45, End: 48, Term: []byte("了"), Position: 12, Type: analysis.Ideographic, }, { Start: 48, End: 51, Term: []byte("吗"), Position: 13, Type: analysis.Ideographic, }, { Start: 51, End: 54, Term: []byte("?"), Position: 14, Type: analysis.AlphaNumeric, }, }, }, { []byte("C++和c#是什么关系?11+122=133,是吗?PI=3.14159"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("C++"), Position: 1, Type: analysis.AlphaNumeric, }, { Start: 3, End: 6, Term: []byte("和"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 8, Term: []byte("c#"), Position: 3, Type: analysis.AlphaNumeric, }, { Start: 8, End: 11, Term: []byte("是"), Position: 4, Type: analysis.Ideographic, }, { Start: 11, End: 17, Term: []byte("什么"), Position: 5, Type: analysis.Ideographic, }, { Start: 17, End: 23, Term: []byte("关系"), Position: 6, Type: analysis.Ideographic, }, { Start: 23, End: 26, Term: []byte("?"), Position: 7, Type: analysis.AlphaNumeric, }, { Start: 26, End: 28, Term: []byte("11"), Position: 8, Type: analysis.Numeric, }, { Start: 28, End: 29, Term: []byte("+"), Position: 9, Type: analysis.AlphaNumeric, }, { Start: 29, End: 32, Term: []byte("122"), Position: 10, Type: analysis.Numeric, }, { Start: 32, End: 33, Term: []byte("="), Position: 11, Type: analysis.AlphaNumeric, }, { Start: 33, End: 36, Term: []byte("133"), Position: 12, Type: analysis.Numeric, }, { Start: 36, End: 39, Term: []byte(","), Position: 13, Type: analysis.AlphaNumeric, }, { Start: 39, End: 42, Term: []byte("是"), Position: 14, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte("吗"), Position: 15, Type: analysis.Ideographic, }, { Start: 45, End: 48, Term: []byte("?"), Position: 16, Type: analysis.AlphaNumeric, }, { Start: 48, End: 50, Term: []byte("PI"), Position: 17, Type: analysis.AlphaNumeric, }, { Start: 50, End: 51, Term: []byte("="), Position: 18, Type: analysis.AlphaNumeric, }, { Start: 51, End: 52, Term: []byte("3"), Position: 19, Type: analysis.Numeric, }, { Start: 52, End: 53, Term: []byte("."), Position: 20, Type: analysis.AlphaNumeric, }, { Start: 53, End: 58, Term: []byte("14159"), Position: 21, Type: analysis.Numeric, }, }, }, { []byte("你认识那个和主席握手的的哥吗?他开一辆黑色的士。"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("你"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("认识"), Position: 2, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("那个"), Position: 3, Type: analysis.Ideographic, }, { Start: 15, End: 18, Term: []byte("和"), Position: 4, Type: analysis.Ideographic, }, { Start: 18, End: 24, Term: []byte("主席"), Position: 5, Type: analysis.Ideographic, }, { Start: 24, End: 30, Term: []byte("握手"), Position: 6, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("的"), Position: 7, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("的哥"), Position: 8, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("吗"), Position: 9, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte("?"), Position: 10, Type: analysis.AlphaNumeric, }, { Start: 45, End: 48, Term: []byte("他"), Position: 11, Type: analysis.Ideographic, }, { Start: 48, End: 51, Term: []byte("开"), Position: 12, Type: analysis.Ideographic, }, { Start: 51, End: 57, Term: []byte("一辆"), Position: 13, Type: analysis.Ideographic, }, { Start: 57, End: 63, Term: []byte("黑色"), Position: 14, Type: analysis.Ideographic, }, { Start: 63, End: 69, Term: []byte("的士"), Position: 15, Type: analysis.Ideographic, }, { Start: 69, End: 72, Term: []byte("。"), Position: 16, Type: analysis.AlphaNumeric, }, }, }, { []byte("枪杆子中出政权"), analysis.TokenStream{ { Start: 0, End: 6, Term: []byte("枪杆"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 9, Term: []byte("杆子"), Position: 2, Type: analysis.Ideographic, }, { Start: 0, End: 9, Term: []byte("枪杆子"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 12, Term: []byte("中"), Position: 4, Type: analysis.Ideographic, }, { Start: 12, End: 15, Term: []byte("出"), Position: 5, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("政权"), Position: 6, Type: analysis.Ideographic, }, }, }, { []byte("张三风同学走上了不归路"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("张"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 6, Term: []byte("三"), Position: 2, Type: analysis.Ideographic, }, { Start: 6, End: 9, Term: []byte("风"), Position: 3, Type: analysis.Ideographic, }, { Start: 9, End: 15, Term: []byte("同学"), Position: 4, Type: analysis.Ideographic, }, { Start: 15, End: 21, Term: []byte("走上"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 24, Term: []byte("了"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 33, Term: []byte("归路"), Position: 7, Type: analysis.Ideographic, }, { Start: 24, End: 33, Term: []byte("不归路"), Position: 8, Type: analysis.Ideographic, }, }, }, { []byte("阿Q腰间挂着BB机手里拿着大哥大,说:我一般吃饭不AA制的。"), analysis.TokenStream{ { Start: 0, End: 4, Term: []byte("阿Q"), Position: 1, Type: analysis.Ideographic, }, { Start: 4, End: 10, Term: []byte("腰间"), Position: 2, Type: analysis.Ideographic, }, { Start: 10, End: 13, Term: []byte("挂"), Position: 3, Type: analysis.Ideographic, }, { Start: 13, End: 16, Term: []byte("着"), Position: 4, Type: analysis.Ideographic, }, { Start: 16, End: 21, Term: []byte("BB机"), Position: 5, Type: analysis.Ideographic, }, { Start: 21, End: 27, Term: []byte("手里"), Position: 6, Type: analysis.Ideographic, }, { Start: 27, End: 30, Term: []byte("拿"), Position: 7, Type: analysis.Ideographic, }, { Start: 30, End: 33, Term: []byte("着"), Position: 8, Type: analysis.Ideographic, }, { Start: 33, End: 39, Term: []byte("大哥"), Position: 9, Type: analysis.Ideographic, }, { Start: 33, End: 42, Term: []byte("大哥大"), Position: 10, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte(","), Position: 11, Type: analysis.AlphaNumeric, }, { Start: 45, End: 48, Term: []byte("说"), Position: 12, Type: analysis.Ideographic, }, { Start: 48, End: 51, Term: []byte(":"), Position: 13, Type: analysis.AlphaNumeric, }, { Start: 51, End: 54, Term: []byte("我"), Position: 14, Type: analysis.Ideographic, }, { Start: 54, End: 60, Term: []byte("一般"), Position: 15, Type: analysis.Ideographic, }, { Start: 60, End: 66, Term: []byte("吃饭"), Position: 16, Type: analysis.Ideographic, }, { Start: 66, End: 69, Term: []byte("不"), Position: 17, Type: analysis.Ideographic, }, { Start: 69, End: 74, Term: []byte("AA制"), Position: 18, Type: analysis.Ideographic, }, { Start: 74, End: 77, Term: []byte("的"), Position: 19, Type: analysis.Ideographic, }, { Start: 77, End: 80, Term: []byte("。"), Position: 20, Type: analysis.AlphaNumeric, }, }, }, { []byte("在1号店能买到小S和大S八卦的书。"), analysis.TokenStream{ { Start: 0, End: 3, Term: []byte("在"), Position: 1, Type: analysis.Ideographic, }, { Start: 3, End: 10, Term: []byte("1号店"), Position: 2, Type: analysis.Ideographic, }, { Start: 10, End: 13, Term: []byte("能"), Position: 3, Type: analysis.Ideographic, }, { Start: 13, End: 16, Term: []byte("买"), Position: 4, Type: analysis.Ideographic, }, { Start: 16, End: 19, Term: []byte("到"), Position: 5, Type: analysis.Ideographic, }, { Start: 19, End: 23, Term: []byte("小S"), Position: 6, Type: analysis.Ideographic, }, { Start: 23, End: 26, Term: []byte("和"), Position: 7, Type: analysis.Ideographic, }, { Start: 26, End: 30, Term: []byte("大S"), Position: 8, Type: analysis.Ideographic, }, { Start: 30, End: 36, Term: []byte("八卦"), Position: 9, Type: analysis.Ideographic, }, { Start: 36, End: 39, Term: []byte("的"), Position: 10, Type: analysis.Ideographic, }, { Start: 39, End: 42, Term: []byte("书"), Position: 11, Type: analysis.Ideographic, }, { Start: 42, End: 45, Term: []byte("。"), Position: 12, Type: analysis.AlphaNumeric, }, }, }, } tokenizer, _ := NewJiebaTokenizer("dict.txt", false, true) for _, test := range tests { actual := tokenizer.Tokenize(test.input) if !reflect.DeepEqual(actual, test.output) { t.Fatalf("Expected %v, got %v for %s", test.output, actual, string(test.input)) } } }