diff --git a/README.md b/README.md
index 48489d5..ade962f 100644
--- a/README.md
+++ b/README.md
@@ -1,18 +1,18 @@
-结巴分词Go版 jiebago
-===================
+#结巴分词 Go 语言版:jiebago
+
[](https://travis-ci.org/wangbin/jiebago)
[结巴分词](https://github.com/fxsjy/jieba)是[@fxsjy](https://github.com/fxsjy)用Python编写的中文分词组件,jiebago是结巴分词的Go语言实现,目前已经实现的功能包括:三种模式分词、自定义词典、关键词提取和词性标注。
-安装
-=====
+## 安装
- go get github.com/wangbin/jiebago
+
+ go get github.com/wangbin/jiebago/...
-分词
-=====
+## 分词
+
package main
@@ -53,8 +53,8 @@
【搜索引擎模式】:小明 / 硕士 / 毕业 / 于 / 中国 / 科学 / 学院 / 科学院 / 中国科学院 / 计算 / 计算所 / , / 后 / 在 / 日本 / 京都 / 大学 / 日本京都大学 / 深造 /
-添加自定义词典
-=============
+## 添加自定义词典
+
var sentence = "李小福是创新办主任也是云计算方面的专家"
fmt.Print("Before: ")
@@ -69,12 +69,7 @@
After: 李小福 / 是 / 创新办 / 主任 / 也 / 是 / 云计算 / 方面 / 的 / 专家 /
-关键词提取
-========
-
-需要先安装analyse模块:
-
- go get github.com/wangbin/jiebago/analyse
+## 关键词提取
示例代码:
@@ -133,12 +128,7 @@
全资 0.306324
商业 0.306138
-词性标注
-=======
-
-需要先安装posseg模块:
-
- go get github.com/wangbin/jiebago/posseg
+## 词性标注
示例代码:
@@ -166,8 +156,8 @@
北京 ns
天安门 ns
-并行分词
-=======
+
+## 并行分词
因为Go有强大的goroutine特性,并行分词实现起来非常简单,所以并没有内置到jiebaogo中,而是由使用者自己实现,下面是一个简单的例子:
@@ -207,40 +197,213 @@
writer.Flush()
-Tokenize
-=========
+## Tokenize:返回词语在原文的起始位置
- var sentence = "永和服装饰品有限公司"
- // 默认模式
- for _, token := range jiebago.Tokenize(sentence, "default", true) {
- fmt.Printf("word %s\t\t start: %d \t\t end:%d\n", token.Word, token.Start, token.End)
+
+注意新版的 Jiebago Tokenizer 实现了 Bleve 的 Tokenizer 接口,跟之前的实现有很大的变化:
+
+1. 接受的参数必须是 []byte。
+2. 输出的 Token 的起始和终止位置是 byte 的位置,不是之前的 rune 的位置,所以和 Python 版的 Jieba.tokenize 输出不一致。
+
+```
+package main
+
+import (
+ "fmt"
+ "github.com/wangbin/jiebago/tokenizers"
+)
+
+const DictPath = "/path/to/dict.txt"
+
+var sentence = []byte("永和服装饰品有限公司")
+
+func main() {
+ // default mode
+ tokenizer, _ := tokenizers.NewJiebaTokenizer(DictPath, true, false) for _, token := range tokenizer.Tokenize(sentence) {
+ fmt.Printf(
+ "Term: %s\t Start: %d \t End: %d\t Position: %d\t Type: %d\n",
+ token.Term, token.Start, token.End, token.Position, token.Type)
}
- // 搜索模式
- for _, token := range jiebago.Tokenize(sentence, "search", true) {
- fmt.Printf("word %s\t\t start: %d \t\t end:%d\n", token.Word, token.Start, token.End)
+
+ //search mode
+ tokenizer, _ = tokenizers.NewJiebaTokenizer(DictPath, true, true)
+ for _, token := range tokenizer.Tokenize(sentence) {
+ fmt.Printf(
+ "Term: %s\t Start: %d \t End: %d\t Position: %d\t Type: %d\n",
+ token.Term, token.Start, token.End, token.Position, token.Type)
}
+}
+
+```
+默认模式输出:
+
+```
+Term: 永和 Start: 0 End: 6 Position: 1 Type: 1
+Term: 服装 Start: 6 End: 12 Position: 2 Type: 1
+Term: 饰品 Start: 12 End: 18 Position: 3 Type: 1
+Term: 有限公司 Start: 18 End: 30 Position: 4 Type: 1
+```
+搜索模式输出:
+
+```
+Term: 永和 Start: 0 End: 6 Position: 1 Type: 1
+Term: 服装 Start: 6 End: 12 Position: 2 Type: 1
+Term: 饰品 Start: 12 End: 18 Position: 3 Type: 1
+Term: 有限 Start: 18 End: 24 Position: 4 Type: 1
+Term: 公司 Start: 24 End: 30 Position: 5 Type: 1
+Term: 有限公司 Start: 18 End: 30 Position: 6 Type: 1
+```
+### 配合 bleve 进行中文全文检索
+
+[bleve](http://www.blevesearch.com/) 是一个 Go 语言实现的全文索引系统,jiebago 可以配合 bleve 使用实现中文的全文检索。一个简单的用法示例:
+
+```
+package main
+
+import (
+ "fmt"
+ "github.com/blevesearch/bleve"
+ _ "github.com/wangbin/jiebago/analyse/tokenizers"
+ "log"
+)
+
+func main() {
+ // open a new index
+ indexMapping := bleve.NewIndexMapping()
+
+ err := indexMapping.AddCustomTokenizer("jieba",
+ map[string]interface{}{
+ "file": "/Users/wangbin/mygo/src/github.com/wangbin/jiebago/dict.txt",
+ "type": "jieba",
+ })
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ err = indexMapping.AddCustomAnalyzer("jieba",
+ map[string]interface{}{
+ "type": "custom",
+ "tokenizer": "jieba",
+ "token_filters": []string{
+ "possessive_en",
+ "to_lower",
+ "stop_en",
+ },
+ })
+
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ indexMapping.DefaultAnalyzer = "jieba"
+
+ index, err := bleve.New("example.bleve", indexMapping)
+
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ indexMapping.DefaultAnalyzer = "jieba"
+
+ index, err := bleve.New("example.bleve", indexMapping)
+
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ docs := []struct {
+ Title string
+ Name string
+ }{
+ {
+ Title: "Doc 1",
+ Name: "This is the first document we’ve added",
+ },
+ {
+ Title: "Doc 2",
+ Name: "The second one 你 中文测试中文 is even more interesting! 吃水果",
+ },
+ {
+ Title: "Doc 3",
+ Name: "买水果然后来世博园。",
+ },
+ {
+ Title: "Doc 4",
+ Name: "工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作",
+ },
+ {
+ Title: "Doc 5",
+ Name: "咱俩交换一下吧。",
+ },
+ }
+ // index docs
+ for _, doc := range docs {
+ index.Index(doc.Title, doc)
+ }
+
+ // search for some text
+ for _, keyword := range []string{"水果世博园", "你", "first", "中文", "交换机", "交换"} {
+ query := bleve.NewMatchQuery(keyword)
+ search := bleve.NewSearchRequest(query)
+ search.Highlight = bleve.NewHighlight()
+ searchResults, err := index.Search(search)
+ if err != nil {
+ log.Fatal(err)
+ }
+ fmt.Printf("Result of %s: %s\n", keyword, searchResults)
+ }
+}
+```
输出结果:
- word 永和 start: 0 end:2
- word 服装 start: 2 end:4
- word 饰品 start: 4 end:6
- word 有限公司 start: 6 end:10
+```
+Result of 水果世博园: 2 matches, showing 1 through 2, took 377.988µs
+ 1. Doc 3 (1.099550)
+ Name
+ 买水果然后来世博园。
+ 2. Doc 2 (0.031941)
+ Name
+ The second one 你 中文测试中文 is even more interesting! 吃水果
- word 永和 start: 0 end:2
- word 服装 start: 0 end:2
- word 饰品 start: 0 end:2
- word 有限 start: 0 end:2
- word 公司 start: 2 end:4
- word 有限公司 start: 0 end:4
+Result of 你: 1 matches, showing 1 through 1, took 103.367µs
+ 1. Doc 2 (0.391161)
+ Name
+ The second one 你 中文测试中文 is even more interesting! 吃水果
-分词速度
-=======
+Result of first: 1 matches, showing 1 through 1, took 373.317µs
+ 1. Doc 1 (0.512150)
+ Name
+ This is the first document we’ve added
+
+Result of 中文: 1 matches, showing 1 through 1, took 106.433µs
+ 1. Doc 2 (0.553186)
+ Name
+ The second one 你 中文测试中文 is even more interesting! 吃水果
+
+Result of 交换机: 2 matches, showing 1 through 2, took 188.235µs
+ 1. Doc 4 (0.608495)
+ Name
+ 工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作
+ 2. Doc 5 (0.086700)
+ Name
+ 咱俩交换一下吧。
+
+Result of 交换: 2 matches, showing 1 through 2, took 148.822µs
+ 1. Doc 5 (0.534158)
+ Name
+ 咱俩交换一下吧。
+ 2. Doc 4 (0.296297)
+ Name
+ 工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作
+```
+
+## 分词速度
- 2MB / Second in Full Mode
- 700KB / Second in Default Mode
- Test Env: AMD Phenom(tm) II X6 1055T CPU @ 2.8GHz; 《金庸全集》
-许可证
-======
+## 许可证
+
MIT: http://wangbin.mit-license.org
diff --git a/tokenize.go b/tokenize.go
deleted file mode 100644
index 3765207..0000000
--- a/tokenize.go
+++ /dev/null
@@ -1,37 +0,0 @@
-package jiebago
-
-type token struct {
- Word string
- Start int
- End int
-}
-
-// Return words with position.
-func Tokenize(sentence string, mode string, HMM bool) []token {
- tokens := make([]token, 0)
- start := 0
- var width int
- for word := range Cut(sentence, false, HMM) {
- if mode == "default" {
- width = len([]rune(word))
- tokens = append(tokens, token{word, start, start + width})
- start += width
-
- } else {
- runes := []rune(word)
- width = len(runes)
- for _, step := range []int{2, 3} {
- if width > step {
- for i := 0; i < width-step+1; i++ {
- gram := string(runes[i : i+step])
- if _, ok := Trie.Freq[gram]; ok {
- tokens = append(tokens, token{gram, start + i, start + i + step})
- }
- }
- }
- }
- tokens = append(tokens, token{word, start, start + width})
- }
- }
- return tokens
-}
diff --git a/tokenize_test.go b/tokenize_test.go
deleted file mode 100644
index a088bbd..0000000
--- a/tokenize_test.go
+++ /dev/null
@@ -1,390 +0,0 @@
-package jiebago
-
-import "testing"
-
-var (
- result = [][]token{
- []token{token{"\u8fd9\u662f", 0, 2}, token{"\u4e00\u4e2a", 2, 4}, token{"\u4f38\u624b\u4e0d\u89c1\u4e94\u6307", 4, 10}, token{"\u7684", 10, 11}, token{"\u9ed1\u591c", 11, 13}, token{"\u3002", 13, 14}, token{"\u6211", 14, 15}, token{"\u53eb", 15, 16}, token{"\u5b59\u609f\u7a7a", 16, 19}, token{"\uff0c", 19, 20}, token{"\u6211", 20, 21}, token{"\u7231", 21, 22}, token{"\u5317\u4eac", 22, 24}, token{"\uff0c", 24, 25}, token{"\u6211", 25, 26}, token{"\u7231", 26, 27}, token{"Python", 27, 33}, token{"\u548c", 33, 34}, token{"C++", 34, 37}, token{"\u3002", 37, 38}},
- []token{token{"\u6211", 0, 1}, token{"\u4e0d", 1, 2}, token{"\u559c\u6b22", 2, 4}, token{"\u65e5\u672c", 4, 6}, token{"\u548c\u670d", 6, 8}, token{"\u3002", 8, 9}},
- []token{token{"\u96f7\u7334", 0, 2}, token{"\u56de\u5f52", 2, 4}, token{"\u4eba\u95f4", 4, 6}, token{"\u3002", 6, 7}},
- []token{token{"\u5de5\u4fe1\u5904", 0, 3}, token{"\u5973\u5e72\u4e8b", 3, 6}, token{"\u6bcf\u6708", 6, 8}, token{"\u7ecf\u8fc7", 8, 10}, token{"\u4e0b\u5c5e", 10, 12}, token{"\u79d1\u5ba4", 12, 14}, token{"\u90fd", 14, 15}, token{"\u8981", 15, 16}, token{"\u4eb2\u53e3", 16, 18}, token{"\u4ea4\u4ee3", 18, 20}, token{"24", 20, 22}, token{"\u53e3", 22, 23}, token{"\u4ea4\u6362\u673a", 23, 26}, token{"\u7b49", 26, 27}, token{"\u6280\u672f\u6027", 27, 30}, token{"\u5668\u4ef6", 30, 32}, token{"\u7684", 32, 33}, token{"\u5b89\u88c5", 33, 35}, token{"\u5de5\u4f5c", 35, 37}},
- []token{token{"\u6211", 0, 1}, token{"\u9700\u8981", 1, 3}, token{"\u5ec9\u79df\u623f", 3, 6}},
- []token{token{"\u6c38\u548c", 0, 2}, token{"\u670d\u88c5", 2, 4}, token{"\u9970\u54c1", 4, 6}, token{"\u6709\u9650\u516c\u53f8", 6, 10}},
- []token{token{"\u6211", 0, 1}, token{"\u7231", 1, 2}, token{"\u5317\u4eac", 2, 4}, token{"\u5929\u5b89\u95e8", 4, 7}},
- []token{token{"abc", 0, 3}},
- []token{token{"\u9690", 0, 1}, token{"\u9a6c\u5c14\u53ef\u592b", 1, 5}},
- []token{token{"\u96f7\u7334", 0, 2}, token{"\u662f", 2, 3}, token{"\u4e2a", 3, 4}, token{"\u597d", 4, 5}, token{"\u7f51\u7ad9", 5, 7}},
- []token{token{"\u201c", 0, 1}, token{"Microsoft", 1, 10}, token{"\u201d", 10, 11}, token{"\u4e00\u8bcd", 11, 13}, token{"\u7531", 13, 14}, token{"\u201c", 14, 15}, token{"MICROcomputer", 15, 28}, token{"\uff08", 28, 29}, token{"\u5fae\u578b", 29, 31}, token{"\u8ba1\u7b97\u673a", 31, 34}, token{"\uff09", 34, 35}, token{"\u201d", 35, 36}, token{"\u548c", 36, 37}, token{"\u201c", 37, 38}, token{"SOFTware", 38, 46}, token{"\uff08", 46, 47}, token{"\u8f6f\u4ef6", 47, 49}, token{"\uff09", 49, 50}, token{"\u201d", 50, 51}, token{"\u4e24", 51, 52}, token{"\u90e8\u5206", 52, 54}, token{"\u7ec4\u6210", 54, 56}},
- []token{token{"\u8349\u6ce5\u9a6c", 0, 3}, token{"\u548c", 3, 4}, token{"\u6b3a\u5b9e", 4, 6}, token{"\u9a6c", 6, 7}, token{"\u662f", 7, 8}, token{"\u4eca\u5e74", 8, 10}, token{"\u7684", 10, 11}, token{"\u6d41\u884c", 11, 13}, token{"\u8bcd\u6c47", 13, 15}},
- []token{token{"\u4f0a\u85e4", 0, 2}, token{"\u6d0b\u534e\u5802", 2, 5}, token{"\u603b\u5e9c", 5, 7}, token{"\u5e97", 7, 8}},
- []token{token{"\u4e2d\u56fd\u79d1\u5b66\u9662\u8ba1\u7b97\u6280\u672f\u7814\u7a76\u6240", 0, 12}},
- []token{token{"\u7f57\u5bc6\u6b27", 0, 3}, token{"\u4e0e", 3, 4}, token{"\u6731\u4e3d\u53f6", 4, 7}},
- []token{token{"\u6211", 0, 1}, token{"\u8d2d\u4e70", 1, 3}, token{"\u4e86", 3, 4}, token{"\u9053\u5177", 4, 6}, token{"\u548c", 6, 7}, token{"\u670d\u88c5", 7, 9}},
- []token{token{"PS", 0, 2}, token{":", 2, 3}, token{" ", 3, 4}, token{"\u6211", 4, 5}, token{"\u89c9\u5f97", 5, 7}, token{"\u5f00\u6e90", 7, 9}, token{"\u6709", 9, 10}, token{"\u4e00\u4e2a", 10, 12}, token{"\u597d\u5904", 12, 14}, token{"\uff0c", 14, 15}, token{"\u5c31\u662f", 15, 17}, token{"\u80fd\u591f", 17, 19}, token{"\u6566\u4fc3", 19, 21}, token{"\u81ea\u5df1", 21, 23}, token{"\u4e0d\u65ad\u6539\u8fdb", 23, 27}, token{"\uff0c", 27, 28}, token{"\u907f\u514d", 28, 30}, token{"\u655e\u5e1a", 30, 32}, token{"\u81ea\u73cd", 32, 34}},
- []token{token{"\u6e56\u5317\u7701", 0, 3}, token{"\u77f3\u9996\u5e02", 3, 6}},
- []token{token{"\u6e56\u5317\u7701", 0, 3}, token{"\u5341\u5830\u5e02", 3, 6}},
- []token{token{"\u603b\u7ecf\u7406", 0, 3}, token{"\u5b8c\u6210", 3, 5}, token{"\u4e86", 5, 6}, token{"\u8fd9\u4ef6", 6, 8}, token{"\u4e8b\u60c5", 8, 10}},
- []token{token{"\u7535\u8111", 0, 2}, token{"\u4fee\u597d", 2, 4}, token{"\u4e86", 4, 5}},
- []token{token{"\u505a\u597d", 0, 2}, token{"\u4e86", 2, 3}, token{"\u8fd9\u4ef6", 3, 5}, token{"\u4e8b\u60c5", 5, 7}, token{"\u5c31", 7, 8}, token{"\u4e00\u4e86\u767e\u4e86", 8, 12}, token{"\u4e86", 12, 13}},
- []token{token{"\u4eba\u4eec", 0, 2}, token{"\u5ba1\u7f8e", 2, 4}, token{"\u7684", 4, 5}, token{"\u89c2\u70b9", 5, 7}, token{"\u662f", 7, 8}, token{"\u4e0d\u540c", 8, 10}, token{"\u7684", 10, 11}},
- []token{token{"\u6211\u4eec", 0, 2}, token{"\u4e70", 2, 3}, token{"\u4e86", 3, 4}, token{"\u4e00\u4e2a", 4, 6}, token{"\u7f8e\u7684", 6, 8}, token{"\u7a7a\u8c03", 8, 10}},
- []token{token{"\u7ebf\u7a0b", 0, 2}, token{"\u521d\u59cb\u5316", 2, 5}, token{"\u65f6", 5, 6}, token{"\u6211\u4eec", 6, 8}, token{"\u8981", 8, 9}, token{"\u6ce8\u610f", 9, 11}},
- []token{token{"\u4e00\u4e2a", 0, 2}, token{"\u5206\u5b50", 2, 4}, token{"\u662f", 4, 5}, token{"\u7531", 5, 6}, token{"\u597d\u591a", 6, 8}, token{"\u539f\u5b50", 8, 10}, token{"\u7ec4\u7ec7", 10, 12}, token{"\u6210", 12, 13}, token{"\u7684", 13, 14}},
- []token{token{"\u795d", 0, 1}, token{"\u4f60", 1, 2}, token{"\u9a6c\u5230\u529f\u6210", 2, 6}},
- []token{token{"\u4ed6", 0, 1}, token{"\u6389", 1, 2}, token{"\u8fdb", 2, 3}, token{"\u4e86", 3, 4}, token{"\u65e0\u5e95\u6d1e", 4, 7}, token{"\u91cc", 7, 8}},
- []token{token{"\u4e2d\u56fd", 0, 2}, token{"\u7684", 2, 3}, token{"\u9996\u90fd", 3, 5}, token{"\u662f", 5, 6}, token{"\u5317\u4eac", 6, 8}},
- []token{token{"\u5b59\u541b\u610f", 0, 3}},
- []token{token{"\u5916\u4ea4\u90e8", 0, 3}, token{"\u53d1\u8a00\u4eba", 3, 6}, token{"\u9a6c\u671d\u65ed", 6, 9}},
- []token{token{"\u9886\u5bfc\u4eba", 0, 3}, token{"\u4f1a\u8bae", 3, 5}, token{"\u548c", 5, 6}, token{"\u7b2c\u56db\u5c4a", 6, 9}, token{"\u4e1c\u4e9a", 9, 11}, token{"\u5cf0\u4f1a", 11, 13}},
- []token{token{"\u5728", 0, 1}, token{"\u8fc7\u53bb", 1, 3}, token{"\u7684", 3, 4}, token{"\u8fd9", 4, 5}, token{"\u4e94\u5e74", 5, 7}},
- []token{token{"\u8fd8", 0, 1}, token{"\u9700\u8981", 1, 3}, token{"\u5f88\u957f", 3, 5}, token{"\u7684", 5, 6}, token{"\u8def", 6, 7}, token{"\u8981", 7, 8}, token{"\u8d70", 8, 9}},
- []token{token{"60", 0, 2}, token{"\u5468\u5e74", 2, 4}, token{"\u9996\u90fd", 4, 6}, token{"\u9605\u5175", 6, 8}},
- []token{token{"\u4f60\u597d", 0, 2}, token{"\u4eba\u4eec", 2, 4}, token{"\u5ba1\u7f8e", 4, 6}, token{"\u7684", 6, 7}, token{"\u89c2\u70b9", 7, 9}, token{"\u662f", 9, 10}, token{"\u4e0d\u540c", 10, 12}, token{"\u7684", 12, 13}},
- []token{token{"\u4e70", 0, 1}, token{"\u6c34\u679c", 1, 3}, token{"\u7136\u540e", 3, 5}, token{"\u6765", 5, 6}, token{"\u4e16\u535a\u56ed", 6, 9}},
- []token{token{"\u4e70", 0, 1}, token{"\u6c34\u679c", 1, 3}, token{"\u7136\u540e", 3, 5}, token{"\u53bb", 5, 6}, token{"\u4e16\u535a\u56ed", 6, 9}},
- []token{token{"\u4f46\u662f", 0, 2}, token{"\u540e\u6765", 2, 4}, token{"\u6211", 4, 5}, token{"\u624d", 5, 6}, token{"\u77e5\u9053", 6, 8}, token{"\u4f60", 8, 9}, token{"\u662f", 9, 10}, token{"\u5bf9", 10, 11}, token{"\u7684", 11, 12}},
- []token{token{"\u5b58\u5728", 0, 2}, token{"\u5373", 2, 3}, token{"\u5408\u7406", 3, 5}},
- []token{token{"\u7684", 0, 1}, token{"\u7684", 1, 2}, token{"\u7684", 2, 3}, token{"\u7684", 3, 4}, token{"\u7684", 4, 5}, token{"\u5728", 5, 6}, token{"\u7684", 6, 7}, token{"\u7684", 7, 8}, token{"\u7684", 8, 9}, token{"\u7684", 9, 10}, token{"\u5c31", 10, 11}, token{"\u4ee5", 11, 12}, token{"\u548c", 12, 13}, token{"\u548c", 13, 14}, token{"\u548c", 14, 15}},
- []token{token{"I", 0, 1}, token{" ", 1, 2}, token{"love", 2, 6}, token{"\u4f60", 6, 7}, token{"\uff0c", 7, 8}, token{"\u4e0d\u4ee5\u4e3a\u803b", 8, 12}, token{"\uff0c", 12, 13}, token{"\u53cd", 13, 14}, token{"\u4ee5\u4e3a", 14, 16}, token{"rong", 16, 20}},
- []token{token{"\u56e0", 0, 1}},
- []token{},
- []token{token{"hello", 0, 5}, token{"\u4f60\u597d", 5, 7}, token{"\u4eba\u4eec", 7, 9}, token{"\u5ba1\u7f8e", 9, 11}, token{"\u7684", 11, 12}, token{"\u89c2\u70b9", 12, 14}, token{"\u662f", 14, 15}, token{"\u4e0d\u540c", 15, 17}, token{"\u7684", 17, 18}},
- []token{token{"\u5f88", 0, 1}, token{"\u597d", 1, 2}, token{"\u4f46", 2, 3}, token{"\u4e3b\u8981", 3, 5}, token{"\u662f", 5, 6}, token{"\u57fa\u4e8e", 6, 8}, token{"\u7f51\u9875", 8, 10}, token{"\u5f62\u5f0f", 10, 12}},
- []token{token{"hello", 0, 5}, token{"\u4f60\u597d", 5, 7}, token{"\u4eba\u4eec", 7, 9}, token{"\u5ba1\u7f8e", 9, 11}, token{"\u7684", 11, 12}, token{"\u89c2\u70b9", 12, 14}, token{"\u662f", 14, 15}, token{"\u4e0d\u540c", 15, 17}, token{"\u7684", 17, 18}},
- []token{token{"\u4e3a\u4ec0\u4e48", 0, 3}, token{"\u6211", 3, 4}, token{"\u4e0d\u80fd", 4, 6}, token{"\u62e5\u6709", 6, 8}, token{"\u60f3\u8981", 8, 10}, token{"\u7684", 10, 11}, token{"\u751f\u6d3b", 11, 13}},
- []token{token{"\u540e\u6765", 0, 2}, token{"\u6211", 2, 3}, token{"\u624d", 3, 4}},
- []token{token{"\u6b64\u6b21", 0, 2}, token{"\u6765", 2, 3}, token{"\u4e2d\u56fd", 3, 5}, token{"\u662f", 5, 6}, token{"\u4e3a\u4e86", 6, 8}},
- []token{token{"\u4f7f\u7528", 0, 2}, token{"\u4e86", 2, 3}, token{"\u5b83", 3, 4}, token{"\u5c31", 4, 5}, token{"\u53ef\u4ee5", 5, 7}, token{"\u89e3\u51b3", 7, 9}, token{"\u4e00\u4e9b", 9, 11}, token{"\u95ee\u9898", 11, 13}},
- []token{token{",", 0, 1}, token{"\u4f7f\u7528", 1, 3}, token{"\u4e86", 3, 4}, token{"\u5b83", 4, 5}, token{"\u5c31", 5, 6}, token{"\u53ef\u4ee5", 6, 8}, token{"\u89e3\u51b3", 8, 10}, token{"\u4e00\u4e9b", 10, 12}, token{"\u95ee\u9898", 12, 14}},
- []token{token{"\u5176\u5b9e", 0, 2}, token{"\u4f7f\u7528", 2, 4}, token{"\u4e86", 4, 5}, token{"\u5b83", 5, 6}, token{"\u5c31", 6, 7}, token{"\u53ef\u4ee5", 7, 9}, token{"\u89e3\u51b3", 9, 11}, token{"\u4e00\u4e9b", 11, 13}, token{"\u95ee\u9898", 13, 15}},
- []token{token{"\u597d\u4eba", 0, 2}, token{"\u4f7f\u7528", 2, 4}, token{"\u4e86", 4, 5}, token{"\u5b83", 5, 6}, token{"\u5c31", 6, 7}, token{"\u53ef\u4ee5", 7, 9}, token{"\u89e3\u51b3", 9, 11}, token{"\u4e00\u4e9b", 11, 13}, token{"\u95ee\u9898", 13, 15}},
- []token{token{"\u662f\u56e0\u4e3a", 0, 3}, token{"\u548c", 3, 4}, token{"\u56fd\u5bb6", 4, 6}},
- []token{token{"\u8001\u5e74", 0, 2}, token{"\u641c\u7d22", 2, 4}, token{"\u8fd8", 4, 5}, token{"\u652f\u6301", 5, 7}},
- []token{token{"\u5e72\u8106", 0, 2}, token{"\u5c31", 2, 3}, token{"\u628a", 3, 4}, token{"\u90a3\u90e8", 4, 6}, token{"\u8499\u4eba", 6, 8}, token{"\u7684", 8, 9}, token{"\u95f2\u6cd5", 9, 11}, token{"\u7ed9", 11, 12}, token{"\u5e9f", 12, 13}, token{"\u4e86", 13, 14}, token{"\u62c9\u5012", 14, 16}, token{"\uff01", 16, 17}, token{"RT", 17, 19}, token{" ", 19, 20}, token{"@", 20, 21}, token{"laoshipukong", 21, 33}, token{" ", 33, 34}, token{":", 34, 35}, token{" ", 35, 36}, token{"27", 36, 38}, token{"\u65e5", 38, 39}, token{"\uff0c", 39, 40}, token{"\u5168\u56fd\u4eba\u5927\u5e38\u59d4\u4f1a", 40, 47}, token{"\u7b2c\u4e09\u6b21", 47, 50}, token{"\u5ba1\u8bae", 50, 52}, token{"\u4fb5\u6743", 52, 54}, token{"\u8d23\u4efb\u6cd5", 54, 57}, token{"\u8349\u6848", 57, 59}, token{"\uff0c", 59, 60}, token{"\u5220\u9664", 60, 62}, token{"\u4e86", 62, 63}, token{"\u6709\u5173", 63, 65}, token{"\u533b\u7597", 65, 67}, token{"\u635f\u5bb3", 67, 69}, token{"\u8d23\u4efb", 69, 71}, token{"\u201c", 71, 72}, token{"\u4e3e\u8bc1", 72, 74}, token{"\u5012\u7f6e", 74, 76}, token{"\u201d", 76, 77}, token{"\u7684", 77, 78}, token{"\u89c4\u5b9a", 78, 80}, token{"\u3002", 80, 81}, token{"\u5728", 81, 82}, token{"\u533b\u60a3", 82, 84}, token{"\u7ea0\u7eb7", 84, 86}, token{"\u4e2d\u672c", 86, 88}, token{"\u5df2", 88, 89}, token{"\u5904\u4e8e", 89, 91}, token{"\u5f31\u52bf", 91, 93}, token{"\u5730\u4f4d", 93, 95}, token{"\u7684", 95, 96}, token{"\u6d88\u8d39\u8005", 96, 99}, token{"\u7531\u6b64", 99, 101}, token{"\u5c06", 101, 102}, token{"\u9677\u5165", 102, 104}, token{"\u4e07\u52ab\u4e0d\u590d", 104, 108}, token{"\u7684", 108, 109}, token{"\u5883\u5730", 109, 111}, token{"\u3002", 111, 112}, token{" ", 112, 113}},
- []token{token{"\u5927", 0, 1}},
- []token{},
- []token{token{"\u4ed6", 0, 1}, token{"\u8bf4", 1, 2}, token{"\u7684", 2, 3}, token{"\u786e\u5b9e", 3, 5}, token{"\u5728\u7406", 5, 7}},
- []token{token{"\u957f\u6625", 0, 2}, token{"\u5e02\u957f", 2, 4}, token{"\u6625\u8282", 4, 6}, token{"\u8bb2\u8bdd", 6, 8}},
- []token{token{"\u7ed3\u5a5a", 0, 2}, token{"\u7684", 2, 3}, token{"\u548c", 3, 4}, token{"\u5c1a\u672a", 4, 6}, token{"\u7ed3\u5a5a", 6, 8}, token{"\u7684", 8, 9}},
- []token{token{"\u7ed3\u5408", 0, 2}, token{"\u6210", 2, 3}, token{"\u5206\u5b50", 3, 5}, token{"\u65f6", 5, 6}},
- []token{token{"\u65c5\u6e38", 0, 2}, token{"\u548c", 2, 3}, token{"\u670d\u52a1", 3, 5}, token{"\u662f", 5, 6}, token{"\u6700\u597d", 6, 8}, token{"\u7684", 8, 9}},
- []token{token{"\u8fd9\u4ef6", 0, 2}, token{"\u4e8b\u60c5", 2, 4}, token{"\u7684\u786e", 4, 6}, token{"\u662f", 6, 7}, token{"\u6211", 7, 8}, token{"\u7684", 8, 9}, token{"\u9519", 9, 10}},
- []token{token{"\u4f9b", 0, 1}, token{"\u5927\u5bb6", 1, 3}, token{"\u53c2\u8003", 3, 5}, token{"\u6307\u6b63", 5, 7}},
- []token{token{"\u54c8\u5c14\u6ee8", 0, 3}, token{"\u653f\u5e9c", 3, 5}, token{"\u516c\u5e03", 5, 7}, token{"\u584c\u6865", 7, 9}, token{"\u539f\u56e0", 9, 11}},
- []token{token{"\u6211", 0, 1}, token{"\u5728", 1, 2}, token{"\u673a\u573a", 2, 4}, token{"\u5165\u53e3\u5904", 4, 7}},
- []token{token{"\u90a2\u6c38\u81e3", 0, 3}, token{"\u6444\u5f71", 3, 5}, token{"\u62a5\u9053", 5, 7}},
- []token{token{"BP", 0, 2}, token{"\u795e\u7ecf\u7f51\u7edc", 2, 6}, token{"\u5982\u4f55", 6, 8}, token{"\u8bad\u7ec3", 8, 10}, token{"\u624d\u80fd", 10, 12}, token{"\u5728", 12, 13}, token{"\u5206\u7c7b", 13, 15}, token{"\u65f6", 15, 16}, token{"\u589e\u52a0", 16, 18}, token{"\u533a\u5206\u5ea6", 18, 21}, token{"\uff1f", 21, 22}},
- []token{token{"\u5357\u4eac\u5e02", 0, 3}, token{"\u957f\u6c5f\u5927\u6865", 3, 7}},
- []token{token{"\u5e94", 0, 1}, token{"\u4e00\u4e9b", 1, 3}, token{"\u4f7f\u7528\u8005", 3, 6}, token{"\u7684", 6, 7}, token{"\u5efa\u8bae", 7, 9}, token{"\uff0c", 9, 10}, token{"\u4e5f", 10, 11}, token{"\u4e3a\u4e86", 11, 13}, token{"\u4fbf\u4e8e", 13, 15}, token{"\u5229\u7528", 15, 17}, token{"NiuTrans", 17, 25}, token{"\u7528\u4e8e", 25, 27}, token{"SMT", 27, 30}, token{"\u7814\u7a76", 30, 32}},
- []token{token{"\u957f\u6625\u5e02", 0, 3}, token{"\u957f\u6625", 3, 5}, token{"\u836f\u5e97", 5, 7}},
- []token{token{"\u9093\u9896\u8d85", 0, 3}, token{"\u751f\u524d", 3, 5}, token{"\u6700", 5, 6}, token{"\u559c\u6b22", 6, 8}, token{"\u7684", 8, 9}, token{"\u8863\u670d", 9, 11}},
- []token{token{"\u80e1\u9526\u6d9b", 0, 3}, token{"\u662f", 3, 4}, token{"\u70ed\u7231", 4, 6}, token{"\u4e16\u754c", 6, 8}, token{"\u548c\u5e73", 8, 10}, token{"\u7684", 10, 11}, token{"\u653f\u6cbb\u5c40", 11, 14}, token{"\u5e38\u59d4", 14, 16}},
- []token{token{"\u7a0b\u5e8f\u5458", 0, 3}, token{"\u795d", 3, 4}, token{"\u6d77\u6797", 4, 6}, token{"\u548c", 6, 7}, token{"\u6731\u4f1a\u9707", 7, 10}, token{"\u662f", 10, 11}, token{"\u5728", 11, 12}, token{"\u5b59\u5065", 12, 14}, token{"\u7684", 14, 15}, token{"\u5de6\u9762", 15, 17}, token{"\u548c", 17, 18}, token{"\u53f3\u9762", 18, 20}, token{",", 20, 21}, token{" ", 21, 22}, token{"\u8303\u51ef", 22, 24}, token{"\u5728", 24, 25}, token{"\u6700", 25, 26}, token{"\u53f3\u9762", 26, 28}, token{".", 28, 29}, token{"\u518d\u5f80", 29, 31}, token{"\u5de6", 31, 32}, token{"\u662f", 32, 33}, token{"\u674e\u677e\u6d2a", 33, 36}},
- []token{token{"\u4e00\u6b21\u6027", 0, 3}, token{"\u4ea4", 3, 4}, token{"\u591a\u5c11", 4, 6}, token{"\u94b1", 6, 7}},
- []token{token{"\u4e24\u5757", 0, 2}, token{"\u4e94", 2, 3}, token{"\u4e00\u5957", 3, 5}, token{"\uff0c", 5, 6}, token{"\u4e09\u5757", 6, 8}, token{"\u516b", 8, 9}, token{"\u4e00\u65a4", 9, 11}, token{"\uff0c", 11, 12}, token{"\u56db\u5757", 12, 14}, token{"\u4e03", 14, 15}, token{"\u4e00\u672c", 15, 17}, token{"\uff0c", 17, 18}, token{"\u4e94\u5757", 18, 20}, token{"\u516d", 20, 21}, token{"\u4e00\u6761", 21, 23}},
- []token{token{"\u5c0f", 0, 1}, token{"\u548c\u5c1a", 1, 3}, token{"\u7559", 3, 4}, token{"\u4e86", 4, 5}, token{"\u4e00\u4e2a", 5, 7}, token{"\u50cf", 7, 8}, token{"\u5927", 8, 9}, token{"\u548c\u5c1a", 9, 11}, token{"\u4e00\u6837", 11, 13}, token{"\u7684", 13, 14}, token{"\u548c\u5c1a\u5934", 14, 17}},
- []token{token{"\u6211", 0, 1}, token{"\u662f", 1, 2}, token{"\u4e2d\u534e\u4eba\u6c11\u5171\u548c\u56fd", 2, 9}, token{"\u516c\u6c11", 9, 11}, token{";", 11, 12}, token{"\u6211", 12, 13}, token{"\u7238\u7238", 13, 15}, token{"\u662f", 15, 16}, token{"\u5171\u548c\u515a", 16, 19}, token{"\u515a\u5458", 19, 21}, token{";", 21, 22}, token{" ", 22, 23}, token{"\u5730\u94c1", 23, 25}, token{"\u548c\u5e73\u95e8", 25, 28}, token{"\u7ad9", 28, 29}},
- []token{token{"\u5f20\u6653\u6885", 0, 3}, token{"\u53bb", 3, 4}, token{"\u4eba\u6c11", 4, 6}, token{"\u533b\u9662", 6, 8}, token{"\u505a", 8, 9}, token{"\u4e86", 9, 10}, token{"\u4e2a", 10, 11}, token{"B\u8d85", 11, 13}, token{"\u7136\u540e", 13, 15}, token{"\u53bb", 15, 16}, token{"\u4e70", 16, 17}, token{"\u4e86", 17, 18}, token{"\u4ef6", 18, 19}, token{"T\u6064", 19, 21}},
- []token{token{"AT&T", 0, 4}, token{"\u662f", 4, 5}, token{"\u4e00\u4ef6", 5, 7}, token{"\u4e0d\u9519", 7, 9}, token{"\u7684", 9, 10}, token{"\u516c\u53f8", 10, 12}, token{"\uff0c", 12, 13}, token{"\u7ed9", 13, 14}, token{"\u4f60", 14, 15}, token{"\u53d1", 15, 16}, token{"offer", 16, 21}, token{"\u4e86", 21, 22}, token{"\u5417", 22, 23}, token{"\uff1f", 23, 24}},
- []token{token{"C++", 0, 3}, token{"\u548c", 3, 4}, token{"c#", 4, 6}, token{"\u662f", 6, 7}, token{"\u4ec0\u4e48", 7, 9}, token{"\u5173\u7cfb", 9, 11}, token{"\uff1f", 11, 12}, token{"11", 12, 14}, token{"+", 14, 15}, token{"122", 15, 18}, token{"=", 18, 19}, token{"133", 19, 22}, token{"\uff0c", 22, 23}, token{"\u662f", 23, 24}, token{"\u5417", 24, 25}, token{"\uff1f", 25, 26}, token{"PI", 26, 28}, token{"=", 28, 29}, token{"3.14159", 29, 36}},
- []token{token{"\u4f60", 0, 1}, token{"\u8ba4\u8bc6", 1, 3}, token{"\u90a3\u4e2a", 3, 5}, token{"\u548c", 5, 6}, token{"\u4e3b\u5e2d", 6, 8}, token{"\u63e1\u624b", 8, 10}, token{"\u7684", 10, 11}, token{"\u7684\u54e5", 11, 13}, token{"\u5417", 13, 14}, token{"\uff1f", 14, 15}, token{"\u4ed6\u5f00", 15, 17}, token{"\u4e00\u8f86", 17, 19}, token{"\u9ed1\u8272", 19, 21}, token{"\u7684\u58eb", 21, 23}, token{"\u3002", 23, 24}},
- []token{token{"\u67aa\u6746\u5b50", 0, 3}, token{"\u4e2d", 3, 4}, token{"\u51fa", 4, 5}, token{"\u653f\u6743", 5, 7}},
- []token{token{"\u5f20\u4e09\u98ce", 0, 3}, token{"\u540c\u5b66", 3, 5}, token{"\u8d70\u4e0a", 5, 7}, token{"\u4e86", 7, 8}, token{"\u4e0d\u5f52\u8def", 8, 11}},
- []token{token{"\u963fQ", 0, 2}, token{"\u8170\u95f4", 2, 4}, token{"\u6302", 4, 5}, token{"\u7740", 5, 6}, token{"BB\u673a", 6, 9}, token{"\u624b\u91cc", 9, 11}, token{"\u62ff", 11, 12}, token{"\u7740", 12, 13}, token{"\u5927\u54e5\u5927", 13, 16}, token{"\uff0c", 16, 17}, token{"\u8bf4", 17, 18}, token{"\uff1a", 18, 19}, token{"\u6211", 19, 20}, token{"\u4e00\u822c", 20, 22}, token{"\u5403\u996d", 22, 24}, token{"\u4e0d", 24, 25}, token{"AA\u5236", 25, 28}, token{"\u7684", 28, 29}, token{"\u3002", 29, 30}},
- []token{token{"\u5728", 0, 1}, token{"1\u53f7\u5e97", 1, 4}, token{"\u80fd", 4, 5}, token{"\u4e70", 5, 6}, token{"\u5230", 6, 7}, token{"\u5c0fS", 7, 9}, token{"\u548c", 9, 10}, token{"\u5927S", 10, 12}, token{"\u516b\u5366", 12, 14}, token{"\u7684", 14, 15}, token{"\u4e66", 15, 16}, token{"\u3002", 16, 17}},
- []token{token{"\u8fd9\u662f", 0, 2}, token{"\u4e00\u4e2a", 2, 4}, token{"\u4f38\u624b", 4, 6}, token{"\u4e0d\u89c1", 6, 8}, token{"\u4e94\u6307", 8, 10}, token{"\u4f38\u624b\u4e0d\u89c1\u4e94\u6307", 4, 10}, token{"\u7684", 10, 11}, token{"\u9ed1\u591c", 11, 13}, token{"\u3002", 13, 14}, token{"\u6211", 14, 15}, token{"\u53eb", 15, 16}, token{"\u609f\u7a7a", 17, 19}, token{"\u5b59\u609f\u7a7a", 16, 19}, token{"\uff0c", 19, 20}, token{"\u6211", 20, 21}, token{"\u7231", 21, 22}, token{"\u5317\u4eac", 22, 24}, token{"\uff0c", 24, 25}, token{"\u6211", 25, 26}, token{"\u7231", 26, 27}, token{"Python", 27, 33}, token{"\u548c", 33, 34}, token{"C++", 34, 37}, token{"\u3002", 37, 38}},
- []token{token{"\u6211", 0, 1}, token{"\u4e0d", 1, 2}, token{"\u559c\u6b22", 2, 4}, token{"\u65e5\u672c", 4, 6}, token{"\u548c\u670d", 6, 8}, token{"\u3002", 8, 9}},
- []token{token{"\u96f7\u7334", 0, 2}, token{"\u56de\u5f52", 2, 4}, token{"\u4eba\u95f4", 4, 6}, token{"\u3002", 6, 7}},
- []token{token{"\u5de5\u4fe1\u5904", 0, 3}, token{"\u5e72\u4e8b", 4, 6}, token{"\u5973\u5e72\u4e8b", 3, 6}, token{"\u6bcf\u6708", 6, 8}, token{"\u7ecf\u8fc7", 8, 10}, token{"\u4e0b\u5c5e", 10, 12}, token{"\u79d1\u5ba4", 12, 14}, token{"\u90fd", 14, 15}, token{"\u8981", 15, 16}, token{"\u4eb2\u53e3", 16, 18}, token{"\u4ea4\u4ee3", 18, 20}, token{"24", 20, 22}, token{"\u53e3", 22, 23}, token{"\u4ea4\u6362", 23, 25}, token{"\u6362\u673a", 24, 26}, token{"\u4ea4\u6362\u673a", 23, 26}, token{"\u7b49", 26, 27}, token{"\u6280\u672f", 27, 29}, token{"\u6280\u672f\u6027", 27, 30}, token{"\u5668\u4ef6", 30, 32}, token{"\u7684", 32, 33}, token{"\u5b89\u88c5", 33, 35}, token{"\u5de5\u4f5c", 35, 37}},
- []token{token{"\u6211", 0, 1}, token{"\u9700\u8981", 1, 3}, token{"\u5ec9\u79df", 3, 5}, token{"\u79df\u623f", 4, 6}, token{"\u5ec9\u79df\u623f", 3, 6}},
- []token{token{"\u6c38\u548c", 0, 2}, token{"\u670d\u88c5", 2, 4}, token{"\u9970\u54c1", 4, 6}, token{"\u6709\u9650", 6, 8}, token{"\u516c\u53f8", 8, 10}, token{"\u6709\u9650\u516c\u53f8", 6, 10}},
- []token{token{"\u6211", 0, 1}, token{"\u7231", 1, 2}, token{"\u5317\u4eac", 2, 4}, token{"\u5929\u5b89", 4, 6}, token{"\u5929\u5b89\u95e8", 4, 7}},
- []token{token{"abc", 0, 3}},
- []token{token{"\u9690", 0, 1}, token{"\u53ef\u592b", 3, 5}, token{"\u9a6c\u5c14\u53ef", 1, 4}, token{"\u9a6c\u5c14\u53ef\u592b", 1, 5}},
- []token{token{"\u96f7\u7334", 0, 2}, token{"\u662f", 2, 3}, token{"\u4e2a", 3, 4}, token{"\u597d", 4, 5}, token{"\u7f51\u7ad9", 5, 7}},
- []token{token{"\u201c", 0, 1}, token{"Microsoft", 1, 10}, token{"\u201d", 10, 11}, token{"\u4e00\u8bcd", 11, 13}, token{"\u7531", 13, 14}, token{"\u201c", 14, 15}, token{"MICROcomputer", 15, 28}, token{"\uff08", 28, 29}, token{"\u5fae\u578b", 29, 31}, token{"\u8ba1\u7b97", 31, 33}, token{"\u7b97\u673a", 32, 34}, token{"\u8ba1\u7b97\u673a", 31, 34}, token{"\uff09", 34, 35}, token{"\u201d", 35, 36}, token{"\u548c", 36, 37}, token{"\u201c", 37, 38}, token{"SOFTware", 38, 46}, token{"\uff08", 46, 47}, token{"\u8f6f\u4ef6", 47, 49}, token{"\uff09", 49, 50}, token{"\u201d", 50, 51}, token{"\u4e24", 51, 52}, token{"\u90e8\u5206", 52, 54}, token{"\u7ec4\u6210", 54, 56}},
- []token{token{"\u8349\u6ce5\u9a6c", 0, 3}, token{"\u548c", 3, 4}, token{"\u6b3a\u5b9e", 4, 6}, token{"\u9a6c", 6, 7}, token{"\u662f", 7, 8}, token{"\u4eca\u5e74", 8, 10}, token{"\u7684", 10, 11}, token{"\u6d41\u884c", 11, 13}, token{"\u8bcd\u6c47", 13, 15}},
- []token{token{"\u4f0a\u85e4", 0, 2}, token{"\u6d0b\u534e\u5802", 2, 5}, token{"\u603b\u5e9c", 5, 7}, token{"\u5e97", 7, 8}},
- []token{token{"\u4e2d\u56fd", 0, 2}, token{"\u79d1\u5b66", 2, 4}, token{"\u5b66\u9662", 3, 5}, token{"\u8ba1\u7b97", 5, 7}, token{"\u6280\u672f", 7, 9}, token{"\u7814\u7a76", 9, 11}, token{"\u79d1\u5b66\u9662", 2, 5}, token{"\u7814\u7a76\u6240", 9, 12}, token{"\u4e2d\u56fd\u79d1\u5b66\u9662\u8ba1\u7b97\u6280\u672f\u7814\u7a76\u6240", 0, 12}},
- []token{token{"\u7f57\u5bc6\u6b27", 0, 3}, token{"\u4e0e", 3, 4}, token{"\u6731\u4e3d\u53f6", 4, 7}},
- []token{token{"\u6211", 0, 1}, token{"\u8d2d\u4e70", 1, 3}, token{"\u4e86", 3, 4}, token{"\u9053\u5177", 4, 6}, token{"\u548c", 6, 7}, token{"\u670d\u88c5", 7, 9}},
- []token{token{"PS", 0, 2}, token{":", 2, 3}, token{" ", 3, 4}, token{"\u6211", 4, 5}, token{"\u89c9\u5f97", 5, 7}, token{"\u5f00\u6e90", 7, 9}, token{"\u6709", 9, 10}, token{"\u4e00\u4e2a", 10, 12}, token{"\u597d\u5904", 12, 14}, token{"\uff0c", 14, 15}, token{"\u5c31\u662f", 15, 17}, token{"\u80fd\u591f", 17, 19}, token{"\u6566\u4fc3", 19, 21}, token{"\u81ea\u5df1", 21, 23}, token{"\u4e0d\u65ad", 23, 25}, token{"\u6539\u8fdb", 25, 27}, token{"\u4e0d\u65ad\u6539\u8fdb", 23, 27}, token{"\uff0c", 27, 28}, token{"\u907f\u514d", 28, 30}, token{"\u655e\u5e1a", 30, 32}, token{"\u81ea\u73cd", 32, 34}},
- []token{token{"\u6e56\u5317", 0, 2}, token{"\u6e56\u5317\u7701", 0, 3}, token{"\u77f3\u9996", 3, 5}, token{"\u77f3\u9996\u5e02", 3, 6}},
- []token{token{"\u6e56\u5317", 0, 2}, token{"\u6e56\u5317\u7701", 0, 3}, token{"\u5341\u5830", 3, 5}, token{"\u5341\u5830\u5e02", 3, 6}},
- []token{token{"\u7ecf\u7406", 1, 3}, token{"\u603b\u7ecf\u7406", 0, 3}, token{"\u5b8c\u6210", 3, 5}, token{"\u4e86", 5, 6}, token{"\u8fd9\u4ef6", 6, 8}, token{"\u4e8b\u60c5", 8, 10}},
- []token{token{"\u7535\u8111", 0, 2}, token{"\u4fee\u597d", 2, 4}, token{"\u4e86", 4, 5}},
- []token{token{"\u505a\u597d", 0, 2}, token{"\u4e86", 2, 3}, token{"\u8fd9\u4ef6", 3, 5}, token{"\u4e8b\u60c5", 5, 7}, token{"\u5c31", 7, 8}, token{"\u4e00\u4e86\u767e\u4e86", 8, 12}, token{"\u4e86", 12, 13}},
- []token{token{"\u4eba\u4eec", 0, 2}, token{"\u5ba1\u7f8e", 2, 4}, token{"\u7684", 4, 5}, token{"\u89c2\u70b9", 5, 7}, token{"\u662f", 7, 8}, token{"\u4e0d\u540c", 8, 10}, token{"\u7684", 10, 11}},
- []token{token{"\u6211\u4eec", 0, 2}, token{"\u4e70", 2, 3}, token{"\u4e86", 3, 4}, token{"\u4e00\u4e2a", 4, 6}, token{"\u7f8e\u7684", 6, 8}, token{"\u7a7a\u8c03", 8, 10}},
- []token{token{"\u7ebf\u7a0b", 0, 2}, token{"\u521d\u59cb", 2, 4}, token{"\u521d\u59cb\u5316", 2, 5}, token{"\u65f6", 5, 6}, token{"\u6211\u4eec", 6, 8}, token{"\u8981", 8, 9}, token{"\u6ce8\u610f", 9, 11}},
- []token{token{"\u4e00\u4e2a", 0, 2}, token{"\u5206\u5b50", 2, 4}, token{"\u662f", 4, 5}, token{"\u7531", 5, 6}, token{"\u597d\u591a", 6, 8}, token{"\u539f\u5b50", 8, 10}, token{"\u7ec4\u7ec7", 10, 12}, token{"\u6210", 12, 13}, token{"\u7684", 13, 14}},
- []token{token{"\u795d", 0, 1}, token{"\u4f60", 1, 2}, token{"\u9a6c\u5230\u529f\u6210", 2, 6}},
- []token{token{"\u4ed6", 0, 1}, token{"\u6389", 1, 2}, token{"\u8fdb", 2, 3}, token{"\u4e86", 3, 4}, token{"\u65e0\u5e95", 4, 6}, token{"\u65e0\u5e95\u6d1e", 4, 7}, token{"\u91cc", 7, 8}},
- []token{token{"\u4e2d\u56fd", 0, 2}, token{"\u7684", 2, 3}, token{"\u9996\u90fd", 3, 5}, token{"\u662f", 5, 6}, token{"\u5317\u4eac", 6, 8}},
- []token{token{"\u5b59\u541b\u610f", 0, 3}},
- []token{token{"\u5916\u4ea4", 0, 2}, token{"\u5916\u4ea4\u90e8", 0, 3}, token{"\u53d1\u8a00", 3, 5}, token{"\u53d1\u8a00\u4eba", 3, 6}, token{"\u9a6c\u671d\u65ed", 6, 9}},
- []token{token{"\u9886\u5bfc", 0, 2}, token{"\u9886\u5bfc\u4eba", 0, 3}, token{"\u4f1a\u8bae", 3, 5}, token{"\u548c", 5, 6}, token{"\u7b2c\u56db", 6, 8}, token{"\u56db\u5c4a", 7, 9}, token{"\u7b2c\u56db\u5c4a", 6, 9}, token{"\u4e1c\u4e9a", 9, 11}, token{"\u5cf0\u4f1a", 11, 13}},
- []token{token{"\u5728", 0, 1}, token{"\u8fc7\u53bb", 1, 3}, token{"\u7684", 3, 4}, token{"\u8fd9", 4, 5}, token{"\u4e94\u5e74", 5, 7}},
- []token{token{"\u8fd8", 0, 1}, token{"\u9700\u8981", 1, 3}, token{"\u5f88\u957f", 3, 5}, token{"\u7684", 5, 6}, token{"\u8def", 6, 7}, token{"\u8981", 7, 8}, token{"\u8d70", 8, 9}},
- []token{token{"60", 0, 2}, token{"\u5468\u5e74", 2, 4}, token{"\u9996\u90fd", 4, 6}, token{"\u9605\u5175", 6, 8}},
- []token{token{"\u4f60\u597d", 0, 2}, token{"\u4eba\u4eec", 2, 4}, token{"\u5ba1\u7f8e", 4, 6}, token{"\u7684", 6, 7}, token{"\u89c2\u70b9", 7, 9}, token{"\u662f", 9, 10}, token{"\u4e0d\u540c", 10, 12}, token{"\u7684", 12, 13}},
- []token{token{"\u4e70", 0, 1}, token{"\u6c34\u679c", 1, 3}, token{"\u7136\u540e", 3, 5}, token{"\u6765", 5, 6}, token{"\u4e16\u535a", 6, 8}, token{"\u535a\u56ed", 7, 9}, token{"\u4e16\u535a\u56ed", 6, 9}},
- []token{token{"\u4e70", 0, 1}, token{"\u6c34\u679c", 1, 3}, token{"\u7136\u540e", 3, 5}, token{"\u53bb", 5, 6}, token{"\u4e16\u535a", 6, 8}, token{"\u535a\u56ed", 7, 9}, token{"\u4e16\u535a\u56ed", 6, 9}},
- []token{token{"\u4f46\u662f", 0, 2}, token{"\u540e\u6765", 2, 4}, token{"\u6211", 4, 5}, token{"\u624d", 5, 6}, token{"\u77e5\u9053", 6, 8}, token{"\u4f60", 8, 9}, token{"\u662f", 9, 10}, token{"\u5bf9", 10, 11}, token{"\u7684", 11, 12}},
- []token{token{"\u5b58\u5728", 0, 2}, token{"\u5373", 2, 3}, token{"\u5408\u7406", 3, 5}},
- []token{token{"\u7684", 0, 1}, token{"\u7684", 1, 2}, token{"\u7684", 2, 3}, token{"\u7684", 3, 4}, token{"\u7684", 4, 5}, token{"\u5728", 5, 6}, token{"\u7684", 6, 7}, token{"\u7684", 7, 8}, token{"\u7684", 8, 9}, token{"\u7684", 9, 10}, token{"\u5c31", 10, 11}, token{"\u4ee5", 11, 12}, token{"\u548c", 12, 13}, token{"\u548c", 13, 14}, token{"\u548c", 14, 15}},
- []token{token{"I", 0, 1}, token{" ", 1, 2}, token{"love", 2, 6}, token{"\u4f60", 6, 7}, token{"\uff0c", 7, 8}, token{"\u4e0d\u4ee5", 8, 10}, token{"\u4ee5\u4e3a", 9, 11}, token{"\u4e0d\u4ee5\u4e3a\u803b", 8, 12}, token{"\uff0c", 12, 13}, token{"\u53cd", 13, 14}, token{"\u4ee5\u4e3a", 14, 16}, token{"rong", 16, 20}},
- []token{token{"\u56e0", 0, 1}},
- []token{},
- []token{token{"hello", 0, 5}, token{"\u4f60\u597d", 5, 7}, token{"\u4eba\u4eec", 7, 9}, token{"\u5ba1\u7f8e", 9, 11}, token{"\u7684", 11, 12}, token{"\u89c2\u70b9", 12, 14}, token{"\u662f", 14, 15}, token{"\u4e0d\u540c", 15, 17}, token{"\u7684", 17, 18}},
- []token{token{"\u5f88", 0, 1}, token{"\u597d", 1, 2}, token{"\u4f46", 2, 3}, token{"\u4e3b\u8981", 3, 5}, token{"\u662f", 5, 6}, token{"\u57fa\u4e8e", 6, 8}, token{"\u7f51\u9875", 8, 10}, token{"\u5f62\u5f0f", 10, 12}},
- []token{token{"hello", 0, 5}, token{"\u4f60\u597d", 5, 7}, token{"\u4eba\u4eec", 7, 9}, token{"\u5ba1\u7f8e", 9, 11}, token{"\u7684", 11, 12}, token{"\u89c2\u70b9", 12, 14}, token{"\u662f", 14, 15}, token{"\u4e0d\u540c", 15, 17}, token{"\u7684", 17, 18}},
- []token{token{"\u4ec0\u4e48", 1, 3}, token{"\u4e3a\u4ec0\u4e48", 0, 3}, token{"\u6211", 3, 4}, token{"\u4e0d\u80fd", 4, 6}, token{"\u62e5\u6709", 6, 8}, token{"\u60f3\u8981", 8, 10}, token{"\u7684", 10, 11}, token{"\u751f\u6d3b", 11, 13}},
- []token{token{"\u540e\u6765", 0, 2}, token{"\u6211", 2, 3}, token{"\u624d", 3, 4}},
- []token{token{"\u6b64\u6b21", 0, 2}, token{"\u6765", 2, 3}, token{"\u4e2d\u56fd", 3, 5}, token{"\u662f", 5, 6}, token{"\u4e3a\u4e86", 6, 8}},
- []token{token{"\u4f7f\u7528", 0, 2}, token{"\u4e86", 2, 3}, token{"\u5b83", 3, 4}, token{"\u5c31", 4, 5}, token{"\u53ef\u4ee5", 5, 7}, token{"\u89e3\u51b3", 7, 9}, token{"\u4e00\u4e9b", 9, 11}, token{"\u95ee\u9898", 11, 13}},
- []token{token{",", 0, 1}, token{"\u4f7f\u7528", 1, 3}, token{"\u4e86", 3, 4}, token{"\u5b83", 4, 5}, token{"\u5c31", 5, 6}, token{"\u53ef\u4ee5", 6, 8}, token{"\u89e3\u51b3", 8, 10}, token{"\u4e00\u4e9b", 10, 12}, token{"\u95ee\u9898", 12, 14}},
- []token{token{"\u5176\u5b9e", 0, 2}, token{"\u4f7f\u7528", 2, 4}, token{"\u4e86", 4, 5}, token{"\u5b83", 5, 6}, token{"\u5c31", 6, 7}, token{"\u53ef\u4ee5", 7, 9}, token{"\u89e3\u51b3", 9, 11}, token{"\u4e00\u4e9b", 11, 13}, token{"\u95ee\u9898", 13, 15}},
- []token{token{"\u597d\u4eba", 0, 2}, token{"\u4f7f\u7528", 2, 4}, token{"\u4e86", 4, 5}, token{"\u5b83", 5, 6}, token{"\u5c31", 6, 7}, token{"\u53ef\u4ee5", 7, 9}, token{"\u89e3\u51b3", 9, 11}, token{"\u4e00\u4e9b", 11, 13}, token{"\u95ee\u9898", 13, 15}},
- []token{token{"\u56e0\u4e3a", 1, 3}, token{"\u662f\u56e0\u4e3a", 0, 3}, token{"\u548c", 3, 4}, token{"\u56fd\u5bb6", 4, 6}},
- []token{token{"\u8001\u5e74", 0, 2}, token{"\u641c\u7d22", 2, 4}, token{"\u8fd8", 4, 5}, token{"\u652f\u6301", 5, 7}},
- []token{token{"\u5e72\u8106", 0, 2}, token{"\u5c31", 2, 3}, token{"\u628a", 3, 4}, token{"\u90a3\u90e8", 4, 6}, token{"\u8499\u4eba", 6, 8}, token{"\u7684", 8, 9}, token{"\u95f2\u6cd5", 9, 11}, token{"\u7ed9", 11, 12}, token{"\u5e9f", 12, 13}, token{"\u4e86", 13, 14}, token{"\u62c9\u5012", 14, 16}, token{"\uff01", 16, 17}, token{"RT", 17, 19}, token{" ", 19, 20}, token{"@", 20, 21}, token{"laoshipukong", 21, 33}, token{" ", 33, 34}, token{":", 34, 35}, token{" ", 35, 36}, token{"27", 36, 38}, token{"\u65e5", 38, 39}, token{"\uff0c", 39, 40}, token{"\u5168\u56fd", 40, 42}, token{"\u56fd\u4eba", 41, 43}, token{"\u4eba\u5927", 42, 44}, token{"\u5e38\u59d4", 44, 46}, token{"\u59d4\u4f1a", 45, 47}, token{"\u5e38\u59d4\u4f1a", 44, 47}, token{"\u5168\u56fd\u4eba\u5927\u5e38\u59d4\u4f1a", 40, 47}, token{"\u7b2c\u4e09", 47, 49}, token{"\u4e09\u6b21", 48, 50}, token{"\u7b2c\u4e09\u6b21", 47, 50}, token{"\u5ba1\u8bae", 50, 52}, token{"\u4fb5\u6743", 52, 54}, token{"\u8d23\u4efb", 54, 56}, token{"\u8d23\u4efb\u6cd5", 54, 57}, token{"\u8349\u6848", 57, 59}, token{"\uff0c", 59, 60}, token{"\u5220\u9664", 60, 62}, token{"\u4e86", 62, 63}, token{"\u6709\u5173", 63, 65}, token{"\u533b\u7597", 65, 67}, token{"\u635f\u5bb3", 67, 69}, token{"\u8d23\u4efb", 69, 71}, token{"\u201c", 71, 72}, token{"\u4e3e\u8bc1", 72, 74}, token{"\u5012\u7f6e", 74, 76}, token{"\u201d", 76, 77}, token{"\u7684", 77, 78}, token{"\u89c4\u5b9a", 78, 80}, token{"\u3002", 80, 81}, token{"\u5728", 81, 82}, token{"\u533b\u60a3", 82, 84}, token{"\u7ea0\u7eb7", 84, 86}, token{"\u4e2d\u672c", 86, 88}, token{"\u5df2", 88, 89}, token{"\u5904\u4e8e", 89, 91}, token{"\u5f31\u52bf", 91, 93}, token{"\u5730\u4f4d", 93, 95}, token{"\u7684", 95, 96}, token{"\u6d88\u8d39", 96, 98}, token{"\u6d88\u8d39\u8005", 96, 99}, token{"\u7531\u6b64", 99, 101}, token{"\u5c06", 101, 102}, token{"\u9677\u5165", 102, 104}, token{"\u4e0d\u590d", 106, 108}, token{"\u4e07\u52ab\u4e0d\u590d", 104, 108}, token{"\u7684", 108, 109}, token{"\u5883\u5730", 109, 111}, token{"\u3002", 111, 112}, token{" ", 112, 113}},
- []token{token{"\u5927", 0, 1}},
- []token{},
- []token{token{"\u4ed6", 0, 1}, token{"\u8bf4", 1, 2}, token{"\u7684", 2, 3}, token{"\u786e\u5b9e", 3, 5}, token{"\u5728\u7406", 5, 7}},
- []token{token{"\u957f\u6625", 0, 2}, token{"\u5e02\u957f", 2, 4}, token{"\u6625\u8282", 4, 6}, token{"\u8bb2\u8bdd", 6, 8}},
- []token{token{"\u7ed3\u5a5a", 0, 2}, token{"\u7684", 2, 3}, token{"\u548c", 3, 4}, token{"\u5c1a\u672a", 4, 6}, token{"\u7ed3\u5a5a", 6, 8}, token{"\u7684", 8, 9}},
- []token{token{"\u7ed3\u5408", 0, 2}, token{"\u6210", 2, 3}, token{"\u5206\u5b50", 3, 5}, token{"\u65f6", 5, 6}},
- []token{token{"\u65c5\u6e38", 0, 2}, token{"\u548c", 2, 3}, token{"\u670d\u52a1", 3, 5}, token{"\u662f", 5, 6}, token{"\u6700\u597d", 6, 8}, token{"\u7684", 8, 9}},
- []token{token{"\u8fd9\u4ef6", 0, 2}, token{"\u4e8b\u60c5", 2, 4}, token{"\u7684\u786e", 4, 6}, token{"\u662f", 6, 7}, token{"\u6211", 7, 8}, token{"\u7684", 8, 9}, token{"\u9519", 9, 10}},
- []token{token{"\u4f9b", 0, 1}, token{"\u5927\u5bb6", 1, 3}, token{"\u53c2\u8003", 3, 5}, token{"\u6307\u6b63", 5, 7}},
- []token{token{"\u54c8\u5c14", 0, 2}, token{"\u54c8\u5c14\u6ee8", 0, 3}, token{"\u653f\u5e9c", 3, 5}, token{"\u516c\u5e03", 5, 7}, token{"\u584c\u6865", 7, 9}, token{"\u539f\u56e0", 9, 11}},
- []token{token{"\u6211", 0, 1}, token{"\u5728", 1, 2}, token{"\u673a\u573a", 2, 4}, token{"\u5165\u53e3", 4, 6}, token{"\u5165\u53e3\u5904", 4, 7}},
- []token{token{"\u90a2\u6c38\u81e3", 0, 3}, token{"\u6444\u5f71", 3, 5}, token{"\u62a5\u9053", 5, 7}},
- []token{token{"BP", 0, 2}, token{"\u795e\u7ecf", 2, 4}, token{"\u7f51\u7edc", 4, 6}, token{"\u795e\u7ecf\u7f51", 2, 5}, token{"\u795e\u7ecf\u7f51\u7edc", 2, 6}, token{"\u5982\u4f55", 6, 8}, token{"\u8bad\u7ec3", 8, 10}, token{"\u624d\u80fd", 10, 12}, token{"\u5728", 12, 13}, token{"\u5206\u7c7b", 13, 15}, token{"\u65f6", 15, 16}, token{"\u589e\u52a0", 16, 18}, token{"\u533a\u5206", 18, 20}, token{"\u5206\u5ea6", 19, 21}, token{"\u533a\u5206\u5ea6", 18, 21}, token{"\uff1f", 21, 22}},
- []token{token{"\u5357\u4eac", 0, 2}, token{"\u4eac\u5e02", 1, 3}, token{"\u5357\u4eac\u5e02", 0, 3}, token{"\u957f\u6c5f", 3, 5}, token{"\u5927\u6865", 5, 7}, token{"\u957f\u6c5f\u5927\u6865", 3, 7}},
- []token{token{"\u5e94", 0, 1}, token{"\u4e00\u4e9b", 1, 3}, token{"\u4f7f\u7528", 3, 5}, token{"\u7528\u8005", 4, 6}, token{"\u4f7f\u7528\u8005", 3, 6}, token{"\u7684", 6, 7}, token{"\u5efa\u8bae", 7, 9}, token{"\uff0c", 9, 10}, token{"\u4e5f", 10, 11}, token{"\u4e3a\u4e86", 11, 13}, token{"\u4fbf\u4e8e", 13, 15}, token{"\u5229\u7528", 15, 17}, token{"NiuTrans", 17, 25}, token{"\u7528\u4e8e", 25, 27}, token{"SMT", 27, 30}, token{"\u7814\u7a76", 30, 32}},
- []token{token{"\u957f\u6625", 0, 2}, token{"\u957f\u6625\u5e02", 0, 3}, token{"\u957f\u6625", 3, 5}, token{"\u836f\u5e97", 5, 7}},
- []token{token{"\u9093\u9896\u8d85", 0, 3}, token{"\u751f\u524d", 3, 5}, token{"\u6700", 5, 6}, token{"\u559c\u6b22", 6, 8}, token{"\u7684", 8, 9}, token{"\u8863\u670d", 9, 11}},
- []token{token{"\u9526\u6d9b", 1, 3}, token{"\u80e1\u9526\u6d9b", 0, 3}, token{"\u662f", 3, 4}, token{"\u70ed\u7231", 4, 6}, token{"\u4e16\u754c", 6, 8}, token{"\u548c\u5e73", 8, 10}, token{"\u7684", 10, 11}, token{"\u653f\u6cbb", 11, 13}, token{"\u653f\u6cbb\u5c40", 11, 14}, token{"\u5e38\u59d4", 14, 16}},
- []token{token{"\u7a0b\u5e8f", 0, 2}, token{"\u7a0b\u5e8f\u5458", 0, 3}, token{"\u795d", 3, 4}, token{"\u6d77\u6797", 4, 6}, token{"\u548c", 6, 7}, token{"\u6731\u4f1a\u9707", 7, 10}, token{"\u662f", 10, 11}, token{"\u5728", 11, 12}, token{"\u5b59\u5065", 12, 14}, token{"\u7684", 14, 15}, token{"\u5de6\u9762", 15, 17}, token{"\u548c", 17, 18}, token{"\u53f3\u9762", 18, 20}, token{",", 20, 21}, token{" ", 21, 22}, token{"\u8303\u51ef", 22, 24}, token{"\u5728", 24, 25}, token{"\u6700", 25, 26}, token{"\u53f3\u9762", 26, 28}, token{".", 28, 29}, token{"\u518d\u5f80", 29, 31}, token{"\u5de6", 31, 32}, token{"\u662f", 32, 33}, token{"\u674e\u677e\u6d2a", 33, 36}},
- []token{token{"\u4e00\u6b21", 0, 2}, token{"\u4e00\u6b21\u6027", 0, 3}, token{"\u4ea4", 3, 4}, token{"\u591a\u5c11", 4, 6}, token{"\u94b1", 6, 7}},
- []token{token{"\u4e24\u5757", 0, 2}, token{"\u4e94", 2, 3}, token{"\u4e00\u5957", 3, 5}, token{"\uff0c", 5, 6}, token{"\u4e09\u5757", 6, 8}, token{"\u516b", 8, 9}, token{"\u4e00\u65a4", 9, 11}, token{"\uff0c", 11, 12}, token{"\u56db\u5757", 12, 14}, token{"\u4e03", 14, 15}, token{"\u4e00\u672c", 15, 17}, token{"\uff0c", 17, 18}, token{"\u4e94\u5757", 18, 20}, token{"\u516d", 20, 21}, token{"\u4e00\u6761", 21, 23}},
- []token{token{"\u5c0f", 0, 1}, token{"\u548c\u5c1a", 1, 3}, token{"\u7559", 3, 4}, token{"\u4e86", 4, 5}, token{"\u4e00\u4e2a", 5, 7}, token{"\u50cf", 7, 8}, token{"\u5927", 8, 9}, token{"\u548c\u5c1a", 9, 11}, token{"\u4e00\u6837", 11, 13}, token{"\u7684", 13, 14}, token{"\u548c\u5c1a", 14, 16}, token{"\u548c\u5c1a\u5934", 14, 17}},
- []token{token{"\u6211", 0, 1}, token{"\u662f", 1, 2}, token{"\u4e2d\u534e", 2, 4}, token{"\u534e\u4eba", 3, 5}, token{"\u4eba\u6c11", 4, 6}, token{"\u5171\u548c", 6, 8}, token{"\u5171\u548c\u56fd", 6, 9}, token{"\u4e2d\u534e\u4eba\u6c11\u5171\u548c\u56fd", 2, 9}, token{"\u516c\u6c11", 9, 11}, token{";", 11, 12}, token{"\u6211", 12, 13}, token{"\u7238\u7238", 13, 15}, token{"\u662f", 15, 16}, token{"\u5171\u548c", 16, 18}, token{"\u5171\u548c\u515a", 16, 19}, token{"\u515a\u5458", 19, 21}, token{";", 21, 22}, token{" ", 22, 23}, token{"\u5730\u94c1", 23, 25}, token{"\u548c\u5e73", 25, 27}, token{"\u548c\u5e73\u95e8", 25, 28}, token{"\u7ad9", 28, 29}},
- []token{token{"\u5f20\u6653\u6885", 0, 3}, token{"\u53bb", 3, 4}, token{"\u4eba\u6c11", 4, 6}, token{"\u533b\u9662", 6, 8}, token{"\u505a", 8, 9}, token{"\u4e86", 9, 10}, token{"\u4e2a", 10, 11}, token{"B\u8d85", 11, 13}, token{"\u7136\u540e", 13, 15}, token{"\u53bb", 15, 16}, token{"\u4e70", 16, 17}, token{"\u4e86", 17, 18}, token{"\u4ef6", 18, 19}, token{"T\u6064", 19, 21}},
- []token{token{"AT&T", 0, 4}, token{"\u662f", 4, 5}, token{"\u4e00\u4ef6", 5, 7}, token{"\u4e0d\u9519", 7, 9}, token{"\u7684", 9, 10}, token{"\u516c\u53f8", 10, 12}, token{"\uff0c", 12, 13}, token{"\u7ed9", 13, 14}, token{"\u4f60", 14, 15}, token{"\u53d1", 15, 16}, token{"offer", 16, 21}, token{"\u4e86", 21, 22}, token{"\u5417", 22, 23}, token{"\uff1f", 23, 24}},
- []token{token{"C++", 0, 3}, token{"\u548c", 3, 4}, token{"c#", 4, 6}, token{"\u662f", 6, 7}, token{"\u4ec0\u4e48", 7, 9}, token{"\u5173\u7cfb", 9, 11}, token{"\uff1f", 11, 12}, token{"11", 12, 14}, token{"+", 14, 15}, token{"122", 15, 18}, token{"=", 18, 19}, token{"133", 19, 22}, token{"\uff0c", 22, 23}, token{"\u662f", 23, 24}, token{"\u5417", 24, 25}, token{"\uff1f", 25, 26}, token{"PI", 26, 28}, token{"=", 28, 29}, token{"3.14159", 29, 36}},
- []token{token{"\u4f60", 0, 1}, token{"\u8ba4\u8bc6", 1, 3}, token{"\u90a3\u4e2a", 3, 5}, token{"\u548c", 5, 6}, token{"\u4e3b\u5e2d", 6, 8}, token{"\u63e1\u624b", 8, 10}, token{"\u7684", 10, 11}, token{"\u7684\u54e5", 11, 13}, token{"\u5417", 13, 14}, token{"\uff1f", 14, 15}, token{"\u4ed6\u5f00", 15, 17}, token{"\u4e00\u8f86", 17, 19}, token{"\u9ed1\u8272", 19, 21}, token{"\u7684\u58eb", 21, 23}, token{"\u3002", 23, 24}},
- []token{token{"\u67aa\u6746", 0, 2}, token{"\u6746\u5b50", 1, 3}, token{"\u67aa\u6746\u5b50", 0, 3}, token{"\u4e2d", 3, 4}, token{"\u51fa", 4, 5}, token{"\u653f\u6743", 5, 7}},
- []token{token{"\u5f20\u4e09\u98ce", 0, 3}, token{"\u540c\u5b66", 3, 5}, token{"\u8d70\u4e0a", 5, 7}, token{"\u4e86", 7, 8}, token{"\u5f52\u8def", 9, 11}, token{"\u4e0d\u5f52\u8def", 8, 11}},
- []token{token{"\u963fQ", 0, 2}, token{"\u8170\u95f4", 2, 4}, token{"\u6302", 4, 5}, token{"\u7740", 5, 6}, token{"BB\u673a", 6, 9}, token{"\u624b\u91cc", 9, 11}, token{"\u62ff", 11, 12}, token{"\u7740", 12, 13}, token{"\u5927\u54e5", 13, 15}, token{"\u5927\u54e5\u5927", 13, 16}, token{"\uff0c", 16, 17}, token{"\u8bf4", 17, 18}, token{"\uff1a", 18, 19}, token{"\u6211", 19, 20}, token{"\u4e00\u822c", 20, 22}, token{"\u5403\u996d", 22, 24}, token{"\u4e0d", 24, 25}, token{"AA\u5236", 25, 28}, token{"\u7684", 28, 29}, token{"\u3002", 29, 30}},
- []token{token{"\u5728", 0, 1}, token{"1\u53f7\u5e97", 1, 4}, token{"\u80fd", 4, 5}, token{"\u4e70", 5, 6}, token{"\u5230", 6, 7}, token{"\u5c0fS", 7, 9}, token{"\u548c", 9, 10}, token{"\u5927S", 10, 12}, token{"\u516b\u5366", 12, 14}, token{"\u7684", 14, 15}, token{"\u4e66", 15, 16}, token{"\u3002", 16, 17}},
- }
- noHmmResult = [][]token{
- []token{token{"\u8fd9", 0, 1}, token{"\u662f", 1, 2}, token{"\u4e00\u4e2a", 2, 4}, token{"\u4f38\u624b\u4e0d\u89c1\u4e94\u6307", 4, 10}, token{"\u7684", 10, 11}, token{"\u9ed1\u591c", 11, 13}, token{"\u3002", 13, 14}, token{"\u6211", 14, 15}, token{"\u53eb", 15, 16}, token{"\u5b59\u609f\u7a7a", 16, 19}, token{"\uff0c", 19, 20}, token{"\u6211", 20, 21}, token{"\u7231", 21, 22}, token{"\u5317\u4eac", 22, 24}, token{"\uff0c", 24, 25}, token{"\u6211", 25, 26}, token{"\u7231", 26, 27}, token{"Python", 27, 33}, token{"\u548c", 33, 34}, token{"C++", 34, 37}, token{"\u3002", 37, 38}},
- []token{token{"\u6211", 0, 1}, token{"\u4e0d", 1, 2}, token{"\u559c\u6b22", 2, 4}, token{"\u65e5\u672c", 4, 6}, token{"\u548c\u670d", 6, 8}, token{"\u3002", 8, 9}},
- []token{token{"\u96f7\u7334", 0, 2}, token{"\u56de\u5f52", 2, 4}, token{"\u4eba\u95f4", 4, 6}, token{"\u3002", 6, 7}},
- []token{token{"\u5de5\u4fe1\u5904", 0, 3}, token{"\u5973\u5e72\u4e8b", 3, 6}, token{"\u6bcf\u6708", 6, 8}, token{"\u7ecf\u8fc7", 8, 10}, token{"\u4e0b\u5c5e", 10, 12}, token{"\u79d1\u5ba4", 12, 14}, token{"\u90fd", 14, 15}, token{"\u8981", 15, 16}, token{"\u4eb2\u53e3", 16, 18}, token{"\u4ea4\u4ee3", 18, 20}, token{"24", 20, 22}, token{"\u53e3", 22, 23}, token{"\u4ea4\u6362\u673a", 23, 26}, token{"\u7b49", 26, 27}, token{"\u6280\u672f\u6027", 27, 30}, token{"\u5668\u4ef6", 30, 32}, token{"\u7684", 32, 33}, token{"\u5b89\u88c5", 33, 35}, token{"\u5de5\u4f5c", 35, 37}},
- []token{token{"\u6211", 0, 1}, token{"\u9700\u8981", 1, 3}, token{"\u5ec9\u79df\u623f", 3, 6}},
- []token{token{"\u6c38\u548c", 0, 2}, token{"\u670d\u88c5", 2, 4}, token{"\u9970\u54c1", 4, 6}, token{"\u6709\u9650\u516c\u53f8", 6, 10}},
- []token{token{"\u6211", 0, 1}, token{"\u7231", 1, 2}, token{"\u5317\u4eac", 2, 4}, token{"\u5929\u5b89\u95e8", 4, 7}},
- []token{token{"abc", 0, 3}},
- []token{token{"\u9690", 0, 1}, token{"\u9a6c\u5c14\u53ef\u592b", 1, 5}},
- []token{token{"\u96f7\u7334", 0, 2}, token{"\u662f", 2, 3}, token{"\u4e2a", 3, 4}, token{"\u597d", 4, 5}, token{"\u7f51\u7ad9", 5, 7}},
- []token{token{"\u201c", 0, 1}, token{"Microsoft", 1, 10}, token{"\u201d", 10, 11}, token{"\u4e00", 11, 12}, token{"\u8bcd", 12, 13}, token{"\u7531", 13, 14}, token{"\u201c", 14, 15}, token{"MICROcomputer", 15, 28}, token{"\uff08", 28, 29}, token{"\u5fae\u578b", 29, 31}, token{"\u8ba1\u7b97\u673a", 31, 34}, token{"\uff09", 34, 35}, token{"\u201d", 35, 36}, token{"\u548c", 36, 37}, token{"\u201c", 37, 38}, token{"SOFTware", 38, 46}, token{"\uff08", 46, 47}, token{"\u8f6f\u4ef6", 47, 49}, token{"\uff09", 49, 50}, token{"\u201d", 50, 51}, token{"\u4e24", 51, 52}, token{"\u90e8\u5206", 52, 54}, token{"\u7ec4\u6210", 54, 56}},
- []token{token{"\u8349\u6ce5\u9a6c", 0, 3}, token{"\u548c", 3, 4}, token{"\u6b3a", 4, 5}, token{"\u5b9e", 5, 6}, token{"\u9a6c", 6, 7}, token{"\u662f", 7, 8}, token{"\u4eca\u5e74", 8, 10}, token{"\u7684", 10, 11}, token{"\u6d41\u884c", 11, 13}, token{"\u8bcd\u6c47", 13, 15}},
- []token{token{"\u4f0a", 0, 1}, token{"\u85e4", 1, 2}, token{"\u6d0b\u534e\u5802", 2, 5}, token{"\u603b\u5e9c", 5, 7}, token{"\u5e97", 7, 8}},
- []token{token{"\u4e2d\u56fd\u79d1\u5b66\u9662\u8ba1\u7b97\u6280\u672f\u7814\u7a76\u6240", 0, 12}},
- []token{token{"\u7f57\u5bc6\u6b27", 0, 3}, token{"\u4e0e", 3, 4}, token{"\u6731\u4e3d\u53f6", 4, 7}},
- []token{token{"\u6211", 0, 1}, token{"\u8d2d\u4e70", 1, 3}, token{"\u4e86", 3, 4}, token{"\u9053\u5177", 4, 6}, token{"\u548c", 6, 7}, token{"\u670d\u88c5", 7, 9}},
- []token{token{"PS", 0, 2}, token{":", 2, 3}, token{" ", 3, 4}, token{"\u6211", 4, 5}, token{"\u89c9\u5f97", 5, 7}, token{"\u5f00\u6e90", 7, 9}, token{"\u6709", 9, 10}, token{"\u4e00\u4e2a", 10, 12}, token{"\u597d\u5904", 12, 14}, token{"\uff0c", 14, 15}, token{"\u5c31\u662f", 15, 17}, token{"\u80fd\u591f", 17, 19}, token{"\u6566\u4fc3", 19, 21}, token{"\u81ea\u5df1", 21, 23}, token{"\u4e0d\u65ad\u6539\u8fdb", 23, 27}, token{"\uff0c", 27, 28}, token{"\u907f\u514d", 28, 30}, token{"\u655e", 30, 31}, token{"\u5e1a", 31, 32}, token{"\u81ea\u73cd", 32, 34}},
- []token{token{"\u6e56\u5317\u7701", 0, 3}, token{"\u77f3\u9996\u5e02", 3, 6}},
- []token{token{"\u6e56\u5317\u7701", 0, 3}, token{"\u5341\u5830\u5e02", 3, 6}},
- []token{token{"\u603b\u7ecf\u7406", 0, 3}, token{"\u5b8c\u6210", 3, 5}, token{"\u4e86", 5, 6}, token{"\u8fd9\u4ef6", 6, 8}, token{"\u4e8b\u60c5", 8, 10}},
- []token{token{"\u7535\u8111", 0, 2}, token{"\u4fee\u597d", 2, 4}, token{"\u4e86", 4, 5}},
- []token{token{"\u505a\u597d", 0, 2}, token{"\u4e86", 2, 3}, token{"\u8fd9\u4ef6", 3, 5}, token{"\u4e8b\u60c5", 5, 7}, token{"\u5c31", 7, 8}, token{"\u4e00\u4e86\u767e\u4e86", 8, 12}, token{"\u4e86", 12, 13}},
- []token{token{"\u4eba\u4eec", 0, 2}, token{"\u5ba1\u7f8e", 2, 4}, token{"\u7684", 4, 5}, token{"\u89c2\u70b9", 5, 7}, token{"\u662f", 7, 8}, token{"\u4e0d\u540c", 8, 10}, token{"\u7684", 10, 11}},
- []token{token{"\u6211\u4eec", 0, 2}, token{"\u4e70", 2, 3}, token{"\u4e86", 3, 4}, token{"\u4e00\u4e2a", 4, 6}, token{"\u7f8e\u7684", 6, 8}, token{"\u7a7a\u8c03", 8, 10}},
- []token{token{"\u7ebf\u7a0b", 0, 2}, token{"\u521d\u59cb\u5316", 2, 5}, token{"\u65f6", 5, 6}, token{"\u6211\u4eec", 6, 8}, token{"\u8981", 8, 9}, token{"\u6ce8\u610f", 9, 11}},
- []token{token{"\u4e00\u4e2a", 0, 2}, token{"\u5206\u5b50", 2, 4}, token{"\u662f", 4, 5}, token{"\u7531", 5, 6}, token{"\u597d\u591a", 6, 8}, token{"\u539f\u5b50", 8, 10}, token{"\u7ec4\u7ec7", 10, 12}, token{"\u6210", 12, 13}, token{"\u7684", 13, 14}},
- []token{token{"\u795d", 0, 1}, token{"\u4f60", 1, 2}, token{"\u9a6c\u5230\u529f\u6210", 2, 6}},
- []token{token{"\u4ed6", 0, 1}, token{"\u6389", 1, 2}, token{"\u8fdb", 2, 3}, token{"\u4e86", 3, 4}, token{"\u65e0\u5e95\u6d1e", 4, 7}, token{"\u91cc", 7, 8}},
- []token{token{"\u4e2d\u56fd", 0, 2}, token{"\u7684", 2, 3}, token{"\u9996\u90fd", 3, 5}, token{"\u662f", 5, 6}, token{"\u5317\u4eac", 6, 8}},
- []token{token{"\u5b59", 0, 1}, token{"\u541b", 1, 2}, token{"\u610f", 2, 3}},
- []token{token{"\u5916\u4ea4\u90e8", 0, 3}, token{"\u53d1\u8a00\u4eba", 3, 6}, token{"\u9a6c\u671d\u65ed", 6, 9}},
- []token{token{"\u9886\u5bfc\u4eba", 0, 3}, token{"\u4f1a\u8bae", 3, 5}, token{"\u548c", 5, 6}, token{"\u7b2c\u56db\u5c4a", 6, 9}, token{"\u4e1c\u4e9a", 9, 11}, token{"\u5cf0\u4f1a", 11, 13}},
- []token{token{"\u5728", 0, 1}, token{"\u8fc7\u53bb", 1, 3}, token{"\u7684", 3, 4}, token{"\u8fd9", 4, 5}, token{"\u4e94\u5e74", 5, 7}},
- []token{token{"\u8fd8", 0, 1}, token{"\u9700\u8981", 1, 3}, token{"\u5f88", 3, 4}, token{"\u957f", 4, 5}, token{"\u7684", 5, 6}, token{"\u8def", 6, 7}, token{"\u8981", 7, 8}, token{"\u8d70", 8, 9}},
- []token{token{"60", 0, 2}, token{"\u5468\u5e74", 2, 4}, token{"\u9996\u90fd", 4, 6}, token{"\u9605\u5175", 6, 8}},
- []token{token{"\u4f60\u597d", 0, 2}, token{"\u4eba\u4eec", 2, 4}, token{"\u5ba1\u7f8e", 4, 6}, token{"\u7684", 6, 7}, token{"\u89c2\u70b9", 7, 9}, token{"\u662f", 9, 10}, token{"\u4e0d\u540c", 10, 12}, token{"\u7684", 12, 13}},
- []token{token{"\u4e70", 0, 1}, token{"\u6c34\u679c", 1, 3}, token{"\u7136\u540e", 3, 5}, token{"\u6765", 5, 6}, token{"\u4e16\u535a\u56ed", 6, 9}},
- []token{token{"\u4e70", 0, 1}, token{"\u6c34\u679c", 1, 3}, token{"\u7136\u540e", 3, 5}, token{"\u53bb", 5, 6}, token{"\u4e16\u535a\u56ed", 6, 9}},
- []token{token{"\u4f46\u662f", 0, 2}, token{"\u540e\u6765", 2, 4}, token{"\u6211", 4, 5}, token{"\u624d", 5, 6}, token{"\u77e5\u9053", 6, 8}, token{"\u4f60", 8, 9}, token{"\u662f", 9, 10}, token{"\u5bf9", 10, 11}, token{"\u7684", 11, 12}},
- []token{token{"\u5b58\u5728", 0, 2}, token{"\u5373", 2, 3}, token{"\u5408\u7406", 3, 5}},
- []token{token{"\u7684", 0, 1}, token{"\u7684", 1, 2}, token{"\u7684", 2, 3}, token{"\u7684", 3, 4}, token{"\u7684", 4, 5}, token{"\u5728", 5, 6}, token{"\u7684", 6, 7}, token{"\u7684", 7, 8}, token{"\u7684", 8, 9}, token{"\u7684", 9, 10}, token{"\u5c31", 10, 11}, token{"\u4ee5", 11, 12}, token{"\u548c", 12, 13}, token{"\u548c", 13, 14}, token{"\u548c", 14, 15}},
- []token{token{"I", 0, 1}, token{" ", 1, 2}, token{"love", 2, 6}, token{"\u4f60", 6, 7}, token{"\uff0c", 7, 8}, token{"\u4e0d\u4ee5\u4e3a\u803b", 8, 12}, token{"\uff0c", 12, 13}, token{"\u53cd", 13, 14}, token{"\u4ee5\u4e3a", 14, 16}, token{"rong", 16, 20}},
- []token{token{"\u56e0", 0, 1}},
- []token{},
- []token{token{"hello", 0, 5}, token{"\u4f60\u597d", 5, 7}, token{"\u4eba\u4eec", 7, 9}, token{"\u5ba1\u7f8e", 9, 11}, token{"\u7684", 11, 12}, token{"\u89c2\u70b9", 12, 14}, token{"\u662f", 14, 15}, token{"\u4e0d\u540c", 15, 17}, token{"\u7684", 17, 18}},
- []token{token{"\u5f88", 0, 1}, token{"\u597d", 1, 2}, token{"\u4f46", 2, 3}, token{"\u4e3b\u8981", 3, 5}, token{"\u662f", 5, 6}, token{"\u57fa\u4e8e", 6, 8}, token{"\u7f51\u9875", 8, 10}, token{"\u5f62\u5f0f", 10, 12}},
- []token{token{"hello", 0, 5}, token{"\u4f60\u597d", 5, 7}, token{"\u4eba\u4eec", 7, 9}, token{"\u5ba1\u7f8e", 9, 11}, token{"\u7684", 11, 12}, token{"\u89c2\u70b9", 12, 14}, token{"\u662f", 14, 15}, token{"\u4e0d\u540c", 15, 17}, token{"\u7684", 17, 18}},
- []token{token{"\u4e3a\u4ec0\u4e48", 0, 3}, token{"\u6211", 3, 4}, token{"\u4e0d\u80fd", 4, 6}, token{"\u62e5\u6709", 6, 8}, token{"\u60f3\u8981", 8, 10}, token{"\u7684", 10, 11}, token{"\u751f\u6d3b", 11, 13}},
- []token{token{"\u540e\u6765", 0, 2}, token{"\u6211", 2, 3}, token{"\u624d", 3, 4}},
- []token{token{"\u6b64\u6b21", 0, 2}, token{"\u6765", 2, 3}, token{"\u4e2d\u56fd", 3, 5}, token{"\u662f", 5, 6}, token{"\u4e3a\u4e86", 6, 8}},
- []token{token{"\u4f7f\u7528", 0, 2}, token{"\u4e86", 2, 3}, token{"\u5b83", 3, 4}, token{"\u5c31", 4, 5}, token{"\u53ef\u4ee5", 5, 7}, token{"\u89e3\u51b3", 7, 9}, token{"\u4e00\u4e9b", 9, 11}, token{"\u95ee\u9898", 11, 13}},
- []token{token{",", 0, 1}, token{"\u4f7f\u7528", 1, 3}, token{"\u4e86", 3, 4}, token{"\u5b83", 4, 5}, token{"\u5c31", 5, 6}, token{"\u53ef\u4ee5", 6, 8}, token{"\u89e3\u51b3", 8, 10}, token{"\u4e00\u4e9b", 10, 12}, token{"\u95ee\u9898", 12, 14}},
- []token{token{"\u5176\u5b9e", 0, 2}, token{"\u4f7f\u7528", 2, 4}, token{"\u4e86", 4, 5}, token{"\u5b83", 5, 6}, token{"\u5c31", 6, 7}, token{"\u53ef\u4ee5", 7, 9}, token{"\u89e3\u51b3", 9, 11}, token{"\u4e00\u4e9b", 11, 13}, token{"\u95ee\u9898", 13, 15}},
- []token{token{"\u597d\u4eba", 0, 2}, token{"\u4f7f\u7528", 2, 4}, token{"\u4e86", 4, 5}, token{"\u5b83", 5, 6}, token{"\u5c31", 6, 7}, token{"\u53ef\u4ee5", 7, 9}, token{"\u89e3\u51b3", 9, 11}, token{"\u4e00\u4e9b", 11, 13}, token{"\u95ee\u9898", 13, 15}},
- []token{token{"\u662f\u56e0\u4e3a", 0, 3}, token{"\u548c", 3, 4}, token{"\u56fd\u5bb6", 4, 6}},
- []token{token{"\u8001\u5e74", 0, 2}, token{"\u641c\u7d22", 2, 4}, token{"\u8fd8", 4, 5}, token{"\u652f\u6301", 5, 7}},
- []token{token{"\u5e72\u8106", 0, 2}, token{"\u5c31", 2, 3}, token{"\u628a", 3, 4}, token{"\u90a3", 4, 5}, token{"\u90e8", 5, 6}, token{"\u8499", 6, 7}, token{"\u4eba", 7, 8}, token{"\u7684", 8, 9}, token{"\u95f2", 9, 10}, token{"\u6cd5", 10, 11}, token{"\u7ed9", 11, 12}, token{"\u5e9f", 12, 13}, token{"\u4e86", 13, 14}, token{"\u62c9\u5012", 14, 16}, token{"\uff01", 16, 17}, token{"RT", 17, 19}, token{" ", 19, 20}, token{"@", 20, 21}, token{"laoshipukong", 21, 33}, token{" ", 33, 34}, token{":", 34, 35}, token{" ", 35, 36}, token{"27", 36, 38}, token{"\u65e5", 38, 39}, token{"\uff0c", 39, 40}, token{"\u5168\u56fd\u4eba\u5927\u5e38\u59d4\u4f1a", 40, 47}, token{"\u7b2c\u4e09\u6b21", 47, 50}, token{"\u5ba1\u8bae", 50, 52}, token{"\u4fb5\u6743", 52, 54}, token{"\u8d23\u4efb\u6cd5", 54, 57}, token{"\u8349\u6848", 57, 59}, token{"\uff0c", 59, 60}, token{"\u5220\u9664", 60, 62}, token{"\u4e86", 62, 63}, token{"\u6709\u5173", 63, 65}, token{"\u533b\u7597", 65, 67}, token{"\u635f\u5bb3", 67, 69}, token{"\u8d23\u4efb", 69, 71}, token{"\u201c", 71, 72}, token{"\u4e3e\u8bc1", 72, 74}, token{"\u5012\u7f6e", 74, 76}, token{"\u201d", 76, 77}, token{"\u7684", 77, 78}, token{"\u89c4\u5b9a", 78, 80}, token{"\u3002", 80, 81}, token{"\u5728", 81, 82}, token{"\u533b\u60a3", 82, 84}, token{"\u7ea0\u7eb7", 84, 86}, token{"\u4e2d", 86, 87}, token{"\u672c", 87, 88}, token{"\u5df2", 88, 89}, token{"\u5904\u4e8e", 89, 91}, token{"\u5f31\u52bf", 91, 93}, token{"\u5730\u4f4d", 93, 95}, token{"\u7684", 95, 96}, token{"\u6d88\u8d39\u8005", 96, 99}, token{"\u7531\u6b64", 99, 101}, token{"\u5c06", 101, 102}, token{"\u9677\u5165", 102, 104}, token{"\u4e07\u52ab\u4e0d\u590d", 104, 108}, token{"\u7684", 108, 109}, token{"\u5883\u5730", 109, 111}, token{"\u3002", 111, 112}, token{" ", 112, 113}},
- []token{token{"\u5927", 0, 1}},
- []token{},
- []token{token{"\u4ed6", 0, 1}, token{"\u8bf4", 1, 2}, token{"\u7684", 2, 3}, token{"\u786e\u5b9e", 3, 5}, token{"\u5728", 5, 6}, token{"\u7406", 6, 7}},
- []token{token{"\u957f\u6625", 0, 2}, token{"\u5e02\u957f", 2, 4}, token{"\u6625\u8282", 4, 6}, token{"\u8bb2\u8bdd", 6, 8}},
- []token{token{"\u7ed3\u5a5a", 0, 2}, token{"\u7684", 2, 3}, token{"\u548c", 3, 4}, token{"\u5c1a\u672a", 4, 6}, token{"\u7ed3\u5a5a", 6, 8}, token{"\u7684", 8, 9}},
- []token{token{"\u7ed3\u5408", 0, 2}, token{"\u6210", 2, 3}, token{"\u5206\u5b50", 3, 5}, token{"\u65f6", 5, 6}},
- []token{token{"\u65c5\u6e38", 0, 2}, token{"\u548c", 2, 3}, token{"\u670d\u52a1", 3, 5}, token{"\u662f", 5, 6}, token{"\u6700\u597d", 6, 8}, token{"\u7684", 8, 9}},
- []token{token{"\u8fd9\u4ef6", 0, 2}, token{"\u4e8b\u60c5", 2, 4}, token{"\u7684\u786e", 4, 6}, token{"\u662f", 6, 7}, token{"\u6211", 7, 8}, token{"\u7684", 8, 9}, token{"\u9519", 9, 10}},
- []token{token{"\u4f9b", 0, 1}, token{"\u5927\u5bb6", 1, 3}, token{"\u53c2\u8003", 3, 5}, token{"\u6307\u6b63", 5, 7}},
- []token{token{"\u54c8\u5c14\u6ee8", 0, 3}, token{"\u653f\u5e9c", 3, 5}, token{"\u516c\u5e03", 5, 7}, token{"\u584c", 7, 8}, token{"\u6865", 8, 9}, token{"\u539f\u56e0", 9, 11}},
- []token{token{"\u6211", 0, 1}, token{"\u5728", 1, 2}, token{"\u673a\u573a", 2, 4}, token{"\u5165\u53e3\u5904", 4, 7}},
- []token{token{"\u90a2", 0, 1}, token{"\u6c38", 1, 2}, token{"\u81e3", 2, 3}, token{"\u6444\u5f71", 3, 5}, token{"\u62a5\u9053", 5, 7}},
- []token{token{"BP", 0, 2}, token{"\u795e\u7ecf\u7f51\u7edc", 2, 6}, token{"\u5982\u4f55", 6, 8}, token{"\u8bad\u7ec3", 8, 10}, token{"\u624d\u80fd", 10, 12}, token{"\u5728", 12, 13}, token{"\u5206\u7c7b", 13, 15}, token{"\u65f6", 15, 16}, token{"\u589e\u52a0", 16, 18}, token{"\u533a\u5206\u5ea6", 18, 21}, token{"\uff1f", 21, 22}},
- []token{token{"\u5357\u4eac\u5e02", 0, 3}, token{"\u957f\u6c5f\u5927\u6865", 3, 7}},
- []token{token{"\u5e94", 0, 1}, token{"\u4e00\u4e9b", 1, 3}, token{"\u4f7f\u7528\u8005", 3, 6}, token{"\u7684", 6, 7}, token{"\u5efa\u8bae", 7, 9}, token{"\uff0c", 9, 10}, token{"\u4e5f", 10, 11}, token{"\u4e3a\u4e86", 11, 13}, token{"\u4fbf\u4e8e", 13, 15}, token{"\u5229\u7528", 15, 17}, token{"NiuTrans", 17, 25}, token{"\u7528\u4e8e", 25, 27}, token{"SMT", 27, 30}, token{"\u7814\u7a76", 30, 32}},
- []token{token{"\u957f\u6625\u5e02", 0, 3}, token{"\u957f\u6625", 3, 5}, token{"\u836f\u5e97", 5, 7}},
- []token{token{"\u9093\u9896\u8d85", 0, 3}, token{"\u751f\u524d", 3, 5}, token{"\u6700", 5, 6}, token{"\u559c\u6b22", 6, 8}, token{"\u7684", 8, 9}, token{"\u8863\u670d", 9, 11}},
- []token{token{"\u80e1\u9526\u6d9b", 0, 3}, token{"\u662f", 3, 4}, token{"\u70ed\u7231", 4, 6}, token{"\u4e16\u754c", 6, 8}, token{"\u548c\u5e73", 8, 10}, token{"\u7684", 10, 11}, token{"\u653f\u6cbb\u5c40", 11, 14}, token{"\u5e38\u59d4", 14, 16}},
- []token{token{"\u7a0b\u5e8f\u5458", 0, 3}, token{"\u795d", 3, 4}, token{"\u6d77\u6797", 4, 6}, token{"\u548c", 6, 7}, token{"\u6731", 7, 8}, token{"\u4f1a", 8, 9}, token{"\u9707", 9, 10}, token{"\u662f", 10, 11}, token{"\u5728", 11, 12}, token{"\u5b59", 12, 13}, token{"\u5065", 13, 14}, token{"\u7684", 14, 15}, token{"\u5de6\u9762", 15, 17}, token{"\u548c", 17, 18}, token{"\u53f3\u9762", 18, 20}, token{",", 20, 21}, token{" ", 21, 22}, token{"\u8303", 22, 23}, token{"\u51ef", 23, 24}, token{"\u5728", 24, 25}, token{"\u6700", 25, 26}, token{"\u53f3\u9762", 26, 28}, token{".", 28, 29}, token{"\u518d", 29, 30}, token{"\u5f80", 30, 31}, token{"\u5de6", 31, 32}, token{"\u662f", 32, 33}, token{"\u674e", 33, 34}, token{"\u677e", 34, 35}, token{"\u6d2a", 35, 36}},
- []token{token{"\u4e00\u6b21\u6027", 0, 3}, token{"\u4ea4", 3, 4}, token{"\u591a\u5c11", 4, 6}, token{"\u94b1", 6, 7}},
- []token{token{"\u4e24\u5757", 0, 2}, token{"\u4e94", 2, 3}, token{"\u4e00\u5957", 3, 5}, token{"\uff0c", 5, 6}, token{"\u4e09\u5757", 6, 8}, token{"\u516b", 8, 9}, token{"\u4e00\u65a4", 9, 11}, token{"\uff0c", 11, 12}, token{"\u56db\u5757", 12, 14}, token{"\u4e03", 14, 15}, token{"\u4e00\u672c", 15, 17}, token{"\uff0c", 17, 18}, token{"\u4e94\u5757", 18, 20}, token{"\u516d", 20, 21}, token{"\u4e00\u6761", 21, 23}},
- []token{token{"\u5c0f", 0, 1}, token{"\u548c\u5c1a", 1, 3}, token{"\u7559", 3, 4}, token{"\u4e86", 4, 5}, token{"\u4e00\u4e2a", 5, 7}, token{"\u50cf", 7, 8}, token{"\u5927", 8, 9}, token{"\u548c\u5c1a", 9, 11}, token{"\u4e00\u6837", 11, 13}, token{"\u7684", 13, 14}, token{"\u548c\u5c1a\u5934", 14, 17}},
- []token{token{"\u6211", 0, 1}, token{"\u662f", 1, 2}, token{"\u4e2d\u534e\u4eba\u6c11\u5171\u548c\u56fd", 2, 9}, token{"\u516c\u6c11", 9, 11}, token{";", 11, 12}, token{"\u6211", 12, 13}, token{"\u7238\u7238", 13, 15}, token{"\u662f", 15, 16}, token{"\u5171\u548c\u515a", 16, 19}, token{"\u515a\u5458", 19, 21}, token{";", 21, 22}, token{" ", 22, 23}, token{"\u5730\u94c1", 23, 25}, token{"\u548c\u5e73\u95e8", 25, 28}, token{"\u7ad9", 28, 29}},
- []token{token{"\u5f20\u6653\u6885", 0, 3}, token{"\u53bb", 3, 4}, token{"\u4eba\u6c11", 4, 6}, token{"\u533b\u9662", 6, 8}, token{"\u505a", 8, 9}, token{"\u4e86", 9, 10}, token{"\u4e2a", 10, 11}, token{"B\u8d85", 11, 13}, token{"\u7136\u540e", 13, 15}, token{"\u53bb", 15, 16}, token{"\u4e70", 16, 17}, token{"\u4e86", 17, 18}, token{"\u4ef6", 18, 19}, token{"T\u6064", 19, 21}},
- []token{token{"AT&T", 0, 4}, token{"\u662f", 4, 5}, token{"\u4e00\u4ef6", 5, 7}, token{"\u4e0d\u9519", 7, 9}, token{"\u7684", 9, 10}, token{"\u516c\u53f8", 10, 12}, token{"\uff0c", 12, 13}, token{"\u7ed9", 13, 14}, token{"\u4f60", 14, 15}, token{"\u53d1", 15, 16}, token{"offer", 16, 21}, token{"\u4e86", 21, 22}, token{"\u5417", 22, 23}, token{"\uff1f", 23, 24}},
- []token{token{"C++", 0, 3}, token{"\u548c", 3, 4}, token{"c#", 4, 6}, token{"\u662f", 6, 7}, token{"\u4ec0\u4e48", 7, 9}, token{"\u5173\u7cfb", 9, 11}, token{"\uff1f", 11, 12}, token{"11", 12, 14}, token{"+", 14, 15}, token{"122", 15, 18}, token{"=", 18, 19}, token{"133", 19, 22}, token{"\uff0c", 22, 23}, token{"\u662f", 23, 24}, token{"\u5417", 24, 25}, token{"\uff1f", 25, 26}, token{"PI", 26, 28}, token{"=", 28, 29}, token{"3", 29, 30}, token{".", 30, 31}, token{"14159", 31, 36}},
- []token{token{"\u4f60", 0, 1}, token{"\u8ba4\u8bc6", 1, 3}, token{"\u90a3\u4e2a", 3, 5}, token{"\u548c", 5, 6}, token{"\u4e3b\u5e2d", 6, 8}, token{"\u63e1\u624b", 8, 10}, token{"\u7684", 10, 11}, token{"\u7684\u54e5", 11, 13}, token{"\u5417", 13, 14}, token{"\uff1f", 14, 15}, token{"\u4ed6", 15, 16}, token{"\u5f00", 16, 17}, token{"\u4e00\u8f86", 17, 19}, token{"\u9ed1\u8272", 19, 21}, token{"\u7684\u58eb", 21, 23}, token{"\u3002", 23, 24}},
- []token{token{"\u67aa\u6746\u5b50", 0, 3}, token{"\u4e2d", 3, 4}, token{"\u51fa", 4, 5}, token{"\u653f\u6743", 5, 7}},
- []token{token{"\u5f20", 0, 1}, token{"\u4e09", 1, 2}, token{"\u98ce", 2, 3}, token{"\u540c\u5b66", 3, 5}, token{"\u8d70\u4e0a", 5, 7}, token{"\u4e86", 7, 8}, token{"\u4e0d\u5f52\u8def", 8, 11}},
- []token{token{"\u963fQ", 0, 2}, token{"\u8170\u95f4", 2, 4}, token{"\u6302", 4, 5}, token{"\u7740", 5, 6}, token{"BB\u673a", 6, 9}, token{"\u624b\u91cc", 9, 11}, token{"\u62ff", 11, 12}, token{"\u7740", 12, 13}, token{"\u5927\u54e5\u5927", 13, 16}, token{"\uff0c", 16, 17}, token{"\u8bf4", 17, 18}, token{"\uff1a", 18, 19}, token{"\u6211", 19, 20}, token{"\u4e00\u822c", 20, 22}, token{"\u5403\u996d", 22, 24}, token{"\u4e0d", 24, 25}, token{"AA\u5236", 25, 28}, token{"\u7684", 28, 29}, token{"\u3002", 29, 30}},
- []token{token{"\u5728", 0, 1}, token{"1\u53f7\u5e97", 1, 4}, token{"\u80fd", 4, 5}, token{"\u4e70", 5, 6}, token{"\u5230", 6, 7}, token{"\u5c0fS", 7, 9}, token{"\u548c", 9, 10}, token{"\u5927S", 10, 12}, token{"\u516b\u5366", 12, 14}, token{"\u7684", 14, 15}, token{"\u4e66", 15, 16}, token{"\u3002", 16, 17}},
- []token{token{"\u8fd9", 0, 1}, token{"\u662f", 1, 2}, token{"\u4e00\u4e2a", 2, 4}, token{"\u4f38\u624b", 4, 6}, token{"\u4e0d\u89c1", 6, 8}, token{"\u4e94\u6307", 8, 10}, token{"\u4f38\u624b\u4e0d\u89c1\u4e94\u6307", 4, 10}, token{"\u7684", 10, 11}, token{"\u9ed1\u591c", 11, 13}, token{"\u3002", 13, 14}, token{"\u6211", 14, 15}, token{"\u53eb", 15, 16}, token{"\u609f\u7a7a", 17, 19}, token{"\u5b59\u609f\u7a7a", 16, 19}, token{"\uff0c", 19, 20}, token{"\u6211", 20, 21}, token{"\u7231", 21, 22}, token{"\u5317\u4eac", 22, 24}, token{"\uff0c", 24, 25}, token{"\u6211", 25, 26}, token{"\u7231", 26, 27}, token{"Python", 27, 33}, token{"\u548c", 33, 34}, token{"C++", 34, 37}, token{"\u3002", 37, 38}},
- []token{token{"\u6211", 0, 1}, token{"\u4e0d", 1, 2}, token{"\u559c\u6b22", 2, 4}, token{"\u65e5\u672c", 4, 6}, token{"\u548c\u670d", 6, 8}, token{"\u3002", 8, 9}},
- []token{token{"\u96f7\u7334", 0, 2}, token{"\u56de\u5f52", 2, 4}, token{"\u4eba\u95f4", 4, 6}, token{"\u3002", 6, 7}},
- []token{token{"\u5de5\u4fe1\u5904", 0, 3}, token{"\u5e72\u4e8b", 4, 6}, token{"\u5973\u5e72\u4e8b", 3, 6}, token{"\u6bcf\u6708", 6, 8}, token{"\u7ecf\u8fc7", 8, 10}, token{"\u4e0b\u5c5e", 10, 12}, token{"\u79d1\u5ba4", 12, 14}, token{"\u90fd", 14, 15}, token{"\u8981", 15, 16}, token{"\u4eb2\u53e3", 16, 18}, token{"\u4ea4\u4ee3", 18, 20}, token{"24", 20, 22}, token{"\u53e3", 22, 23}, token{"\u4ea4\u6362", 23, 25}, token{"\u6362\u673a", 24, 26}, token{"\u4ea4\u6362\u673a", 23, 26}, token{"\u7b49", 26, 27}, token{"\u6280\u672f", 27, 29}, token{"\u6280\u672f\u6027", 27, 30}, token{"\u5668\u4ef6", 30, 32}, token{"\u7684", 32, 33}, token{"\u5b89\u88c5", 33, 35}, token{"\u5de5\u4f5c", 35, 37}},
- []token{token{"\u6211", 0, 1}, token{"\u9700\u8981", 1, 3}, token{"\u5ec9\u79df", 3, 5}, token{"\u79df\u623f", 4, 6}, token{"\u5ec9\u79df\u623f", 3, 6}},
- []token{token{"\u6c38\u548c", 0, 2}, token{"\u670d\u88c5", 2, 4}, token{"\u9970\u54c1", 4, 6}, token{"\u6709\u9650", 6, 8}, token{"\u516c\u53f8", 8, 10}, token{"\u6709\u9650\u516c\u53f8", 6, 10}},
- []token{token{"\u6211", 0, 1}, token{"\u7231", 1, 2}, token{"\u5317\u4eac", 2, 4}, token{"\u5929\u5b89", 4, 6}, token{"\u5929\u5b89\u95e8", 4, 7}},
- []token{token{"abc", 0, 3}},
- []token{token{"\u9690", 0, 1}, token{"\u53ef\u592b", 3, 5}, token{"\u9a6c\u5c14\u53ef", 1, 4}, token{"\u9a6c\u5c14\u53ef\u592b", 1, 5}},
- []token{token{"\u96f7\u7334", 0, 2}, token{"\u662f", 2, 3}, token{"\u4e2a", 3, 4}, token{"\u597d", 4, 5}, token{"\u7f51\u7ad9", 5, 7}},
- []token{token{"\u201c", 0, 1}, token{"Microsoft", 1, 10}, token{"\u201d", 10, 11}, token{"\u4e00", 11, 12}, token{"\u8bcd", 12, 13}, token{"\u7531", 13, 14}, token{"\u201c", 14, 15}, token{"MICROcomputer", 15, 28}, token{"\uff08", 28, 29}, token{"\u5fae\u578b", 29, 31}, token{"\u8ba1\u7b97", 31, 33}, token{"\u7b97\u673a", 32, 34}, token{"\u8ba1\u7b97\u673a", 31, 34}, token{"\uff09", 34, 35}, token{"\u201d", 35, 36}, token{"\u548c", 36, 37}, token{"\u201c", 37, 38}, token{"SOFTware", 38, 46}, token{"\uff08", 46, 47}, token{"\u8f6f\u4ef6", 47, 49}, token{"\uff09", 49, 50}, token{"\u201d", 50, 51}, token{"\u4e24", 51, 52}, token{"\u90e8\u5206", 52, 54}, token{"\u7ec4\u6210", 54, 56}},
- []token{token{"\u8349\u6ce5\u9a6c", 0, 3}, token{"\u548c", 3, 4}, token{"\u6b3a", 4, 5}, token{"\u5b9e", 5, 6}, token{"\u9a6c", 6, 7}, token{"\u662f", 7, 8}, token{"\u4eca\u5e74", 8, 10}, token{"\u7684", 10, 11}, token{"\u6d41\u884c", 11, 13}, token{"\u8bcd\u6c47", 13, 15}},
- []token{token{"\u4f0a", 0, 1}, token{"\u85e4", 1, 2}, token{"\u6d0b\u534e\u5802", 2, 5}, token{"\u603b\u5e9c", 5, 7}, token{"\u5e97", 7, 8}},
- []token{token{"\u4e2d\u56fd", 0, 2}, token{"\u79d1\u5b66", 2, 4}, token{"\u5b66\u9662", 3, 5}, token{"\u8ba1\u7b97", 5, 7}, token{"\u6280\u672f", 7, 9}, token{"\u7814\u7a76", 9, 11}, token{"\u79d1\u5b66\u9662", 2, 5}, token{"\u7814\u7a76\u6240", 9, 12}, token{"\u4e2d\u56fd\u79d1\u5b66\u9662\u8ba1\u7b97\u6280\u672f\u7814\u7a76\u6240", 0, 12}},
- []token{token{"\u7f57\u5bc6\u6b27", 0, 3}, token{"\u4e0e", 3, 4}, token{"\u6731\u4e3d\u53f6", 4, 7}},
- []token{token{"\u6211", 0, 1}, token{"\u8d2d\u4e70", 1, 3}, token{"\u4e86", 3, 4}, token{"\u9053\u5177", 4, 6}, token{"\u548c", 6, 7}, token{"\u670d\u88c5", 7, 9}},
- []token{token{"PS", 0, 2}, token{":", 2, 3}, token{" ", 3, 4}, token{"\u6211", 4, 5}, token{"\u89c9\u5f97", 5, 7}, token{"\u5f00\u6e90", 7, 9}, token{"\u6709", 9, 10}, token{"\u4e00\u4e2a", 10, 12}, token{"\u597d\u5904", 12, 14}, token{"\uff0c", 14, 15}, token{"\u5c31\u662f", 15, 17}, token{"\u80fd\u591f", 17, 19}, token{"\u6566\u4fc3", 19, 21}, token{"\u81ea\u5df1", 21, 23}, token{"\u4e0d\u65ad", 23, 25}, token{"\u6539\u8fdb", 25, 27}, token{"\u4e0d\u65ad\u6539\u8fdb", 23, 27}, token{"\uff0c", 27, 28}, token{"\u907f\u514d", 28, 30}, token{"\u655e", 30, 31}, token{"\u5e1a", 31, 32}, token{"\u81ea\u73cd", 32, 34}},
- []token{token{"\u6e56\u5317", 0, 2}, token{"\u6e56\u5317\u7701", 0, 3}, token{"\u77f3\u9996", 3, 5}, token{"\u77f3\u9996\u5e02", 3, 6}},
- []token{token{"\u6e56\u5317", 0, 2}, token{"\u6e56\u5317\u7701", 0, 3}, token{"\u5341\u5830", 3, 5}, token{"\u5341\u5830\u5e02", 3, 6}},
- []token{token{"\u7ecf\u7406", 1, 3}, token{"\u603b\u7ecf\u7406", 0, 3}, token{"\u5b8c\u6210", 3, 5}, token{"\u4e86", 5, 6}, token{"\u8fd9\u4ef6", 6, 8}, token{"\u4e8b\u60c5", 8, 10}},
- []token{token{"\u7535\u8111", 0, 2}, token{"\u4fee\u597d", 2, 4}, token{"\u4e86", 4, 5}},
- []token{token{"\u505a\u597d", 0, 2}, token{"\u4e86", 2, 3}, token{"\u8fd9\u4ef6", 3, 5}, token{"\u4e8b\u60c5", 5, 7}, token{"\u5c31", 7, 8}, token{"\u4e00\u4e86\u767e\u4e86", 8, 12}, token{"\u4e86", 12, 13}},
- []token{token{"\u4eba\u4eec", 0, 2}, token{"\u5ba1\u7f8e", 2, 4}, token{"\u7684", 4, 5}, token{"\u89c2\u70b9", 5, 7}, token{"\u662f", 7, 8}, token{"\u4e0d\u540c", 8, 10}, token{"\u7684", 10, 11}},
- []token{token{"\u6211\u4eec", 0, 2}, token{"\u4e70", 2, 3}, token{"\u4e86", 3, 4}, token{"\u4e00\u4e2a", 4, 6}, token{"\u7f8e\u7684", 6, 8}, token{"\u7a7a\u8c03", 8, 10}},
- []token{token{"\u7ebf\u7a0b", 0, 2}, token{"\u521d\u59cb", 2, 4}, token{"\u521d\u59cb\u5316", 2, 5}, token{"\u65f6", 5, 6}, token{"\u6211\u4eec", 6, 8}, token{"\u8981", 8, 9}, token{"\u6ce8\u610f", 9, 11}},
- []token{token{"\u4e00\u4e2a", 0, 2}, token{"\u5206\u5b50", 2, 4}, token{"\u662f", 4, 5}, token{"\u7531", 5, 6}, token{"\u597d\u591a", 6, 8}, token{"\u539f\u5b50", 8, 10}, token{"\u7ec4\u7ec7", 10, 12}, token{"\u6210", 12, 13}, token{"\u7684", 13, 14}},
- []token{token{"\u795d", 0, 1}, token{"\u4f60", 1, 2}, token{"\u9a6c\u5230\u529f\u6210", 2, 6}},
- []token{token{"\u4ed6", 0, 1}, token{"\u6389", 1, 2}, token{"\u8fdb", 2, 3}, token{"\u4e86", 3, 4}, token{"\u65e0\u5e95", 4, 6}, token{"\u65e0\u5e95\u6d1e", 4, 7}, token{"\u91cc", 7, 8}},
- []token{token{"\u4e2d\u56fd", 0, 2}, token{"\u7684", 2, 3}, token{"\u9996\u90fd", 3, 5}, token{"\u662f", 5, 6}, token{"\u5317\u4eac", 6, 8}},
- []token{token{"\u5b59", 0, 1}, token{"\u541b", 1, 2}, token{"\u610f", 2, 3}},
- []token{token{"\u5916\u4ea4", 0, 2}, token{"\u5916\u4ea4\u90e8", 0, 3}, token{"\u53d1\u8a00", 3, 5}, token{"\u53d1\u8a00\u4eba", 3, 6}, token{"\u9a6c\u671d\u65ed", 6, 9}},
- []token{token{"\u9886\u5bfc", 0, 2}, token{"\u9886\u5bfc\u4eba", 0, 3}, token{"\u4f1a\u8bae", 3, 5}, token{"\u548c", 5, 6}, token{"\u7b2c\u56db", 6, 8}, token{"\u56db\u5c4a", 7, 9}, token{"\u7b2c\u56db\u5c4a", 6, 9}, token{"\u4e1c\u4e9a", 9, 11}, token{"\u5cf0\u4f1a", 11, 13}},
- []token{token{"\u5728", 0, 1}, token{"\u8fc7\u53bb", 1, 3}, token{"\u7684", 3, 4}, token{"\u8fd9", 4, 5}, token{"\u4e94\u5e74", 5, 7}},
- []token{token{"\u8fd8", 0, 1}, token{"\u9700\u8981", 1, 3}, token{"\u5f88", 3, 4}, token{"\u957f", 4, 5}, token{"\u7684", 5, 6}, token{"\u8def", 6, 7}, token{"\u8981", 7, 8}, token{"\u8d70", 8, 9}},
- []token{token{"60", 0, 2}, token{"\u5468\u5e74", 2, 4}, token{"\u9996\u90fd", 4, 6}, token{"\u9605\u5175", 6, 8}},
- []token{token{"\u4f60\u597d", 0, 2}, token{"\u4eba\u4eec", 2, 4}, token{"\u5ba1\u7f8e", 4, 6}, token{"\u7684", 6, 7}, token{"\u89c2\u70b9", 7, 9}, token{"\u662f", 9, 10}, token{"\u4e0d\u540c", 10, 12}, token{"\u7684", 12, 13}},
- []token{token{"\u4e70", 0, 1}, token{"\u6c34\u679c", 1, 3}, token{"\u7136\u540e", 3, 5}, token{"\u6765", 5, 6}, token{"\u4e16\u535a", 6, 8}, token{"\u535a\u56ed", 7, 9}, token{"\u4e16\u535a\u56ed", 6, 9}},
- []token{token{"\u4e70", 0, 1}, token{"\u6c34\u679c", 1, 3}, token{"\u7136\u540e", 3, 5}, token{"\u53bb", 5, 6}, token{"\u4e16\u535a", 6, 8}, token{"\u535a\u56ed", 7, 9}, token{"\u4e16\u535a\u56ed", 6, 9}},
- []token{token{"\u4f46\u662f", 0, 2}, token{"\u540e\u6765", 2, 4}, token{"\u6211", 4, 5}, token{"\u624d", 5, 6}, token{"\u77e5\u9053", 6, 8}, token{"\u4f60", 8, 9}, token{"\u662f", 9, 10}, token{"\u5bf9", 10, 11}, token{"\u7684", 11, 12}},
- []token{token{"\u5b58\u5728", 0, 2}, token{"\u5373", 2, 3}, token{"\u5408\u7406", 3, 5}},
- []token{token{"\u7684", 0, 1}, token{"\u7684", 1, 2}, token{"\u7684", 2, 3}, token{"\u7684", 3, 4}, token{"\u7684", 4, 5}, token{"\u5728", 5, 6}, token{"\u7684", 6, 7}, token{"\u7684", 7, 8}, token{"\u7684", 8, 9}, token{"\u7684", 9, 10}, token{"\u5c31", 10, 11}, token{"\u4ee5", 11, 12}, token{"\u548c", 12, 13}, token{"\u548c", 13, 14}, token{"\u548c", 14, 15}},
- []token{token{"I", 0, 1}, token{" ", 1, 2}, token{"love", 2, 6}, token{"\u4f60", 6, 7}, token{"\uff0c", 7, 8}, token{"\u4e0d\u4ee5", 8, 10}, token{"\u4ee5\u4e3a", 9, 11}, token{"\u4e0d\u4ee5\u4e3a\u803b", 8, 12}, token{"\uff0c", 12, 13}, token{"\u53cd", 13, 14}, token{"\u4ee5\u4e3a", 14, 16}, token{"rong", 16, 20}},
- []token{token{"\u56e0", 0, 1}},
- []token{},
- []token{token{"hello", 0, 5}, token{"\u4f60\u597d", 5, 7}, token{"\u4eba\u4eec", 7, 9}, token{"\u5ba1\u7f8e", 9, 11}, token{"\u7684", 11, 12}, token{"\u89c2\u70b9", 12, 14}, token{"\u662f", 14, 15}, token{"\u4e0d\u540c", 15, 17}, token{"\u7684", 17, 18}},
- []token{token{"\u5f88", 0, 1}, token{"\u597d", 1, 2}, token{"\u4f46", 2, 3}, token{"\u4e3b\u8981", 3, 5}, token{"\u662f", 5, 6}, token{"\u57fa\u4e8e", 6, 8}, token{"\u7f51\u9875", 8, 10}, token{"\u5f62\u5f0f", 10, 12}},
- []token{token{"hello", 0, 5}, token{"\u4f60\u597d", 5, 7}, token{"\u4eba\u4eec", 7, 9}, token{"\u5ba1\u7f8e", 9, 11}, token{"\u7684", 11, 12}, token{"\u89c2\u70b9", 12, 14}, token{"\u662f", 14, 15}, token{"\u4e0d\u540c", 15, 17}, token{"\u7684", 17, 18}},
- []token{token{"\u4ec0\u4e48", 1, 3}, token{"\u4e3a\u4ec0\u4e48", 0, 3}, token{"\u6211", 3, 4}, token{"\u4e0d\u80fd", 4, 6}, token{"\u62e5\u6709", 6, 8}, token{"\u60f3\u8981", 8, 10}, token{"\u7684", 10, 11}, token{"\u751f\u6d3b", 11, 13}},
- []token{token{"\u540e\u6765", 0, 2}, token{"\u6211", 2, 3}, token{"\u624d", 3, 4}},
- []token{token{"\u6b64\u6b21", 0, 2}, token{"\u6765", 2, 3}, token{"\u4e2d\u56fd", 3, 5}, token{"\u662f", 5, 6}, token{"\u4e3a\u4e86", 6, 8}},
- []token{token{"\u4f7f\u7528", 0, 2}, token{"\u4e86", 2, 3}, token{"\u5b83", 3, 4}, token{"\u5c31", 4, 5}, token{"\u53ef\u4ee5", 5, 7}, token{"\u89e3\u51b3", 7, 9}, token{"\u4e00\u4e9b", 9, 11}, token{"\u95ee\u9898", 11, 13}},
- []token{token{",", 0, 1}, token{"\u4f7f\u7528", 1, 3}, token{"\u4e86", 3, 4}, token{"\u5b83", 4, 5}, token{"\u5c31", 5, 6}, token{"\u53ef\u4ee5", 6, 8}, token{"\u89e3\u51b3", 8, 10}, token{"\u4e00\u4e9b", 10, 12}, token{"\u95ee\u9898", 12, 14}},
- []token{token{"\u5176\u5b9e", 0, 2}, token{"\u4f7f\u7528", 2, 4}, token{"\u4e86", 4, 5}, token{"\u5b83", 5, 6}, token{"\u5c31", 6, 7}, token{"\u53ef\u4ee5", 7, 9}, token{"\u89e3\u51b3", 9, 11}, token{"\u4e00\u4e9b", 11, 13}, token{"\u95ee\u9898", 13, 15}},
- []token{token{"\u597d\u4eba", 0, 2}, token{"\u4f7f\u7528", 2, 4}, token{"\u4e86", 4, 5}, token{"\u5b83", 5, 6}, token{"\u5c31", 6, 7}, token{"\u53ef\u4ee5", 7, 9}, token{"\u89e3\u51b3", 9, 11}, token{"\u4e00\u4e9b", 11, 13}, token{"\u95ee\u9898", 13, 15}},
- []token{token{"\u56e0\u4e3a", 1, 3}, token{"\u662f\u56e0\u4e3a", 0, 3}, token{"\u548c", 3, 4}, token{"\u56fd\u5bb6", 4, 6}},
- []token{token{"\u8001\u5e74", 0, 2}, token{"\u641c\u7d22", 2, 4}, token{"\u8fd8", 4, 5}, token{"\u652f\u6301", 5, 7}},
- []token{token{"\u5e72\u8106", 0, 2}, token{"\u5c31", 2, 3}, token{"\u628a", 3, 4}, token{"\u90a3", 4, 5}, token{"\u90e8", 5, 6}, token{"\u8499", 6, 7}, token{"\u4eba", 7, 8}, token{"\u7684", 8, 9}, token{"\u95f2", 9, 10}, token{"\u6cd5", 10, 11}, token{"\u7ed9", 11, 12}, token{"\u5e9f", 12, 13}, token{"\u4e86", 13, 14}, token{"\u62c9\u5012", 14, 16}, token{"\uff01", 16, 17}, token{"RT", 17, 19}, token{" ", 19, 20}, token{"@", 20, 21}, token{"laoshipukong", 21, 33}, token{" ", 33, 34}, token{":", 34, 35}, token{" ", 35, 36}, token{"27", 36, 38}, token{"\u65e5", 38, 39}, token{"\uff0c", 39, 40}, token{"\u5168\u56fd", 40, 42}, token{"\u56fd\u4eba", 41, 43}, token{"\u4eba\u5927", 42, 44}, token{"\u5e38\u59d4", 44, 46}, token{"\u59d4\u4f1a", 45, 47}, token{"\u5e38\u59d4\u4f1a", 44, 47}, token{"\u5168\u56fd\u4eba\u5927\u5e38\u59d4\u4f1a", 40, 47}, token{"\u7b2c\u4e09", 47, 49}, token{"\u4e09\u6b21", 48, 50}, token{"\u7b2c\u4e09\u6b21", 47, 50}, token{"\u5ba1\u8bae", 50, 52}, token{"\u4fb5\u6743", 52, 54}, token{"\u8d23\u4efb", 54, 56}, token{"\u8d23\u4efb\u6cd5", 54, 57}, token{"\u8349\u6848", 57, 59}, token{"\uff0c", 59, 60}, token{"\u5220\u9664", 60, 62}, token{"\u4e86", 62, 63}, token{"\u6709\u5173", 63, 65}, token{"\u533b\u7597", 65, 67}, token{"\u635f\u5bb3", 67, 69}, token{"\u8d23\u4efb", 69, 71}, token{"\u201c", 71, 72}, token{"\u4e3e\u8bc1", 72, 74}, token{"\u5012\u7f6e", 74, 76}, token{"\u201d", 76, 77}, token{"\u7684", 77, 78}, token{"\u89c4\u5b9a", 78, 80}, token{"\u3002", 80, 81}, token{"\u5728", 81, 82}, token{"\u533b\u60a3", 82, 84}, token{"\u7ea0\u7eb7", 84, 86}, token{"\u4e2d", 86, 87}, token{"\u672c", 87, 88}, token{"\u5df2", 88, 89}, token{"\u5904\u4e8e", 89, 91}, token{"\u5f31\u52bf", 91, 93}, token{"\u5730\u4f4d", 93, 95}, token{"\u7684", 95, 96}, token{"\u6d88\u8d39", 96, 98}, token{"\u6d88\u8d39\u8005", 96, 99}, token{"\u7531\u6b64", 99, 101}, token{"\u5c06", 101, 102}, token{"\u9677\u5165", 102, 104}, token{"\u4e0d\u590d", 106, 108}, token{"\u4e07\u52ab\u4e0d\u590d", 104, 108}, token{"\u7684", 108, 109}, token{"\u5883\u5730", 109, 111}, token{"\u3002", 111, 112}, token{" ", 112, 113}},
- []token{token{"\u5927", 0, 1}},
- []token{},
- []token{token{"\u4ed6", 0, 1}, token{"\u8bf4", 1, 2}, token{"\u7684", 2, 3}, token{"\u786e\u5b9e", 3, 5}, token{"\u5728", 5, 6}, token{"\u7406", 6, 7}},
- []token{token{"\u957f\u6625", 0, 2}, token{"\u5e02\u957f", 2, 4}, token{"\u6625\u8282", 4, 6}, token{"\u8bb2\u8bdd", 6, 8}},
- []token{token{"\u7ed3\u5a5a", 0, 2}, token{"\u7684", 2, 3}, token{"\u548c", 3, 4}, token{"\u5c1a\u672a", 4, 6}, token{"\u7ed3\u5a5a", 6, 8}, token{"\u7684", 8, 9}},
- []token{token{"\u7ed3\u5408", 0, 2}, token{"\u6210", 2, 3}, token{"\u5206\u5b50", 3, 5}, token{"\u65f6", 5, 6}},
- []token{token{"\u65c5\u6e38", 0, 2}, token{"\u548c", 2, 3}, token{"\u670d\u52a1", 3, 5}, token{"\u662f", 5, 6}, token{"\u6700\u597d", 6, 8}, token{"\u7684", 8, 9}},
- []token{token{"\u8fd9\u4ef6", 0, 2}, token{"\u4e8b\u60c5", 2, 4}, token{"\u7684\u786e", 4, 6}, token{"\u662f", 6, 7}, token{"\u6211", 7, 8}, token{"\u7684", 8, 9}, token{"\u9519", 9, 10}},
- []token{token{"\u4f9b", 0, 1}, token{"\u5927\u5bb6", 1, 3}, token{"\u53c2\u8003", 3, 5}, token{"\u6307\u6b63", 5, 7}},
- []token{token{"\u54c8\u5c14", 0, 2}, token{"\u54c8\u5c14\u6ee8", 0, 3}, token{"\u653f\u5e9c", 3, 5}, token{"\u516c\u5e03", 5, 7}, token{"\u584c", 7, 8}, token{"\u6865", 8, 9}, token{"\u539f\u56e0", 9, 11}},
- []token{token{"\u6211", 0, 1}, token{"\u5728", 1, 2}, token{"\u673a\u573a", 2, 4}, token{"\u5165\u53e3", 4, 6}, token{"\u5165\u53e3\u5904", 4, 7}},
- []token{token{"\u90a2", 0, 1}, token{"\u6c38", 1, 2}, token{"\u81e3", 2, 3}, token{"\u6444\u5f71", 3, 5}, token{"\u62a5\u9053", 5, 7}},
- []token{token{"BP", 0, 2}, token{"\u795e\u7ecf", 2, 4}, token{"\u7f51\u7edc", 4, 6}, token{"\u795e\u7ecf\u7f51", 2, 5}, token{"\u795e\u7ecf\u7f51\u7edc", 2, 6}, token{"\u5982\u4f55", 6, 8}, token{"\u8bad\u7ec3", 8, 10}, token{"\u624d\u80fd", 10, 12}, token{"\u5728", 12, 13}, token{"\u5206\u7c7b", 13, 15}, token{"\u65f6", 15, 16}, token{"\u589e\u52a0", 16, 18}, token{"\u533a\u5206", 18, 20}, token{"\u5206\u5ea6", 19, 21}, token{"\u533a\u5206\u5ea6", 18, 21}, token{"\uff1f", 21, 22}},
- []token{token{"\u5357\u4eac", 0, 2}, token{"\u4eac\u5e02", 1, 3}, token{"\u5357\u4eac\u5e02", 0, 3}, token{"\u957f\u6c5f", 3, 5}, token{"\u5927\u6865", 5, 7}, token{"\u957f\u6c5f\u5927\u6865", 3, 7}},
- []token{token{"\u5e94", 0, 1}, token{"\u4e00\u4e9b", 1, 3}, token{"\u4f7f\u7528", 3, 5}, token{"\u7528\u8005", 4, 6}, token{"\u4f7f\u7528\u8005", 3, 6}, token{"\u7684", 6, 7}, token{"\u5efa\u8bae", 7, 9}, token{"\uff0c", 9, 10}, token{"\u4e5f", 10, 11}, token{"\u4e3a\u4e86", 11, 13}, token{"\u4fbf\u4e8e", 13, 15}, token{"\u5229\u7528", 15, 17}, token{"NiuTrans", 17, 25}, token{"\u7528\u4e8e", 25, 27}, token{"SMT", 27, 30}, token{"\u7814\u7a76", 30, 32}},
- []token{token{"\u957f\u6625", 0, 2}, token{"\u957f\u6625\u5e02", 0, 3}, token{"\u957f\u6625", 3, 5}, token{"\u836f\u5e97", 5, 7}},
- []token{token{"\u9093\u9896\u8d85", 0, 3}, token{"\u751f\u524d", 3, 5}, token{"\u6700", 5, 6}, token{"\u559c\u6b22", 6, 8}, token{"\u7684", 8, 9}, token{"\u8863\u670d", 9, 11}},
- []token{token{"\u9526\u6d9b", 1, 3}, token{"\u80e1\u9526\u6d9b", 0, 3}, token{"\u662f", 3, 4}, token{"\u70ed\u7231", 4, 6}, token{"\u4e16\u754c", 6, 8}, token{"\u548c\u5e73", 8, 10}, token{"\u7684", 10, 11}, token{"\u653f\u6cbb", 11, 13}, token{"\u653f\u6cbb\u5c40", 11, 14}, token{"\u5e38\u59d4", 14, 16}},
- []token{token{"\u7a0b\u5e8f", 0, 2}, token{"\u7a0b\u5e8f\u5458", 0, 3}, token{"\u795d", 3, 4}, token{"\u6d77\u6797", 4, 6}, token{"\u548c", 6, 7}, token{"\u6731", 7, 8}, token{"\u4f1a", 8, 9}, token{"\u9707", 9, 10}, token{"\u662f", 10, 11}, token{"\u5728", 11, 12}, token{"\u5b59", 12, 13}, token{"\u5065", 13, 14}, token{"\u7684", 14, 15}, token{"\u5de6\u9762", 15, 17}, token{"\u548c", 17, 18}, token{"\u53f3\u9762", 18, 20}, token{",", 20, 21}, token{" ", 21, 22}, token{"\u8303", 22, 23}, token{"\u51ef", 23, 24}, token{"\u5728", 24, 25}, token{"\u6700", 25, 26}, token{"\u53f3\u9762", 26, 28}, token{".", 28, 29}, token{"\u518d", 29, 30}, token{"\u5f80", 30, 31}, token{"\u5de6", 31, 32}, token{"\u662f", 32, 33}, token{"\u674e", 33, 34}, token{"\u677e", 34, 35}, token{"\u6d2a", 35, 36}},
- []token{token{"\u4e00\u6b21", 0, 2}, token{"\u4e00\u6b21\u6027", 0, 3}, token{"\u4ea4", 3, 4}, token{"\u591a\u5c11", 4, 6}, token{"\u94b1", 6, 7}},
- []token{token{"\u4e24\u5757", 0, 2}, token{"\u4e94", 2, 3}, token{"\u4e00\u5957", 3, 5}, token{"\uff0c", 5, 6}, token{"\u4e09\u5757", 6, 8}, token{"\u516b", 8, 9}, token{"\u4e00\u65a4", 9, 11}, token{"\uff0c", 11, 12}, token{"\u56db\u5757", 12, 14}, token{"\u4e03", 14, 15}, token{"\u4e00\u672c", 15, 17}, token{"\uff0c", 17, 18}, token{"\u4e94\u5757", 18, 20}, token{"\u516d", 20, 21}, token{"\u4e00\u6761", 21, 23}},
- []token{token{"\u5c0f", 0, 1}, token{"\u548c\u5c1a", 1, 3}, token{"\u7559", 3, 4}, token{"\u4e86", 4, 5}, token{"\u4e00\u4e2a", 5, 7}, token{"\u50cf", 7, 8}, token{"\u5927", 8, 9}, token{"\u548c\u5c1a", 9, 11}, token{"\u4e00\u6837", 11, 13}, token{"\u7684", 13, 14}, token{"\u548c\u5c1a", 14, 16}, token{"\u548c\u5c1a\u5934", 14, 17}},
- []token{token{"\u6211", 0, 1}, token{"\u662f", 1, 2}, token{"\u4e2d\u534e", 2, 4}, token{"\u534e\u4eba", 3, 5}, token{"\u4eba\u6c11", 4, 6}, token{"\u5171\u548c", 6, 8}, token{"\u5171\u548c\u56fd", 6, 9}, token{"\u4e2d\u534e\u4eba\u6c11\u5171\u548c\u56fd", 2, 9}, token{"\u516c\u6c11", 9, 11}, token{";", 11, 12}, token{"\u6211", 12, 13}, token{"\u7238\u7238", 13, 15}, token{"\u662f", 15, 16}, token{"\u5171\u548c", 16, 18}, token{"\u5171\u548c\u515a", 16, 19}, token{"\u515a\u5458", 19, 21}, token{";", 21, 22}, token{" ", 22, 23}, token{"\u5730\u94c1", 23, 25}, token{"\u548c\u5e73", 25, 27}, token{"\u548c\u5e73\u95e8", 25, 28}, token{"\u7ad9", 28, 29}},
- []token{token{"\u5f20\u6653\u6885", 0, 3}, token{"\u53bb", 3, 4}, token{"\u4eba\u6c11", 4, 6}, token{"\u533b\u9662", 6, 8}, token{"\u505a", 8, 9}, token{"\u4e86", 9, 10}, token{"\u4e2a", 10, 11}, token{"B\u8d85", 11, 13}, token{"\u7136\u540e", 13, 15}, token{"\u53bb", 15, 16}, token{"\u4e70", 16, 17}, token{"\u4e86", 17, 18}, token{"\u4ef6", 18, 19}, token{"T\u6064", 19, 21}},
- []token{token{"AT&T", 0, 4}, token{"\u662f", 4, 5}, token{"\u4e00\u4ef6", 5, 7}, token{"\u4e0d\u9519", 7, 9}, token{"\u7684", 9, 10}, token{"\u516c\u53f8", 10, 12}, token{"\uff0c", 12, 13}, token{"\u7ed9", 13, 14}, token{"\u4f60", 14, 15}, token{"\u53d1", 15, 16}, token{"offer", 16, 21}, token{"\u4e86", 21, 22}, token{"\u5417", 22, 23}, token{"\uff1f", 23, 24}},
- []token{token{"C++", 0, 3}, token{"\u548c", 3, 4}, token{"c#", 4, 6}, token{"\u662f", 6, 7}, token{"\u4ec0\u4e48", 7, 9}, token{"\u5173\u7cfb", 9, 11}, token{"\uff1f", 11, 12}, token{"11", 12, 14}, token{"+", 14, 15}, token{"122", 15, 18}, token{"=", 18, 19}, token{"133", 19, 22}, token{"\uff0c", 22, 23}, token{"\u662f", 23, 24}, token{"\u5417", 24, 25}, token{"\uff1f", 25, 26}, token{"PI", 26, 28}, token{"=", 28, 29}, token{"3", 29, 30}, token{".", 30, 31}, token{"14159", 31, 36}},
- []token{token{"\u4f60", 0, 1}, token{"\u8ba4\u8bc6", 1, 3}, token{"\u90a3\u4e2a", 3, 5}, token{"\u548c", 5, 6}, token{"\u4e3b\u5e2d", 6, 8}, token{"\u63e1\u624b", 8, 10}, token{"\u7684", 10, 11}, token{"\u7684\u54e5", 11, 13}, token{"\u5417", 13, 14}, token{"\uff1f", 14, 15}, token{"\u4ed6", 15, 16}, token{"\u5f00", 16, 17}, token{"\u4e00\u8f86", 17, 19}, token{"\u9ed1\u8272", 19, 21}, token{"\u7684\u58eb", 21, 23}, token{"\u3002", 23, 24}},
- []token{token{"\u67aa\u6746", 0, 2}, token{"\u6746\u5b50", 1, 3}, token{"\u67aa\u6746\u5b50", 0, 3}, token{"\u4e2d", 3, 4}, token{"\u51fa", 4, 5}, token{"\u653f\u6743", 5, 7}},
- []token{token{"\u5f20", 0, 1}, token{"\u4e09", 1, 2}, token{"\u98ce", 2, 3}, token{"\u540c\u5b66", 3, 5}, token{"\u8d70\u4e0a", 5, 7}, token{"\u4e86", 7, 8}, token{"\u5f52\u8def", 9, 11}, token{"\u4e0d\u5f52\u8def", 8, 11}},
- []token{token{"\u963fQ", 0, 2}, token{"\u8170\u95f4", 2, 4}, token{"\u6302", 4, 5}, token{"\u7740", 5, 6}, token{"BB\u673a", 6, 9}, token{"\u624b\u91cc", 9, 11}, token{"\u62ff", 11, 12}, token{"\u7740", 12, 13}, token{"\u5927\u54e5", 13, 15}, token{"\u5927\u54e5\u5927", 13, 16}, token{"\uff0c", 16, 17}, token{"\u8bf4", 17, 18}, token{"\uff1a", 18, 19}, token{"\u6211", 19, 20}, token{"\u4e00\u822c", 20, 22}, token{"\u5403\u996d", 22, 24}, token{"\u4e0d", 24, 25}, token{"AA\u5236", 25, 28}, token{"\u7684", 28, 29}, token{"\u3002", 29, 30}},
- []token{token{"\u5728", 0, 1}, token{"1\u53f7\u5e97", 1, 4}, token{"\u80fd", 4, 5}, token{"\u4e70", 5, 6}, token{"\u5230", 6, 7}, token{"\u5c0fS", 7, 9}, token{"\u548c", 9, 10}, token{"\u5927S", 10, 12}, token{"\u516b\u5366", 12, 14}, token{"\u7684", 14, 15}, token{"\u4e66", 15, 16}, token{"\u3002", 16, 17}},
- }
-)
-
-func TesttokenizeDefaultMode(t *testing.T) {
- for index, sentence := range test_contents {
- tokens := Tokenize(sentence, "default", true)
- if len(tokens) != len(result[index]) {
- t.Error(len(tokens))
- }
- for i, token := range tokens {
- if token != result[index][i] {
- t.Error(token)
- }
- }
- }
-}
-
-func TesttokenizeNoHMM(t *testing.T) {
- for index, sentence := range test_contents {
- tokens := Tokenize(sentence, "default", false)
- if len(tokens) != len(noHmmResult[index]) {
- t.Error(len(tokens))
- }
- for i, token := range tokens {
- if token != noHmmResult[index][i] {
- t.Error(token)
- }
- }
- }
-}
diff --git a/tokenizers/jieba.go b/tokenizers/jieba.go
new file mode 100644
index 0000000..1e1547d
--- /dev/null
+++ b/tokenizers/jieba.go
@@ -0,0 +1,110 @@
+package tokenizers
+
+import (
+ "fmt"
+ "github.com/blevesearch/bleve/analysis"
+ "github.com/blevesearch/bleve/registry"
+ "github.com/wangbin/jiebago"
+ "regexp"
+ "strconv"
+)
+
+const Name = "jieba"
+
+var IdeographRegexp = regexp.MustCompile(`\p{Han}+`)
+
+type JiebaTokenizer struct {
+ dictFileName string
+ hmm, searchMode bool
+}
+
+func NewJiebaTokenizer(dictFileName string, hmm, searchMode bool) (analysis.Tokenizer, error) {
+ err := jiebago.SetDictionary(dictFileName)
+ return &JiebaTokenizer{
+ dictFileName: dictFileName,
+ hmm: hmm,
+ searchMode: searchMode,
+ }, err
+}
+
+func (jt *JiebaTokenizer) Tokenize(input []byte) analysis.TokenStream {
+ rv := make(analysis.TokenStream, 0)
+ runeStart := 0
+ start := 0
+ end := 0
+ pos := 1
+ var width int
+ var gram string
+ for word := range jiebago.Cut(string(input), false, jt.hmm) {
+ if jt.searchMode {
+ runes := []rune(word)
+ width = len(runes)
+ for _, step := range [2]int{2, 3} {
+ if width > step {
+ for i := 0; i < width-step+1; i++ {
+ gram = string(runes[i : i+step])
+ gramLen := len(gram)
+ if value, ok := jiebago.Trie.Freq[gram]; ok && value > 0 {
+ gramStart := start + len(string(runes[:i]))
+ token := analysis.Token{
+ Term: []byte(gram),
+ Start: gramStart,
+ End: gramStart + gramLen,
+ Position: pos,
+ Type: detectTokenType(gram),
+ }
+ rv = append(rv, &token)
+ pos++
+ }
+ }
+ }
+ }
+ }
+ end = start + len(word)
+ token := analysis.Token{
+ Term: []byte(word),
+ Start: start,
+ End: end,
+ Position: pos,
+ Type: detectTokenType(word),
+ }
+ rv = append(rv, &token)
+ pos++
+ runeStart += width
+ start = end
+ }
+ return rv
+}
+
+func JiebaTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (
+ analysis.Tokenizer, error) {
+ dictFileName, ok := config["file"].(string)
+ if !ok {
+ return nil, fmt.Errorf("must specify dictionary file path")
+ }
+ hmm, ok := config["hmm"].(bool)
+ if !ok {
+ hmm = true
+ }
+ searchMode, ok := config["search"].(bool)
+ if !ok {
+ searchMode = true
+ }
+
+ return NewJiebaTokenizer(dictFileName, hmm, searchMode)
+}
+
+func detectTokenType(term string) analysis.TokenType {
+ if IdeographRegexp.MatchString(term) {
+ return analysis.Ideographic
+ }
+ _, err := strconv.ParseFloat(term, 64)
+ if err == nil {
+ return analysis.Numeric
+ }
+ return analysis.AlphaNumeric
+}
+
+func init() {
+ registry.RegisterTokenizer(Name, JiebaTokenizerConstructor)
+}
diff --git a/tokenizers/jieba_test.go b/tokenizers/jieba_test.go
new file mode 100644
index 0000000..2f55775
--- /dev/null
+++ b/tokenizers/jieba_test.go
@@ -0,0 +1,22515 @@
+package tokenizers
+
+import (
+ "github.com/blevesearch/bleve/analysis"
+ "reflect"
+ "testing"
+)
+
+func TestJiebaTokenizerDefaultModeWithHMM(t *testing.T) {
+ tests := []struct {
+ input []byte
+ output analysis.TokenStream
+ }{
+ {
+ []byte("这是一个伸手不见五指的黑夜。我叫孙悟空,我爱北京,我爱Python和C++。"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("这是"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("一个"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 30,
+ Term: []byte("伸手不见五指"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("的"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("黑夜"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("。"),
+ Position: 6,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("我"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 45,
+ End: 48,
+ Term: []byte("叫"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 48,
+ End: 57,
+ Term: []byte("孙悟空"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 57,
+ End: 60,
+ Term: []byte(","),
+ Position: 10,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 60,
+ End: 63,
+ Term: []byte("我"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 63,
+ End: 66,
+ Term: []byte("爱"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 66,
+ End: 72,
+ Term: []byte("北京"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 72,
+ End: 75,
+ Term: []byte(","),
+ Position: 14,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 75,
+ End: 78,
+ Term: []byte("我"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 78,
+ End: 81,
+ Term: []byte("爱"),
+ Position: 16,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 81,
+ End: 87,
+ Term: []byte("Python"),
+ Position: 17,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 87,
+ End: 90,
+ Term: []byte("和"),
+ Position: 18,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 90,
+ End: 93,
+ Term: []byte("C++"),
+ Position: 19,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 93,
+ End: 96,
+ Term: []byte("。"),
+ Position: 20,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("我不喜欢日本和服。"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("我"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("不"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("喜欢"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("日本"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("和服"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("。"),
+ Position: 6,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("雷猴回归人间。"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("雷猴"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("回归"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("人间"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("。"),
+ Position: 4,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("工信处"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 18,
+ Term: []byte("女干事"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("每月"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("经过"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 36,
+ Term: []byte("下属"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 42,
+ Term: []byte("科室"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("都"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 45,
+ End: 48,
+ Term: []byte("要"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 48,
+ End: 54,
+ Term: []byte("亲口"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 54,
+ End: 60,
+ Term: []byte("交代"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 60,
+ End: 62,
+ Term: []byte("24"),
+ Position: 11,
+ Type: analysis.Numeric,
+ },
+ {
+ Start: 62,
+ End: 65,
+ Term: []byte("口"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 65,
+ End: 74,
+ Term: []byte("交换机"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 74,
+ End: 77,
+ Term: []byte("等"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 77,
+ End: 86,
+ Term: []byte("技术性"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 86,
+ End: 92,
+ Term: []byte("器件"),
+ Position: 16,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 92,
+ End: 95,
+ Term: []byte("的"),
+ Position: 17,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 95,
+ End: 101,
+ Term: []byte("安装"),
+ Position: 18,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 101,
+ End: 107,
+ Term: []byte("工作"),
+ Position: 19,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("我需要廉租房"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("我"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("需要"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 18,
+ Term: []byte("廉租房"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("永和服装饰品有限公司"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("永和"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("服装"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("饰品"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 30,
+ Term: []byte("有限公司"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("我爱北京天安门"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("我"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("爱"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("北京"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 21,
+ Term: []byte("天安门"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("abc"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("abc"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("隐马尔可夫"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("隐"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 15,
+ Term: []byte("马尔可夫"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("雷猴是个好网站"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("雷猴"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("是"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("个"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("好"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("网站"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("“Microsoft”一词由“MICROcomputer(微型计算机)”和“SOFTware(软件)”两部分组成"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("“"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 3,
+ End: 12,
+ Term: []byte("Microsoft"),
+ Position: 2,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("”"),
+ Position: 3,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("一词"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("由"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("“"),
+ Position: 6,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 27,
+ End: 40,
+ Term: []byte("MICROcomputer"),
+ Position: 7,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 40,
+ End: 43,
+ Term: []byte("("),
+ Position: 8,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 43,
+ End: 49,
+ Term: []byte("微型"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 49,
+ End: 58,
+ Term: []byte("计算机"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 58,
+ End: 61,
+ Term: []byte(")"),
+ Position: 11,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 61,
+ End: 64,
+ Term: []byte("”"),
+ Position: 12,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 64,
+ End: 67,
+ Term: []byte("和"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 67,
+ End: 70,
+ Term: []byte("“"),
+ Position: 14,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 70,
+ End: 78,
+ Term: []byte("SOFTware"),
+ Position: 15,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 78,
+ End: 81,
+ Term: []byte("("),
+ Position: 16,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 81,
+ End: 87,
+ Term: []byte("软件"),
+ Position: 17,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 87,
+ End: 90,
+ Term: []byte(")"),
+ Position: 18,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 90,
+ End: 93,
+ Term: []byte("”"),
+ Position: 19,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 93,
+ End: 96,
+ Term: []byte("两"),
+ Position: 20,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 96,
+ End: 102,
+ Term: []byte("部分"),
+ Position: 21,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 102,
+ End: 108,
+ Term: []byte("组成"),
+ Position: 22,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("草泥马和欺实马是今年的流行词汇"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("草泥马"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("和"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("欺实"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("马"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("是"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("今年"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("的"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("流行"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 45,
+ Term: []byte("词汇"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("伊藤洋华堂总府店"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("伊藤"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 15,
+ Term: []byte("洋华堂"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("总府"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("店"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("中国科学院计算技术研究所"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 36,
+ Term: []byte("中国科学院计算技术研究所"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("罗密欧与朱丽叶"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("罗密欧"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("与"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 21,
+ Term: []byte("朱丽叶"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("我购买了道具和服装"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("我"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("购买"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("了"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("道具"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("和"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("服装"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("PS: 我觉得开源有一个好处,就是能够敦促自己不断改进,避免敞帚自珍"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 2,
+ Term: []byte("PS"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 2,
+ End: 3,
+ Term: []byte(":"),
+ Position: 2,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 3,
+ End: 4,
+ Term: []byte(" "),
+ Position: 3,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 4,
+ End: 7,
+ Term: []byte("我"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 7,
+ End: 13,
+ Term: []byte("觉得"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 13,
+ End: 19,
+ Term: []byte("开源"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 19,
+ End: 22,
+ Term: []byte("有"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 22,
+ End: 28,
+ Term: []byte("一个"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 28,
+ End: 34,
+ Term: []byte("好处"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 34,
+ End: 37,
+ Term: []byte(","),
+ Position: 10,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 37,
+ End: 43,
+ Term: []byte("就是"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 43,
+ End: 49,
+ Term: []byte("能够"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 49,
+ End: 55,
+ Term: []byte("敦促"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 55,
+ End: 61,
+ Term: []byte("自己"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 61,
+ End: 73,
+ Term: []byte("不断改进"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 73,
+ End: 76,
+ Term: []byte(","),
+ Position: 16,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 76,
+ End: 82,
+ Term: []byte("避免"),
+ Position: 17,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 82,
+ End: 88,
+ Term: []byte("敞帚"),
+ Position: 18,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 88,
+ End: 94,
+ Term: []byte("自珍"),
+ Position: 19,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("湖北省石首市"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("湖北省"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 18,
+ Term: []byte("石首市"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("湖北省十堰市"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("湖北省"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 18,
+ Term: []byte("十堰市"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("总经理完成了这件事情"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("总经理"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("完成"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("了"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("这件"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("事情"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("电脑修好了"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("电脑"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("修好"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("了"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("做好了这件事情就一了百了了"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("做好"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("了"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("这件"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("事情"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("就"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 36,
+ Term: []byte("一了百了"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte("了"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("人们审美的观点是不同的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("人们"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("审美"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("的"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("观点"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("是"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("不同"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("的"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("我们买了一个美的空调"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("我们"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("买"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("了"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("一个"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("美的"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("空调"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("线程初始化时我们要注意"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("线程"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 15,
+ Term: []byte("初始化"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("时"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("我们"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("要"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("注意"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("一个分子是由好多原子组织成的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("一个"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("分子"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("是"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("由"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("好多"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("原子"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 36,
+ Term: []byte("组织"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte("成"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("的"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("祝你马到功成"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("祝"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("你"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 18,
+ Term: []byte("马到功成"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("他掉进了无底洞里"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("他"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("掉"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("进"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("了"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 21,
+ Term: []byte("无底洞"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("里"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("中国的首都是北京"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("中国"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("的"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("首都"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("是"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("北京"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("孙君意"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("孙君意"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("外交部发言人马朝旭"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("外交部"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 18,
+ Term: []byte("发言人"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 27,
+ Term: []byte("马朝旭"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("领导人会议和第四届东亚峰会"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("领导人"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("会议"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("和"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 27,
+ Term: []byte("第四届"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("东亚"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("峰会"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("在过去的这五年"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("在"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("过去"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("的"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("这"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("五年"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("还需要很长的路要走"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("还"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("需要"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("很长"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("的"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("路"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("要"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("走"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("60周年首都阅兵"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 2,
+ Term: []byte("60"),
+ Position: 1,
+ Type: analysis.Numeric,
+ },
+ {
+ Start: 2,
+ End: 8,
+ Term: []byte("周年"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 8,
+ End: 14,
+ Term: []byte("首都"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 14,
+ End: 20,
+ Term: []byte("阅兵"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("你好人们审美的观点是不同的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("你好"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("人们"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("审美"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("的"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("观点"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("是"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 36,
+ Term: []byte("不同"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte("的"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("买水果然后来世博园"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("买"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("水果"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("然后"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("来"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 27,
+ Term: []byte("世博园"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("买水果然后去世博园"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("买"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("水果"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("然后"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("去"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 27,
+ Term: []byte("世博园"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("但是后来我才知道你是对的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("但是"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("后来"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("我"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("才"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("知道"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("你"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("是"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("对"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 36,
+ Term: []byte("的"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("存在即合理"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("存在"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("即"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("合理"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("的的的的的在的的的的就以和和和"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("的"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("的"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("的"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("的"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("的"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("在"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("的"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("的"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("的"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("的"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("就"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 36,
+ Term: []byte("以"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte("和"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("和"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("和"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("I love你,不以为耻,反以为rong"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 1,
+ Term: []byte("I"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 1,
+ End: 2,
+ Term: []byte(" "),
+ Position: 2,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 2,
+ End: 6,
+ Term: []byte("love"),
+ Position: 3,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("你"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte(","),
+ Position: 5,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 12,
+ End: 24,
+ Term: []byte("不以为耻"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte(","),
+ Position: 7,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("反"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 36,
+ Term: []byte("以为"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 40,
+ Term: []byte("rong"),
+ Position: 10,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("因"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("因"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte(""),
+ analysis.TokenStream{},
+ },
+ {
+ []byte("hello你好人们审美的观点是不同的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 5,
+ Term: []byte("hello"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 5,
+ End: 11,
+ Term: []byte("你好"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 11,
+ End: 17,
+ Term: []byte("人们"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 17,
+ End: 23,
+ Term: []byte("审美"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 23,
+ End: 26,
+ Term: []byte("的"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 26,
+ End: 32,
+ Term: []byte("观点"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 32,
+ End: 35,
+ Term: []byte("是"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 35,
+ End: 41,
+ Term: []byte("不同"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 41,
+ End: 44,
+ Term: []byte("的"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("很好但主要是基于网页形式"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("很"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("好"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("但"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("主要"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("是"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("基于"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("网页"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 36,
+ Term: []byte("形式"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("hello你好人们审美的观点是不同的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 5,
+ Term: []byte("hello"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 5,
+ End: 11,
+ Term: []byte("你好"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 11,
+ End: 17,
+ Term: []byte("人们"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 17,
+ End: 23,
+ Term: []byte("审美"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 23,
+ End: 26,
+ Term: []byte("的"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 26,
+ End: 32,
+ Term: []byte("观点"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 32,
+ End: 35,
+ Term: []byte("是"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 35,
+ End: 41,
+ Term: []byte("不同"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 41,
+ End: 44,
+ Term: []byte("的"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("为什么我不能拥有想要的生活"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("为什么"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("我"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("不能"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("拥有"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("想要"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("的"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("生活"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("后来我才"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("后来"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("我"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("才"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("此次来中国是为了"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("此次"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("来"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("中国"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("是"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("为了"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("使用了它就可以解决一些问题"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("使用"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("了"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("它"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("就"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("可以"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("解决"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("一些"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("问题"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte(",使用了它就可以解决一些问题"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 1,
+ Term: []byte(","),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 1,
+ End: 7,
+ Term: []byte("使用"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 7,
+ End: 10,
+ Term: []byte("了"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 10,
+ End: 13,
+ Term: []byte("它"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 13,
+ End: 16,
+ Term: []byte("就"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 16,
+ End: 22,
+ Term: []byte("可以"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 22,
+ End: 28,
+ Term: []byte("解决"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 28,
+ End: 34,
+ Term: []byte("一些"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 34,
+ End: 40,
+ Term: []byte("问题"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("其实使用了它就可以解决一些问题"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("其实"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("使用"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("了"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("它"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("就"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("可以"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("解决"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("一些"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 45,
+ Term: []byte("问题"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("好人使用了它就可以解决一些问题"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("好人"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("使用"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("了"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("它"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("就"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("可以"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("解决"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("一些"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 45,
+ Term: []byte("问题"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("是因为和国家"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("是因为"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("和"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("国家"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("老年搜索还支持"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("老年"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("搜索"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("还"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("支持"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("干脆就把那部蒙人的闲法给废了拉倒!RT @laoshipukong : 27日,全国人大常委会第三次审议侵权责任法草案,删除了有关医疗损害责任“举证倒置”的规定。在医患纠纷中本已处于弱势地位的消费者由此将陷入万劫不复的境地。 "),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("干脆"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("就"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("把"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("那部"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("蒙人"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("的"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("闲法"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 36,
+ Term: []byte("给"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte("废"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("了"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 48,
+ Term: []byte("拉倒"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 48,
+ End: 51,
+ Term: []byte("!"),
+ Position: 12,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 51,
+ End: 53,
+ Term: []byte("RT"),
+ Position: 13,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 53,
+ End: 54,
+ Term: []byte(" "),
+ Position: 14,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 54,
+ End: 55,
+ Term: []byte("@"),
+ Position: 15,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 55,
+ End: 67,
+ Term: []byte("laoshipukong"),
+ Position: 16,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 67,
+ End: 68,
+ Term: []byte(" "),
+ Position: 17,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 68,
+ End: 69,
+ Term: []byte(":"),
+ Position: 18,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 69,
+ End: 70,
+ Term: []byte(" "),
+ Position: 19,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 70,
+ End: 72,
+ Term: []byte("27"),
+ Position: 20,
+ Type: analysis.Numeric,
+ },
+ {
+ Start: 72,
+ End: 75,
+ Term: []byte("日"),
+ Position: 21,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 75,
+ End: 78,
+ Term: []byte(","),
+ Position: 22,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 78,
+ End: 99,
+ Term: []byte("全国人大常委会"),
+ Position: 23,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 99,
+ End: 108,
+ Term: []byte("第三次"),
+ Position: 24,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 108,
+ End: 114,
+ Term: []byte("审议"),
+ Position: 25,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 114,
+ End: 120,
+ Term: []byte("侵权"),
+ Position: 26,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 120,
+ End: 129,
+ Term: []byte("责任法"),
+ Position: 27,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 129,
+ End: 135,
+ Term: []byte("草案"),
+ Position: 28,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 135,
+ End: 138,
+ Term: []byte(","),
+ Position: 29,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 138,
+ End: 144,
+ Term: []byte("删除"),
+ Position: 30,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 144,
+ End: 147,
+ Term: []byte("了"),
+ Position: 31,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 147,
+ End: 153,
+ Term: []byte("有关"),
+ Position: 32,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 153,
+ End: 159,
+ Term: []byte("医疗"),
+ Position: 33,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 159,
+ End: 165,
+ Term: []byte("损害"),
+ Position: 34,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 165,
+ End: 171,
+ Term: []byte("责任"),
+ Position: 35,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 171,
+ End: 174,
+ Term: []byte("“"),
+ Position: 36,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 174,
+ End: 180,
+ Term: []byte("举证"),
+ Position: 37,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 180,
+ End: 186,
+ Term: []byte("倒置"),
+ Position: 38,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 186,
+ End: 189,
+ Term: []byte("”"),
+ Position: 39,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 189,
+ End: 192,
+ Term: []byte("的"),
+ Position: 40,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 192,
+ End: 198,
+ Term: []byte("规定"),
+ Position: 41,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 198,
+ End: 201,
+ Term: []byte("。"),
+ Position: 42,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 201,
+ End: 204,
+ Term: []byte("在"),
+ Position: 43,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 204,
+ End: 210,
+ Term: []byte("医患"),
+ Position: 44,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 210,
+ End: 216,
+ Term: []byte("纠纷"),
+ Position: 45,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 216,
+ End: 222,
+ Term: []byte("中本"),
+ Position: 46,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 222,
+ End: 225,
+ Term: []byte("已"),
+ Position: 47,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 225,
+ End: 231,
+ Term: []byte("处于"),
+ Position: 48,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 231,
+ End: 237,
+ Term: []byte("弱势"),
+ Position: 49,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 237,
+ End: 243,
+ Term: []byte("地位"),
+ Position: 50,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 243,
+ End: 246,
+ Term: []byte("的"),
+ Position: 51,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 246,
+ End: 255,
+ Term: []byte("消费者"),
+ Position: 52,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 255,
+ End: 261,
+ Term: []byte("由此"),
+ Position: 53,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 261,
+ End: 264,
+ Term: []byte("将"),
+ Position: 54,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 264,
+ End: 270,
+ Term: []byte("陷入"),
+ Position: 55,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 270,
+ End: 282,
+ Term: []byte("万劫不复"),
+ Position: 56,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 282,
+ End: 285,
+ Term: []byte("的"),
+ Position: 57,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 285,
+ End: 291,
+ Term: []byte("境地"),
+ Position: 58,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 291,
+ End: 294,
+ Term: []byte("。"),
+ Position: 59,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 294,
+ End: 295,
+ Term: []byte(" "),
+ Position: 60,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("大"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("大"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte(""),
+ analysis.TokenStream{},
+ },
+ {
+ []byte("他说的确实在理"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("他"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("说"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("的"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("确实"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("在理"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("长春市长春节讲话"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("长春"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("市长"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("春节"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("讲话"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("结婚的和尚未结婚的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("结婚"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("的"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("和"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("尚未"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("结婚"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("的"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("结合成分子时"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("结合"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("成"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("分子"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("时"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("旅游和服务是最好的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("旅游"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("和"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("服务"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("是"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("最好"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("的"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("这件事情的确是我的错"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("这件"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("事情"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("的确"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("是"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("我"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("的"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("错"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("供大家参考指正"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("供"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("大家"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("参考"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("指正"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("哈尔滨政府公布塌桥原因"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("哈尔滨"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("政府"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("公布"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("塌桥"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("原因"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("我在机场入口处"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("我"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("在"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("机场"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 21,
+ Term: []byte("入口处"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("邢永臣摄影报道"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("邢永臣"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("摄影"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("报道"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("BP神经网络如何训练才能在分类时增加区分度?"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 2,
+ Term: []byte("BP"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 2,
+ End: 14,
+ Term: []byte("神经网络"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 14,
+ End: 20,
+ Term: []byte("如何"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 20,
+ End: 26,
+ Term: []byte("训练"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 26,
+ End: 32,
+ Term: []byte("才能"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 32,
+ End: 35,
+ Term: []byte("在"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 35,
+ End: 41,
+ Term: []byte("分类"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 41,
+ End: 44,
+ Term: []byte("时"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 44,
+ End: 50,
+ Term: []byte("增加"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 50,
+ End: 59,
+ Term: []byte("区分度"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 59,
+ End: 62,
+ Term: []byte("?"),
+ Position: 11,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("南京市长江大桥"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("南京市"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 21,
+ Term: []byte("长江大桥"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("应一些使用者的建议,也为了便于利用NiuTrans用于SMT研究"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("应"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("一些"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 18,
+ Term: []byte("使用者"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("的"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("建议"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte(","),
+ Position: 6,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("也"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("为了"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 45,
+ Term: []byte("便于"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 45,
+ End: 51,
+ Term: []byte("利用"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 51,
+ End: 59,
+ Term: []byte("NiuTrans"),
+ Position: 11,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 59,
+ End: 65,
+ Term: []byte("用于"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 65,
+ End: 68,
+ Term: []byte("SMT"),
+ Position: 13,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 68,
+ End: 74,
+ Term: []byte("研究"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("长春市长春药店"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("长春市"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("长春"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("药店"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("邓颖超生前最喜欢的衣服"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("邓颖超"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("生前"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("最"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("喜欢"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("的"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("衣服"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("胡锦涛是热爱世界和平的政治局常委"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("胡锦涛"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("是"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("热爱"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("世界"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("和平"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("的"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 42,
+ Term: []byte("政治局"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 48,
+ Term: []byte("常委"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("程序员祝海林和朱会震是在孙健的左面和右面, 范凯在最右面.再往左是李松洪"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("程序员"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("祝"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("海林"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("和"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 30,
+ Term: []byte("朱会震"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("是"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 36,
+ Term: []byte("在"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 42,
+ Term: []byte("孙健"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("的"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 45,
+ End: 51,
+ Term: []byte("左面"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 51,
+ End: 54,
+ Term: []byte("和"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 54,
+ End: 60,
+ Term: []byte("右面"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 60,
+ End: 61,
+ Term: []byte(","),
+ Position: 13,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 61,
+ End: 62,
+ Term: []byte(" "),
+ Position: 14,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 62,
+ End: 68,
+ Term: []byte("范凯"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 68,
+ End: 71,
+ Term: []byte("在"),
+ Position: 16,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 71,
+ End: 74,
+ Term: []byte("最"),
+ Position: 17,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 74,
+ End: 80,
+ Term: []byte("右面"),
+ Position: 18,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 80,
+ End: 81,
+ Term: []byte("."),
+ Position: 19,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 81,
+ End: 87,
+ Term: []byte("再往"),
+ Position: 20,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 87,
+ End: 90,
+ Term: []byte("左"),
+ Position: 21,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 90,
+ End: 93,
+ Term: []byte("是"),
+ Position: 22,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 93,
+ End: 102,
+ Term: []byte("李松洪"),
+ Position: 23,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("一次性交多少钱"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("一次性"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("交"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("多少"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("钱"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("两块五一套,三块八一斤,四块七一本,五块六一条"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("两块"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("五"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("一套"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte(","),
+ Position: 4,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("三块"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("八"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("一斤"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 36,
+ Term: []byte(","),
+ Position: 8,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 36,
+ End: 42,
+ Term: []byte("四块"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("七"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 45,
+ End: 51,
+ Term: []byte("一本"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 51,
+ End: 54,
+ Term: []byte(","),
+ Position: 12,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 54,
+ End: 60,
+ Term: []byte("五块"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 60,
+ End: 63,
+ Term: []byte("六"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 63,
+ End: 69,
+ Term: []byte("一条"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("小和尚留了一个像大和尚一样的和尚头"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("小"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("和尚"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("留"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("了"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("一个"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("像"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("大"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("和尚"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("一样"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("的"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 51,
+ Term: []byte("和尚头"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("我是中华人民共和国公民;我爸爸是共和党党员; 地铁和平门站"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("我"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("是"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 27,
+ Term: []byte("中华人民共和国"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("公民"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 34,
+ Term: []byte(";"),
+ Position: 5,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 34,
+ End: 37,
+ Term: []byte("我"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 37,
+ End: 43,
+ Term: []byte("爸爸"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 43,
+ End: 46,
+ Term: []byte("是"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 46,
+ End: 55,
+ Term: []byte("共和党"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 55,
+ End: 61,
+ Term: []byte("党员"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 61,
+ End: 62,
+ Term: []byte(";"),
+ Position: 11,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 62,
+ End: 63,
+ Term: []byte(" "),
+ Position: 12,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 63,
+ End: 69,
+ Term: []byte("地铁"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 69,
+ End: 78,
+ Term: []byte("和平门"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 78,
+ End: 81,
+ Term: []byte("站"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("张晓梅去人民医院做了个B超然后去买了件T恤"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("张晓梅"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("去"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("人民"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("医院"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("做"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("了"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("个"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 37,
+ Term: []byte("B超"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 37,
+ End: 43,
+ Term: []byte("然后"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 43,
+ End: 46,
+ Term: []byte("去"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 46,
+ End: 49,
+ Term: []byte("买"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 49,
+ End: 52,
+ Term: []byte("了"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 52,
+ End: 55,
+ Term: []byte("件"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 55,
+ End: 59,
+ Term: []byte("T恤"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("AT&T是一件不错的公司,给你发offer了吗?"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 4,
+ Term: []byte("AT&T"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 4,
+ End: 7,
+ Term: []byte("是"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 7,
+ End: 13,
+ Term: []byte("一件"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 13,
+ End: 19,
+ Term: []byte("不错"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 19,
+ End: 22,
+ Term: []byte("的"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 22,
+ End: 28,
+ Term: []byte("公司"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 28,
+ End: 31,
+ Term: []byte(","),
+ Position: 7,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 31,
+ End: 34,
+ Term: []byte("给"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 34,
+ End: 37,
+ Term: []byte("你"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 37,
+ End: 40,
+ Term: []byte("发"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 40,
+ End: 45,
+ Term: []byte("offer"),
+ Position: 11,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 45,
+ End: 48,
+ Term: []byte("了"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 48,
+ End: 51,
+ Term: []byte("吗"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 51,
+ End: 54,
+ Term: []byte("?"),
+ Position: 14,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("C++和c#是什么关系?11+122=133,是吗?PI=3.14159"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("C++"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("和"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 8,
+ Term: []byte("c#"),
+ Position: 3,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 8,
+ End: 11,
+ Term: []byte("是"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 11,
+ End: 17,
+ Term: []byte("什么"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 17,
+ End: 23,
+ Term: []byte("关系"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 23,
+ End: 26,
+ Term: []byte("?"),
+ Position: 7,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 26,
+ End: 28,
+ Term: []byte("11"),
+ Position: 8,
+ Type: analysis.Numeric,
+ },
+ {
+ Start: 28,
+ End: 29,
+ Term: []byte("+"),
+ Position: 9,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 29,
+ End: 32,
+ Term: []byte("122"),
+ Position: 10,
+ Type: analysis.Numeric,
+ },
+ {
+ Start: 32,
+ End: 33,
+ Term: []byte("="),
+ Position: 11,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 33,
+ End: 36,
+ Term: []byte("133"),
+ Position: 12,
+ Type: analysis.Numeric,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte(","),
+ Position: 13,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("是"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("吗"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 45,
+ End: 48,
+ Term: []byte("?"),
+ Position: 16,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 48,
+ End: 50,
+ Term: []byte("PI"),
+ Position: 17,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 50,
+ End: 51,
+ Term: []byte("="),
+ Position: 18,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 51,
+ End: 58,
+ Term: []byte("3.14159"),
+ Position: 19,
+ Type: analysis.Numeric,
+ },
+ },
+ },
+ {
+ []byte("你认识那个和主席握手的的哥吗?他开一辆黑色的士。"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("你"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("认识"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("那个"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("和"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("主席"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("握手"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("的"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("的哥"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("吗"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("?"),
+ Position: 10,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 45,
+ End: 51,
+ Term: []byte("他开"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 51,
+ End: 57,
+ Term: []byte("一辆"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 57,
+ End: 63,
+ Term: []byte("黑色"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 63,
+ End: 69,
+ Term: []byte("的士"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 69,
+ End: 72,
+ Term: []byte("。"),
+ Position: 15,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("枪杆子中出政权"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("枪杆子"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("中"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("出"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("政权"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("张三风同学走上了不归路"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("张三风"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("同学"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("走上"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("了"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 33,
+ Term: []byte("不归路"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("阿Q腰间挂着BB机手里拿着大哥大,说:我一般吃饭不AA制的。"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 4,
+ Term: []byte("阿Q"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 4,
+ End: 10,
+ Term: []byte("腰间"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 10,
+ End: 13,
+ Term: []byte("挂"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 13,
+ End: 16,
+ Term: []byte("着"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 16,
+ End: 21,
+ Term: []byte("BB机"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("手里"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("拿"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("着"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 42,
+ Term: []byte("大哥大"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte(","),
+ Position: 10,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 45,
+ End: 48,
+ Term: []byte("说"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 48,
+ End: 51,
+ Term: []byte(":"),
+ Position: 12,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 51,
+ End: 54,
+ Term: []byte("我"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 54,
+ End: 60,
+ Term: []byte("一般"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 60,
+ End: 66,
+ Term: []byte("吃饭"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 66,
+ End: 69,
+ Term: []byte("不"),
+ Position: 16,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 69,
+ End: 74,
+ Term: []byte("AA制"),
+ Position: 17,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 74,
+ End: 77,
+ Term: []byte("的"),
+ Position: 18,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 77,
+ End: 80,
+ Term: []byte("。"),
+ Position: 19,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("在1号店能买到小S和大S八卦的书。"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("在"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 10,
+ Term: []byte("1号店"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 10,
+ End: 13,
+ Term: []byte("能"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 13,
+ End: 16,
+ Term: []byte("买"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 16,
+ End: 19,
+ Term: []byte("到"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 19,
+ End: 23,
+ Term: []byte("小S"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 23,
+ End: 26,
+ Term: []byte("和"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 26,
+ End: 30,
+ Term: []byte("大S"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 36,
+ Term: []byte("八卦"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte("的"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("书"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("。"),
+ Position: 12,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ }
+
+ tokenizer, _ := NewJiebaTokenizer("../dict.txt", true, false)
+ for _, test := range tests {
+ actual := tokenizer.Tokenize(test.input)
+ if !reflect.DeepEqual(actual, test.output) {
+ t.Errorf("Expected %v, got %v for %s", test.output, actual, string(test.input))
+ }
+ }
+
+}
+
+func TestJiebaTokenizerSearchModeWithHMM(t *testing.T) {
+ tests := []struct {
+ input []byte
+ output analysis.TokenStream
+ }{
+ {
+ []byte("这是一个伸手不见五指的黑夜。我叫孙悟空,我爱北京,我爱Python和C++。"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("这是"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("一个"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("伸手"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("不见"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("五指"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 30,
+ Term: []byte("伸手不见五指"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("的"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("黑夜"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("。"),
+ Position: 9,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("我"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 45,
+ End: 48,
+ Term: []byte("叫"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 51,
+ End: 57,
+ Term: []byte("悟空"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 48,
+ End: 57,
+ Term: []byte("孙悟空"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 57,
+ End: 60,
+ Term: []byte(","),
+ Position: 14,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 60,
+ End: 63,
+ Term: []byte("我"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 63,
+ End: 66,
+ Term: []byte("爱"),
+ Position: 16,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 66,
+ End: 72,
+ Term: []byte("北京"),
+ Position: 17,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 72,
+ End: 75,
+ Term: []byte(","),
+ Position: 18,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 75,
+ End: 78,
+ Term: []byte("我"),
+ Position: 19,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 78,
+ End: 81,
+ Term: []byte("爱"),
+ Position: 20,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 81,
+ End: 87,
+ Term: []byte("Python"),
+ Position: 21,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 87,
+ End: 90,
+ Term: []byte("和"),
+ Position: 22,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 90,
+ End: 93,
+ Term: []byte("C++"),
+ Position: 23,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 93,
+ End: 96,
+ Term: []byte("。"),
+ Position: 24,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("我不喜欢日本和服。"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("我"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("不"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("喜欢"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("日本"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("和服"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("。"),
+ Position: 6,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("雷猴回归人间。"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("雷猴"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("回归"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("人间"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("。"),
+ Position: 4,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("工信处"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("干事"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 18,
+ Term: []byte("女干事"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("每月"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("经过"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 36,
+ Term: []byte("下属"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 42,
+ Term: []byte("科室"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("都"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 45,
+ End: 48,
+ Term: []byte("要"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 48,
+ End: 54,
+ Term: []byte("亲口"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 54,
+ End: 60,
+ Term: []byte("交代"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 60,
+ End: 62,
+ Term: []byte("24"),
+ Position: 12,
+ Type: analysis.Numeric,
+ },
+ {
+ Start: 62,
+ End: 65,
+ Term: []byte("口"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 65,
+ End: 71,
+ Term: []byte("交换"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 68,
+ End: 74,
+ Term: []byte("换机"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 65,
+ End: 74,
+ Term: []byte("交换机"),
+ Position: 16,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 74,
+ End: 77,
+ Term: []byte("等"),
+ Position: 17,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 77,
+ End: 83,
+ Term: []byte("技术"),
+ Position: 18,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 77,
+ End: 86,
+ Term: []byte("技术性"),
+ Position: 19,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 86,
+ End: 92,
+ Term: []byte("器件"),
+ Position: 20,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 92,
+ End: 95,
+ Term: []byte("的"),
+ Position: 21,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 95,
+ End: 101,
+ Term: []byte("安装"),
+ Position: 22,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 101,
+ End: 107,
+ Term: []byte("工作"),
+ Position: 23,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("我需要廉租房"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("我"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("需要"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("廉租"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("租房"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 18,
+ Term: []byte("廉租房"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("永和服装饰品有限公司"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("永和"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("服装"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("饰品"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("有限"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("公司"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 30,
+ Term: []byte("有限公司"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("我爱北京天安门"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("我"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("爱"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("北京"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("天安"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 21,
+ Term: []byte("天安门"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("abc"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("abc"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("隐马尔可夫"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("隐"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("可夫"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 12,
+ Term: []byte("马尔可"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 15,
+ Term: []byte("马尔可夫"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("雷猴是个好网站"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("雷猴"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("是"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("个"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("好"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("网站"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("“Microsoft”一词由“MICROcomputer(微型计算机)”和“SOFTware(软件)”两部分组成"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("“"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 3,
+ End: 12,
+ Term: []byte("Microsoft"),
+ Position: 2,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("”"),
+ Position: 3,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("一词"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("由"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("“"),
+ Position: 6,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 27,
+ End: 40,
+ Term: []byte("MICROcomputer"),
+ Position: 7,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 40,
+ End: 43,
+ Term: []byte("("),
+ Position: 8,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 43,
+ End: 49,
+ Term: []byte("微型"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 49,
+ End: 55,
+ Term: []byte("计算"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 52,
+ End: 58,
+ Term: []byte("算机"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 49,
+ End: 58,
+ Term: []byte("计算机"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 58,
+ End: 61,
+ Term: []byte(")"),
+ Position: 13,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 61,
+ End: 64,
+ Term: []byte("”"),
+ Position: 14,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 64,
+ End: 67,
+ Term: []byte("和"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 67,
+ End: 70,
+ Term: []byte("“"),
+ Position: 16,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 70,
+ End: 78,
+ Term: []byte("SOFTware"),
+ Position: 17,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 78,
+ End: 81,
+ Term: []byte("("),
+ Position: 18,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 81,
+ End: 87,
+ Term: []byte("软件"),
+ Position: 19,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 87,
+ End: 90,
+ Term: []byte(")"),
+ Position: 20,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 90,
+ End: 93,
+ Term: []byte("”"),
+ Position: 21,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 93,
+ End: 96,
+ Term: []byte("两"),
+ Position: 22,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 96,
+ End: 102,
+ Term: []byte("部分"),
+ Position: 23,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 102,
+ End: 108,
+ Term: []byte("组成"),
+ Position: 24,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("草泥马和欺实马是今年的流行词汇"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("草泥马"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("和"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("欺实"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("马"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("是"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("今年"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("的"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("流行"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 45,
+ Term: []byte("词汇"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("伊藤洋华堂总府店"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("伊藤"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 15,
+ Term: []byte("洋华堂"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("总府"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("店"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("中国科学院计算技术研究所"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("中国"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("科学"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("学院"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("计算"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("技术"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("研究"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 15,
+ Term: []byte("科学院"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 36,
+ Term: []byte("研究所"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 36,
+ Term: []byte("中国科学院计算技术研究所"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("罗密欧与朱丽叶"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("罗密欧"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("与"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 21,
+ Term: []byte("朱丽叶"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("我购买了道具和服装"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("我"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("购买"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("了"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("道具"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("和"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("服装"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("PS: 我觉得开源有一个好处,就是能够敦促自己不断改进,避免敞帚自珍"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 2,
+ Term: []byte("PS"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 2,
+ End: 3,
+ Term: []byte(":"),
+ Position: 2,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 3,
+ End: 4,
+ Term: []byte(" "),
+ Position: 3,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 4,
+ End: 7,
+ Term: []byte("我"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 7,
+ End: 13,
+ Term: []byte("觉得"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 13,
+ End: 19,
+ Term: []byte("开源"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 19,
+ End: 22,
+ Term: []byte("有"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 22,
+ End: 28,
+ Term: []byte("一个"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 28,
+ End: 34,
+ Term: []byte("好处"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 34,
+ End: 37,
+ Term: []byte(","),
+ Position: 10,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 37,
+ End: 43,
+ Term: []byte("就是"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 43,
+ End: 49,
+ Term: []byte("能够"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 49,
+ End: 55,
+ Term: []byte("敦促"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 55,
+ End: 61,
+ Term: []byte("自己"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 61,
+ End: 67,
+ Term: []byte("不断"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 67,
+ End: 73,
+ Term: []byte("改进"),
+ Position: 16,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 61,
+ End: 73,
+ Term: []byte("不断改进"),
+ Position: 17,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 73,
+ End: 76,
+ Term: []byte(","),
+ Position: 18,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 76,
+ End: 82,
+ Term: []byte("避免"),
+ Position: 19,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 82,
+ End: 88,
+ Term: []byte("敞帚"),
+ Position: 20,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 88,
+ End: 94,
+ Term: []byte("自珍"),
+ Position: 21,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("湖北省石首市"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("湖北"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("湖北省"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("石首"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 18,
+ Term: []byte("石首市"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("湖北省十堰市"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("湖北"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("湖北省"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("十堰"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 18,
+ Term: []byte("十堰市"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("总经理完成了这件事情"),
+ analysis.TokenStream{
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("经理"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("总经理"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("完成"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("了"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("这件"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("事情"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("电脑修好了"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("电脑"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("修好"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("了"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("做好了这件事情就一了百了了"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("做好"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("了"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("这件"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("事情"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("就"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 36,
+ Term: []byte("一了百了"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte("了"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("人们审美的观点是不同的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("人们"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("审美"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("的"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("观点"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("是"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("不同"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("的"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("我们买了一个美的空调"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("我们"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("买"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("了"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("一个"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("美的"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("空调"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("线程初始化时我们要注意"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("线程"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("初始"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 15,
+ Term: []byte("初始化"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("时"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("我们"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("要"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("注意"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("一个分子是由好多原子组织成的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("一个"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("分子"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("是"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("由"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("好多"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("原子"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 36,
+ Term: []byte("组织"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte("成"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("的"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("祝你马到功成"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("祝"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("你"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 18,
+ Term: []byte("马到功成"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("他掉进了无底洞里"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("他"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("掉"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("进"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("了"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("无底"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 21,
+ Term: []byte("无底洞"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("里"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("中国的首都是北京"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("中国"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("的"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("首都"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("是"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("北京"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("孙君意"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("孙君意"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("外交部发言人马朝旭"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("外交"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("外交部"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("发言"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 18,
+ Term: []byte("发言人"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 27,
+ Term: []byte("马朝旭"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("领导人会议和第四届东亚峰会"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("领导"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("领导人"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("会议"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("和"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("第四"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("四届"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 27,
+ Term: []byte("第四届"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("东亚"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("峰会"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("在过去的这五年"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("在"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("过去"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("的"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("这"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("五年"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("还需要很长的路要走"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("还"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("需要"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("很长"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("的"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("路"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("要"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("走"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("60周年首都阅兵"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 2,
+ Term: []byte("60"),
+ Position: 1,
+ Type: analysis.Numeric,
+ },
+ {
+ Start: 2,
+ End: 8,
+ Term: []byte("周年"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 8,
+ End: 14,
+ Term: []byte("首都"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 14,
+ End: 20,
+ Term: []byte("阅兵"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("你好人们审美的观点是不同的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("你好"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("人们"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("审美"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("的"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("观点"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("是"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 36,
+ Term: []byte("不同"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte("的"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("买水果然后来世博园"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("买"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("水果"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("然后"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("来"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("世博"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("博园"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 27,
+ Term: []byte("世博园"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("买水果然后去世博园"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("买"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("水果"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("然后"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("去"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("世博"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("博园"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 27,
+ Term: []byte("世博园"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("但是后来我才知道你是对的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("但是"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("后来"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("我"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("才"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("知道"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("你"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("是"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("对"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 36,
+ Term: []byte("的"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("存在即合理"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("存在"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("即"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("合理"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("的的的的的在的的的的就以和和和"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("的"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("的"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("的"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("的"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("的"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("在"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("的"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("的"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("的"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("的"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("就"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 36,
+ Term: []byte("以"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte("和"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("和"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("和"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("I love你,不以为耻,反以为rong"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 1,
+ Term: []byte("I"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 1,
+ End: 2,
+ Term: []byte(" "),
+ Position: 2,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 2,
+ End: 6,
+ Term: []byte("love"),
+ Position: 3,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("你"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte(","),
+ Position: 5,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("不以"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("以为"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 24,
+ Term: []byte("不以为耻"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte(","),
+ Position: 9,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("反"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 36,
+ Term: []byte("以为"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 40,
+ Term: []byte("rong"),
+ Position: 12,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("因"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("因"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte(""),
+ analysis.TokenStream{},
+ },
+ {
+ []byte("hello你好人们审美的观点是不同的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 5,
+ Term: []byte("hello"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 5,
+ End: 11,
+ Term: []byte("你好"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 11,
+ End: 17,
+ Term: []byte("人们"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 17,
+ End: 23,
+ Term: []byte("审美"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 23,
+ End: 26,
+ Term: []byte("的"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 26,
+ End: 32,
+ Term: []byte("观点"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 32,
+ End: 35,
+ Term: []byte("是"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 35,
+ End: 41,
+ Term: []byte("不同"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 41,
+ End: 44,
+ Term: []byte("的"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("很好但主要是基于网页形式"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("很"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("好"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("但"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("主要"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("是"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("基于"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("网页"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 36,
+ Term: []byte("形式"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("hello你好人们审美的观点是不同的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 5,
+ Term: []byte("hello"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 5,
+ End: 11,
+ Term: []byte("你好"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 11,
+ End: 17,
+ Term: []byte("人们"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 17,
+ End: 23,
+ Term: []byte("审美"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 23,
+ End: 26,
+ Term: []byte("的"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 26,
+ End: 32,
+ Term: []byte("观点"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 32,
+ End: 35,
+ Term: []byte("是"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 35,
+ End: 41,
+ Term: []byte("不同"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 41,
+ End: 44,
+ Term: []byte("的"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("为什么我不能拥有想要的生活"),
+ analysis.TokenStream{
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("什么"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("为什么"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("我"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("不能"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("拥有"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("想要"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("的"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("生活"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("后来我才"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("后来"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("我"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("才"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("此次来中国是为了"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("此次"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("来"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("中国"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("是"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("为了"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("使用了它就可以解决一些问题"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("使用"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("了"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("它"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("就"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("可以"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("解决"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("一些"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("问题"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte(",使用了它就可以解决一些问题"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 1,
+ Term: []byte(","),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 1,
+ End: 7,
+ Term: []byte("使用"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 7,
+ End: 10,
+ Term: []byte("了"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 10,
+ End: 13,
+ Term: []byte("它"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 13,
+ End: 16,
+ Term: []byte("就"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 16,
+ End: 22,
+ Term: []byte("可以"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 22,
+ End: 28,
+ Term: []byte("解决"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 28,
+ End: 34,
+ Term: []byte("一些"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 34,
+ End: 40,
+ Term: []byte("问题"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("其实使用了它就可以解决一些问题"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("其实"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("使用"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("了"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("它"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("就"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("可以"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("解决"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("一些"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 45,
+ Term: []byte("问题"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("好人使用了它就可以解决一些问题"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("好人"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("使用"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("了"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("它"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("就"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("可以"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("解决"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("一些"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 45,
+ Term: []byte("问题"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("是因为和国家"),
+ analysis.TokenStream{
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("因为"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("是因为"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("和"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("国家"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("老年搜索还支持"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("老年"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("搜索"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("还"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("支持"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("干脆就把那部蒙人的闲法给废了拉倒!RT @laoshipukong : 27日,全国人大常委会第三次审议侵权责任法草案,删除了有关医疗损害责任“举证倒置”的规定。在医患纠纷中本已处于弱势地位的消费者由此将陷入万劫不复的境地。 "),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("干脆"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("就"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("把"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("那部"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("蒙人"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("的"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("闲法"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 36,
+ Term: []byte("给"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte("废"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("了"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 48,
+ Term: []byte("拉倒"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 48,
+ End: 51,
+ Term: []byte("!"),
+ Position: 12,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 51,
+ End: 53,
+ Term: []byte("RT"),
+ Position: 13,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 53,
+ End: 54,
+ Term: []byte(" "),
+ Position: 14,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 54,
+ End: 55,
+ Term: []byte("@"),
+ Position: 15,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 55,
+ End: 67,
+ Term: []byte("laoshipukong"),
+ Position: 16,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 67,
+ End: 68,
+ Term: []byte(" "),
+ Position: 17,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 68,
+ End: 69,
+ Term: []byte(":"),
+ Position: 18,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 69,
+ End: 70,
+ Term: []byte(" "),
+ Position: 19,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 70,
+ End: 72,
+ Term: []byte("27"),
+ Position: 20,
+ Type: analysis.Numeric,
+ },
+ {
+ Start: 72,
+ End: 75,
+ Term: []byte("日"),
+ Position: 21,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 75,
+ End: 78,
+ Term: []byte(","),
+ Position: 22,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 78,
+ End: 84,
+ Term: []byte("全国"),
+ Position: 23,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 81,
+ End: 87,
+ Term: []byte("国人"),
+ Position: 24,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 84,
+ End: 90,
+ Term: []byte("人大"),
+ Position: 25,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 90,
+ End: 96,
+ Term: []byte("常委"),
+ Position: 26,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 93,
+ End: 99,
+ Term: []byte("委会"),
+ Position: 27,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 90,
+ End: 99,
+ Term: []byte("常委会"),
+ Position: 28,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 78,
+ End: 99,
+ Term: []byte("全国人大常委会"),
+ Position: 29,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 99,
+ End: 105,
+ Term: []byte("第三"),
+ Position: 30,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 102,
+ End: 108,
+ Term: []byte("三次"),
+ Position: 31,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 99,
+ End: 108,
+ Term: []byte("第三次"),
+ Position: 32,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 108,
+ End: 114,
+ Term: []byte("审议"),
+ Position: 33,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 114,
+ End: 120,
+ Term: []byte("侵权"),
+ Position: 34,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 120,
+ End: 126,
+ Term: []byte("责任"),
+ Position: 35,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 120,
+ End: 129,
+ Term: []byte("责任法"),
+ Position: 36,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 129,
+ End: 135,
+ Term: []byte("草案"),
+ Position: 37,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 135,
+ End: 138,
+ Term: []byte(","),
+ Position: 38,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 138,
+ End: 144,
+ Term: []byte("删除"),
+ Position: 39,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 144,
+ End: 147,
+ Term: []byte("了"),
+ Position: 40,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 147,
+ End: 153,
+ Term: []byte("有关"),
+ Position: 41,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 153,
+ End: 159,
+ Term: []byte("医疗"),
+ Position: 42,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 159,
+ End: 165,
+ Term: []byte("损害"),
+ Position: 43,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 165,
+ End: 171,
+ Term: []byte("责任"),
+ Position: 44,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 171,
+ End: 174,
+ Term: []byte("“"),
+ Position: 45,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 174,
+ End: 180,
+ Term: []byte("举证"),
+ Position: 46,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 180,
+ End: 186,
+ Term: []byte("倒置"),
+ Position: 47,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 186,
+ End: 189,
+ Term: []byte("”"),
+ Position: 48,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 189,
+ End: 192,
+ Term: []byte("的"),
+ Position: 49,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 192,
+ End: 198,
+ Term: []byte("规定"),
+ Position: 50,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 198,
+ End: 201,
+ Term: []byte("。"),
+ Position: 51,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 201,
+ End: 204,
+ Term: []byte("在"),
+ Position: 52,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 204,
+ End: 210,
+ Term: []byte("医患"),
+ Position: 53,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 210,
+ End: 216,
+ Term: []byte("纠纷"),
+ Position: 54,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 216,
+ End: 222,
+ Term: []byte("中本"),
+ Position: 55,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 222,
+ End: 225,
+ Term: []byte("已"),
+ Position: 56,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 225,
+ End: 231,
+ Term: []byte("处于"),
+ Position: 57,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 231,
+ End: 237,
+ Term: []byte("弱势"),
+ Position: 58,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 237,
+ End: 243,
+ Term: []byte("地位"),
+ Position: 59,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 243,
+ End: 246,
+ Term: []byte("的"),
+ Position: 60,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 246,
+ End: 252,
+ Term: []byte("消费"),
+ Position: 61,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 246,
+ End: 255,
+ Term: []byte("消费者"),
+ Position: 62,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 255,
+ End: 261,
+ Term: []byte("由此"),
+ Position: 63,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 261,
+ End: 264,
+ Term: []byte("将"),
+ Position: 64,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 264,
+ End: 270,
+ Term: []byte("陷入"),
+ Position: 65,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 276,
+ End: 282,
+ Term: []byte("不复"),
+ Position: 66,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 270,
+ End: 282,
+ Term: []byte("万劫不复"),
+ Position: 67,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 282,
+ End: 285,
+ Term: []byte("的"),
+ Position: 68,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 285,
+ End: 291,
+ Term: []byte("境地"),
+ Position: 69,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 291,
+ End: 294,
+ Term: []byte("。"),
+ Position: 70,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 294,
+ End: 295,
+ Term: []byte(" "),
+ Position: 71,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("大"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("大"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte(""),
+ analysis.TokenStream{},
+ },
+ {
+ []byte("他说的确实在理"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("他"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("说"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("的"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("确实"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("在理"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("长春市长春节讲话"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("长春"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("市长"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("春节"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("讲话"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("结婚的和尚未结婚的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("结婚"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("的"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("和"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("尚未"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("结婚"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("的"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("结合成分子时"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("结合"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("成"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("分子"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("时"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("旅游和服务是最好的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("旅游"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("和"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("服务"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("是"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("最好"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("的"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("这件事情的确是我的错"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("这件"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("事情"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("的确"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("是"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("我"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("的"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("错"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("供大家参考指正"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("供"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("大家"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("参考"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("指正"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("哈尔滨政府公布塌桥原因"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("哈尔"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("哈尔滨"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("政府"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("公布"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("塌桥"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("原因"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("我在机场入口处"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("我"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("在"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("机场"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("入口"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 21,
+ Term: []byte("入口处"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("邢永臣摄影报道"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("邢永臣"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("摄影"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("报道"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("BP神经网络如何训练才能在分类时增加区分度?"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 2,
+ Term: []byte("BP"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 2,
+ End: 8,
+ Term: []byte("神经"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 8,
+ End: 14,
+ Term: []byte("网络"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 2,
+ End: 11,
+ Term: []byte("神经网"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 2,
+ End: 14,
+ Term: []byte("神经网络"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 14,
+ End: 20,
+ Term: []byte("如何"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 20,
+ End: 26,
+ Term: []byte("训练"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 26,
+ End: 32,
+ Term: []byte("才能"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 32,
+ End: 35,
+ Term: []byte("在"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 35,
+ End: 41,
+ Term: []byte("分类"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 41,
+ End: 44,
+ Term: []byte("时"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 44,
+ End: 50,
+ Term: []byte("增加"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 50,
+ End: 56,
+ Term: []byte("区分"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 53,
+ End: 59,
+ Term: []byte("分度"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 50,
+ End: 59,
+ Term: []byte("区分度"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 59,
+ End: 62,
+ Term: []byte("?"),
+ Position: 16,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("南京市长江大桥"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("南京"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("京市"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("南京市"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("长江"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("大桥"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 21,
+ Term: []byte("长江大桥"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("应一些使用者的建议,也为了便于利用NiuTrans用于SMT研究"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("应"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("一些"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("使用"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("用者"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 18,
+ Term: []byte("使用者"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("的"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("建议"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte(","),
+ Position: 8,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("也"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("为了"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 45,
+ Term: []byte("便于"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 45,
+ End: 51,
+ Term: []byte("利用"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 51,
+ End: 59,
+ Term: []byte("NiuTrans"),
+ Position: 13,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 59,
+ End: 65,
+ Term: []byte("用于"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 65,
+ End: 68,
+ Term: []byte("SMT"),
+ Position: 15,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 68,
+ End: 74,
+ Term: []byte("研究"),
+ Position: 16,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("长春市长春药店"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("长春"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("长春市"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("长春"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("药店"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("邓颖超生前最喜欢的衣服"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("邓颖超"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("生前"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("最"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("喜欢"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("的"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("衣服"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("胡锦涛是热爱世界和平的政治局常委"),
+ analysis.TokenStream{
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("锦涛"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("胡锦涛"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("是"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("热爱"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("世界"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("和平"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("的"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("政治"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 42,
+ Term: []byte("政治局"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 48,
+ Term: []byte("常委"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("程序员祝海林和朱会震是在孙健的左面和右面, 范凯在最右面.再往左是李松洪"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("程序"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("程序员"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("祝"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("海林"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("和"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 30,
+ Term: []byte("朱会震"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("是"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 36,
+ Term: []byte("在"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 42,
+ Term: []byte("孙健"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("的"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 45,
+ End: 51,
+ Term: []byte("左面"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 51,
+ End: 54,
+ Term: []byte("和"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 54,
+ End: 60,
+ Term: []byte("右面"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 60,
+ End: 61,
+ Term: []byte(","),
+ Position: 14,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 61,
+ End: 62,
+ Term: []byte(" "),
+ Position: 15,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 62,
+ End: 68,
+ Term: []byte("范凯"),
+ Position: 16,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 68,
+ End: 71,
+ Term: []byte("在"),
+ Position: 17,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 71,
+ End: 74,
+ Term: []byte("最"),
+ Position: 18,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 74,
+ End: 80,
+ Term: []byte("右面"),
+ Position: 19,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 80,
+ End: 81,
+ Term: []byte("."),
+ Position: 20,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 81,
+ End: 87,
+ Term: []byte("再往"),
+ Position: 21,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 87,
+ End: 90,
+ Term: []byte("左"),
+ Position: 22,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 90,
+ End: 93,
+ Term: []byte("是"),
+ Position: 23,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 93,
+ End: 102,
+ Term: []byte("李松洪"),
+ Position: 24,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("一次性交多少钱"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("一次"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("一次性"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("交"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("多少"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("钱"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("两块五一套,三块八一斤,四块七一本,五块六一条"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("两块"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("五"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("一套"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte(","),
+ Position: 4,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("三块"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("八"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("一斤"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 36,
+ Term: []byte(","),
+ Position: 8,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 36,
+ End: 42,
+ Term: []byte("四块"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("七"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 45,
+ End: 51,
+ Term: []byte("一本"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 51,
+ End: 54,
+ Term: []byte(","),
+ Position: 12,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 54,
+ End: 60,
+ Term: []byte("五块"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 60,
+ End: 63,
+ Term: []byte("六"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 63,
+ End: 69,
+ Term: []byte("一条"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("小和尚留了一个像大和尚一样的和尚头"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("小"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("和尚"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("留"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("了"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("一个"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("像"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("大"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("和尚"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("一样"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("的"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 48,
+ Term: []byte("和尚"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 51,
+ Term: []byte("和尚头"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("我是中华人民共和国公民;我爸爸是共和党党员; 地铁和平门站"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("我"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("是"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("中华"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("华人"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("人民"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("共和"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 27,
+ Term: []byte("共和国"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 27,
+ Term: []byte("中华人民共和国"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("公民"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 34,
+ Term: []byte(";"),
+ Position: 10,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 34,
+ End: 37,
+ Term: []byte("我"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 37,
+ End: 43,
+ Term: []byte("爸爸"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 43,
+ End: 46,
+ Term: []byte("是"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 46,
+ End: 52,
+ Term: []byte("共和"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 46,
+ End: 55,
+ Term: []byte("共和党"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 55,
+ End: 61,
+ Term: []byte("党员"),
+ Position: 16,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 61,
+ End: 62,
+ Term: []byte(";"),
+ Position: 17,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 62,
+ End: 63,
+ Term: []byte(" "),
+ Position: 18,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 63,
+ End: 69,
+ Term: []byte("地铁"),
+ Position: 19,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 69,
+ End: 75,
+ Term: []byte("和平"),
+ Position: 20,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 69,
+ End: 78,
+ Term: []byte("和平门"),
+ Position: 21,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 78,
+ End: 81,
+ Term: []byte("站"),
+ Position: 22,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("张晓梅去人民医院做了个B超然后去买了件T恤"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("张晓梅"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("去"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("人民"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("医院"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("做"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("了"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("个"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 37,
+ Term: []byte("B超"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 37,
+ End: 43,
+ Term: []byte("然后"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 43,
+ End: 46,
+ Term: []byte("去"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 46,
+ End: 49,
+ Term: []byte("买"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 49,
+ End: 52,
+ Term: []byte("了"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 52,
+ End: 55,
+ Term: []byte("件"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 55,
+ End: 59,
+ Term: []byte("T恤"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("AT&T是一件不错的公司,给你发offer了吗?"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 4,
+ Term: []byte("AT&T"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 4,
+ End: 7,
+ Term: []byte("是"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 7,
+ End: 13,
+ Term: []byte("一件"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 13,
+ End: 19,
+ Term: []byte("不错"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 19,
+ End: 22,
+ Term: []byte("的"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 22,
+ End: 28,
+ Term: []byte("公司"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 28,
+ End: 31,
+ Term: []byte(","),
+ Position: 7,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 31,
+ End: 34,
+ Term: []byte("给"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 34,
+ End: 37,
+ Term: []byte("你"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 37,
+ End: 40,
+ Term: []byte("发"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 40,
+ End: 45,
+ Term: []byte("offer"),
+ Position: 11,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 45,
+ End: 48,
+ Term: []byte("了"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 48,
+ End: 51,
+ Term: []byte("吗"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 51,
+ End: 54,
+ Term: []byte("?"),
+ Position: 14,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("C++和c#是什么关系?11+122=133,是吗?PI=3.14159"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("C++"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("和"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 8,
+ Term: []byte("c#"),
+ Position: 3,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 8,
+ End: 11,
+ Term: []byte("是"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 11,
+ End: 17,
+ Term: []byte("什么"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 17,
+ End: 23,
+ Term: []byte("关系"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 23,
+ End: 26,
+ Term: []byte("?"),
+ Position: 7,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 26,
+ End: 28,
+ Term: []byte("11"),
+ Position: 8,
+ Type: analysis.Numeric,
+ },
+ {
+ Start: 28,
+ End: 29,
+ Term: []byte("+"),
+ Position: 9,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 29,
+ End: 32,
+ Term: []byte("122"),
+ Position: 10,
+ Type: analysis.Numeric,
+ },
+ {
+ Start: 32,
+ End: 33,
+ Term: []byte("="),
+ Position: 11,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 33,
+ End: 36,
+ Term: []byte("133"),
+ Position: 12,
+ Type: analysis.Numeric,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte(","),
+ Position: 13,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("是"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("吗"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 45,
+ End: 48,
+ Term: []byte("?"),
+ Position: 16,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 48,
+ End: 50,
+ Term: []byte("PI"),
+ Position: 17,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 50,
+ End: 51,
+ Term: []byte("="),
+ Position: 18,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 51,
+ End: 58,
+ Term: []byte("3.14159"),
+ Position: 19,
+ Type: analysis.Numeric,
+ },
+ },
+ },
+ {
+ []byte("你认识那个和主席握手的的哥吗?他开一辆黑色的士。"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("你"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("认识"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("那个"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("和"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("主席"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("握手"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("的"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("的哥"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("吗"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("?"),
+ Position: 10,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 45,
+ End: 51,
+ Term: []byte("他开"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 51,
+ End: 57,
+ Term: []byte("一辆"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 57,
+ End: 63,
+ Term: []byte("黑色"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 63,
+ End: 69,
+ Term: []byte("的士"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 69,
+ End: 72,
+ Term: []byte("。"),
+ Position: 15,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("枪杆子中出政权"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("枪杆"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("杆子"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("枪杆子"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("中"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("出"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("政权"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("张三风同学走上了不归路"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("张三风"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("同学"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("走上"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("了"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("归路"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 33,
+ Term: []byte("不归路"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("阿Q腰间挂着BB机手里拿着大哥大,说:我一般吃饭不AA制的。"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 4,
+ Term: []byte("阿Q"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 4,
+ End: 10,
+ Term: []byte("腰间"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 10,
+ End: 13,
+ Term: []byte("挂"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 13,
+ End: 16,
+ Term: []byte("着"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 16,
+ End: 21,
+ Term: []byte("BB机"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("手里"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("拿"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("着"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("大哥"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 42,
+ Term: []byte("大哥大"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte(","),
+ Position: 11,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 45,
+ End: 48,
+ Term: []byte("说"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 48,
+ End: 51,
+ Term: []byte(":"),
+ Position: 13,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 51,
+ End: 54,
+ Term: []byte("我"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 54,
+ End: 60,
+ Term: []byte("一般"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 60,
+ End: 66,
+ Term: []byte("吃饭"),
+ Position: 16,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 66,
+ End: 69,
+ Term: []byte("不"),
+ Position: 17,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 69,
+ End: 74,
+ Term: []byte("AA制"),
+ Position: 18,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 74,
+ End: 77,
+ Term: []byte("的"),
+ Position: 19,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 77,
+ End: 80,
+ Term: []byte("。"),
+ Position: 20,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("在1号店能买到小S和大S八卦的书。"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("在"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 10,
+ Term: []byte("1号店"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 10,
+ End: 13,
+ Term: []byte("能"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 13,
+ End: 16,
+ Term: []byte("买"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 16,
+ End: 19,
+ Term: []byte("到"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 19,
+ End: 23,
+ Term: []byte("小S"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 23,
+ End: 26,
+ Term: []byte("和"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 26,
+ End: 30,
+ Term: []byte("大S"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 36,
+ Term: []byte("八卦"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte("的"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("书"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("。"),
+ Position: 12,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ }
+
+ tokenizer, _ := NewJiebaTokenizer("../dict.txt", true, true)
+ for _, test := range tests {
+ actual := tokenizer.Tokenize(test.input)
+ if !reflect.DeepEqual(actual, test.output) {
+ t.Errorf("Expected %v, got %v for %s", test.output, actual, string(test.input))
+ }
+ }
+}
+
+func TestJiebaTokenizerDefaultModeWithoutHMM(t *testing.T) {
+ tests := []struct {
+ input []byte
+ output analysis.TokenStream
+ }{
+ {
+ []byte("这是一个伸手不见五指的黑夜。我叫孙悟空,我爱北京,我爱Python和C++。"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("这"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("是"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("一个"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 30,
+ Term: []byte("伸手不见五指"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("的"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("黑夜"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("。"),
+ Position: 7,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("我"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 45,
+ End: 48,
+ Term: []byte("叫"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 48,
+ End: 57,
+ Term: []byte("孙悟空"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 57,
+ End: 60,
+ Term: []byte(","),
+ Position: 11,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 60,
+ End: 63,
+ Term: []byte("我"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 63,
+ End: 66,
+ Term: []byte("爱"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 66,
+ End: 72,
+ Term: []byte("北京"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 72,
+ End: 75,
+ Term: []byte(","),
+ Position: 15,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 75,
+ End: 78,
+ Term: []byte("我"),
+ Position: 16,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 78,
+ End: 81,
+ Term: []byte("爱"),
+ Position: 17,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 81,
+ End: 87,
+ Term: []byte("Python"),
+ Position: 18,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 87,
+ End: 90,
+ Term: []byte("和"),
+ Position: 19,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 90,
+ End: 93,
+ Term: []byte("C++"),
+ Position: 20,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 93,
+ End: 96,
+ Term: []byte("。"),
+ Position: 21,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("我不喜欢日本和服。"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("我"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("不"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("喜欢"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("日本"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("和服"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("。"),
+ Position: 6,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("雷猴回归人间。"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("雷猴"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("回归"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("人间"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("。"),
+ Position: 4,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("工信处"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 18,
+ Term: []byte("女干事"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("每月"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("经过"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 36,
+ Term: []byte("下属"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 42,
+ Term: []byte("科室"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("都"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 45,
+ End: 48,
+ Term: []byte("要"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 48,
+ End: 54,
+ Term: []byte("亲口"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 54,
+ End: 60,
+ Term: []byte("交代"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 60,
+ End: 62,
+ Term: []byte("24"),
+ Position: 11,
+ Type: analysis.Numeric,
+ },
+ {
+ Start: 62,
+ End: 65,
+ Term: []byte("口"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 65,
+ End: 74,
+ Term: []byte("交换机"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 74,
+ End: 77,
+ Term: []byte("等"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 77,
+ End: 86,
+ Term: []byte("技术性"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 86,
+ End: 92,
+ Term: []byte("器件"),
+ Position: 16,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 92,
+ End: 95,
+ Term: []byte("的"),
+ Position: 17,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 95,
+ End: 101,
+ Term: []byte("安装"),
+ Position: 18,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 101,
+ End: 107,
+ Term: []byte("工作"),
+ Position: 19,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("我需要廉租房"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("我"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("需要"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 18,
+ Term: []byte("廉租房"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("永和服装饰品有限公司"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("永和"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("服装"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("饰品"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 30,
+ Term: []byte("有限公司"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("我爱北京天安门"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("我"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("爱"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("北京"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 21,
+ Term: []byte("天安门"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("abc"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("abc"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("隐马尔可夫"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("隐"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 15,
+ Term: []byte("马尔可夫"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("雷猴是个好网站"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("雷猴"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("是"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("个"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("好"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("网站"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("“Microsoft”一词由“MICROcomputer(微型计算机)”和“SOFTware(软件)”两部分组成"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("“"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 3,
+ End: 12,
+ Term: []byte("Microsoft"),
+ Position: 2,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("”"),
+ Position: 3,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("一"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("词"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("由"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("“"),
+ Position: 7,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 27,
+ End: 40,
+ Term: []byte("MICROcomputer"),
+ Position: 8,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 40,
+ End: 43,
+ Term: []byte("("),
+ Position: 9,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 43,
+ End: 49,
+ Term: []byte("微型"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 49,
+ End: 58,
+ Term: []byte("计算机"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 58,
+ End: 61,
+ Term: []byte(")"),
+ Position: 12,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 61,
+ End: 64,
+ Term: []byte("”"),
+ Position: 13,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 64,
+ End: 67,
+ Term: []byte("和"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 67,
+ End: 70,
+ Term: []byte("“"),
+ Position: 15,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 70,
+ End: 78,
+ Term: []byte("SOFTware"),
+ Position: 16,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 78,
+ End: 81,
+ Term: []byte("("),
+ Position: 17,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 81,
+ End: 87,
+ Term: []byte("软件"),
+ Position: 18,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 87,
+ End: 90,
+ Term: []byte(")"),
+ Position: 19,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 90,
+ End: 93,
+ Term: []byte("”"),
+ Position: 20,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 93,
+ End: 96,
+ Term: []byte("两"),
+ Position: 21,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 96,
+ End: 102,
+ Term: []byte("部分"),
+ Position: 22,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 102,
+ End: 108,
+ Term: []byte("组成"),
+ Position: 23,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("草泥马和欺实马是今年的流行词汇"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("草泥马"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("和"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("欺"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("实"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("马"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("是"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("今年"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("的"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("流行"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 45,
+ Term: []byte("词汇"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("伊藤洋华堂总府店"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("伊"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("藤"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 15,
+ Term: []byte("洋华堂"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("总府"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("店"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("中国科学院计算技术研究所"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 36,
+ Term: []byte("中国科学院计算技术研究所"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("罗密欧与朱丽叶"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("罗密欧"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("与"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 21,
+ Term: []byte("朱丽叶"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("我购买了道具和服装"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("我"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("购买"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("了"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("道具"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("和"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("服装"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("PS: 我觉得开源有一个好处,就是能够敦促自己不断改进,避免敞帚自珍"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 2,
+ Term: []byte("PS"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 2,
+ End: 3,
+ Term: []byte(":"),
+ Position: 2,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 3,
+ End: 4,
+ Term: []byte(" "),
+ Position: 3,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 4,
+ End: 7,
+ Term: []byte("我"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 7,
+ End: 13,
+ Term: []byte("觉得"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 13,
+ End: 19,
+ Term: []byte("开源"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 19,
+ End: 22,
+ Term: []byte("有"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 22,
+ End: 28,
+ Term: []byte("一个"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 28,
+ End: 34,
+ Term: []byte("好处"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 34,
+ End: 37,
+ Term: []byte(","),
+ Position: 10,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 37,
+ End: 43,
+ Term: []byte("就是"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 43,
+ End: 49,
+ Term: []byte("能够"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 49,
+ End: 55,
+ Term: []byte("敦促"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 55,
+ End: 61,
+ Term: []byte("自己"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 61,
+ End: 73,
+ Term: []byte("不断改进"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 73,
+ End: 76,
+ Term: []byte(","),
+ Position: 16,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 76,
+ End: 82,
+ Term: []byte("避免"),
+ Position: 17,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 82,
+ End: 85,
+ Term: []byte("敞"),
+ Position: 18,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 85,
+ End: 88,
+ Term: []byte("帚"),
+ Position: 19,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 88,
+ End: 94,
+ Term: []byte("自珍"),
+ Position: 20,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("湖北省石首市"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("湖北省"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 18,
+ Term: []byte("石首市"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("湖北省十堰市"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("湖北省"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 18,
+ Term: []byte("十堰市"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("总经理完成了这件事情"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("总经理"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("完成"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("了"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("这件"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("事情"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("电脑修好了"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("电脑"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("修好"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("了"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("做好了这件事情就一了百了了"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("做好"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("了"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("这件"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("事情"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("就"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 36,
+ Term: []byte("一了百了"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte("了"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("人们审美的观点是不同的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("人们"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("审美"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("的"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("观点"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("是"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("不同"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("的"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("我们买了一个美的空调"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("我们"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("买"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("了"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("一个"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("美的"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("空调"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("线程初始化时我们要注意"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("线程"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 15,
+ Term: []byte("初始化"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("时"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("我们"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("要"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("注意"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("一个分子是由好多原子组织成的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("一个"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("分子"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("是"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("由"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("好多"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("原子"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 36,
+ Term: []byte("组织"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte("成"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("的"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("祝你马到功成"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("祝"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("你"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 18,
+ Term: []byte("马到功成"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("他掉进了无底洞里"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("他"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("掉"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("进"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("了"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 21,
+ Term: []byte("无底洞"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("里"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("中国的首都是北京"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("中国"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("的"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("首都"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("是"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("北京"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("孙君意"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("孙"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("君"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("意"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("外交部发言人马朝旭"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("外交部"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 18,
+ Term: []byte("发言人"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 27,
+ Term: []byte("马朝旭"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("领导人会议和第四届东亚峰会"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("领导人"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("会议"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("和"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 27,
+ Term: []byte("第四届"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("东亚"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("峰会"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("在过去的这五年"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("在"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("过去"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("的"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("这"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("五年"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("还需要很长的路要走"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("还"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("需要"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("很"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("长"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("的"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("路"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("要"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("走"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("60周年首都阅兵"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 2,
+ Term: []byte("60"),
+ Position: 1,
+ Type: analysis.Numeric,
+ },
+ {
+ Start: 2,
+ End: 8,
+ Term: []byte("周年"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 8,
+ End: 14,
+ Term: []byte("首都"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 14,
+ End: 20,
+ Term: []byte("阅兵"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("你好人们审美的观点是不同的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("你好"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("人们"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("审美"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("的"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("观点"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("是"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 36,
+ Term: []byte("不同"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte("的"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("买水果然后来世博园"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("买"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("水果"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("然后"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("来"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 27,
+ Term: []byte("世博园"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("买水果然后去世博园"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("买"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("水果"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("然后"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("去"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 27,
+ Term: []byte("世博园"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("但是后来我才知道你是对的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("但是"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("后来"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("我"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("才"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("知道"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("你"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("是"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("对"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 36,
+ Term: []byte("的"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("存在即合理"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("存在"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("即"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("合理"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("的的的的的在的的的的就以和和和"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("的"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("的"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("的"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("的"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("的"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("在"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("的"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("的"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("的"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("的"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("就"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 36,
+ Term: []byte("以"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte("和"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("和"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("和"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("I love你,不以为耻,反以为rong"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 1,
+ Term: []byte("I"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 1,
+ End: 2,
+ Term: []byte(" "),
+ Position: 2,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 2,
+ End: 6,
+ Term: []byte("love"),
+ Position: 3,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("你"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte(","),
+ Position: 5,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 12,
+ End: 24,
+ Term: []byte("不以为耻"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte(","),
+ Position: 7,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("反"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 36,
+ Term: []byte("以为"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 40,
+ Term: []byte("rong"),
+ Position: 10,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("因"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("因"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte(""),
+ analysis.TokenStream{},
+ },
+ {
+ []byte("hello你好人们审美的观点是不同的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 5,
+ Term: []byte("hello"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 5,
+ End: 11,
+ Term: []byte("你好"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 11,
+ End: 17,
+ Term: []byte("人们"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 17,
+ End: 23,
+ Term: []byte("审美"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 23,
+ End: 26,
+ Term: []byte("的"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 26,
+ End: 32,
+ Term: []byte("观点"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 32,
+ End: 35,
+ Term: []byte("是"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 35,
+ End: 41,
+ Term: []byte("不同"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 41,
+ End: 44,
+ Term: []byte("的"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("很好但主要是基于网页形式"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("很"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("好"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("但"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("主要"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("是"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("基于"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("网页"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 36,
+ Term: []byte("形式"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("hello你好人们审美的观点是不同的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 5,
+ Term: []byte("hello"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 5,
+ End: 11,
+ Term: []byte("你好"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 11,
+ End: 17,
+ Term: []byte("人们"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 17,
+ End: 23,
+ Term: []byte("审美"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 23,
+ End: 26,
+ Term: []byte("的"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 26,
+ End: 32,
+ Term: []byte("观点"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 32,
+ End: 35,
+ Term: []byte("是"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 35,
+ End: 41,
+ Term: []byte("不同"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 41,
+ End: 44,
+ Term: []byte("的"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("为什么我不能拥有想要的生活"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("为什么"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("我"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("不能"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("拥有"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("想要"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("的"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("生活"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("后来我才"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("后来"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("我"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("才"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("此次来中国是为了"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("此次"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("来"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("中国"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("是"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("为了"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("使用了它就可以解决一些问题"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("使用"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("了"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("它"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("就"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("可以"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("解决"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("一些"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("问题"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte(",使用了它就可以解决一些问题"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 1,
+ Term: []byte(","),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 1,
+ End: 7,
+ Term: []byte("使用"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 7,
+ End: 10,
+ Term: []byte("了"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 10,
+ End: 13,
+ Term: []byte("它"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 13,
+ End: 16,
+ Term: []byte("就"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 16,
+ End: 22,
+ Term: []byte("可以"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 22,
+ End: 28,
+ Term: []byte("解决"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 28,
+ End: 34,
+ Term: []byte("一些"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 34,
+ End: 40,
+ Term: []byte("问题"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("其实使用了它就可以解决一些问题"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("其实"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("使用"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("了"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("它"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("就"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("可以"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("解决"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("一些"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 45,
+ Term: []byte("问题"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("好人使用了它就可以解决一些问题"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("好人"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("使用"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("了"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("它"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("就"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("可以"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("解决"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("一些"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 45,
+ Term: []byte("问题"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("是因为和国家"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("是因为"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("和"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("国家"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("老年搜索还支持"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("老年"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("搜索"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("还"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("支持"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("干脆就把那部蒙人的闲法给废了拉倒!RT @laoshipukong : 27日,全国人大常委会第三次审议侵权责任法草案,删除了有关医疗损害责任“举证倒置”的规定。在医患纠纷中本已处于弱势地位的消费者由此将陷入万劫不复的境地。 "),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("干脆"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("就"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("把"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("那"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("部"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("蒙"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("人"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("的"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("闲"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("法"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 36,
+ Term: []byte("给"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte("废"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("了"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 48,
+ Term: []byte("拉倒"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 48,
+ End: 51,
+ Term: []byte("!"),
+ Position: 15,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 51,
+ End: 53,
+ Term: []byte("RT"),
+ Position: 16,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 53,
+ End: 54,
+ Term: []byte(" "),
+ Position: 17,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 54,
+ End: 55,
+ Term: []byte("@"),
+ Position: 18,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 55,
+ End: 67,
+ Term: []byte("laoshipukong"),
+ Position: 19,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 67,
+ End: 68,
+ Term: []byte(" "),
+ Position: 20,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 68,
+ End: 69,
+ Term: []byte(":"),
+ Position: 21,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 69,
+ End: 70,
+ Term: []byte(" "),
+ Position: 22,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 70,
+ End: 72,
+ Term: []byte("27"),
+ Position: 23,
+ Type: analysis.Numeric,
+ },
+ {
+ Start: 72,
+ End: 75,
+ Term: []byte("日"),
+ Position: 24,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 75,
+ End: 78,
+ Term: []byte(","),
+ Position: 25,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 78,
+ End: 99,
+ Term: []byte("全国人大常委会"),
+ Position: 26,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 99,
+ End: 108,
+ Term: []byte("第三次"),
+ Position: 27,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 108,
+ End: 114,
+ Term: []byte("审议"),
+ Position: 28,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 114,
+ End: 120,
+ Term: []byte("侵权"),
+ Position: 29,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 120,
+ End: 129,
+ Term: []byte("责任法"),
+ Position: 30,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 129,
+ End: 135,
+ Term: []byte("草案"),
+ Position: 31,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 135,
+ End: 138,
+ Term: []byte(","),
+ Position: 32,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 138,
+ End: 144,
+ Term: []byte("删除"),
+ Position: 33,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 144,
+ End: 147,
+ Term: []byte("了"),
+ Position: 34,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 147,
+ End: 153,
+ Term: []byte("有关"),
+ Position: 35,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 153,
+ End: 159,
+ Term: []byte("医疗"),
+ Position: 36,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 159,
+ End: 165,
+ Term: []byte("损害"),
+ Position: 37,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 165,
+ End: 171,
+ Term: []byte("责任"),
+ Position: 38,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 171,
+ End: 174,
+ Term: []byte("“"),
+ Position: 39,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 174,
+ End: 180,
+ Term: []byte("举证"),
+ Position: 40,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 180,
+ End: 186,
+ Term: []byte("倒置"),
+ Position: 41,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 186,
+ End: 189,
+ Term: []byte("”"),
+ Position: 42,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 189,
+ End: 192,
+ Term: []byte("的"),
+ Position: 43,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 192,
+ End: 198,
+ Term: []byte("规定"),
+ Position: 44,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 198,
+ End: 201,
+ Term: []byte("。"),
+ Position: 45,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 201,
+ End: 204,
+ Term: []byte("在"),
+ Position: 46,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 204,
+ End: 210,
+ Term: []byte("医患"),
+ Position: 47,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 210,
+ End: 216,
+ Term: []byte("纠纷"),
+ Position: 48,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 216,
+ End: 219,
+ Term: []byte("中"),
+ Position: 49,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 219,
+ End: 222,
+ Term: []byte("本"),
+ Position: 50,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 222,
+ End: 225,
+ Term: []byte("已"),
+ Position: 51,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 225,
+ End: 231,
+ Term: []byte("处于"),
+ Position: 52,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 231,
+ End: 237,
+ Term: []byte("弱势"),
+ Position: 53,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 237,
+ End: 243,
+ Term: []byte("地位"),
+ Position: 54,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 243,
+ End: 246,
+ Term: []byte("的"),
+ Position: 55,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 246,
+ End: 255,
+ Term: []byte("消费者"),
+ Position: 56,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 255,
+ End: 261,
+ Term: []byte("由此"),
+ Position: 57,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 261,
+ End: 264,
+ Term: []byte("将"),
+ Position: 58,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 264,
+ End: 270,
+ Term: []byte("陷入"),
+ Position: 59,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 270,
+ End: 282,
+ Term: []byte("万劫不复"),
+ Position: 60,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 282,
+ End: 285,
+ Term: []byte("的"),
+ Position: 61,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 285,
+ End: 291,
+ Term: []byte("境地"),
+ Position: 62,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 291,
+ End: 294,
+ Term: []byte("。"),
+ Position: 63,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 294,
+ End: 295,
+ Term: []byte(" "),
+ Position: 64,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("大"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("大"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte(""),
+ analysis.TokenStream{},
+ },
+ {
+ []byte("他说的确实在理"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("他"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("说"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("的"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("确实"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("在"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("理"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("长春市长春节讲话"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("长春"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("市长"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("春节"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("讲话"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("结婚的和尚未结婚的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("结婚"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("的"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("和"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("尚未"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("结婚"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("的"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("结合成分子时"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("结合"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("成"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("分子"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("时"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("旅游和服务是最好的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("旅游"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("和"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("服务"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("是"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("最好"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("的"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("这件事情的确是我的错"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("这件"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("事情"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("的确"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("是"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("我"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("的"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("错"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("供大家参考指正"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("供"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("大家"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("参考"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("指正"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("哈尔滨政府公布塌桥原因"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("哈尔滨"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("政府"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("公布"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("塌"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("桥"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("原因"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("我在机场入口处"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("我"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("在"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("机场"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 21,
+ Term: []byte("入口处"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("邢永臣摄影报道"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("邢"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("永"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("臣"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("摄影"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("报道"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("BP神经网络如何训练才能在分类时增加区分度?"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 2,
+ Term: []byte("BP"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 2,
+ End: 14,
+ Term: []byte("神经网络"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 14,
+ End: 20,
+ Term: []byte("如何"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 20,
+ End: 26,
+ Term: []byte("训练"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 26,
+ End: 32,
+ Term: []byte("才能"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 32,
+ End: 35,
+ Term: []byte("在"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 35,
+ End: 41,
+ Term: []byte("分类"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 41,
+ End: 44,
+ Term: []byte("时"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 44,
+ End: 50,
+ Term: []byte("增加"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 50,
+ End: 59,
+ Term: []byte("区分度"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 59,
+ End: 62,
+ Term: []byte("?"),
+ Position: 11,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("南京市长江大桥"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("南京市"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 21,
+ Term: []byte("长江大桥"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("应一些使用者的建议,也为了便于利用NiuTrans用于SMT研究"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("应"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("一些"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 18,
+ Term: []byte("使用者"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("的"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("建议"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte(","),
+ Position: 6,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("也"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("为了"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 45,
+ Term: []byte("便于"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 45,
+ End: 51,
+ Term: []byte("利用"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 51,
+ End: 59,
+ Term: []byte("NiuTrans"),
+ Position: 11,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 59,
+ End: 65,
+ Term: []byte("用于"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 65,
+ End: 68,
+ Term: []byte("SMT"),
+ Position: 13,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 68,
+ End: 74,
+ Term: []byte("研究"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("长春市长春药店"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("长春市"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("长春"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("药店"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("邓颖超生前最喜欢的衣服"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("邓颖超"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("生前"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("最"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("喜欢"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("的"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("衣服"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("胡锦涛是热爱世界和平的政治局常委"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("胡锦涛"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("是"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("热爱"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("世界"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("和平"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("的"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 42,
+ Term: []byte("政治局"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 48,
+ Term: []byte("常委"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("程序员祝海林和朱会震是在孙健的左面和右面, 范凯在最右面.再往左是李松洪"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("程序员"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("祝"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("海林"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("和"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("朱"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("会"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("震"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("是"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 36,
+ Term: []byte("在"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte("孙"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("健"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("的"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 45,
+ End: 51,
+ Term: []byte("左面"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 51,
+ End: 54,
+ Term: []byte("和"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 54,
+ End: 60,
+ Term: []byte("右面"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 60,
+ End: 61,
+ Term: []byte(","),
+ Position: 16,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 61,
+ End: 62,
+ Term: []byte(" "),
+ Position: 17,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 62,
+ End: 65,
+ Term: []byte("范"),
+ Position: 18,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 65,
+ End: 68,
+ Term: []byte("凯"),
+ Position: 19,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 68,
+ End: 71,
+ Term: []byte("在"),
+ Position: 20,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 71,
+ End: 74,
+ Term: []byte("最"),
+ Position: 21,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 74,
+ End: 80,
+ Term: []byte("右面"),
+ Position: 22,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 80,
+ End: 81,
+ Term: []byte("."),
+ Position: 23,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 81,
+ End: 84,
+ Term: []byte("再"),
+ Position: 24,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 84,
+ End: 87,
+ Term: []byte("往"),
+ Position: 25,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 87,
+ End: 90,
+ Term: []byte("左"),
+ Position: 26,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 90,
+ End: 93,
+ Term: []byte("是"),
+ Position: 27,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 93,
+ End: 96,
+ Term: []byte("李"),
+ Position: 28,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 96,
+ End: 99,
+ Term: []byte("松"),
+ Position: 29,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 99,
+ End: 102,
+ Term: []byte("洪"),
+ Position: 30,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("一次性交多少钱"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("一次性"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("交"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("多少"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("钱"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("两块五一套,三块八一斤,四块七一本,五块六一条"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("两块"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("五"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("一套"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte(","),
+ Position: 4,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("三块"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("八"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("一斤"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 36,
+ Term: []byte(","),
+ Position: 8,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 36,
+ End: 42,
+ Term: []byte("四块"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("七"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 45,
+ End: 51,
+ Term: []byte("一本"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 51,
+ End: 54,
+ Term: []byte(","),
+ Position: 12,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 54,
+ End: 60,
+ Term: []byte("五块"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 60,
+ End: 63,
+ Term: []byte("六"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 63,
+ End: 69,
+ Term: []byte("一条"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("小和尚留了一个像大和尚一样的和尚头"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("小"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("和尚"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("留"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("了"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("一个"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("像"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("大"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("和尚"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("一样"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("的"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 51,
+ Term: []byte("和尚头"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("我是中华人民共和国公民;我爸爸是共和党党员; 地铁和平门站"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("我"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("是"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 27,
+ Term: []byte("中华人民共和国"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("公民"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 34,
+ Term: []byte(";"),
+ Position: 5,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 34,
+ End: 37,
+ Term: []byte("我"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 37,
+ End: 43,
+ Term: []byte("爸爸"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 43,
+ End: 46,
+ Term: []byte("是"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 46,
+ End: 55,
+ Term: []byte("共和党"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 55,
+ End: 61,
+ Term: []byte("党员"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 61,
+ End: 62,
+ Term: []byte(";"),
+ Position: 11,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 62,
+ End: 63,
+ Term: []byte(" "),
+ Position: 12,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 63,
+ End: 69,
+ Term: []byte("地铁"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 69,
+ End: 78,
+ Term: []byte("和平门"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 78,
+ End: 81,
+ Term: []byte("站"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("张晓梅去人民医院做了个B超然后去买了件T恤"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("张晓梅"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("去"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("人民"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("医院"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("做"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("了"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("个"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 37,
+ Term: []byte("B超"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 37,
+ End: 43,
+ Term: []byte("然后"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 43,
+ End: 46,
+ Term: []byte("去"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 46,
+ End: 49,
+ Term: []byte("买"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 49,
+ End: 52,
+ Term: []byte("了"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 52,
+ End: 55,
+ Term: []byte("件"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 55,
+ End: 59,
+ Term: []byte("T恤"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("AT&T是一件不错的公司,给你发offer了吗?"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 4,
+ Term: []byte("AT&T"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 4,
+ End: 7,
+ Term: []byte("是"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 7,
+ End: 13,
+ Term: []byte("一件"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 13,
+ End: 19,
+ Term: []byte("不错"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 19,
+ End: 22,
+ Term: []byte("的"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 22,
+ End: 28,
+ Term: []byte("公司"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 28,
+ End: 31,
+ Term: []byte(","),
+ Position: 7,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 31,
+ End: 34,
+ Term: []byte("给"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 34,
+ End: 37,
+ Term: []byte("你"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 37,
+ End: 40,
+ Term: []byte("发"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 40,
+ End: 45,
+ Term: []byte("offer"),
+ Position: 11,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 45,
+ End: 48,
+ Term: []byte("了"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 48,
+ End: 51,
+ Term: []byte("吗"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 51,
+ End: 54,
+ Term: []byte("?"),
+ Position: 14,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("C++和c#是什么关系?11+122=133,是吗?PI=3.14159"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("C++"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("和"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 8,
+ Term: []byte("c#"),
+ Position: 3,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 8,
+ End: 11,
+ Term: []byte("是"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 11,
+ End: 17,
+ Term: []byte("什么"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 17,
+ End: 23,
+ Term: []byte("关系"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 23,
+ End: 26,
+ Term: []byte("?"),
+ Position: 7,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 26,
+ End: 28,
+ Term: []byte("11"),
+ Position: 8,
+ Type: analysis.Numeric,
+ },
+ {
+ Start: 28,
+ End: 29,
+ Term: []byte("+"),
+ Position: 9,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 29,
+ End: 32,
+ Term: []byte("122"),
+ Position: 10,
+ Type: analysis.Numeric,
+ },
+ {
+ Start: 32,
+ End: 33,
+ Term: []byte("="),
+ Position: 11,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 33,
+ End: 36,
+ Term: []byte("133"),
+ Position: 12,
+ Type: analysis.Numeric,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte(","),
+ Position: 13,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("是"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("吗"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 45,
+ End: 48,
+ Term: []byte("?"),
+ Position: 16,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 48,
+ End: 50,
+ Term: []byte("PI"),
+ Position: 17,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 50,
+ End: 51,
+ Term: []byte("="),
+ Position: 18,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 51,
+ End: 52,
+ Term: []byte("3"),
+ Position: 19,
+ Type: analysis.Numeric,
+ },
+ {
+ Start: 52,
+ End: 53,
+ Term: []byte("."),
+ Position: 20,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 53,
+ End: 58,
+ Term: []byte("14159"),
+ Position: 21,
+ Type: analysis.Numeric,
+ },
+ },
+ },
+ {
+ []byte("你认识那个和主席握手的的哥吗?他开一辆黑色的士。"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("你"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("认识"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("那个"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("和"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("主席"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("握手"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("的"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("的哥"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("吗"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("?"),
+ Position: 10,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 45,
+ End: 48,
+ Term: []byte("他"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 48,
+ End: 51,
+ Term: []byte("开"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 51,
+ End: 57,
+ Term: []byte("一辆"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 57,
+ End: 63,
+ Term: []byte("黑色"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 63,
+ End: 69,
+ Term: []byte("的士"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 69,
+ End: 72,
+ Term: []byte("。"),
+ Position: 16,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("枪杆子中出政权"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("枪杆子"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("中"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("出"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("政权"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("张三风同学走上了不归路"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("张"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("三"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("风"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("同学"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("走上"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("了"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 33,
+ Term: []byte("不归路"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("阿Q腰间挂着BB机手里拿着大哥大,说:我一般吃饭不AA制的。"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 4,
+ Term: []byte("阿Q"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 4,
+ End: 10,
+ Term: []byte("腰间"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 10,
+ End: 13,
+ Term: []byte("挂"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 13,
+ End: 16,
+ Term: []byte("着"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 16,
+ End: 21,
+ Term: []byte("BB机"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("手里"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("拿"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("着"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 42,
+ Term: []byte("大哥大"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte(","),
+ Position: 10,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 45,
+ End: 48,
+ Term: []byte("说"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 48,
+ End: 51,
+ Term: []byte(":"),
+ Position: 12,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 51,
+ End: 54,
+ Term: []byte("我"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 54,
+ End: 60,
+ Term: []byte("一般"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 60,
+ End: 66,
+ Term: []byte("吃饭"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 66,
+ End: 69,
+ Term: []byte("不"),
+ Position: 16,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 69,
+ End: 74,
+ Term: []byte("AA制"),
+ Position: 17,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 74,
+ End: 77,
+ Term: []byte("的"),
+ Position: 18,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 77,
+ End: 80,
+ Term: []byte("。"),
+ Position: 19,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("在1号店能买到小S和大S八卦的书。"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("在"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 10,
+ Term: []byte("1号店"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 10,
+ End: 13,
+ Term: []byte("能"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 13,
+ End: 16,
+ Term: []byte("买"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 16,
+ End: 19,
+ Term: []byte("到"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 19,
+ End: 23,
+ Term: []byte("小S"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 23,
+ End: 26,
+ Term: []byte("和"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 26,
+ End: 30,
+ Term: []byte("大S"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 36,
+ Term: []byte("八卦"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte("的"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("书"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("。"),
+ Position: 12,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ }
+
+ tokenizer, _ := NewJiebaTokenizer("../dict.txt", false, false)
+ for _, test := range tests {
+ actual := tokenizer.Tokenize(test.input)
+ if !reflect.DeepEqual(actual, test.output) {
+ t.Errorf("Expected %v, got %v for %s", test.output, actual, string(test.input))
+ }
+ }
+}
+
+func TestJiebaTokenizerSearchModeWithoutHMM(t *testing.T) {
+ tests := []struct {
+ input []byte
+ output analysis.TokenStream
+ }{{
+ []byte("这是一个伸手不见五指的黑夜。我叫孙悟空,我爱北京,我爱Python和C++。"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("这"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("是"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("一个"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("伸手"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("不见"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("五指"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 30,
+ Term: []byte("伸手不见五指"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("的"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("黑夜"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("。"),
+ Position: 10,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("我"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 45,
+ End: 48,
+ Term: []byte("叫"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 51,
+ End: 57,
+ Term: []byte("悟空"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 48,
+ End: 57,
+ Term: []byte("孙悟空"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 57,
+ End: 60,
+ Term: []byte(","),
+ Position: 15,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 60,
+ End: 63,
+ Term: []byte("我"),
+ Position: 16,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 63,
+ End: 66,
+ Term: []byte("爱"),
+ Position: 17,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 66,
+ End: 72,
+ Term: []byte("北京"),
+ Position: 18,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 72,
+ End: 75,
+ Term: []byte(","),
+ Position: 19,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 75,
+ End: 78,
+ Term: []byte("我"),
+ Position: 20,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 78,
+ End: 81,
+ Term: []byte("爱"),
+ Position: 21,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 81,
+ End: 87,
+ Term: []byte("Python"),
+ Position: 22,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 87,
+ End: 90,
+ Term: []byte("和"),
+ Position: 23,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 90,
+ End: 93,
+ Term: []byte("C++"),
+ Position: 24,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 93,
+ End: 96,
+ Term: []byte("。"),
+ Position: 25,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("我不喜欢日本和服。"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("我"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("不"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("喜欢"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("日本"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("和服"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("。"),
+ Position: 6,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("雷猴回归人间。"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("雷猴"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("回归"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("人间"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("。"),
+ Position: 4,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("工信处"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("干事"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 18,
+ Term: []byte("女干事"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("每月"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("经过"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 36,
+ Term: []byte("下属"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 42,
+ Term: []byte("科室"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("都"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 45,
+ End: 48,
+ Term: []byte("要"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 48,
+ End: 54,
+ Term: []byte("亲口"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 54,
+ End: 60,
+ Term: []byte("交代"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 60,
+ End: 62,
+ Term: []byte("24"),
+ Position: 12,
+ Type: analysis.Numeric,
+ },
+ {
+ Start: 62,
+ End: 65,
+ Term: []byte("口"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 65,
+ End: 71,
+ Term: []byte("交换"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 68,
+ End: 74,
+ Term: []byte("换机"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 65,
+ End: 74,
+ Term: []byte("交换机"),
+ Position: 16,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 74,
+ End: 77,
+ Term: []byte("等"),
+ Position: 17,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 77,
+ End: 83,
+ Term: []byte("技术"),
+ Position: 18,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 77,
+ End: 86,
+ Term: []byte("技术性"),
+ Position: 19,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 86,
+ End: 92,
+ Term: []byte("器件"),
+ Position: 20,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 92,
+ End: 95,
+ Term: []byte("的"),
+ Position: 21,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 95,
+ End: 101,
+ Term: []byte("安装"),
+ Position: 22,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 101,
+ End: 107,
+ Term: []byte("工作"),
+ Position: 23,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("我需要廉租房"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("我"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("需要"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("廉租"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("租房"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 18,
+ Term: []byte("廉租房"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("永和服装饰品有限公司"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("永和"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("服装"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("饰品"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("有限"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("公司"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 30,
+ Term: []byte("有限公司"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("我爱北京天安门"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("我"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("爱"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("北京"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("天安"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 21,
+ Term: []byte("天安门"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("abc"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("abc"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("隐马尔可夫"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("隐"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("可夫"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 12,
+ Term: []byte("马尔可"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 15,
+ Term: []byte("马尔可夫"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("雷猴是个好网站"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("雷猴"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("是"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("个"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("好"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("网站"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("“Microsoft”一词由“MICROcomputer(微型计算机)”和“SOFTware(软件)”两部分组成"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("“"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 3,
+ End: 12,
+ Term: []byte("Microsoft"),
+ Position: 2,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("”"),
+ Position: 3,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("一"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("词"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("由"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("“"),
+ Position: 7,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 27,
+ End: 40,
+ Term: []byte("MICROcomputer"),
+ Position: 8,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 40,
+ End: 43,
+ Term: []byte("("),
+ Position: 9,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 43,
+ End: 49,
+ Term: []byte("微型"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 49,
+ End: 55,
+ Term: []byte("计算"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 52,
+ End: 58,
+ Term: []byte("算机"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 49,
+ End: 58,
+ Term: []byte("计算机"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 58,
+ End: 61,
+ Term: []byte(")"),
+ Position: 14,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 61,
+ End: 64,
+ Term: []byte("”"),
+ Position: 15,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 64,
+ End: 67,
+ Term: []byte("和"),
+ Position: 16,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 67,
+ End: 70,
+ Term: []byte("“"),
+ Position: 17,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 70,
+ End: 78,
+ Term: []byte("SOFTware"),
+ Position: 18,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 78,
+ End: 81,
+ Term: []byte("("),
+ Position: 19,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 81,
+ End: 87,
+ Term: []byte("软件"),
+ Position: 20,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 87,
+ End: 90,
+ Term: []byte(")"),
+ Position: 21,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 90,
+ End: 93,
+ Term: []byte("”"),
+ Position: 22,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 93,
+ End: 96,
+ Term: []byte("两"),
+ Position: 23,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 96,
+ End: 102,
+ Term: []byte("部分"),
+ Position: 24,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 102,
+ End: 108,
+ Term: []byte("组成"),
+ Position: 25,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("草泥马和欺实马是今年的流行词汇"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("草泥马"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("和"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("欺"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("实"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("马"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("是"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("今年"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("的"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("流行"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 45,
+ Term: []byte("词汇"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("伊藤洋华堂总府店"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("伊"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("藤"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 15,
+ Term: []byte("洋华堂"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("总府"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("店"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("中国科学院计算技术研究所"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("中国"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("科学"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("学院"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("计算"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("技术"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("研究"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 15,
+ Term: []byte("科学院"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 36,
+ Term: []byte("研究所"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 36,
+ Term: []byte("中国科学院计算技术研究所"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("罗密欧与朱丽叶"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("罗密欧"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("与"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 21,
+ Term: []byte("朱丽叶"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("我购买了道具和服装"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("我"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("购买"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("了"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("道具"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("和"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("服装"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("PS: 我觉得开源有一个好处,就是能够敦促自己不断改进,避免敞帚自珍"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 2,
+ Term: []byte("PS"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 2,
+ End: 3,
+ Term: []byte(":"),
+ Position: 2,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 3,
+ End: 4,
+ Term: []byte(" "),
+ Position: 3,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 4,
+ End: 7,
+ Term: []byte("我"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 7,
+ End: 13,
+ Term: []byte("觉得"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 13,
+ End: 19,
+ Term: []byte("开源"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 19,
+ End: 22,
+ Term: []byte("有"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 22,
+ End: 28,
+ Term: []byte("一个"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 28,
+ End: 34,
+ Term: []byte("好处"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 34,
+ End: 37,
+ Term: []byte(","),
+ Position: 10,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 37,
+ End: 43,
+ Term: []byte("就是"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 43,
+ End: 49,
+ Term: []byte("能够"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 49,
+ End: 55,
+ Term: []byte("敦促"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 55,
+ End: 61,
+ Term: []byte("自己"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 61,
+ End: 67,
+ Term: []byte("不断"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 67,
+ End: 73,
+ Term: []byte("改进"),
+ Position: 16,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 61,
+ End: 73,
+ Term: []byte("不断改进"),
+ Position: 17,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 73,
+ End: 76,
+ Term: []byte(","),
+ Position: 18,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 76,
+ End: 82,
+ Term: []byte("避免"),
+ Position: 19,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 82,
+ End: 85,
+ Term: []byte("敞"),
+ Position: 20,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 85,
+ End: 88,
+ Term: []byte("帚"),
+ Position: 21,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 88,
+ End: 94,
+ Term: []byte("自珍"),
+ Position: 22,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("湖北省石首市"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("湖北"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("湖北省"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("石首"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 18,
+ Term: []byte("石首市"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("湖北省十堰市"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("湖北"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("湖北省"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("十堰"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 18,
+ Term: []byte("十堰市"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("总经理完成了这件事情"),
+ analysis.TokenStream{
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("经理"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("总经理"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("完成"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("了"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("这件"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("事情"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("电脑修好了"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("电脑"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("修好"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("了"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("做好了这件事情就一了百了了"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("做好"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("了"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("这件"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("事情"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("就"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 36,
+ Term: []byte("一了百了"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte("了"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("人们审美的观点是不同的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("人们"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("审美"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("的"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("观点"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("是"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("不同"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("的"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("我们买了一个美的空调"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("我们"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("买"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("了"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("一个"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("美的"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("空调"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("线程初始化时我们要注意"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("线程"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("初始"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 15,
+ Term: []byte("初始化"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("时"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("我们"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("要"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("注意"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("一个分子是由好多原子组织成的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("一个"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("分子"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("是"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("由"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("好多"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("原子"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 36,
+ Term: []byte("组织"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte("成"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("的"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("祝你马到功成"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("祝"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("你"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 18,
+ Term: []byte("马到功成"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("他掉进了无底洞里"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("他"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("掉"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("进"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("了"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("无底"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 21,
+ Term: []byte("无底洞"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("里"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("中国的首都是北京"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("中国"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("的"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("首都"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("是"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("北京"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("孙君意"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("孙"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("君"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("意"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("外交部发言人马朝旭"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("外交"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("外交部"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("发言"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 18,
+ Term: []byte("发言人"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 27,
+ Term: []byte("马朝旭"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("领导人会议和第四届东亚峰会"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("领导"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("领导人"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("会议"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("和"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("第四"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("四届"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 27,
+ Term: []byte("第四届"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("东亚"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("峰会"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("在过去的这五年"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("在"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("过去"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("的"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("这"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("五年"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("还需要很长的路要走"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("还"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("需要"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("很"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("长"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("的"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("路"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("要"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("走"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("60周年首都阅兵"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 2,
+ Term: []byte("60"),
+ Position: 1,
+ Type: analysis.Numeric,
+ },
+ {
+ Start: 2,
+ End: 8,
+ Term: []byte("周年"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 8,
+ End: 14,
+ Term: []byte("首都"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 14,
+ End: 20,
+ Term: []byte("阅兵"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("你好人们审美的观点是不同的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("你好"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("人们"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("审美"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("的"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("观点"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("是"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 36,
+ Term: []byte("不同"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte("的"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("买水果然后来世博园"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("买"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("水果"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("然后"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("来"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("世博"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("博园"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 27,
+ Term: []byte("世博园"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("买水果然后去世博园"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("买"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("水果"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("然后"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("去"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("世博"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("博园"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 27,
+ Term: []byte("世博园"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("但是后来我才知道你是对的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("但是"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("后来"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("我"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("才"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("知道"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("你"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("是"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("对"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 36,
+ Term: []byte("的"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("存在即合理"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("存在"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("即"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("合理"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("的的的的的在的的的的就以和和和"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("的"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("的"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("的"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("的"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("的"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("在"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("的"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("的"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("的"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("的"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("就"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 36,
+ Term: []byte("以"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte("和"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("和"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("和"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("I love你,不以为耻,反以为rong"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 1,
+ Term: []byte("I"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 1,
+ End: 2,
+ Term: []byte(" "),
+ Position: 2,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 2,
+ End: 6,
+ Term: []byte("love"),
+ Position: 3,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("你"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte(","),
+ Position: 5,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("不以"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("以为"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 24,
+ Term: []byte("不以为耻"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte(","),
+ Position: 9,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("反"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 36,
+ Term: []byte("以为"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 40,
+ Term: []byte("rong"),
+ Position: 12,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("因"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("因"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte(""),
+ analysis.TokenStream{},
+ },
+ {
+ []byte("hello你好人们审美的观点是不同的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 5,
+ Term: []byte("hello"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 5,
+ End: 11,
+ Term: []byte("你好"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 11,
+ End: 17,
+ Term: []byte("人们"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 17,
+ End: 23,
+ Term: []byte("审美"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 23,
+ End: 26,
+ Term: []byte("的"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 26,
+ End: 32,
+ Term: []byte("观点"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 32,
+ End: 35,
+ Term: []byte("是"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 35,
+ End: 41,
+ Term: []byte("不同"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 41,
+ End: 44,
+ Term: []byte("的"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("很好但主要是基于网页形式"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("很"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("好"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("但"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("主要"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("是"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("基于"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("网页"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 36,
+ Term: []byte("形式"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("hello你好人们审美的观点是不同的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 5,
+ Term: []byte("hello"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 5,
+ End: 11,
+ Term: []byte("你好"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 11,
+ End: 17,
+ Term: []byte("人们"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 17,
+ End: 23,
+ Term: []byte("审美"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 23,
+ End: 26,
+ Term: []byte("的"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 26,
+ End: 32,
+ Term: []byte("观点"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 32,
+ End: 35,
+ Term: []byte("是"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 35,
+ End: 41,
+ Term: []byte("不同"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 41,
+ End: 44,
+ Term: []byte("的"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("为什么我不能拥有想要的生活"),
+ analysis.TokenStream{
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("什么"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("为什么"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("我"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("不能"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("拥有"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("想要"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("的"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("生活"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("后来我才"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("后来"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("我"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("才"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("此次来中国是为了"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("此次"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("来"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("中国"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("是"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("为了"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("使用了它就可以解决一些问题"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("使用"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("了"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("它"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("就"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("可以"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("解决"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("一些"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("问题"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte(",使用了它就可以解决一些问题"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 1,
+ Term: []byte(","),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 1,
+ End: 7,
+ Term: []byte("使用"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 7,
+ End: 10,
+ Term: []byte("了"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 10,
+ End: 13,
+ Term: []byte("它"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 13,
+ End: 16,
+ Term: []byte("就"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 16,
+ End: 22,
+ Term: []byte("可以"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 22,
+ End: 28,
+ Term: []byte("解决"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 28,
+ End: 34,
+ Term: []byte("一些"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 34,
+ End: 40,
+ Term: []byte("问题"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("其实使用了它就可以解决一些问题"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("其实"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("使用"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("了"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("它"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("就"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("可以"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("解决"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("一些"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 45,
+ Term: []byte("问题"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("好人使用了它就可以解决一些问题"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("好人"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("使用"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("了"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("它"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("就"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("可以"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("解决"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("一些"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 45,
+ Term: []byte("问题"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("是因为和国家"),
+ analysis.TokenStream{
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("因为"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("是因为"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("和"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("国家"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("老年搜索还支持"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("老年"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("搜索"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("还"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("支持"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("干脆就把那部蒙人的闲法给废了拉倒!RT @laoshipukong : 27日,全国人大常委会第三次审议侵权责任法草案,删除了有关医疗损害责任“举证倒置”的规定。在医患纠纷中本已处于弱势地位的消费者由此将陷入万劫不复的境地。 "),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("干脆"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("就"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("把"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("那"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("部"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("蒙"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("人"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("的"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("闲"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("法"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 36,
+ Term: []byte("给"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte("废"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("了"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 48,
+ Term: []byte("拉倒"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 48,
+ End: 51,
+ Term: []byte("!"),
+ Position: 15,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 51,
+ End: 53,
+ Term: []byte("RT"),
+ Position: 16,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 53,
+ End: 54,
+ Term: []byte(" "),
+ Position: 17,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 54,
+ End: 55,
+ Term: []byte("@"),
+ Position: 18,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 55,
+ End: 67,
+ Term: []byte("laoshipukong"),
+ Position: 19,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 67,
+ End: 68,
+ Term: []byte(" "),
+ Position: 20,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 68,
+ End: 69,
+ Term: []byte(":"),
+ Position: 21,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 69,
+ End: 70,
+ Term: []byte(" "),
+ Position: 22,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 70,
+ End: 72,
+ Term: []byte("27"),
+ Position: 23,
+ Type: analysis.Numeric,
+ },
+ {
+ Start: 72,
+ End: 75,
+ Term: []byte("日"),
+ Position: 24,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 75,
+ End: 78,
+ Term: []byte(","),
+ Position: 25,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 78,
+ End: 84,
+ Term: []byte("全国"),
+ Position: 26,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 81,
+ End: 87,
+ Term: []byte("国人"),
+ Position: 27,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 84,
+ End: 90,
+ Term: []byte("人大"),
+ Position: 28,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 90,
+ End: 96,
+ Term: []byte("常委"),
+ Position: 29,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 93,
+ End: 99,
+ Term: []byte("委会"),
+ Position: 30,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 90,
+ End: 99,
+ Term: []byte("常委会"),
+ Position: 31,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 78,
+ End: 99,
+ Term: []byte("全国人大常委会"),
+ Position: 32,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 99,
+ End: 105,
+ Term: []byte("第三"),
+ Position: 33,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 102,
+ End: 108,
+ Term: []byte("三次"),
+ Position: 34,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 99,
+ End: 108,
+ Term: []byte("第三次"),
+ Position: 35,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 108,
+ End: 114,
+ Term: []byte("审议"),
+ Position: 36,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 114,
+ End: 120,
+ Term: []byte("侵权"),
+ Position: 37,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 120,
+ End: 126,
+ Term: []byte("责任"),
+ Position: 38,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 120,
+ End: 129,
+ Term: []byte("责任法"),
+ Position: 39,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 129,
+ End: 135,
+ Term: []byte("草案"),
+ Position: 40,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 135,
+ End: 138,
+ Term: []byte(","),
+ Position: 41,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 138,
+ End: 144,
+ Term: []byte("删除"),
+ Position: 42,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 144,
+ End: 147,
+ Term: []byte("了"),
+ Position: 43,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 147,
+ End: 153,
+ Term: []byte("有关"),
+ Position: 44,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 153,
+ End: 159,
+ Term: []byte("医疗"),
+ Position: 45,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 159,
+ End: 165,
+ Term: []byte("损害"),
+ Position: 46,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 165,
+ End: 171,
+ Term: []byte("责任"),
+ Position: 47,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 171,
+ End: 174,
+ Term: []byte("“"),
+ Position: 48,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 174,
+ End: 180,
+ Term: []byte("举证"),
+ Position: 49,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 180,
+ End: 186,
+ Term: []byte("倒置"),
+ Position: 50,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 186,
+ End: 189,
+ Term: []byte("”"),
+ Position: 51,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 189,
+ End: 192,
+ Term: []byte("的"),
+ Position: 52,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 192,
+ End: 198,
+ Term: []byte("规定"),
+ Position: 53,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 198,
+ End: 201,
+ Term: []byte("。"),
+ Position: 54,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 201,
+ End: 204,
+ Term: []byte("在"),
+ Position: 55,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 204,
+ End: 210,
+ Term: []byte("医患"),
+ Position: 56,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 210,
+ End: 216,
+ Term: []byte("纠纷"),
+ Position: 57,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 216,
+ End: 219,
+ Term: []byte("中"),
+ Position: 58,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 219,
+ End: 222,
+ Term: []byte("本"),
+ Position: 59,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 222,
+ End: 225,
+ Term: []byte("已"),
+ Position: 60,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 225,
+ End: 231,
+ Term: []byte("处于"),
+ Position: 61,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 231,
+ End: 237,
+ Term: []byte("弱势"),
+ Position: 62,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 237,
+ End: 243,
+ Term: []byte("地位"),
+ Position: 63,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 243,
+ End: 246,
+ Term: []byte("的"),
+ Position: 64,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 246,
+ End: 252,
+ Term: []byte("消费"),
+ Position: 65,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 246,
+ End: 255,
+ Term: []byte("消费者"),
+ Position: 66,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 255,
+ End: 261,
+ Term: []byte("由此"),
+ Position: 67,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 261,
+ End: 264,
+ Term: []byte("将"),
+ Position: 68,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 264,
+ End: 270,
+ Term: []byte("陷入"),
+ Position: 69,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 276,
+ End: 282,
+ Term: []byte("不复"),
+ Position: 70,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 270,
+ End: 282,
+ Term: []byte("万劫不复"),
+ Position: 71,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 282,
+ End: 285,
+ Term: []byte("的"),
+ Position: 72,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 285,
+ End: 291,
+ Term: []byte("境地"),
+ Position: 73,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 291,
+ End: 294,
+ Term: []byte("。"),
+ Position: 74,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 294,
+ End: 295,
+ Term: []byte(" "),
+ Position: 75,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("大"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("大"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte(""),
+ analysis.TokenStream{},
+ },
+ {
+ []byte("他说的确实在理"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("他"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("说"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("的"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("确实"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("在"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("理"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("长春市长春节讲话"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("长春"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("市长"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("春节"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("讲话"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("结婚的和尚未结婚的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("结婚"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("的"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("和"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("尚未"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("结婚"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("的"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("结合成分子时"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("结合"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("成"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("分子"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("时"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("旅游和服务是最好的"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("旅游"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("和"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("服务"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("是"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("最好"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("的"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("这件事情的确是我的错"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("这件"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("事情"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("的确"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("是"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("我"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("的"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("错"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("供大家参考指正"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("供"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("大家"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("参考"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("指正"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("哈尔滨政府公布塌桥原因"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("哈尔"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("哈尔滨"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("政府"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("公布"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("塌"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("桥"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("原因"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("我在机场入口处"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("我"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("在"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("机场"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("入口"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 21,
+ Term: []byte("入口处"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("邢永臣摄影报道"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("邢"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("永"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("臣"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("摄影"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("报道"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("BP神经网络如何训练才能在分类时增加区分度?"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 2,
+ Term: []byte("BP"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 2,
+ End: 8,
+ Term: []byte("神经"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 8,
+ End: 14,
+ Term: []byte("网络"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 2,
+ End: 11,
+ Term: []byte("神经网"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 2,
+ End: 14,
+ Term: []byte("神经网络"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 14,
+ End: 20,
+ Term: []byte("如何"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 20,
+ End: 26,
+ Term: []byte("训练"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 26,
+ End: 32,
+ Term: []byte("才能"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 32,
+ End: 35,
+ Term: []byte("在"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 35,
+ End: 41,
+ Term: []byte("分类"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 41,
+ End: 44,
+ Term: []byte("时"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 44,
+ End: 50,
+ Term: []byte("增加"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 50,
+ End: 56,
+ Term: []byte("区分"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 53,
+ End: 59,
+ Term: []byte("分度"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 50,
+ End: 59,
+ Term: []byte("区分度"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 59,
+ End: 62,
+ Term: []byte("?"),
+ Position: 16,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("南京市长江大桥"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("南京"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("京市"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("南京市"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("长江"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("大桥"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 21,
+ Term: []byte("长江大桥"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("应一些使用者的建议,也为了便于利用NiuTrans用于SMT研究"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("应"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("一些"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("使用"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("用者"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 18,
+ Term: []byte("使用者"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("的"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("建议"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte(","),
+ Position: 8,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("也"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("为了"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 45,
+ Term: []byte("便于"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 45,
+ End: 51,
+ Term: []byte("利用"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 51,
+ End: 59,
+ Term: []byte("NiuTrans"),
+ Position: 13,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 59,
+ End: 65,
+ Term: []byte("用于"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 65,
+ End: 68,
+ Term: []byte("SMT"),
+ Position: 15,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 68,
+ End: 74,
+ Term: []byte("研究"),
+ Position: 16,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("长春市长春药店"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("长春"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("长春市"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("长春"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("药店"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("邓颖超生前最喜欢的衣服"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("邓颖超"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("生前"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("最"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("喜欢"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("的"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("衣服"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("胡锦涛是热爱世界和平的政治局常委"),
+ analysis.TokenStream{
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("锦涛"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("胡锦涛"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("是"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("热爱"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("世界"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("和平"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("的"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("政治"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 42,
+ Term: []byte("政治局"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 48,
+ Term: []byte("常委"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("程序员祝海林和朱会震是在孙健的左面和右面, 范凯在最右面.再往左是李松洪"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("程序"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("程序员"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("祝"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("海林"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("和"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("朱"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("会"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("震"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("是"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 36,
+ Term: []byte("在"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte("孙"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("健"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("的"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 45,
+ End: 51,
+ Term: []byte("左面"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 51,
+ End: 54,
+ Term: []byte("和"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 54,
+ End: 60,
+ Term: []byte("右面"),
+ Position: 16,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 60,
+ End: 61,
+ Term: []byte(","),
+ Position: 17,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 61,
+ End: 62,
+ Term: []byte(" "),
+ Position: 18,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 62,
+ End: 65,
+ Term: []byte("范"),
+ Position: 19,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 65,
+ End: 68,
+ Term: []byte("凯"),
+ Position: 20,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 68,
+ End: 71,
+ Term: []byte("在"),
+ Position: 21,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 71,
+ End: 74,
+ Term: []byte("最"),
+ Position: 22,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 74,
+ End: 80,
+ Term: []byte("右面"),
+ Position: 23,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 80,
+ End: 81,
+ Term: []byte("."),
+ Position: 24,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 81,
+ End: 84,
+ Term: []byte("再"),
+ Position: 25,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 84,
+ End: 87,
+ Term: []byte("往"),
+ Position: 26,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 87,
+ End: 90,
+ Term: []byte("左"),
+ Position: 27,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 90,
+ End: 93,
+ Term: []byte("是"),
+ Position: 28,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 93,
+ End: 96,
+ Term: []byte("李"),
+ Position: 29,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 96,
+ End: 99,
+ Term: []byte("松"),
+ Position: 30,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 99,
+ End: 102,
+ Term: []byte("洪"),
+ Position: 31,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("一次性交多少钱"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("一次"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("一次性"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("交"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("多少"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 21,
+ Term: []byte("钱"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("两块五一套,三块八一斤,四块七一本,五块六一条"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("两块"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("五"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("一套"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte(","),
+ Position: 4,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("三块"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("八"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("一斤"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 36,
+ Term: []byte(","),
+ Position: 8,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 36,
+ End: 42,
+ Term: []byte("四块"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("七"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 45,
+ End: 51,
+ Term: []byte("一本"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 51,
+ End: 54,
+ Term: []byte(","),
+ Position: 12,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 54,
+ End: 60,
+ Term: []byte("五块"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 60,
+ End: 63,
+ Term: []byte("六"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 63,
+ End: 69,
+ Term: []byte("一条"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("小和尚留了一个像大和尚一样的和尚头"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("小"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("和尚"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("留"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("了"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("一个"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("像"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("大"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("和尚"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("一样"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("的"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 48,
+ Term: []byte("和尚"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 51,
+ Term: []byte("和尚头"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("我是中华人民共和国公民;我爸爸是共和党党员; 地铁和平门站"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("我"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("是"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 12,
+ Term: []byte("中华"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("华人"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("人民"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("共和"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 27,
+ Term: []byte("共和国"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 27,
+ Term: []byte("中华人民共和国"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("公民"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 34,
+ Term: []byte(";"),
+ Position: 10,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 34,
+ End: 37,
+ Term: []byte("我"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 37,
+ End: 43,
+ Term: []byte("爸爸"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 43,
+ End: 46,
+ Term: []byte("是"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 46,
+ End: 52,
+ Term: []byte("共和"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 46,
+ End: 55,
+ Term: []byte("共和党"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 55,
+ End: 61,
+ Term: []byte("党员"),
+ Position: 16,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 61,
+ End: 62,
+ Term: []byte(";"),
+ Position: 17,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 62,
+ End: 63,
+ Term: []byte(" "),
+ Position: 18,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 63,
+ End: 69,
+ Term: []byte("地铁"),
+ Position: 19,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 69,
+ End: 75,
+ Term: []byte("和平"),
+ Position: 20,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 69,
+ End: 78,
+ Term: []byte("和平门"),
+ Position: 21,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 78,
+ End: 81,
+ Term: []byte("站"),
+ Position: 22,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("张晓梅去人民医院做了个B超然后去买了件T恤"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("张晓梅"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("去"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 18,
+ Term: []byte("人民"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("医院"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 27,
+ Term: []byte("做"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("了"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("个"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 37,
+ Term: []byte("B超"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 37,
+ End: 43,
+ Term: []byte("然后"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 43,
+ End: 46,
+ Term: []byte("去"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 46,
+ End: 49,
+ Term: []byte("买"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 49,
+ End: 52,
+ Term: []byte("了"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 52,
+ End: 55,
+ Term: []byte("件"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 55,
+ End: 59,
+ Term: []byte("T恤"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("AT&T是一件不错的公司,给你发offer了吗?"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 4,
+ Term: []byte("AT&T"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 4,
+ End: 7,
+ Term: []byte("是"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 7,
+ End: 13,
+ Term: []byte("一件"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 13,
+ End: 19,
+ Term: []byte("不错"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 19,
+ End: 22,
+ Term: []byte("的"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 22,
+ End: 28,
+ Term: []byte("公司"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 28,
+ End: 31,
+ Term: []byte(","),
+ Position: 7,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 31,
+ End: 34,
+ Term: []byte("给"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 34,
+ End: 37,
+ Term: []byte("你"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 37,
+ End: 40,
+ Term: []byte("发"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 40,
+ End: 45,
+ Term: []byte("offer"),
+ Position: 11,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 45,
+ End: 48,
+ Term: []byte("了"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 48,
+ End: 51,
+ Term: []byte("吗"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 51,
+ End: 54,
+ Term: []byte("?"),
+ Position: 14,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("C++和c#是什么关系?11+122=133,是吗?PI=3.14159"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("C++"),
+ Position: 1,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("和"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 8,
+ Term: []byte("c#"),
+ Position: 3,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 8,
+ End: 11,
+ Term: []byte("是"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 11,
+ End: 17,
+ Term: []byte("什么"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 17,
+ End: 23,
+ Term: []byte("关系"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 23,
+ End: 26,
+ Term: []byte("?"),
+ Position: 7,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 26,
+ End: 28,
+ Term: []byte("11"),
+ Position: 8,
+ Type: analysis.Numeric,
+ },
+ {
+ Start: 28,
+ End: 29,
+ Term: []byte("+"),
+ Position: 9,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 29,
+ End: 32,
+ Term: []byte("122"),
+ Position: 10,
+ Type: analysis.Numeric,
+ },
+ {
+ Start: 32,
+ End: 33,
+ Term: []byte("="),
+ Position: 11,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 33,
+ End: 36,
+ Term: []byte("133"),
+ Position: 12,
+ Type: analysis.Numeric,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte(","),
+ Position: 13,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("是"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("吗"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 45,
+ End: 48,
+ Term: []byte("?"),
+ Position: 16,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 48,
+ End: 50,
+ Term: []byte("PI"),
+ Position: 17,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 50,
+ End: 51,
+ Term: []byte("="),
+ Position: 18,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 51,
+ End: 52,
+ Term: []byte("3"),
+ Position: 19,
+ Type: analysis.Numeric,
+ },
+ {
+ Start: 52,
+ End: 53,
+ Term: []byte("."),
+ Position: 20,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 53,
+ End: 58,
+ Term: []byte("14159"),
+ Position: 21,
+ Type: analysis.Numeric,
+ },
+ },
+ },
+ {
+ []byte("你认识那个和主席握手的的哥吗?他开一辆黑色的士。"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("你"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("认识"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("那个"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 18,
+ Term: []byte("和"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 18,
+ End: 24,
+ Term: []byte("主席"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 30,
+ Term: []byte("握手"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("的"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("的哥"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("吗"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("?"),
+ Position: 10,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 45,
+ End: 48,
+ Term: []byte("他"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 48,
+ End: 51,
+ Term: []byte("开"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 51,
+ End: 57,
+ Term: []byte("一辆"),
+ Position: 13,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 57,
+ End: 63,
+ Term: []byte("黑色"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 63,
+ End: 69,
+ Term: []byte("的士"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 69,
+ End: 72,
+ Term: []byte("。"),
+ Position: 16,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("枪杆子中出政权"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 6,
+ Term: []byte("枪杆"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 9,
+ Term: []byte("杆子"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 0,
+ End: 9,
+ Term: []byte("枪杆子"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 12,
+ Term: []byte("中"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 12,
+ End: 15,
+ Term: []byte("出"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("政权"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("张三风同学走上了不归路"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("张"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 6,
+ Term: []byte("三"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 6,
+ End: 9,
+ Term: []byte("风"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 9,
+ End: 15,
+ Term: []byte("同学"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 15,
+ End: 21,
+ Term: []byte("走上"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 24,
+ Term: []byte("了"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 33,
+ Term: []byte("归路"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 24,
+ End: 33,
+ Term: []byte("不归路"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ },
+ },
+ {
+ []byte("阿Q腰间挂着BB机手里拿着大哥大,说:我一般吃饭不AA制的。"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 4,
+ Term: []byte("阿Q"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 4,
+ End: 10,
+ Term: []byte("腰间"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 10,
+ End: 13,
+ Term: []byte("挂"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 13,
+ End: 16,
+ Term: []byte("着"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 16,
+ End: 21,
+ Term: []byte("BB机"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 21,
+ End: 27,
+ Term: []byte("手里"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 27,
+ End: 30,
+ Term: []byte("拿"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 33,
+ Term: []byte("着"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 39,
+ Term: []byte("大哥"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 33,
+ End: 42,
+ Term: []byte("大哥大"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte(","),
+ Position: 11,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 45,
+ End: 48,
+ Term: []byte("说"),
+ Position: 12,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 48,
+ End: 51,
+ Term: []byte(":"),
+ Position: 13,
+ Type: analysis.AlphaNumeric,
+ },
+ {
+ Start: 51,
+ End: 54,
+ Term: []byte("我"),
+ Position: 14,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 54,
+ End: 60,
+ Term: []byte("一般"),
+ Position: 15,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 60,
+ End: 66,
+ Term: []byte("吃饭"),
+ Position: 16,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 66,
+ End: 69,
+ Term: []byte("不"),
+ Position: 17,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 69,
+ End: 74,
+ Term: []byte("AA制"),
+ Position: 18,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 74,
+ End: 77,
+ Term: []byte("的"),
+ Position: 19,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 77,
+ End: 80,
+ Term: []byte("。"),
+ Position: 20,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ {
+ []byte("在1号店能买到小S和大S八卦的书。"),
+ analysis.TokenStream{
+ {
+ Start: 0,
+ End: 3,
+ Term: []byte("在"),
+ Position: 1,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 3,
+ End: 10,
+ Term: []byte("1号店"),
+ Position: 2,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 10,
+ End: 13,
+ Term: []byte("能"),
+ Position: 3,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 13,
+ End: 16,
+ Term: []byte("买"),
+ Position: 4,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 16,
+ End: 19,
+ Term: []byte("到"),
+ Position: 5,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 19,
+ End: 23,
+ Term: []byte("小S"),
+ Position: 6,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 23,
+ End: 26,
+ Term: []byte("和"),
+ Position: 7,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 26,
+ End: 30,
+ Term: []byte("大S"),
+ Position: 8,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 30,
+ End: 36,
+ Term: []byte("八卦"),
+ Position: 9,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 36,
+ End: 39,
+ Term: []byte("的"),
+ Position: 10,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 39,
+ End: 42,
+ Term: []byte("书"),
+ Position: 11,
+ Type: analysis.Ideographic,
+ },
+ {
+ Start: 42,
+ End: 45,
+ Term: []byte("。"),
+ Position: 12,
+ Type: analysis.AlphaNumeric,
+ },
+ },
+ },
+ }
+
+ tokenizer, _ := NewJiebaTokenizer("../dict.txt", false, true)
+ for _, test := range tests {
+ actual := tokenizer.Tokenize(test.input)
+ if !reflect.DeepEqual(actual, test.output) {
+ t.Errorf("Expected %v, got %v for %s", test.output, actual, string(test.input))
+ }
+ }
+}
diff --git a/trie.go b/trie.go
index b7d7ed2..679f1f4 100644
--- a/trie.go
+++ b/trie.go
@@ -1,7 +1,6 @@
package jiebago
import (
- "bytes"
"crypto/md5"
"encoding/gob"
"fmt"
@@ -19,34 +18,6 @@ type trie struct {
Freq map[string]float64
}
-func (t trie) MarshalBinary() ([]byte, error) {
- var b bytes.Buffer
- enc := gob.NewEncoder(&b)
- err := enc.Encode(t.Total)
- if err != nil {
- return nil, err
- }
- err = enc.Encode(t.Freq)
- if err != nil {
- return nil, err
- }
- return b.Bytes(), nil
-}
-
-func (t *trie) UnmarshalBinary(data []byte) error {
- b := bytes.NewBuffer(data)
- dec := gob.NewDecoder(b)
- err := dec.Decode(&t.Total)
- if err != nil {
- return err
- }
- err = dec.Decode(&t.Freq)
- if err != nil {
- return err
- }
- return nil
-}
-
func (t *trie) load(dictFileName string) error {
dictFilePath, err := DictPath(dictFileName)
if err != nil {