1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-10 11:40:26 +08:00

initial commit

This commit is contained in:
Wang Bin
2013-10-31 18:20:04 +08:00
commit 8c785ad36a
24 changed files with 831685 additions and 0 deletions

80
analyse/analyse.go Normal file
View File

@@ -0,0 +1,80 @@
package analyse
import (
"github.com/wangbin/jiebago"
"sort"
"strings"
"unicode/utf8"
)
type TfIdf struct {
word string
freq float64
}
type TfIdfs []TfIdf
func (tis TfIdfs) Len() int {
return len(tis)
}
func (tis TfIdfs) Less(i, j int) bool {
if tis[i].freq == tis[j].freq {
return tis[i].word < tis[j].word
}
return tis[i].freq < tis[j].freq
}
func (tis TfIdfs) Swap(i, j int) {
tis[i], tis[j] = tis[j], tis[i]
}
func ExtractTags(sentence string, topK int) []string {
words := jiebago.Cut(sentence, false, true)
freq := make(map[string]float64)
for _, w := range words {
w = strings.TrimSpace(w)
if utf8.RuneCountInString(w) < 2 {
continue
}
index := stopWords.Search(w)
if index < len(stopWords) && stopWords[index] == w {
continue
}
if f, ok := freq[w]; ok {
freq[w] = f + 1.0
} else {
freq[w] = 1.0
}
}
total := 0.0
for _, f := range freq {
total += f
}
for k, v := range freq {
freq[k] = v / total
}
tis := make(TfIdfs, 0)
for k, v := range freq {
var ti TfIdf
if freq_, ok := idfFreq[k]; ok {
ti = TfIdf{word: k, freq: freq_ * v}
} else {
ti = TfIdf{word: k, freq: medianIdf * v}
}
tis = append(tis, ti)
}
sort.Sort(sort.Reverse(tis))
var topTfIdfs TfIdfs
if len(tis) > topK {
topTfIdfs = tis[:topK]
} else {
topTfIdfs = tis
}
tags := make([]string, len(topTfIdfs))
for index, ti := range topTfIdfs {
tags[index] = ti.word
}
return tags
}

197
analyse/analyse_test.go Normal file
View File

@@ -0,0 +1,197 @@
package analyse
import (
"github.com/wangbin/jiebago"
"testing"
)
var (
test_contents = []string{
"这是一个伸手不见五指的黑夜。我叫孙悟空我爱北京我爱Python和C++。",
"我不喜欢日本和服。",
"雷猴回归人间。",
"工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作",
"我需要廉租房",
"永和服装饰品有限公司",
"我爱北京天安门",
"abc",
"隐马尔可夫",
"雷猴是个好网站",
"“Microsoft”一词由“MICROcomputer微型计算机”和“SOFTware软件”两部分组成",
"草泥马和欺实马是今年的流行词汇",
"伊藤洋华堂总府店",
"中国科学院计算技术研究所",
"罗密欧与朱丽叶",
"我购买了道具和服装",
"PS: 我觉得开源有一个好处,就是能够敦促自己不断改进,避免敞帚自珍",
"湖北省石首市",
"湖北省十堰市",
"总经理完成了这件事情",
"电脑修好了",
"做好了这件事情就一了百了了",
"人们审美的观点是不同的",
"我们买了一个美的空调",
"线程初始化时我们要注意",
"一个分子是由好多原子组织成的",
"祝你马到功成",
"他掉进了无底洞里",
"中国的首都是北京",
"孙君意",
"外交部发言人马朝旭",
"领导人会议和第四届东亚峰会",
"在过去的这五年",
"还需要很长的路要走",
"60周年首都阅兵",
"你好人们审美的观点是不同的",
"买水果然后来世博园",
"买水果然后去世博园",
"但是后来我才知道你是对的",
"存在即合理",
"的的的的的在的的的的就以和和和",
"I love你不以为耻反以为rong",
"因",
"",
"hello你好人们审美的观点是不同的",
"很好但主要是基于网页形式",
"hello你好人们审美的观点是不同的",
"为什么我不能拥有想要的生活",
"后来我才",
"此次来中国是为了",
"使用了它就可以解决一些问题",
",使用了它就可以解决一些问题",
"其实使用了它就可以解决一些问题",
"好人使用了它就可以解决一些问题",
"是因为和国家",
"老年搜索还支持",
"干脆就把那部蒙人的闲法给废了拉倒RT @laoshipukong : 27日全国人大常委会第三次审议侵权责任法草案删除了有关医疗损害责任“举证倒置”的规定。在医患纠纷中本已处于弱势地位的消费者由此将陷入万劫不复的境地。 ",
"大",
"",
"他说的确实在理",
"长春市长春节讲话",
"结婚的和尚未结婚的",
"结合成分子时",
"旅游和服务是最好的",
"这件事情的确是我的错",
"供大家参考指正",
"哈尔滨政府公布塌桥原因",
"我在机场入口处",
"邢永臣摄影报道",
"BP神经网络如何训练才能在分类时增加区分度",
"南京市长江大桥",
"应一些使用者的建议也为了便于利用NiuTrans用于SMT研究",
"长春市长春药店",
"邓颖超生前最喜欢的衣服",
"胡锦涛是热爱世界和平的政治局常委",
"程序员祝海林和朱会震是在孙健的左面和右面, 范凯在最右面.再往左是李松洪",
"一次性交多少钱",
"两块五一套,三块八一斤,四块七一本,五块六一条",
"小和尚留了一个像大和尚一样的和尚头",
"我是中华人民共和国公民;我爸爸是共和党党员; 地铁和平门站",
"张晓梅去人民医院做了个B超然后去买了件T恤",
"AT&T是一件不错的公司给你发offer了吗",
"C++和c#是什么关系11+122=133是吗PI=3.14159",
"你认识那个和主席握手的的哥吗?他开一辆黑色的士。",
"枪杆子中出政权"}
Tags = [][]string{
[]string{"Python", "C++", "\u4f38\u624b\u4e0d\u89c1\u4e94\u6307", "\u5b59\u609f\u7a7a", "\u9ed1\u591c", "\u5317\u4eac", "\u8fd9\u662f", "\u4e00\u4e2a"},
[]string{"\u548c\u670d", "\u559c\u6b22", "\u65e5\u672c"},
[]string{"\u96f7\u7334", "\u4eba\u95f4", "\u56de\u5f52"},
[]string{"\u5de5\u4fe1\u5904", "\u5973\u5e72\u4e8b", "24", "\u4ea4\u6362\u673a", "\u79d1\u5ba4", "\u4eb2\u53e3", "\u5668\u4ef6", "\u6280\u672f\u6027", "\u4e0b\u5c5e", "\u4ea4\u4ee3", "\u6bcf\u6708", "\u5b89\u88c5", "\u7ecf\u8fc7", "\u5de5\u4f5c"},
[]string{"\u5ec9\u79df\u623f", "\u9700\u8981"},
[]string{"\u9970\u54c1", "\u6c38\u548c", "\u670d\u88c5", "\u6709\u9650\u516c\u53f8"},
[]string{"\u5929\u5b89\u95e8", "\u5317\u4eac"},
[]string{"abc"},
[]string{"\u9a6c\u5c14\u53ef\u592b"},
[]string{"\u96f7\u7334", "\u7f51\u7ad9"},
[]string{"SOFTware", "Microsoft", "MICROcomputer", "\u5fae\u578b", "\u4e00\u8bcd", "\u8f6f\u4ef6", "\u8ba1\u7b97\u673a", "\u7ec4\u6210", "\u90e8\u5206"},
[]string{"\u8349\u6ce5\u9a6c", "\u6b3a\u5b9e", "\u8bcd\u6c47", "\u6d41\u884c", "\u4eca\u5e74"},
[]string{"\u6d0b\u534e\u5802", "\u603b\u5e9c", "\u4f0a\u85e4"},
[]string{"\u4e2d\u56fd\u79d1\u5b66\u9662\u8ba1\u7b97\u6280\u672f\u7814\u7a76\u6240"},
[]string{"\u6731\u4e3d\u53f6", "\u7f57\u5bc6\u6b27"},
[]string{"\u9053\u5177", "\u670d\u88c5", "\u8d2d\u4e70"},
[]string{"\u81ea\u73cd", "\u655e\u5e1a", "PS", "\u5f00\u6e90", "\u4e0d\u65ad\u6539\u8fdb", "\u6566\u4fc3", "\u597d\u5904", "\u907f\u514d", "\u80fd\u591f", "\u89c9\u5f97", "\u5c31\u662f", "\u81ea\u5df1", "\u4e00\u4e2a"},
[]string{"\u77f3\u9996\u5e02", "\u6e56\u5317\u7701"},
[]string{"\u5341\u5830\u5e02", "\u6e56\u5317\u7701"},
[]string{"\u603b\u7ecf\u7406", "\u8fd9\u4ef6", "\u5b8c\u6210", "\u4e8b\u60c5"},
[]string{"\u4fee\u597d", "\u7535\u8111"},
[]string{"\u4e00\u4e86\u767e\u4e86", "\u505a\u597d", "\u8fd9\u4ef6", "\u4e8b\u60c5"},
[]string{"\u5ba1\u7f8e", "\u89c2\u70b9", "\u4eba\u4eec", "\u4e0d\u540c"},
[]string{"\u7f8e\u7684", "\u7a7a\u8c03", "\u6211\u4eec", "\u4e00\u4e2a"},
[]string{"\u7ebf\u7a0b", "\u521d\u59cb\u5316", "\u6ce8\u610f", "\u6211\u4eec"},
[]string{"\u597d\u591a", "\u539f\u5b50", "\u5206\u5b50", "\u7ec4\u7ec7", "\u4e00\u4e2a"},
[]string{"\u9a6c\u5230\u529f\u6210"},
[]string{"\u65e0\u5e95\u6d1e"},
[]string{"\u9996\u90fd", "\u5317\u4eac", "\u4e2d\u56fd"},
[]string{"\u5b59\u541b\u610f"},
[]string{"\u9a6c\u671d\u65ed", "\u5916\u4ea4\u90e8", "\u53d1\u8a00\u4eba"},
[]string{"\u7b2c\u56db\u5c4a", "\u4e1c\u4e9a", "\u5cf0\u4f1a", "\u9886\u5bfc\u4eba", "\u4f1a\u8bae"},
[]string{"\u4e94\u5e74", "\u8fc7\u53bb"},
[]string{"\u5f88\u957f", "\u9700\u8981"},
[]string{"60", "\u9605\u5175", "\u5468\u5e74", "\u9996\u90fd"},
[]string{"\u5ba1\u7f8e", "\u4f60\u597d", "\u89c2\u70b9", "\u4eba\u4eec", "\u4e0d\u540c"},
[]string{"\u4e16\u535a\u56ed", "\u6c34\u679c", "\u7136\u540e"},
[]string{"\u4e16\u535a\u56ed", "\u6c34\u679c", "\u7136\u540e"},
[]string{"\u540e\u6765", "\u4f46\u662f", "\u77e5\u9053"},
[]string{"\u5408\u7406", "\u5b58\u5728"},
[]string{},
[]string{"rong", "love", "\u4e0d\u4ee5\u4e3a\u803b", "\u4ee5\u4e3a"},
[]string{},
[]string{},
[]string{"hello", "\u5ba1\u7f8e", "\u4f60\u597d", "\u89c2\u70b9", "\u4eba\u4eec", "\u4e0d\u540c"},
[]string{"\u7f51\u9875", "\u57fa\u4e8e", "\u5f62\u5f0f", "\u4e3b\u8981"},
[]string{"hello", "\u5ba1\u7f8e", "\u4f60\u597d", "\u89c2\u70b9", "\u4eba\u4eec", "\u4e0d\u540c"},
[]string{"\u60f3\u8981", "\u62e5\u6709", "\u4e3a\u4ec0\u4e48", "\u751f\u6d3b", "\u4e0d\u80fd"},
[]string{"\u540e\u6765"},
[]string{"\u6b64\u6b21", "\u4e3a\u4e86", "\u4e2d\u56fd"},
[]string{"\u89e3\u51b3", "\u4f7f\u7528", "\u4e00\u4e9b", "\u95ee\u9898", "\u53ef\u4ee5"},
[]string{"\u89e3\u51b3", "\u4f7f\u7528", "\u4e00\u4e9b", "\u95ee\u9898", "\u53ef\u4ee5"},
[]string{"\u89e3\u51b3", "\u5176\u5b9e", "\u4f7f\u7528", "\u4e00\u4e9b", "\u95ee\u9898", "\u53ef\u4ee5"},
[]string{"\u597d\u4eba", "\u89e3\u51b3", "\u4f7f\u7528", "\u4e00\u4e9b", "\u95ee\u9898", "\u53ef\u4ee5"},
[]string{"\u662f\u56e0\u4e3a", "\u56fd\u5bb6"},
[]string{"\u8001\u5e74", "\u641c\u7d22", "\u652f\u6301"},
[]string{"\u95f2\u6cd5", "\u4e2d\u672c", "laoshipukong", "RT", "27", "\u8d23\u4efb\u6cd5", "\u8499\u4eba", "\u4e07\u52ab\u4e0d\u590d", "\u4e3e\u8bc1", "\u5012\u7f6e", "\u533b\u60a3", "\u90a3\u90e8", "\u62c9\u5012", "\u4fb5\u6743", "\u5168\u56fd\u4eba\u5927\u5e38\u59d4\u4f1a", "\u8349\u6848", "\u5883\u5730", "\u7ea0\u7eb7", "\u5220\u9664", "\u5f31\u52bf"},
[]string{},
[]string{},
[]string{"\u5728\u7406", "\u786e\u5b9e"},
[]string{"\u957f\u6625", "\u6625\u8282", "\u8bb2\u8bdd", "\u5e02\u957f"},
[]string{"\u7ed3\u5a5a", "\u5c1a\u672a"},
[]string{"\u5206\u5b50", "\u7ed3\u5408"},
[]string{"\u65c5\u6e38", "\u6700\u597d", "\u670d\u52a1"},
[]string{"\u7684\u786e", "\u8fd9\u4ef6", "\u4e8b\u60c5"},
[]string{"\u6307\u6b63", "\u53c2\u8003", "\u5927\u5bb6"},
[]string{"\u584c\u6865", "\u54c8\u5c14\u6ee8", "\u516c\u5e03", "\u539f\u56e0", "\u653f\u5e9c"},
[]string{"\u5165\u53e3\u5904", "\u673a\u573a"},
[]string{"\u90a2\u6c38\u81e3", "\u6444\u5f71", "\u62a5\u9053"},
[]string{"\u533a\u5206\u5ea6", "BP", "\u795e\u7ecf\u7f51\u7edc", "\u8bad\u7ec3", "\u5206\u7c7b", "\u624d\u80fd", "\u5982\u4f55", "\u589e\u52a0"},
[]string{"\u957f\u6c5f\u5927\u6865", "\u5357\u4eac\u5e02"},
[]string{"SMT", "NiuTrans", "\u4f7f\u7528\u8005", "\u4fbf\u4e8e", "\u7528\u4e8e", "\u5efa\u8bae", "\u5229\u7528", "\u4e3a\u4e86", "\u7814\u7a76", "\u4e00\u4e9b"},
[]string{"\u957f\u6625\u5e02", "\u836f\u5e97", "\u957f\u6625"},
[]string{"\u9093\u9896\u8d85", "\u751f\u524d", "\u8863\u670d", "\u559c\u6b22"},
[]string{"\u653f\u6cbb\u5c40", "\u70ed\u7231", "\u5e38\u59d4", "\u80e1\u9526\u6d9b", "\u548c\u5e73", "\u4e16\u754c"},
[]string{"\u53f3\u9762", "\u5b59\u5065", "\u8303\u51ef", "\u674e\u677e\u6d2a", "\u6731\u4f1a\u9707", "\u6d77\u6797", "\u5de6\u9762", "\u7a0b\u5e8f\u5458", "\u518d\u5f80"},
[]string{"\u4e00\u6b21\u6027", "\u591a\u5c11"},
[]string{"\u56db\u5757", "\u4e94\u5757", "\u4e09\u5757", "\u4e00\u65a4", "\u4e24\u5757", "\u4e00\u672c", "\u4e00\u5957", "\u4e00\u6761"},
[]string{"\u548c\u5c1a", "\u548c\u5c1a\u5934", "\u4e00\u6837", "\u4e00\u4e2a"},
[]string{"\u548c\u5e73\u95e8", "\u5171\u548c\u515a", "\u5730\u94c1", "\u515a\u5458", "\u516c\u6c11", "\u7238\u7238", "\u4e2d\u534e\u4eba\u6c11\u5171\u548c\u56fd"},
[]string{"\u5f20\u6653\u6885", "T\u6064", "B\u8d85", "\u533b\u9662", "\u4eba\u6c11", "\u7136\u540e"},
[]string{"offer", "AT&T", "\u4e0d\u9519", "\u4e00\u4ef6", "\u516c\u53f8"},
[]string{"c#", "PI", "C++", "3.14159", "133", "122", "11", "\u5173\u7cfb", "\u4ec0\u4e48"},
[]string{"\u7684\u58eb", "\u7684\u54e5", "\u4ed6\u5f00", "\u63e1\u624b", "\u4e00\u8f86", "\u9ed1\u8272", "\u4e3b\u5e2d", "\u8ba4\u8bc6", "\u90a3\u4e2a"},
[]string{"\u67aa\u6746\u5b50", "\u653f\u6743"},
}
)
func TestExtractTags(t *testing.T) {
jiebago.SetDictionary("../dict.txt")
for index, sentence := range test_contents {
result := ExtractTags(sentence, 20)
if len(result) != len(Tags[index]) {
t.Errorf("%s = %v", sentence, result)
}
for i, tag := range result {
if tag != Tags[index][i] {
t.Error(tag)
}
}
}
}

270181
analyse/idf.go Normal file

File diff suppressed because it is too large Load Diff

349045
dict.txt Normal file

File diff suppressed because it is too large Load Diff

85
finalseg/finalseg.go Normal file
View File

@@ -0,0 +1,85 @@
package finalseg
import (
"regexp"
)
func cutHan(sentence string) []string {
runes := []rune(sentence)
result := make([]string, 0)
_, pos_list := viterbi(runes, []byte{'B', 'M', 'E', 'S'})
begin, next := 0, 0
for i, char := range runes {
pos := pos_list[i]
switch pos {
case 'B':
begin = i
case 'E':
result = append(result, string(runes[begin:i+1]))
next = i + 1
case 'S':
result = append(result, string(char))
next = i + 1
}
}
if next < len(runes) {
result = append(result, string(runes[next:]))
}
return result
}
func Cut(sentence string) []string {
result := make([]string, 0)
re_han := regexp.MustCompile(`\p{Han}+`)
re_skip := regexp.MustCompile(`(\d+\.\d+|[a-zA-Z0-9]+)`)
s := sentence
var hans string
var hanLoc []int
var nonhanLoc []int
for {
hanLoc = re_han.FindStringIndex(s)
if hanLoc == nil {
if len(s) == 0 {
break
}
} else if hanLoc[0] == 0 {
hans = s[hanLoc[0]:hanLoc[1]]
s = s[hanLoc[1]:]
for _, han := range cutHan(hans) {
result = append(result, han)
}
continue
}
nonhanLoc = re_skip.FindStringIndex(s)
if nonhanLoc == nil {
if len(s) == 0 {
break
}
} else if nonhanLoc[0] == 0 {
nonhans := s[nonhanLoc[0]:nonhanLoc[1]]
s = s[nonhanLoc[1]:]
if nonhans != "" {
result = append(result, nonhans)
continue
}
}
var loc []int
if hanLoc == nil && nonhanLoc == nil {
if len(s) > 0 {
result = append(result, s)
break
}
} else if hanLoc == nil {
loc = nonhanLoc
} else if nonhanLoc == nil {
loc = hanLoc
} else if hanLoc[0] < nonhanLoc[0] {
loc = hanLoc
} else {
loc = nonhanLoc
}
result = append(result, s[:loc[0]])
s = s[loc[0]:]
}
return result
}

63
finalseg/finalseg_test.go Normal file
View File

@@ -0,0 +1,63 @@
package finalseg
import (
"math"
"testing"
)
func TestViterbi(t *testing.T) {
obs := "我们是程序员"
states := []byte{'B', 'M', 'E', 'S'}
prob, path := viterbi([]rune(obs), states)
if math.Abs(prob+39.68824128493802) > 1e-10 {
t.Error(prob)
}
for index, state := range []byte{'B', 'E', 'S', 'B', 'M', 'E'} {
if path[index] != state {
t.Error(path)
}
}
}
func TestCutHan(t *testing.T) {
obs := "我们是程序员"
result := cutHan(obs)
if len(result) != 3 {
t.Error(result)
}
if result[0] != "我们" {
t.Error(result[0])
}
if result[1] != "是" {
t.Error(result[1])
}
if result[2] != "程序员" {
t.Error(result[2])
}
}
func TestCut(t *testing.T) {
sentence := "我们是程序员"
result := Cut(sentence)
if len(result) != 3 {
t.Error(len(result))
}
if result[0] != "我们" {
t.Error(result[0])
}
if result[1] != "是" {
t.Error(result[1])
}
if result[2] != "程序员" {
t.Error(result[2])
}
result2 := Cut("I'm a programmer!")
if len(result2) != 8 {
t.Error(result2)
}
result3 := Cut("程序员average年龄28.6岁。")
if len(result3) != 6 {
t.Error(result3)
}
}

35231
finalseg/prob_emit.go Normal file

File diff suppressed because it is too large Load Diff

10
finalseg/prob_start.go Normal file
View File

@@ -0,0 +1,10 @@
package finalseg
var ProbStart = make(map[byte]float64)
func init() {
ProbStart['B'] = -0.26268660809250016
ProbStart['E'] = -3.14e+100
ProbStart['M'] = -3.14e+100
ProbStart['S'] = -1.4652633398537678
}

14
finalseg/prob_trans.go Normal file
View File

@@ -0,0 +1,14 @@
package finalseg
var ProbTrans = make(map[byte]map[byte]float64)
func init() {
ProbTrans['B'] = map[byte]float64{'E': -0.510825623765990,
'M': -0.916290731874155}
ProbTrans['E'] = map[byte]float64{'B': -0.5897149736854513,
'S': -0.8085250474669937}
ProbTrans['M'] = map[byte]float64{'E': -0.33344856811948514,
'M': -1.2603623820268226}
ProbTrans['S'] = map[byte]float64{'B': -0.7211965654669841,
'S': -0.6658631448798212}
}

94
finalseg/viterbi.go Normal file
View File

@@ -0,0 +1,94 @@
package finalseg
import (
"fmt"
"sort"
)
const MIN_FLOAT = -3.14e100
var PrevStatus = make(map[byte][]byte)
func init() {
PrevStatus['B'] = []byte{'E', 'S'}
PrevStatus['M'] = []byte{'M', 'B'}
PrevStatus['S'] = []byte{'S', 'E'}
PrevStatus['E'] = []byte{'B', 'M'}
}
type Viterbi struct {
prob float64
state byte
}
func (v Viterbi) String() string {
return fmt.Sprintf("(%f, %s)", v.prob, v.state)
}
type Viterbis []*Viterbi
func (vs Viterbis) Len() int {
return len(vs)
}
func (vs Viterbis) Less(i, j int) bool {
if vs[i].prob == vs[j].prob {
return vs[i].state < vs[j].state
}
return vs[i].prob < vs[j].prob
}
func (vs Viterbis) Swap(i, j int) {
vs[i], vs[j] = vs[j], vs[i]
}
func viterbi(obs []rune, states []byte) (float64, []byte) {
path := make(map[byte][]byte)
V := make([]map[byte]float64, len(obs))
V[0] = make(map[byte]float64)
for _, y := range states {
if val, ok := ProbEmit[y][obs[0]]; ok {
V[0][y] = val + ProbStart[y]
} else {
V[0][y] = MIN_FLOAT + ProbStart[y]
}
path[y] = []byte{y}
}
for t := 1; t < len(obs); t++ {
newPath := make(map[byte][]byte)
V[t] = make(map[byte]float64)
for _, y := range states {
vs0 := make(Viterbis, 0)
var em_p float64
if val, ok := ProbEmit[y][obs[t]]; ok {
em_p = val
} else {
em_p = MIN_FLOAT
}
for _, y0 := range PrevStatus[y] {
var transP float64
if tp, ok := ProbTrans[y0][y]; ok {
transP = tp
} else {
transP = MIN_FLOAT
}
prob0 := V[t-1][y0] + transP + em_p
vs0 = append(vs0, &Viterbi{prob: prob0, state: y0})
}
sort.Sort(sort.Reverse(vs0))
V[t][y] = vs0[0].prob
pp := make([]byte, len(path[vs0[0].state]))
copy(pp, path[vs0[0].state])
newPath[y] = append(pp, y)
}
path = newPath
}
vs := make(Viterbis, 0)
for _, y := range []byte{'E', 'S'} {
vs = append(vs, &Viterbi{V[len(obs)-1][y], y})
}
sort.Sort(sort.Reverse(vs))
v := vs[0]
return v.prob, path[v.state]
}

1
foobar.txt Normal file
View File

@@ -0,0 +1 @@
好人 12 n

344
jieba.go Normal file
View File

@@ -0,0 +1,344 @@
package jiebago
import (
"fmt"
"github.com/wangbin/jiebago/finalseg"
"regexp"
"sort"
)
var (
Dictionary = "dict.txt"
TT *TopTrie
UserWordTagTab = make(map[string]string)
)
type Route struct {
Freq float64
Index int
}
func (route Route) String() string {
return fmt.Sprintf("(%f, %d)", route.Freq, route.Index)
}
type Routes []*Route
func (routes Routes) Len() int {
return len(routes)
}
func (routes Routes) Less(i, j int) bool {
routei := routes[i]
routej := routes[j]
if routei.Freq < routej.Freq {
return true
} else if routei.Freq == routej.Freq {
return routei.Index < routej.Index
}
return false
}
func (routes Routes) Swap(i, j int) {
routes[i], routes[j] = routes[j], routes[i]
}
func RegexpSplit(r *regexp.Regexp, sentence string) []string {
result := make([]string, 0)
locs := r.FindAllStringIndex(sentence, -1)
lastLoc := 0
if len(locs) == 0 {
return []string{sentence}
}
for _, loc := range locs {
if loc[0] == lastLoc {
result = append(result, sentence[loc[0]:loc[1]])
} else {
result = append(result, sentence[lastLoc:loc[0]])
result = append(result, sentence[loc[0]:loc[1]])
}
lastLoc = loc[1]
}
if lastLoc < len(sentence) {
result = append(result, sentence[lastLoc:])
}
return result
}
func GetDAG(sentence string) map[int][]int {
dag := make(map[int][]int)
runes := []rune(sentence)
n := len(runes)
p := TT.T
i, j := 0, 0
for {
if i >= n {
break
}
c := string(runes[j])
if node, ok := p[c]; ok {
p = node.SubNodes
if node.IsLeaf {
if _, inDag := dag[i]; !inDag {
dag[i] = []int{j}
} else {
dag[i] = append(dag[i], j)
}
}
j += 1
if j >= n {
i += 1
j = i
p = TT.T
}
} else {
p = TT.T
i += 1
j = i
}
}
for i := 0; i < n; i++ {
if _, ok := dag[i]; !ok {
dag[i] = []int{i}
}
}
return dag
}
func Calc(sentence string, dag map[int][]int, idx int) map[int]*Route {
runes := []rune(sentence)
number := len(runes)
routes := make(map[int]*Route)
routes[number] = &Route{0.0, 0}
for idx := number - 1; idx >= 0; idx-- {
candidates := make(Routes, 0)
for _, i := range dag[idx] {
var word string
if i <= idx-1 {
word = string(runes[i+1 : idx])
} else {
word = string(runes[idx : i+1])
}
var route *Route
if _, ok := TT.Freq[word]; ok {
route = &Route{TT.Freq[word] + routes[i+1].Freq, i}
} else {
route = &Route{TT.MinFreq + routes[i+1].Freq, i}
}
candidates = append(candidates, route)
}
sort.Sort(sort.Reverse(candidates))
routes[idx] = candidates[0]
}
return routes
}
type cutAction func(sentence string) []string
func cut_DAG(sentence string) []string {
dag := GetDAG(sentence)
routes := Calc(sentence, dag, 0)
x := 0
var y int
runes := []rune(sentence)
length := len(runes)
result := make([]string, 0)
buf := make([]rune, 0)
for {
if x >= length {
break
}
y = routes[x].Index + 1
l_word := runes[x:y]
if y-x == 1 {
buf = append(buf, l_word...)
} else {
if len(buf) > 0 {
if len(buf) == 1 {
result = append(result, string(buf))
buf = make([]rune, 0)
} else {
bufString := string(buf)
if _, ok := TT.Freq[bufString]; !ok {
recognized := finalseg.Cut(bufString)
for _, t := range recognized {
result = append(result, t)
}
} else {
for _, elem := range buf {
result = append(result, string(elem)) // TODO: I don't get this?
}
}
buf = make([]rune, 0)
}
}
result = append(result, string(l_word))
}
x = y
}
if len(buf) > 0 {
if len(buf) == 1 {
result = append(result, string(buf))
} else {
bufString := string(buf)
if _, ok := TT.Freq[bufString]; !ok {
recognized := finalseg.Cut(bufString)
for _, t := range recognized {
result = append(result, t)
}
} else {
for _, elem := range buf {
result = append(result, string(elem)) // TODO: I don't get this?
}
}
}
}
return result
}
func cut_DAG_NO_HMM(sentence string) []string {
result := make([]string, 0)
re_eng := regexp.MustCompile(`[[:alnum:]]`)
dag := GetDAG(sentence)
routes := Calc(sentence, dag, 0)
x := 0
var y int
runes := []rune(sentence)
length := len(runes)
buf := make([]rune, 0)
for {
if x >= length {
break
}
y = routes[x].Index + 1
l_word := runes[x:y]
if re_eng.MatchString(string(l_word)) && len(l_word) == 1 {
buf = append(buf, l_word...)
x = y
} else {
if len(buf) > 0 {
result = append(result, string(buf))
buf = make([]rune, 0)
}
result = append(result, string(l_word))
x = y
}
}
if len(buf) > 0 {
result = append(result, string(buf))
buf = make([]rune, 0)
}
return result
}
func cut_All(sentence string) []string {
result := make([]string, 0)
runes := []rune(sentence)
dag := GetDAG(sentence)
old_j := -1
ks := make([]int, 0)
for k := range dag {
ks = append(ks, k)
}
sort.Ints(ks)
for k := range ks {
l := dag[k]
if len(l) == 1 && k > old_j {
result = append(result, string(runes[k:l[0]+1]))
old_j = l[0]
} else {
for _, j := range l {
if j > k {
result = append(result, string(runes[k:j+1]))
old_j = j
}
}
}
}
return result
}
func Cut(sentence string, cut_all bool, HMM bool) []string {
result := make([]string, 0)
var re_han, re_skip *regexp.Regexp
if cut_all {
re_han = regexp.MustCompile(`\p{Han}+`)
re_skip = regexp.MustCompile(`[^[:alnum:]+#\n]`)
} else {
re_han = regexp.MustCompile(`([\p{Han}+[:alnum:]+#&\._]+)`)
re_skip = regexp.MustCompile(`(\r\n|\s)`)
}
blocks := RegexpSplit(re_han, sentence)
var cut_block cutAction
if HMM {
cut_block = cut_DAG
} else {
cut_block = cut_DAG_NO_HMM
}
if cut_all {
cut_block = cut_All
}
for _, blk := range blocks {
if len(blk) == 0 {
continue
}
if re_han.MatchString(blk) {
for _, word := range cut_block(blk) {
result = append(result, word)
}
} else {
type skipSplitFunc func(sentence string) []string
var ssf skipSplitFunc
if cut_all {
ssf = func(sentence string) []string {
return re_skip.Split(sentence, -1)
}
} else {
ssf = func(sentence string) []string {
return RegexpSplit(re_skip, sentence)
}
}
for _, x := range ssf(blk) {
if re_skip.MatchString(x) {
result = append(result, x)
} else if !cut_all {
for _, xx := range x {
result = append(result, string(xx))
}
} else {
result = append(result, x)
}
}
}
}
return result
}
func CutForSearch(sentence string, hmm bool) []string {
result := make([]string, 0)
words := Cut(sentence, false, hmm)
for _, word := range words {
runes := []rune(word)
for _, increment := range []int{2, 3} {
if len(runes) > increment {
var gram2 string
for i := 0; i < len(runes)-increment+1; i++ {
gram2 = string(runes[i : i+increment])
if _, ok := TT.Freq[gram2]; ok {
result = append(result, gram2)
}
}
}
}
result = append(result, word)
}
return result
}
func SetDictionary(dict_path string) (err error) {
TT, err = newTopTrie(dict_path)
return
}

747
jieba_test.go Normal file
View File

@@ -0,0 +1,747 @@
package jiebago
import (
"regexp"
"testing"
)
var (
test_contents = []string{
"这是一个伸手不见五指的黑夜。我叫孙悟空我爱北京我爱Python和C++。",
"我不喜欢日本和服。",
"雷猴回归人间。",
"工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作",
"我需要廉租房",
"永和服装饰品有限公司",
"我爱北京天安门",
"abc",
"隐马尔可夫",
"雷猴是个好网站",
"“Microsoft”一词由“MICROcomputer微型计算机”和“SOFTware软件”两部分组成",
"草泥马和欺实马是今年的流行词汇",
"伊藤洋华堂总府店",
"中国科学院计算技术研究所",
"罗密欧与朱丽叶",
"我购买了道具和服装",
"PS: 我觉得开源有一个好处,就是能够敦促自己不断改进,避免敞帚自珍",
"湖北省石首市",
"湖北省十堰市",
"总经理完成了这件事情",
"电脑修好了",
"做好了这件事情就一了百了了",
"人们审美的观点是不同的",
"我们买了一个美的空调",
"线程初始化时我们要注意",
"一个分子是由好多原子组织成的",
"祝你马到功成",
"他掉进了无底洞里",
"中国的首都是北京",
"孙君意",
"外交部发言人马朝旭",
"领导人会议和第四届东亚峰会",
"在过去的这五年",
"还需要很长的路要走",
"60周年首都阅兵",
"你好人们审美的观点是不同的",
"买水果然后来世博园",
"买水果然后去世博园",
"但是后来我才知道你是对的",
"存在即合理",
"的的的的的在的的的的就以和和和",
"I love你不以为耻反以为rong",
"因",
"",
"hello你好人们审美的观点是不同的",
"很好但主要是基于网页形式",
"hello你好人们审美的观点是不同的",
"为什么我不能拥有想要的生活",
"后来我才",
"此次来中国是为了",
"使用了它就可以解决一些问题",
",使用了它就可以解决一些问题",
"其实使用了它就可以解决一些问题",
"好人使用了它就可以解决一些问题",
"是因为和国家",
"老年搜索还支持",
"干脆就把那部蒙人的闲法给废了拉倒RT @laoshipukong : 27日全国人大常委会第三次审议侵权责任法草案删除了有关医疗损害责任“举证倒置”的规定。在医患纠纷中本已处于弱势地位的消费者由此将陷入万劫不复的境地。 ",
"大",
"",
"他说的确实在理",
"长春市长春节讲话",
"结婚的和尚未结婚的",
"结合成分子时",
"旅游和服务是最好的",
"这件事情的确是我的错",
"供大家参考指正",
"哈尔滨政府公布塌桥原因",
"我在机场入口处",
"邢永臣摄影报道",
"BP神经网络如何训练才能在分类时增加区分度",
"南京市长江大桥",
"应一些使用者的建议也为了便于利用NiuTrans用于SMT研究",
"长春市长春药店",
"邓颖超生前最喜欢的衣服",
"胡锦涛是热爱世界和平的政治局常委",
"程序员祝海林和朱会震是在孙健的左面和右面, 范凯在最右面.再往左是李松洪",
"一次性交多少钱",
"两块五一套,三块八一斤,四块七一本,五块六一条",
"小和尚留了一个像大和尚一样的和尚头",
"我是中华人民共和国公民;我爸爸是共和党党员; 地铁和平门站",
"张晓梅去人民医院做了个B超然后去买了件T恤",
"AT&T是一件不错的公司给你发offer了吗",
"C++和c#是什么关系11+122=133是吗PI=3.14159",
"你认识那个和主席握手的的哥吗?他开一辆黑色的士。",
"枪杆子中出政权"}
defaultCutResult = [][]string{
[]string{"\u8fd9\u662f", "\u4e00\u4e2a", "\u4f38\u624b\u4e0d\u89c1\u4e94\u6307", "\u7684", "\u9ed1\u591c", "\u3002", "\u6211", "\u53eb", "\u5b59\u609f\u7a7a", "\uff0c", "\u6211", "\u7231", "\u5317\u4eac", "\uff0c", "\u6211", "\u7231", "Python", "\u548c", "C++", "\u3002"},
[]string{"\u6211", "\u4e0d", "\u559c\u6b22", "\u65e5\u672c", "\u548c\u670d", "\u3002"},
[]string{"\u96f7\u7334", "\u56de\u5f52", "\u4eba\u95f4", "\u3002"},
[]string{"\u5de5\u4fe1\u5904", "\u5973\u5e72\u4e8b", "\u6bcf\u6708", "\u7ecf\u8fc7", "\u4e0b\u5c5e", "\u79d1\u5ba4", "\u90fd", "\u8981", "\u4eb2\u53e3", "\u4ea4\u4ee3", "24", "\u53e3", "\u4ea4\u6362\u673a", "\u7b49", "\u6280\u672f\u6027", "\u5668\u4ef6", "\u7684", "\u5b89\u88c5", "\u5de5\u4f5c"},
[]string{"\u6211", "\u9700\u8981", "\u5ec9\u79df\u623f"},
[]string{"\u6c38\u548c", "\u670d\u88c5", "\u9970\u54c1", "\u6709\u9650\u516c\u53f8"},
[]string{"\u6211", "\u7231", "\u5317\u4eac", "\u5929\u5b89\u95e8"},
[]string{"abc"},
[]string{"\u9690", "\u9a6c\u5c14\u53ef\u592b"},
[]string{"\u96f7\u7334", "\u662f", "\u4e2a", "\u597d", "\u7f51\u7ad9"},
[]string{"\u201c", "Microsoft", "\u201d", "\u4e00\u8bcd", "\u7531", "\u201c", "MICROcomputer", "\uff08", "\u5fae\u578b", "\u8ba1\u7b97\u673a", "\uff09", "\u201d", "\u548c", "\u201c", "SOFTware", "\uff08", "\u8f6f\u4ef6", "\uff09", "\u201d", "\u4e24", "\u90e8\u5206", "\u7ec4\u6210"},
[]string{"\u8349\u6ce5\u9a6c", "\u548c", "\u6b3a\u5b9e", "\u9a6c", "\u662f", "\u4eca\u5e74", "\u7684", "\u6d41\u884c", "\u8bcd\u6c47"},
[]string{"\u4f0a\u85e4", "\u6d0b\u534e\u5802", "\u603b\u5e9c", "\u5e97"},
[]string{"\u4e2d\u56fd\u79d1\u5b66\u9662\u8ba1\u7b97\u6280\u672f\u7814\u7a76\u6240"},
[]string{"\u7f57\u5bc6\u6b27", "\u4e0e", "\u6731\u4e3d\u53f6"},
[]string{"\u6211", "\u8d2d\u4e70", "\u4e86", "\u9053\u5177", "\u548c", "\u670d\u88c5"},
[]string{"PS", ":", " ", "\u6211", "\u89c9\u5f97", "\u5f00\u6e90", "\u6709", "\u4e00\u4e2a", "\u597d\u5904", "\uff0c", "\u5c31\u662f", "\u80fd\u591f", "\u6566\u4fc3", "\u81ea\u5df1", "\u4e0d\u65ad\u6539\u8fdb", "\uff0c", "\u907f\u514d", "\u655e\u5e1a", "\u81ea\u73cd"},
[]string{"\u6e56\u5317\u7701", "\u77f3\u9996\u5e02"},
[]string{"\u6e56\u5317\u7701", "\u5341\u5830\u5e02"},
[]string{"\u603b\u7ecf\u7406", "\u5b8c\u6210", "\u4e86", "\u8fd9\u4ef6", "\u4e8b\u60c5"},
[]string{"\u7535\u8111", "\u4fee\u597d", "\u4e86"},
[]string{"\u505a\u597d", "\u4e86", "\u8fd9\u4ef6", "\u4e8b\u60c5", "\u5c31", "\u4e00\u4e86\u767e\u4e86", "\u4e86"},
[]string{"\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
[]string{"\u6211\u4eec", "\u4e70", "\u4e86", "\u4e00\u4e2a", "\u7f8e\u7684", "\u7a7a\u8c03"},
[]string{"\u7ebf\u7a0b", "\u521d\u59cb\u5316", "\u65f6", "\u6211\u4eec", "\u8981", "\u6ce8\u610f"},
[]string{"\u4e00\u4e2a", "\u5206\u5b50", "\u662f", "\u7531", "\u597d\u591a", "\u539f\u5b50", "\u7ec4\u7ec7", "\u6210", "\u7684"},
[]string{"\u795d", "\u4f60", "\u9a6c\u5230\u529f\u6210"},
[]string{"\u4ed6", "\u6389", "\u8fdb", "\u4e86", "\u65e0\u5e95\u6d1e", "\u91cc"},
[]string{"\u4e2d\u56fd", "\u7684", "\u9996\u90fd", "\u662f", "\u5317\u4eac"},
[]string{"\u5b59\u541b\u610f"},
[]string{"\u5916\u4ea4\u90e8", "\u53d1\u8a00\u4eba", "\u9a6c\u671d\u65ed"},
[]string{"\u9886\u5bfc\u4eba", "\u4f1a\u8bae", "\u548c", "\u7b2c\u56db\u5c4a", "\u4e1c\u4e9a", "\u5cf0\u4f1a"},
[]string{"\u5728", "\u8fc7\u53bb", "\u7684", "\u8fd9", "\u4e94\u5e74"},
[]string{"\u8fd8", "\u9700\u8981", "\u5f88\u957f", "\u7684", "\u8def", "\u8981", "\u8d70"},
[]string{"60", "\u5468\u5e74", "\u9996\u90fd", "\u9605\u5175"},
[]string{"\u4f60\u597d", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
[]string{"\u4e70", "\u6c34\u679c", "\u7136\u540e", "\u6765", "\u4e16\u535a\u56ed"},
[]string{"\u4e70", "\u6c34\u679c", "\u7136\u540e", "\u53bb", "\u4e16\u535a\u56ed"},
[]string{"\u4f46\u662f", "\u540e\u6765", "\u6211", "\u624d", "\u77e5\u9053", "\u4f60", "\u662f", "\u5bf9", "\u7684"},
[]string{"\u5b58\u5728", "\u5373", "\u5408\u7406"},
[]string{"\u7684", "\u7684", "\u7684", "\u7684", "\u7684", "\u5728", "\u7684", "\u7684", "\u7684", "\u7684", "\u5c31", "\u4ee5", "\u548c", "\u548c", "\u548c"},
[]string{"I", " ", "love", "\u4f60", "\uff0c", "\u4e0d\u4ee5\u4e3a\u803b", "\uff0c", "\u53cd", "\u4ee5\u4e3a", "rong"},
[]string{"\u56e0"},
[]string{},
[]string{"hello", "\u4f60\u597d", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
[]string{"\u5f88", "\u597d", "\u4f46", "\u4e3b\u8981", "\u662f", "\u57fa\u4e8e", "\u7f51\u9875", "\u5f62\u5f0f"},
[]string{"hello", "\u4f60\u597d", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
[]string{"\u4e3a\u4ec0\u4e48", "\u6211", "\u4e0d\u80fd", "\u62e5\u6709", "\u60f3\u8981", "\u7684", "\u751f\u6d3b"},
[]string{"\u540e\u6765", "\u6211", "\u624d"},
[]string{"\u6b64\u6b21", "\u6765", "\u4e2d\u56fd", "\u662f", "\u4e3a\u4e86"},
[]string{"\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
[]string{",", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
[]string{"\u5176\u5b9e", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
[]string{"\u597d\u4eba", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
[]string{"\u662f\u56e0\u4e3a", "\u548c", "\u56fd\u5bb6"},
[]string{"\u8001\u5e74", "\u641c\u7d22", "\u8fd8", "\u652f\u6301"},
[]string{"\u5e72\u8106", "\u5c31", "\u628a", "\u90a3\u90e8", "\u8499\u4eba", "\u7684", "\u95f2\u6cd5", "\u7ed9", "\u5e9f", "\u4e86", "\u62c9\u5012", "\uff01", "RT", " ", "@", "laoshipukong", " ", ":", " ", "27", "\u65e5", "\uff0c", "\u5168\u56fd\u4eba\u5927\u5e38\u59d4\u4f1a", "\u7b2c\u4e09\u6b21", "\u5ba1\u8bae", "\u4fb5\u6743", "\u8d23\u4efb\u6cd5", "\u8349\u6848", "\uff0c", "\u5220\u9664", "\u4e86", "\u6709\u5173", "\u533b\u7597", "\u635f\u5bb3", "\u8d23\u4efb", "\u201c", "\u4e3e\u8bc1", "\u5012\u7f6e", "\u201d", "\u7684", "\u89c4\u5b9a", "\u3002", "\u5728", "\u533b\u60a3", "\u7ea0\u7eb7", "\u4e2d\u672c", "\u5df2", "\u5904\u4e8e", "\u5f31\u52bf", "\u5730\u4f4d", "\u7684", "\u6d88\u8d39\u8005", "\u7531\u6b64", "\u5c06", "\u9677\u5165", "\u4e07\u52ab\u4e0d\u590d", "\u7684", "\u5883\u5730", "\u3002", " "},
[]string{"\u5927"},
[]string{},
[]string{"\u4ed6", "\u8bf4", "\u7684", "\u786e\u5b9e", "\u5728\u7406"},
[]string{"\u957f\u6625", "\u5e02\u957f", "\u6625\u8282", "\u8bb2\u8bdd"},
[]string{"\u7ed3\u5a5a", "\u7684", "\u548c", "\u5c1a\u672a", "\u7ed3\u5a5a", "\u7684"},
[]string{"\u7ed3\u5408", "\u6210", "\u5206\u5b50", "\u65f6"},
[]string{"\u65c5\u6e38", "\u548c", "\u670d\u52a1", "\u662f", "\u6700\u597d", "\u7684"},
[]string{"\u8fd9\u4ef6", "\u4e8b\u60c5", "\u7684\u786e", "\u662f", "\u6211", "\u7684", "\u9519"},
[]string{"\u4f9b", "\u5927\u5bb6", "\u53c2\u8003", "\u6307\u6b63"},
[]string{"\u54c8\u5c14\u6ee8", "\u653f\u5e9c", "\u516c\u5e03", "\u584c\u6865", "\u539f\u56e0"},
[]string{"\u6211", "\u5728", "\u673a\u573a", "\u5165\u53e3\u5904"},
[]string{"\u90a2\u6c38\u81e3", "\u6444\u5f71", "\u62a5\u9053"},
[]string{"BP", "\u795e\u7ecf\u7f51\u7edc", "\u5982\u4f55", "\u8bad\u7ec3", "\u624d\u80fd", "\u5728", "\u5206\u7c7b", "\u65f6", "\u589e\u52a0", "\u533a\u5206\u5ea6", "\uff1f"},
[]string{"\u5357\u4eac\u5e02", "\u957f\u6c5f\u5927\u6865"},
[]string{"\u5e94", "\u4e00\u4e9b", "\u4f7f\u7528\u8005", "\u7684", "\u5efa\u8bae", "\uff0c", "\u4e5f", "\u4e3a\u4e86", "\u4fbf\u4e8e", "\u5229\u7528", "NiuTrans", "\u7528\u4e8e", "SMT", "\u7814\u7a76"},
[]string{"\u957f\u6625\u5e02", "\u957f\u6625", "\u836f\u5e97"},
[]string{"\u9093\u9896\u8d85", "\u751f\u524d", "\u6700", "\u559c\u6b22", "\u7684", "\u8863\u670d"},
[]string{"\u80e1\u9526\u6d9b", "\u662f", "\u70ed\u7231", "\u4e16\u754c", "\u548c\u5e73", "\u7684", "\u653f\u6cbb\u5c40", "\u5e38\u59d4"},
[]string{"\u7a0b\u5e8f\u5458", "\u795d", "\u6d77\u6797", "\u548c", "\u6731\u4f1a\u9707", "\u662f", "\u5728", "\u5b59\u5065", "\u7684", "\u5de6\u9762", "\u548c", "\u53f3\u9762", ",", " ", "\u8303\u51ef", "\u5728", "\u6700", "\u53f3\u9762", ".", "\u518d\u5f80", "\u5de6", "\u662f", "\u674e\u677e\u6d2a"},
[]string{"\u4e00\u6b21\u6027", "\u4ea4", "\u591a\u5c11", "\u94b1"},
[]string{"\u4e24\u5757", "\u4e94", "\u4e00\u5957", "\uff0c", "\u4e09\u5757", "\u516b", "\u4e00\u65a4", "\uff0c", "\u56db\u5757", "\u4e03", "\u4e00\u672c", "\uff0c", "\u4e94\u5757", "\u516d", "\u4e00\u6761"},
[]string{"\u5c0f", "\u548c\u5c1a", "\u7559", "\u4e86", "\u4e00\u4e2a", "\u50cf", "\u5927", "\u548c\u5c1a", "\u4e00\u6837", "\u7684", "\u548c\u5c1a\u5934"},
[]string{"\u6211", "\u662f", "\u4e2d\u534e\u4eba\u6c11\u5171\u548c\u56fd", "\u516c\u6c11", ";", "\u6211", "\u7238\u7238", "\u662f", "\u5171\u548c\u515a", "\u515a\u5458", ";", " ", "\u5730\u94c1", "\u548c\u5e73\u95e8", "\u7ad9"},
[]string{"\u5f20\u6653\u6885", "\u53bb", "\u4eba\u6c11", "\u533b\u9662", "\u505a", "\u4e86", "\u4e2a", "B\u8d85", "\u7136\u540e", "\u53bb", "\u4e70", "\u4e86", "\u4ef6", "T\u6064"},
[]string{"AT&T", "\u662f", "\u4e00\u4ef6", "\u4e0d\u9519", "\u7684", "\u516c\u53f8", "\uff0c", "\u7ed9", "\u4f60", "\u53d1", "offer", "\u4e86", "\u5417", "\uff1f"},
[]string{"C++", "\u548c", "c#", "\u662f", "\u4ec0\u4e48", "\u5173\u7cfb", "\uff1f", "11", "+", "122", "=", "133", "\uff0c", "\u662f", "\u5417", "\uff1f", "PI", "=", "3.14159"},
[]string{"\u4f60", "\u8ba4\u8bc6", "\u90a3\u4e2a", "\u548c", "\u4e3b\u5e2d", "\u63e1\u624b", "\u7684", "\u7684\u54e5", "\u5417", "\uff1f", "\u4ed6\u5f00", "\u4e00\u8f86", "\u9ed1\u8272", "\u7684\u58eb", "\u3002"},
[]string{"\u67aa\u6746\u5b50", "\u4e2d", "\u51fa", "\u653f\u6743"},
}
cutAllResult = [][]string{
[]string{"\u8fd9", "\u662f", "\u4e00\u4e2a", "\u4f38\u624b", "\u4f38\u624b\u4e0d\u89c1", "\u4f38\u624b\u4e0d\u89c1\u4e94\u6307", "\u4e0d\u89c1", "\u4e94\u6307", "\u7684", "\u9ed1\u591c", "", "", "\u6211", "\u53eb", "\u5b59\u609f\u7a7a", "\u609f\u7a7a", "", "", "\u6211", "\u7231", "\u5317\u4eac", "", "", "\u6211", "\u7231", "Python", "\u548c", "C++", ""},
[]string{"\u6211", "\u4e0d", "\u559c\u6b22", "\u65e5\u672c", "\u548c\u670d", "", ""},
[]string{"\u96f7\u7334", "\u56de\u5f52", "\u4eba\u95f4", "", ""},
[]string{"\u5de5\u4fe1\u5904", "\u5904\u5973", "\u5973\u5e72\u4e8b", "\u5e72\u4e8b", "\u6bcf\u6708", "\u6708\u7ecf", "\u7ecf\u8fc7", "\u4e0b\u5c5e", "\u79d1\u5ba4", "\u90fd", "\u8981", "\u4eb2\u53e3", "\u53e3\u4ea4", "\u4ea4\u4ee3", "24", "\u53e3\u4ea4", "\u4ea4\u6362", "\u4ea4\u6362\u673a", "\u6362\u673a", "\u7b49", "\u6280\u672f", "\u6280\u672f\u6027", "\u6027\u5668", "\u5668\u4ef6", "\u7684", "\u5b89\u88c5", "\u5b89\u88c5\u5de5", "\u88c5\u5de5", "\u5de5\u4f5c"},
[]string{"\u6211", "\u9700\u8981", "\u5ec9\u79df", "\u5ec9\u79df\u623f", "\u79df\u623f"},
[]string{"\u6c38\u548c", "\u548c\u670d", "\u670d\u88c5", "\u88c5\u9970", "\u88c5\u9970\u54c1", "\u9970\u54c1", "\u6709\u9650", "\u6709\u9650\u516c\u53f8", "\u516c\u53f8"},
[]string{"\u6211", "\u7231", "\u5317\u4eac", "\u5929\u5b89", "\u5929\u5b89\u95e8"},
[]string{"abc"},
[]string{"\u9690", "\u9a6c\u5c14\u53ef", "\u9a6c\u5c14\u53ef\u592b", "\u53ef\u592b"},
[]string{"\u96f7\u7334", "\u662f", "\u4e2a", "\u597d", "\u7f51\u7ad9"},
[]string{"", "Microsoft", "", "\u4e00", "\u8bcd", "\u7531", "", "MICROcomputer", "", "\u5fae\u578b", "\u8ba1\u7b97", "\u8ba1\u7b97\u673a", "\u7b97\u673a", "", "", "", "\u548c", "", "SOFTware", "", "\u8f6f\u4ef6", "", "", "", "\u4e24\u90e8", "\u90e8\u5206", "\u5206\u7ec4", "\u7ec4\u6210"},
[]string{"\u8349\u6ce5\u9a6c", "\u548c", "\u6b3a", "\u5b9e", "\u9a6c", "\u662f", "\u4eca\u5e74", "\u7684", "\u6d41\u884c", "\u8bcd\u6c47"},
[]string{"\u4f0a", "\u85e4", "\u6d0b\u534e\u5802", "\u603b\u5e9c", "\u5e97"},
[]string{"\u4e2d\u56fd", "\u4e2d\u56fd\u79d1\u5b66\u9662", "\u4e2d\u56fd\u79d1\u5b66\u9662\u8ba1\u7b97\u6280\u672f\u7814\u7a76\u6240", "\u79d1\u5b66", "\u79d1\u5b66\u9662", "\u5b66\u9662", "\u8ba1\u7b97", "\u8ba1\u7b97\u6280\u672f", "\u6280\u672f", "\u7814\u7a76", "\u7814\u7a76\u6240"},
[]string{"\u7f57\u5bc6\u6b27", "\u4e0e", "\u6731\u4e3d\u53f6"},
[]string{"\u6211", "\u8d2d\u4e70", "\u4e86", "\u9053\u5177", "\u548c\u670d", "\u670d\u88c5"},
[]string{"PS", "", "", "\u6211", "\u89c9\u5f97", "\u5f00\u6e90", "\u6709", "\u4e00\u4e2a", "\u597d\u5904", "", "", "\u5c31\u662f", "\u80fd\u591f", "\u6566\u4fc3", "\u81ea\u5df1", "\u4e0d\u65ad", "\u4e0d\u65ad\u6539\u8fdb", "\u6539\u8fdb", "", "", "\u907f\u514d", "\u655e", "\u5e1a", "\u81ea\u73cd"},
[]string{"\u6e56\u5317", "\u6e56\u5317\u7701", "\u77f3\u9996", "\u77f3\u9996\u5e02"},
[]string{"\u6e56\u5317", "\u6e56\u5317\u7701", "\u5341\u5830", "\u5341\u5830\u5e02"},
[]string{"\u603b\u7ecf\u7406", "\u7ecf\u7406", "\u7406\u5b8c", "\u5b8c\u6210", "\u4e86", "\u8fd9\u4ef6", "\u4e8b\u60c5"},
[]string{"\u7535\u8111", "\u4fee\u597d", "\u4e86"},
[]string{"\u505a\u597d", "\u4e86", "\u8fd9\u4ef6", "\u4e8b\u60c5", "\u5c31", "\u4e00\u4e86\u767e\u4e86", "\u4e86\u4e86"},
[]string{"\u4eba\u4eec", "\u5ba1\u7f8e", "\u7f8e\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
[]string{"\u6211\u4eec", "\u4e70", "\u4e86", "\u4e00\u4e2a", "\u7f8e\u7684", "\u7a7a\u8c03"},
[]string{"\u7ebf\u7a0b", "\u521d\u59cb", "\u521d\u59cb\u5316", "\u5316\u65f6", "\u6211\u4eec", "\u8981", "\u6ce8\u610f"},
[]string{"\u4e00\u4e2a", "\u5206\u5b50", "\u662f", "\u7531", "\u597d\u591a", "\u539f\u5b50", "\u7ec4\u7ec7", "\u7ec7\u6210", "\u7684"},
[]string{"\u795d", "\u4f60", "\u9a6c\u5230\u529f\u6210"},
[]string{"\u4ed6", "\u6389", "\u8fdb", "\u4e86", "\u65e0\u5e95", "\u65e0\u5e95\u6d1e", "\u91cc"},
[]string{"\u4e2d\u56fd", "\u7684", "\u9996\u90fd", "\u662f", "\u5317\u4eac"},
[]string{"\u5b59", "\u541b", "\u610f"},
[]string{"\u5916\u4ea4", "\u5916\u4ea4\u90e8", "\u90e8\u53d1", "\u53d1\u8a00", "\u53d1\u8a00\u4eba", "\u4eba\u9a6c", "\u9a6c\u671d\u65ed"},
[]string{"\u9886\u5bfc", "\u9886\u5bfc\u4eba", "\u4f1a\u8bae", "\u8bae\u548c", "\u7b2c\u56db", "\u7b2c\u56db\u5c4a", "\u56db\u5c4a", "\u4e1c\u4e9a", "\u5cf0\u4f1a"},
[]string{"\u5728", "\u8fc7\u53bb", "\u7684", "\u8fd9", "\u4e94\u5e74"},
[]string{"\u8fd8", "\u9700\u8981", "\u5f88", "\u957f", "\u7684", "\u8def", "\u8981", "\u8d70"},
[]string{"60", "\u5468\u5e74", "\u9996\u90fd", "\u9605\u5175"},
[]string{"\u4f60\u597d", "\u597d\u4eba", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7f8e\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
[]string{"\u4e70", "\u6c34\u679c", "\u679c\u7136", "\u7136\u540e", "\u540e\u6765", "\u6765\u4e16", "\u4e16\u535a", "\u4e16\u535a\u56ed", "\u535a\u56ed"},
[]string{"\u4e70", "\u6c34\u679c", "\u679c\u7136", "\u7136\u540e", "\u540e\u53bb", "\u53bb\u4e16", "\u4e16\u535a", "\u4e16\u535a\u56ed", "\u535a\u56ed"},
[]string{"\u4f46\u662f", "\u540e\u6765", "\u6211", "\u624d", "\u77e5\u9053", "\u4f60", "\u662f", "\u5bf9", "\u7684"},
[]string{"\u5b58\u5728", "\u5373", "\u5408\u7406"},
[]string{"\u7684", "\u7684", "\u7684", "\u7684", "\u7684", "\u5728", "\u7684", "\u7684", "\u7684", "\u7684", "\u5c31", "\u4ee5", "\u548c", "\u548c", "\u548c"},
[]string{"I", "love", "\u4f60", "", "", "\u4e0d\u4ee5", "\u4e0d\u4ee5\u4e3a\u803b", "\u4ee5\u4e3a", "\u803b", "", "", "\u53cd", "\u4ee5\u4e3a", "rong"},
[]string{"\u56e0"},
[]string{},
[]string{"hello", "\u4f60\u597d", "\u597d\u4eba", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7f8e\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
[]string{"\u5f88", "\u597d", "\u4f46", "\u4e3b\u8981", "\u8981\u662f", "\u57fa\u4e8e", "\u7f51\u9875", "\u5f62\u5f0f"},
[]string{"hello", "\u4f60\u597d", "\u597d\u4eba", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7f8e\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
[]string{"\u4e3a\u4ec0\u4e48", "\u4ec0\u4e48", "\u6211", "\u4e0d\u80fd", "\u62e5\u6709", "\u60f3\u8981", "\u7684", "\u751f\u6d3b"},
[]string{"\u540e\u6765", "\u6211", "\u624d"},
[]string{"\u6b64\u6b21", "\u6765", "\u4e2d\u56fd", "\u56fd\u662f", "\u4e3a\u4e86"},
[]string{"\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
[]string{"", "", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
[]string{"\u5176\u5b9e", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
[]string{"\u597d\u4eba", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
[]string{"\u662f\u56e0\u4e3a", "\u56e0\u4e3a", "\u548c", "\u56fd\u5bb6"},
[]string{"\u8001\u5e74", "\u641c\u7d22", "\u7d22\u8fd8", "\u652f\u6301"},
[]string{"\u5e72\u8106", "\u5c31", "\u628a", "\u90a3\u90e8", "\u8499\u4eba", "\u7684", "\u95f2", "\u6cd5", "\u7ed9", "\u5e9f", "\u4e86", "\u62c9\u5012", "", "RT", "", "laoshipukong", "", "", "27", "\u65e5", "", "", "\u5168\u56fd", "\u5168\u56fd\u4eba\u5927", "\u5168\u56fd\u4eba\u5927\u5e38\u59d4\u4f1a", "\u56fd\u4eba", "\u4eba\u5927", "\u4eba\u5927\u5e38\u59d4\u4f1a", "\u5e38\u59d4", "\u5e38\u59d4\u4f1a", "\u59d4\u4f1a", "\u7b2c\u4e09", "\u7b2c\u4e09\u6b21", "\u4e09\u6b21", "\u5ba1\u8bae", "\u4fb5\u6743", "\u6743\u8d23", "\u8d23\u4efb", "\u8d23\u4efb\u6cd5", "\u8349\u6848", "", "", "\u5220\u9664", "\u9664\u4e86", "\u6709\u5173", "\u533b\u7597", "\u635f\u5bb3", "\u8d23\u4efb", "", "", "\u4e3e\u8bc1", "\u5012\u7f6e", "", "", "\u7684", "\u89c4\u5b9a", "", "", "\u5728", "\u533b\u60a3", "\u7ea0\u7eb7", "\u4e2d", "\u672c", "\u5df2", "\u5904\u4e8e", "\u5f31\u52bf", "\u5730\u4f4d", "\u7684", "\u6d88\u8d39", "\u6d88\u8d39\u8005", "\u7531\u6b64", "\u5c06", "\u9677\u5165", "\u4e07\u52ab\u4e0d\u590d", "\u4e0d\u590d", "\u7684", "\u5883\u5730", "", "", ""},
[]string{"\u5927"},
[]string{},
[]string{"\u4ed6", "\u8bf4", "\u7684\u786e", "\u786e\u5b9e", "\u5b9e\u5728", "\u7406"},
[]string{"\u957f\u6625", "\u957f\u6625\u5e02", "\u5e02\u957f", "\u957f\u6625", "\u6625\u8282", "\u8bb2\u8bdd"},
[]string{"\u7ed3\u5a5a", "\u7684", "\u548c\u5c1a", "\u5c1a\u672a", "\u672a\u7ed3", "\u7ed3\u5a5a", "\u7684"},
[]string{"\u7ed3\u5408", "\u5408\u6210", "\u6210\u5206", "\u5206\u5b50", "\u65f6"},
[]string{"\u65c5\u6e38", "\u548c\u670d", "\u670d\u52a1", "\u662f", "\u6700\u597d", "\u7684"},
[]string{"\u8fd9\u4ef6", "\u4e8b\u60c5", "\u7684\u786e", "\u662f", "\u6211", "\u7684", "\u9519"},
[]string{"\u4f9b", "\u5927\u5bb6", "\u53c2\u8003", "\u6307\u6b63"},
[]string{"\u54c8\u5c14", "\u54c8\u5c14\u6ee8", "\u653f\u5e9c", "\u516c\u5e03", "\u584c", "\u6865", "\u539f\u56e0"},
[]string{"\u6211", "\u5728", "\u673a\u573a", "\u5165\u53e3", "\u5165\u53e3\u5904"},
[]string{"\u90a2", "\u6c38", "\u81e3", "\u6444\u5f71", "\u62a5\u9053"},
[]string{"BP", "\u795e\u7ecf", "\u795e\u7ecf\u7f51", "\u795e\u7ecf\u7f51\u7edc", "\u7f51\u7edc", "\u5982\u4f55", "\u8bad\u7ec3", "\u624d\u80fd", "\u5728", "\u5206\u7c7b", "\u65f6", "\u589e\u52a0", "\u52a0\u533a", "\u533a\u5206", "\u533a\u5206\u5ea6", "\u5206\u5ea6", "", ""},
[]string{"\u5357\u4eac", "\u5357\u4eac\u5e02", "\u4eac\u5e02", "\u5e02\u957f", "\u957f\u6c5f", "\u957f\u6c5f\u5927\u6865", "\u5927\u6865"},
[]string{"\u5e94", "\u4e00\u4e9b", "\u4f7f\u7528", "\u4f7f\u7528\u8005", "\u7528\u8005", "\u7684", "\u5efa\u8bae", "", "", "\u4e5f", "\u4e3a\u4e86", "\u4fbf\u4e8e", "\u5229\u7528", "NiuTrans", "\u7528\u4e8e", "SMT", "\u7814\u7a76"},
[]string{"\u957f\u6625", "\u957f\u6625\u5e02", "\u5e02\u957f", "\u957f\u6625", "\u6625\u836f", "\u836f\u5e97"},
[]string{"\u9093\u9896\u8d85", "\u8d85\u751f", "\u751f\u524d", "\u6700", "\u559c\u6b22", "\u7684", "\u8863\u670d"},
[]string{"\u80e1\u9526\u6d9b", "\u9526\u6d9b", "\u662f", "\u70ed\u7231", "\u4e16\u754c", "\u548c\u5e73", "\u7684", "\u653f\u6cbb", "\u653f\u6cbb\u5c40", "\u5e38\u59d4"},
[]string{"\u7a0b\u5e8f", "\u7a0b\u5e8f\u5458", "\u795d", "\u6d77\u6797", "\u548c", "\u6731", "\u4f1a", "\u9707", "\u662f", "\u5728", "\u5b59", "\u5065", "\u7684", "\u5de6\u9762", "\u548c", "\u53f3\u9762", "", "", "", "\u8303", "\u51ef", "\u5728", "\u6700", "\u53f3\u9762", "", "", "\u518d\u5f80", "\u5de6", "\u662f", "\u674e", "\u677e", "\u6d2a"},
[]string{"\u4e00\u6b21", "\u4e00\u6b21\u6027", "\u6027\u4ea4", "\u591a\u5c11", "\u591a\u5c11\u94b1"},
[]string{"\u4e24\u5757", "\u4e94\u4e00", "\u4e00\u5957", "", "", "\u4e09\u5757", "\u516b\u4e00", "\u4e00\u65a4", "", "", "\u56db\u5757", "\u4e03\u4e00", "\u4e00\u672c", "", "", "\u4e94\u5757", "\u516d\u4e00", "\u4e00\u6761"},
[]string{"\u5c0f", "\u548c\u5c1a", "\u7559", "\u4e86", "\u4e00\u4e2a", "\u50cf", "\u5927", "\u548c\u5c1a", "\u4e00\u6837", "\u7684", "\u548c\u5c1a", "\u548c\u5c1a\u5934"},
[]string{"\u6211", "\u662f", "\u4e2d\u534e", "\u4e2d\u534e\u4eba\u6c11", "\u4e2d\u534e\u4eba\u6c11\u5171\u548c\u56fd", "\u534e\u4eba", "\u4eba\u6c11", "\u4eba\u6c11\u5171\u548c\u56fd", "\u5171\u548c", "\u5171\u548c\u56fd", "\u56fd\u516c", "\u516c\u6c11", "", "", "\u6211", "\u7238\u7238", "\u662f", "\u5171\u548c", "\u5171\u548c\u515a", "\u515a\u5458", "", "", "", "\u5730\u94c1", "\u548c\u5e73", "\u548c\u5e73\u95e8", "\u7ad9"},
[]string{"\u5f20\u6653\u6885", "\u53bb", "\u4eba\u6c11", "\u6c11\u533b\u9662", "\u533b\u9662", "\u505a", "\u4e86", "\u4e2a", "B", "\u8d85\u7136", "\u7136\u540e", "\u540e\u53bb", "\u4e70", "\u4e86", "\u4ef6", "T", "\u6064"},
[]string{"AT", "T", "\u662f", "\u4e00\u4ef6", "\u4e0d\u9519", "\u7684", "\u516c\u53f8", "", "", "\u7ed9", "\u4f60", "\u53d1", "offer", "\u4e86", "\u5417", "", ""},
[]string{"C++", "\u548c", "c#", "\u662f", "\u4ec0\u4e48", "\u5173\u7cfb", "", "11+122", "133", "", "\u662f", "\u5417", "", "PI", "3", "14159"},
[]string{"\u4f60", "\u8ba4\u8bc6", "\u90a3\u4e2a", "\u548c", "\u4e3b\u5e2d", "\u63e1\u624b", "\u7684", "\u7684\u54e5", "\u5417", "", "", "\u4ed6", "\u5f00", "\u4e00\u8f86", "\u9ed1\u8272", "\u7684\u58eb", "", ""},
[]string{"\u67aa\u6746", "\u67aa\u6746\u5b50", "\u6746\u5b50", "\u4e2d\u51fa", "\u653f\u6743"},
}
defaultCutNoHMMResult = [][]string{
[]string{"\u8fd9", "\u662f", "\u4e00\u4e2a", "\u4f38\u624b\u4e0d\u89c1\u4e94\u6307", "\u7684", "\u9ed1\u591c", "\u3002", "\u6211", "\u53eb", "\u5b59\u609f\u7a7a", "\uff0c", "\u6211", "\u7231", "\u5317\u4eac", "\uff0c", "\u6211", "\u7231", "Python", "\u548c", "C++", "\u3002"},
[]string{"\u6211", "\u4e0d", "\u559c\u6b22", "\u65e5\u672c", "\u548c\u670d", "\u3002"},
[]string{"\u96f7\u7334", "\u56de\u5f52", "\u4eba\u95f4", "\u3002"},
[]string{"\u5de5\u4fe1\u5904", "\u5973\u5e72\u4e8b", "\u6bcf\u6708", "\u7ecf\u8fc7", "\u4e0b\u5c5e", "\u79d1\u5ba4", "\u90fd", "\u8981", "\u4eb2\u53e3", "\u4ea4\u4ee3", "24", "\u53e3", "\u4ea4\u6362\u673a", "\u7b49", "\u6280\u672f\u6027", "\u5668\u4ef6", "\u7684", "\u5b89\u88c5", "\u5de5\u4f5c"},
[]string{"\u6211", "\u9700\u8981", "\u5ec9\u79df\u623f"},
[]string{"\u6c38\u548c", "\u670d\u88c5", "\u9970\u54c1", "\u6709\u9650\u516c\u53f8"},
[]string{"\u6211", "\u7231", "\u5317\u4eac", "\u5929\u5b89\u95e8"},
[]string{"abc"},
[]string{"\u9690", "\u9a6c\u5c14\u53ef\u592b"},
[]string{"\u96f7\u7334", "\u662f", "\u4e2a", "\u597d", "\u7f51\u7ad9"},
[]string{"\u201c", "Microsoft", "\u201d", "\u4e00", "\u8bcd", "\u7531", "\u201c", "MICROcomputer", "\uff08", "\u5fae\u578b", "\u8ba1\u7b97\u673a", "\uff09", "\u201d", "\u548c", "\u201c", "SOFTware", "\uff08", "\u8f6f\u4ef6", "\uff09", "\u201d", "\u4e24", "\u90e8\u5206", "\u7ec4\u6210"},
[]string{"\u8349\u6ce5\u9a6c", "\u548c", "\u6b3a", "\u5b9e", "\u9a6c", "\u662f", "\u4eca\u5e74", "\u7684", "\u6d41\u884c", "\u8bcd\u6c47"},
[]string{"\u4f0a", "\u85e4", "\u6d0b\u534e\u5802", "\u603b\u5e9c", "\u5e97"},
[]string{"\u4e2d\u56fd\u79d1\u5b66\u9662\u8ba1\u7b97\u6280\u672f\u7814\u7a76\u6240"},
[]string{"\u7f57\u5bc6\u6b27", "\u4e0e", "\u6731\u4e3d\u53f6"},
[]string{"\u6211", "\u8d2d\u4e70", "\u4e86", "\u9053\u5177", "\u548c", "\u670d\u88c5"},
[]string{"PS", ":", " ", "\u6211", "\u89c9\u5f97", "\u5f00\u6e90", "\u6709", "\u4e00\u4e2a", "\u597d\u5904", "\uff0c", "\u5c31\u662f", "\u80fd\u591f", "\u6566\u4fc3", "\u81ea\u5df1", "\u4e0d\u65ad\u6539\u8fdb", "\uff0c", "\u907f\u514d", "\u655e", "\u5e1a", "\u81ea\u73cd"},
[]string{"\u6e56\u5317\u7701", "\u77f3\u9996\u5e02"},
[]string{"\u6e56\u5317\u7701", "\u5341\u5830\u5e02"},
[]string{"\u603b\u7ecf\u7406", "\u5b8c\u6210", "\u4e86", "\u8fd9\u4ef6", "\u4e8b\u60c5"},
[]string{"\u7535\u8111", "\u4fee\u597d", "\u4e86"},
[]string{"\u505a\u597d", "\u4e86", "\u8fd9\u4ef6", "\u4e8b\u60c5", "\u5c31", "\u4e00\u4e86\u767e\u4e86", "\u4e86"},
[]string{"\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
[]string{"\u6211\u4eec", "\u4e70", "\u4e86", "\u4e00\u4e2a", "\u7f8e\u7684", "\u7a7a\u8c03"},
[]string{"\u7ebf\u7a0b", "\u521d\u59cb\u5316", "\u65f6", "\u6211\u4eec", "\u8981", "\u6ce8\u610f"},
[]string{"\u4e00\u4e2a", "\u5206\u5b50", "\u662f", "\u7531", "\u597d\u591a", "\u539f\u5b50", "\u7ec4\u7ec7", "\u6210", "\u7684"},
[]string{"\u795d", "\u4f60", "\u9a6c\u5230\u529f\u6210"},
[]string{"\u4ed6", "\u6389", "\u8fdb", "\u4e86", "\u65e0\u5e95\u6d1e", "\u91cc"},
[]string{"\u4e2d\u56fd", "\u7684", "\u9996\u90fd", "\u662f", "\u5317\u4eac"},
[]string{"\u5b59", "\u541b", "\u610f"},
[]string{"\u5916\u4ea4\u90e8", "\u53d1\u8a00\u4eba", "\u9a6c\u671d\u65ed"},
[]string{"\u9886\u5bfc\u4eba", "\u4f1a\u8bae", "\u548c", "\u7b2c\u56db\u5c4a", "\u4e1c\u4e9a", "\u5cf0\u4f1a"},
[]string{"\u5728", "\u8fc7\u53bb", "\u7684", "\u8fd9", "\u4e94\u5e74"},
[]string{"\u8fd8", "\u9700\u8981", "\u5f88", "\u957f", "\u7684", "\u8def", "\u8981", "\u8d70"},
[]string{"60", "\u5468\u5e74", "\u9996\u90fd", "\u9605\u5175"},
[]string{"\u4f60\u597d", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
[]string{"\u4e70", "\u6c34\u679c", "\u7136\u540e", "\u6765", "\u4e16\u535a\u56ed"},
[]string{"\u4e70", "\u6c34\u679c", "\u7136\u540e", "\u53bb", "\u4e16\u535a\u56ed"},
[]string{"\u4f46\u662f", "\u540e\u6765", "\u6211", "\u624d", "\u77e5\u9053", "\u4f60", "\u662f", "\u5bf9", "\u7684"},
[]string{"\u5b58\u5728", "\u5373", "\u5408\u7406"},
[]string{"\u7684", "\u7684", "\u7684", "\u7684", "\u7684", "\u5728", "\u7684", "\u7684", "\u7684", "\u7684", "\u5c31", "\u4ee5", "\u548c", "\u548c", "\u548c"},
[]string{"I", " ", "love", "\u4f60", "\uff0c", "\u4e0d\u4ee5\u4e3a\u803b", "\uff0c", "\u53cd", "\u4ee5\u4e3a", "rong"},
[]string{"\u56e0"},
[]string{},
[]string{"hello", "\u4f60\u597d", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
[]string{"\u5f88", "\u597d", "\u4f46", "\u4e3b\u8981", "\u662f", "\u57fa\u4e8e", "\u7f51\u9875", "\u5f62\u5f0f"},
[]string{"hello", "\u4f60\u597d", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
[]string{"\u4e3a\u4ec0\u4e48", "\u6211", "\u4e0d\u80fd", "\u62e5\u6709", "\u60f3\u8981", "\u7684", "\u751f\u6d3b"},
[]string{"\u540e\u6765", "\u6211", "\u624d"},
[]string{"\u6b64\u6b21", "\u6765", "\u4e2d\u56fd", "\u662f", "\u4e3a\u4e86"},
[]string{"\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
[]string{",", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
[]string{"\u5176\u5b9e", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
[]string{"\u597d\u4eba", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
[]string{"\u662f\u56e0\u4e3a", "\u548c", "\u56fd\u5bb6"},
[]string{"\u8001\u5e74", "\u641c\u7d22", "\u8fd8", "\u652f\u6301"},
[]string{"\u5e72\u8106", "\u5c31", "\u628a", "\u90a3", "\u90e8", "\u8499", "\u4eba", "\u7684", "\u95f2", "\u6cd5", "\u7ed9", "\u5e9f", "\u4e86", "\u62c9\u5012", "\uff01", "RT", " ", "@", "laoshipukong", " ", ":", " ", "27", "\u65e5", "\uff0c", "\u5168\u56fd\u4eba\u5927\u5e38\u59d4\u4f1a", "\u7b2c\u4e09\u6b21", "\u5ba1\u8bae", "\u4fb5\u6743", "\u8d23\u4efb\u6cd5", "\u8349\u6848", "\uff0c", "\u5220\u9664", "\u4e86", "\u6709\u5173", "\u533b\u7597", "\u635f\u5bb3", "\u8d23\u4efb", "\u201c", "\u4e3e\u8bc1", "\u5012\u7f6e", "\u201d", "\u7684", "\u89c4\u5b9a", "\u3002", "\u5728", "\u533b\u60a3", "\u7ea0\u7eb7", "\u4e2d", "\u672c", "\u5df2", "\u5904\u4e8e", "\u5f31\u52bf", "\u5730\u4f4d", "\u7684", "\u6d88\u8d39\u8005", "\u7531\u6b64", "\u5c06", "\u9677\u5165", "\u4e07\u52ab\u4e0d\u590d", "\u7684", "\u5883\u5730", "\u3002", " "},
[]string{"\u5927"},
[]string{},
[]string{"\u4ed6", "\u8bf4", "\u7684", "\u786e\u5b9e", "\u5728", "\u7406"},
[]string{"\u957f\u6625", "\u5e02\u957f", "\u6625\u8282", "\u8bb2\u8bdd"},
[]string{"\u7ed3\u5a5a", "\u7684", "\u548c", "\u5c1a\u672a", "\u7ed3\u5a5a", "\u7684"},
[]string{"\u7ed3\u5408", "\u6210", "\u5206\u5b50", "\u65f6"},
[]string{"\u65c5\u6e38", "\u548c", "\u670d\u52a1", "\u662f", "\u6700\u597d", "\u7684"},
[]string{"\u8fd9\u4ef6", "\u4e8b\u60c5", "\u7684\u786e", "\u662f", "\u6211", "\u7684", "\u9519"},
[]string{"\u4f9b", "\u5927\u5bb6", "\u53c2\u8003", "\u6307\u6b63"},
[]string{"\u54c8\u5c14\u6ee8", "\u653f\u5e9c", "\u516c\u5e03", "\u584c", "\u6865", "\u539f\u56e0"},
[]string{"\u6211", "\u5728", "\u673a\u573a", "\u5165\u53e3\u5904"},
[]string{"\u90a2", "\u6c38", "\u81e3", "\u6444\u5f71", "\u62a5\u9053"},
[]string{"BP", "\u795e\u7ecf\u7f51\u7edc", "\u5982\u4f55", "\u8bad\u7ec3", "\u624d\u80fd", "\u5728", "\u5206\u7c7b", "\u65f6", "\u589e\u52a0", "\u533a\u5206\u5ea6", "\uff1f"},
[]string{"\u5357\u4eac\u5e02", "\u957f\u6c5f\u5927\u6865"},
[]string{"\u5e94", "\u4e00\u4e9b", "\u4f7f\u7528\u8005", "\u7684", "\u5efa\u8bae", "\uff0c", "\u4e5f", "\u4e3a\u4e86", "\u4fbf\u4e8e", "\u5229\u7528", "NiuTrans", "\u7528\u4e8e", "SMT", "\u7814\u7a76"},
[]string{"\u957f\u6625\u5e02", "\u957f\u6625", "\u836f\u5e97"},
[]string{"\u9093\u9896\u8d85", "\u751f\u524d", "\u6700", "\u559c\u6b22", "\u7684", "\u8863\u670d"},
[]string{"\u80e1\u9526\u6d9b", "\u662f", "\u70ed\u7231", "\u4e16\u754c", "\u548c\u5e73", "\u7684", "\u653f\u6cbb\u5c40", "\u5e38\u59d4"},
[]string{"\u7a0b\u5e8f\u5458", "\u795d", "\u6d77\u6797", "\u548c", "\u6731", "\u4f1a", "\u9707", "\u662f", "\u5728", "\u5b59", "\u5065", "\u7684", "\u5de6\u9762", "\u548c", "\u53f3\u9762", ",", " ", "\u8303", "\u51ef", "\u5728", "\u6700", "\u53f3\u9762", ".", "\u518d", "\u5f80", "\u5de6", "\u662f", "\u674e", "\u677e", "\u6d2a"},
[]string{"\u4e00\u6b21\u6027", "\u4ea4", "\u591a\u5c11", "\u94b1"},
[]string{"\u4e24\u5757", "\u4e94", "\u4e00\u5957", "\uff0c", "\u4e09\u5757", "\u516b", "\u4e00\u65a4", "\uff0c", "\u56db\u5757", "\u4e03", "\u4e00\u672c", "\uff0c", "\u4e94\u5757", "\u516d", "\u4e00\u6761"},
[]string{"\u5c0f", "\u548c\u5c1a", "\u7559", "\u4e86", "\u4e00\u4e2a", "\u50cf", "\u5927", "\u548c\u5c1a", "\u4e00\u6837", "\u7684", "\u548c\u5c1a\u5934"},
[]string{"\u6211", "\u662f", "\u4e2d\u534e\u4eba\u6c11\u5171\u548c\u56fd", "\u516c\u6c11", ";", "\u6211", "\u7238\u7238", "\u662f", "\u5171\u548c\u515a", "\u515a\u5458", ";", " ", "\u5730\u94c1", "\u548c\u5e73\u95e8", "\u7ad9"},
[]string{"\u5f20\u6653\u6885", "\u53bb", "\u4eba\u6c11", "\u533b\u9662", "\u505a", "\u4e86", "\u4e2a", "B\u8d85", "\u7136\u540e", "\u53bb", "\u4e70", "\u4e86", "\u4ef6", "T\u6064"},
[]string{"AT&T", "\u662f", "\u4e00\u4ef6", "\u4e0d\u9519", "\u7684", "\u516c\u53f8", "\uff0c", "\u7ed9", "\u4f60", "\u53d1", "offer", "\u4e86", "\u5417", "\uff1f"},
[]string{"C++", "\u548c", "c#", "\u662f", "\u4ec0\u4e48", "\u5173\u7cfb", "\uff1f", "11", "+", "122", "=", "133", "\uff0c", "\u662f", "\u5417", "\uff1f", "PI", "=", "3", ".", "14159"},
[]string{"\u4f60", "\u8ba4\u8bc6", "\u90a3\u4e2a", "\u548c", "\u4e3b\u5e2d", "\u63e1\u624b", "\u7684", "\u7684\u54e5", "\u5417", "\uff1f", "\u4ed6", "\u5f00", "\u4e00\u8f86", "\u9ed1\u8272", "\u7684\u58eb", "\u3002"},
[]string{"\u67aa\u6746\u5b50", "\u4e2d", "\u51fa", "\u653f\u6743"},
}
cutForSearchResult = [][]string{
[]string{"\u8fd9\u662f", "\u4e00\u4e2a", "\u4f38\u624b", "\u4e0d\u89c1", "\u4e94\u6307", "\u4f38\u624b\u4e0d\u89c1\u4e94\u6307", "\u7684", "\u9ed1\u591c", "\u3002", "\u6211", "\u53eb", "\u609f\u7a7a", "\u5b59\u609f\u7a7a", "\uff0c", "\u6211", "\u7231", "\u5317\u4eac", "\uff0c", "\u6211", "\u7231", "Python", "\u548c", "C++", "\u3002"},
[]string{"\u6211", "\u4e0d", "\u559c\u6b22", "\u65e5\u672c", "\u548c\u670d", "\u3002"},
[]string{"\u96f7\u7334", "\u56de\u5f52", "\u4eba\u95f4", "\u3002"},
[]string{"\u5de5\u4fe1\u5904", "\u5e72\u4e8b", "\u5973\u5e72\u4e8b", "\u6bcf\u6708", "\u7ecf\u8fc7", "\u4e0b\u5c5e", "\u79d1\u5ba4", "\u90fd", "\u8981", "\u4eb2\u53e3", "\u4ea4\u4ee3", "24", "\u53e3", "\u4ea4\u6362", "\u6362\u673a", "\u4ea4\u6362\u673a", "\u7b49", "\u6280\u672f", "\u6280\u672f\u6027", "\u5668\u4ef6", "\u7684", "\u5b89\u88c5", "\u5de5\u4f5c"},
[]string{"\u6211", "\u9700\u8981", "\u5ec9\u79df", "\u79df\u623f", "\u5ec9\u79df\u623f"},
[]string{"\u6c38\u548c", "\u670d\u88c5", "\u9970\u54c1", "\u6709\u9650", "\u516c\u53f8", "\u6709\u9650\u516c\u53f8"},
[]string{"\u6211", "\u7231", "\u5317\u4eac", "\u5929\u5b89", "\u5929\u5b89\u95e8"},
[]string{"abc"},
[]string{"\u9690", "\u53ef\u592b", "\u9a6c\u5c14\u53ef", "\u9a6c\u5c14\u53ef\u592b"},
[]string{"\u96f7\u7334", "\u662f", "\u4e2a", "\u597d", "\u7f51\u7ad9"},
[]string{"\u201c", "Microsoft", "\u201d", "\u4e00\u8bcd", "\u7531", "\u201c", "MICROcomputer", "\uff08", "\u5fae\u578b", "\u8ba1\u7b97", "\u7b97\u673a", "\u8ba1\u7b97\u673a", "\uff09", "\u201d", "\u548c", "\u201c", "SOFTware", "\uff08", "\u8f6f\u4ef6", "\uff09", "\u201d", "\u4e24", "\u90e8\u5206", "\u7ec4\u6210"},
[]string{"\u8349\u6ce5\u9a6c", "\u548c", "\u6b3a\u5b9e", "\u9a6c", "\u662f", "\u4eca\u5e74", "\u7684", "\u6d41\u884c", "\u8bcd\u6c47"},
[]string{"\u4f0a\u85e4", "\u6d0b\u534e\u5802", "\u603b\u5e9c", "\u5e97"},
[]string{"\u4e2d\u56fd", "\u79d1\u5b66", "\u5b66\u9662", "\u8ba1\u7b97", "\u6280\u672f", "\u7814\u7a76", "\u79d1\u5b66\u9662", "\u7814\u7a76\u6240", "\u4e2d\u56fd\u79d1\u5b66\u9662\u8ba1\u7b97\u6280\u672f\u7814\u7a76\u6240"},
[]string{"\u7f57\u5bc6\u6b27", "\u4e0e", "\u6731\u4e3d\u53f6"},
[]string{"\u6211", "\u8d2d\u4e70", "\u4e86", "\u9053\u5177", "\u548c", "\u670d\u88c5"},
[]string{"PS", ":", " ", "\u6211", "\u89c9\u5f97", "\u5f00\u6e90", "\u6709", "\u4e00\u4e2a", "\u597d\u5904", "\uff0c", "\u5c31\u662f", "\u80fd\u591f", "\u6566\u4fc3", "\u81ea\u5df1", "\u4e0d\u65ad", "\u6539\u8fdb", "\u4e0d\u65ad\u6539\u8fdb", "\uff0c", "\u907f\u514d", "\u655e\u5e1a", "\u81ea\u73cd"},
[]string{"\u6e56\u5317", "\u6e56\u5317\u7701", "\u77f3\u9996", "\u77f3\u9996\u5e02"},
[]string{"\u6e56\u5317", "\u6e56\u5317\u7701", "\u5341\u5830", "\u5341\u5830\u5e02"},
[]string{"\u7ecf\u7406", "\u603b\u7ecf\u7406", "\u5b8c\u6210", "\u4e86", "\u8fd9\u4ef6", "\u4e8b\u60c5"},
[]string{"\u7535\u8111", "\u4fee\u597d", "\u4e86"},
[]string{"\u505a\u597d", "\u4e86", "\u8fd9\u4ef6", "\u4e8b\u60c5", "\u5c31", "\u4e00\u4e86\u767e\u4e86", "\u4e86"},
[]string{"\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
[]string{"\u6211\u4eec", "\u4e70", "\u4e86", "\u4e00\u4e2a", "\u7f8e\u7684", "\u7a7a\u8c03"},
[]string{"\u7ebf\u7a0b", "\u521d\u59cb", "\u521d\u59cb\u5316", "\u65f6", "\u6211\u4eec", "\u8981", "\u6ce8\u610f"},
[]string{"\u4e00\u4e2a", "\u5206\u5b50", "\u662f", "\u7531", "\u597d\u591a", "\u539f\u5b50", "\u7ec4\u7ec7", "\u6210", "\u7684"},
[]string{"\u795d", "\u4f60", "\u9a6c\u5230\u529f\u6210"},
[]string{"\u4ed6", "\u6389", "\u8fdb", "\u4e86", "\u65e0\u5e95", "\u65e0\u5e95\u6d1e", "\u91cc"},
[]string{"\u4e2d\u56fd", "\u7684", "\u9996\u90fd", "\u662f", "\u5317\u4eac"},
[]string{"\u5b59\u541b\u610f"},
[]string{"\u5916\u4ea4", "\u5916\u4ea4\u90e8", "\u53d1\u8a00", "\u53d1\u8a00\u4eba", "\u9a6c\u671d\u65ed"},
[]string{"\u9886\u5bfc", "\u9886\u5bfc\u4eba", "\u4f1a\u8bae", "\u548c", "\u7b2c\u56db", "\u56db\u5c4a", "\u7b2c\u56db\u5c4a", "\u4e1c\u4e9a", "\u5cf0\u4f1a"},
[]string{"\u5728", "\u8fc7\u53bb", "\u7684", "\u8fd9", "\u4e94\u5e74"},
[]string{"\u8fd8", "\u9700\u8981", "\u5f88\u957f", "\u7684", "\u8def", "\u8981", "\u8d70"},
[]string{"60", "\u5468\u5e74", "\u9996\u90fd", "\u9605\u5175"},
[]string{"\u4f60\u597d", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
[]string{"\u4e70", "\u6c34\u679c", "\u7136\u540e", "\u6765", "\u4e16\u535a", "\u535a\u56ed", "\u4e16\u535a\u56ed"},
[]string{"\u4e70", "\u6c34\u679c", "\u7136\u540e", "\u53bb", "\u4e16\u535a", "\u535a\u56ed", "\u4e16\u535a\u56ed"},
[]string{"\u4f46\u662f", "\u540e\u6765", "\u6211", "\u624d", "\u77e5\u9053", "\u4f60", "\u662f", "\u5bf9", "\u7684"},
[]string{"\u5b58\u5728", "\u5373", "\u5408\u7406"},
[]string{"\u7684", "\u7684", "\u7684", "\u7684", "\u7684", "\u5728", "\u7684", "\u7684", "\u7684", "\u7684", "\u5c31", "\u4ee5", "\u548c", "\u548c", "\u548c"},
[]string{"I", " ", "love", "\u4f60", "\uff0c", "\u4e0d\u4ee5", "\u4ee5\u4e3a", "\u4e0d\u4ee5\u4e3a\u803b", "\uff0c", "\u53cd", "\u4ee5\u4e3a", "rong"},
[]string{"\u56e0"},
[]string{},
[]string{"hello", "\u4f60\u597d", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
[]string{"\u5f88", "\u597d", "\u4f46", "\u4e3b\u8981", "\u662f", "\u57fa\u4e8e", "\u7f51\u9875", "\u5f62\u5f0f"},
[]string{"hello", "\u4f60\u597d", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
[]string{"\u4ec0\u4e48", "\u4e3a\u4ec0\u4e48", "\u6211", "\u4e0d\u80fd", "\u62e5\u6709", "\u60f3\u8981", "\u7684", "\u751f\u6d3b"},
[]string{"\u540e\u6765", "\u6211", "\u624d"},
[]string{"\u6b64\u6b21", "\u6765", "\u4e2d\u56fd", "\u662f", "\u4e3a\u4e86"},
[]string{"\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
[]string{",", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
[]string{"\u5176\u5b9e", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
[]string{"\u597d\u4eba", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
[]string{"\u56e0\u4e3a", "\u662f\u56e0\u4e3a", "\u548c", "\u56fd\u5bb6"},
[]string{"\u8001\u5e74", "\u641c\u7d22", "\u8fd8", "\u652f\u6301"},
[]string{"\u5e72\u8106", "\u5c31", "\u628a", "\u90a3\u90e8", "\u8499\u4eba", "\u7684", "\u95f2\u6cd5", "\u7ed9", "\u5e9f", "\u4e86", "\u62c9\u5012", "\uff01", "RT", " ", "@", "laoshipukong", " ", ":", " ", "27", "\u65e5", "\uff0c", "\u5168\u56fd", "\u56fd\u4eba", "\u4eba\u5927", "\u5e38\u59d4", "\u59d4\u4f1a", "\u5e38\u59d4\u4f1a", "\u5168\u56fd\u4eba\u5927\u5e38\u59d4\u4f1a", "\u7b2c\u4e09", "\u4e09\u6b21", "\u7b2c\u4e09\u6b21", "\u5ba1\u8bae", "\u4fb5\u6743", "\u8d23\u4efb", "\u8d23\u4efb\u6cd5", "\u8349\u6848", "\uff0c", "\u5220\u9664", "\u4e86", "\u6709\u5173", "\u533b\u7597", "\u635f\u5bb3", "\u8d23\u4efb", "\u201c", "\u4e3e\u8bc1", "\u5012\u7f6e", "\u201d", "\u7684", "\u89c4\u5b9a", "\u3002", "\u5728", "\u533b\u60a3", "\u7ea0\u7eb7", "\u4e2d\u672c", "\u5df2", "\u5904\u4e8e", "\u5f31\u52bf", "\u5730\u4f4d", "\u7684", "\u6d88\u8d39", "\u6d88\u8d39\u8005", "\u7531\u6b64", "\u5c06", "\u9677\u5165", "\u4e0d\u590d", "\u4e07\u52ab\u4e0d\u590d", "\u7684", "\u5883\u5730", "\u3002", " "},
[]string{"\u5927"},
[]string{},
[]string{"\u4ed6", "\u8bf4", "\u7684", "\u786e\u5b9e", "\u5728\u7406"},
[]string{"\u957f\u6625", "\u5e02\u957f", "\u6625\u8282", "\u8bb2\u8bdd"},
[]string{"\u7ed3\u5a5a", "\u7684", "\u548c", "\u5c1a\u672a", "\u7ed3\u5a5a", "\u7684"},
[]string{"\u7ed3\u5408", "\u6210", "\u5206\u5b50", "\u65f6"},
[]string{"\u65c5\u6e38", "\u548c", "\u670d\u52a1", "\u662f", "\u6700\u597d", "\u7684"},
[]string{"\u8fd9\u4ef6", "\u4e8b\u60c5", "\u7684\u786e", "\u662f", "\u6211", "\u7684", "\u9519"},
[]string{"\u4f9b", "\u5927\u5bb6", "\u53c2\u8003", "\u6307\u6b63"},
[]string{"\u54c8\u5c14", "\u54c8\u5c14\u6ee8", "\u653f\u5e9c", "\u516c\u5e03", "\u584c\u6865", "\u539f\u56e0"},
[]string{"\u6211", "\u5728", "\u673a\u573a", "\u5165\u53e3", "\u5165\u53e3\u5904"},
[]string{"\u90a2\u6c38\u81e3", "\u6444\u5f71", "\u62a5\u9053"},
[]string{"BP", "\u795e\u7ecf", "\u7f51\u7edc", "\u795e\u7ecf\u7f51", "\u795e\u7ecf\u7f51\u7edc", "\u5982\u4f55", "\u8bad\u7ec3", "\u624d\u80fd", "\u5728", "\u5206\u7c7b", "\u65f6", "\u589e\u52a0", "\u533a\u5206", "\u5206\u5ea6", "\u533a\u5206\u5ea6", "\uff1f"},
[]string{"\u5357\u4eac", "\u4eac\u5e02", "\u5357\u4eac\u5e02", "\u957f\u6c5f", "\u5927\u6865", "\u957f\u6c5f\u5927\u6865"},
[]string{"\u5e94", "\u4e00\u4e9b", "\u4f7f\u7528", "\u7528\u8005", "\u4f7f\u7528\u8005", "\u7684", "\u5efa\u8bae", "\uff0c", "\u4e5f", "\u4e3a\u4e86", "\u4fbf\u4e8e", "\u5229\u7528", "NiuTrans", "\u7528\u4e8e", "SMT", "\u7814\u7a76"},
[]string{"\u957f\u6625", "\u957f\u6625\u5e02", "\u957f\u6625", "\u836f\u5e97"},
[]string{"\u9093\u9896\u8d85", "\u751f\u524d", "\u6700", "\u559c\u6b22", "\u7684", "\u8863\u670d"},
[]string{"\u9526\u6d9b", "\u80e1\u9526\u6d9b", "\u662f", "\u70ed\u7231", "\u4e16\u754c", "\u548c\u5e73", "\u7684", "\u653f\u6cbb", "\u653f\u6cbb\u5c40", "\u5e38\u59d4"},
[]string{"\u7a0b\u5e8f", "\u7a0b\u5e8f\u5458", "\u795d", "\u6d77\u6797", "\u548c", "\u6731\u4f1a\u9707", "\u662f", "\u5728", "\u5b59\u5065", "\u7684", "\u5de6\u9762", "\u548c", "\u53f3\u9762", ",", " ", "\u8303\u51ef", "\u5728", "\u6700", "\u53f3\u9762", ".", "\u518d\u5f80", "\u5de6", "\u662f", "\u674e\u677e\u6d2a"},
[]string{"\u4e00\u6b21", "\u4e00\u6b21\u6027", "\u4ea4", "\u591a\u5c11", "\u94b1"},
[]string{"\u4e24\u5757", "\u4e94", "\u4e00\u5957", "\uff0c", "\u4e09\u5757", "\u516b", "\u4e00\u65a4", "\uff0c", "\u56db\u5757", "\u4e03", "\u4e00\u672c", "\uff0c", "\u4e94\u5757", "\u516d", "\u4e00\u6761"},
[]string{"\u5c0f", "\u548c\u5c1a", "\u7559", "\u4e86", "\u4e00\u4e2a", "\u50cf", "\u5927", "\u548c\u5c1a", "\u4e00\u6837", "\u7684", "\u548c\u5c1a", "\u548c\u5c1a\u5934"},
[]string{"\u6211", "\u662f", "\u4e2d\u534e", "\u534e\u4eba", "\u4eba\u6c11", "\u5171\u548c", "\u5171\u548c\u56fd", "\u4e2d\u534e\u4eba\u6c11\u5171\u548c\u56fd", "\u516c\u6c11", ";", "\u6211", "\u7238\u7238", "\u662f", "\u5171\u548c", "\u5171\u548c\u515a", "\u515a\u5458", ";", " ", "\u5730\u94c1", "\u548c\u5e73", "\u548c\u5e73\u95e8", "\u7ad9"},
[]string{"\u5f20\u6653\u6885", "\u53bb", "\u4eba\u6c11", "\u533b\u9662", "\u505a", "\u4e86", "\u4e2a", "B\u8d85", "\u7136\u540e", "\u53bb", "\u4e70", "\u4e86", "\u4ef6", "T\u6064"},
[]string{"AT&T", "\u662f", "\u4e00\u4ef6", "\u4e0d\u9519", "\u7684", "\u516c\u53f8", "\uff0c", "\u7ed9", "\u4f60", "\u53d1", "offer", "\u4e86", "\u5417", "\uff1f"},
[]string{"C++", "\u548c", "c#", "\u662f", "\u4ec0\u4e48", "\u5173\u7cfb", "\uff1f", "11", "+", "122", "=", "133", "\uff0c", "\u662f", "\u5417", "\uff1f", "PI", "=", "3.14159"},
[]string{"\u4f60", "\u8ba4\u8bc6", "\u90a3\u4e2a", "\u548c", "\u4e3b\u5e2d", "\u63e1\u624b", "\u7684", "\u7684\u54e5", "\u5417", "\uff1f", "\u4ed6\u5f00", "\u4e00\u8f86", "\u9ed1\u8272", "\u7684\u58eb", "\u3002"},
[]string{"\u67aa\u6746", "\u6746\u5b50", "\u67aa\u6746\u5b50", "\u4e2d", "\u51fa", "\u653f\u6743"},
}
cutForSearchNoHMMResult = [][]string{
[]string{"\u8fd9", "\u662f", "\u4e00\u4e2a", "\u4f38\u624b", "\u4e0d\u89c1", "\u4e94\u6307", "\u4f38\u624b\u4e0d\u89c1\u4e94\u6307", "\u7684", "\u9ed1\u591c", "\u3002", "\u6211", "\u53eb", "\u609f\u7a7a", "\u5b59\u609f\u7a7a", "\uff0c", "\u6211", "\u7231", "\u5317\u4eac", "\uff0c", "\u6211", "\u7231", "Python", "\u548c", "C++", "\u3002"},
[]string{"\u6211", "\u4e0d", "\u559c\u6b22", "\u65e5\u672c", "\u548c\u670d", "\u3002"},
[]string{"\u96f7\u7334", "\u56de\u5f52", "\u4eba\u95f4", "\u3002"},
[]string{"\u5de5\u4fe1\u5904", "\u5e72\u4e8b", "\u5973\u5e72\u4e8b", "\u6bcf\u6708", "\u7ecf\u8fc7", "\u4e0b\u5c5e", "\u79d1\u5ba4", "\u90fd", "\u8981", "\u4eb2\u53e3", "\u4ea4\u4ee3", "24", "\u53e3", "\u4ea4\u6362", "\u6362\u673a", "\u4ea4\u6362\u673a", "\u7b49", "\u6280\u672f", "\u6280\u672f\u6027", "\u5668\u4ef6", "\u7684", "\u5b89\u88c5", "\u5de5\u4f5c"},
[]string{"\u6211", "\u9700\u8981", "\u5ec9\u79df", "\u79df\u623f", "\u5ec9\u79df\u623f"},
[]string{"\u6c38\u548c", "\u670d\u88c5", "\u9970\u54c1", "\u6709\u9650", "\u516c\u53f8", "\u6709\u9650\u516c\u53f8"},
[]string{"\u6211", "\u7231", "\u5317\u4eac", "\u5929\u5b89", "\u5929\u5b89\u95e8"},
[]string{"abc"},
[]string{"\u9690", "\u53ef\u592b", "\u9a6c\u5c14\u53ef", "\u9a6c\u5c14\u53ef\u592b"},
[]string{"\u96f7\u7334", "\u662f", "\u4e2a", "\u597d", "\u7f51\u7ad9"},
[]string{"\u201c", "Microsoft", "\u201d", "\u4e00", "\u8bcd", "\u7531", "\u201c", "MICROcomputer", "\uff08", "\u5fae\u578b", "\u8ba1\u7b97", "\u7b97\u673a", "\u8ba1\u7b97\u673a", "\uff09", "\u201d", "\u548c", "\u201c", "SOFTware", "\uff08", "\u8f6f\u4ef6", "\uff09", "\u201d", "\u4e24", "\u90e8\u5206", "\u7ec4\u6210"},
[]string{"\u8349\u6ce5\u9a6c", "\u548c", "\u6b3a", "\u5b9e", "\u9a6c", "\u662f", "\u4eca\u5e74", "\u7684", "\u6d41\u884c", "\u8bcd\u6c47"},
[]string{"\u4f0a", "\u85e4", "\u6d0b\u534e\u5802", "\u603b\u5e9c", "\u5e97"},
[]string{"\u4e2d\u56fd", "\u79d1\u5b66", "\u5b66\u9662", "\u8ba1\u7b97", "\u6280\u672f", "\u7814\u7a76", "\u79d1\u5b66\u9662", "\u7814\u7a76\u6240", "\u4e2d\u56fd\u79d1\u5b66\u9662\u8ba1\u7b97\u6280\u672f\u7814\u7a76\u6240"},
[]string{"\u7f57\u5bc6\u6b27", "\u4e0e", "\u6731\u4e3d\u53f6"},
[]string{"\u6211", "\u8d2d\u4e70", "\u4e86", "\u9053\u5177", "\u548c", "\u670d\u88c5"},
[]string{"PS", ":", " ", "\u6211", "\u89c9\u5f97", "\u5f00\u6e90", "\u6709", "\u4e00\u4e2a", "\u597d\u5904", "\uff0c", "\u5c31\u662f", "\u80fd\u591f", "\u6566\u4fc3", "\u81ea\u5df1", "\u4e0d\u65ad", "\u6539\u8fdb", "\u4e0d\u65ad\u6539\u8fdb", "\uff0c", "\u907f\u514d", "\u655e", "\u5e1a", "\u81ea\u73cd"},
[]string{"\u6e56\u5317", "\u6e56\u5317\u7701", "\u77f3\u9996", "\u77f3\u9996\u5e02"},
[]string{"\u6e56\u5317", "\u6e56\u5317\u7701", "\u5341\u5830", "\u5341\u5830\u5e02"},
[]string{"\u7ecf\u7406", "\u603b\u7ecf\u7406", "\u5b8c\u6210", "\u4e86", "\u8fd9\u4ef6", "\u4e8b\u60c5"},
[]string{"\u7535\u8111", "\u4fee\u597d", "\u4e86"},
[]string{"\u505a\u597d", "\u4e86", "\u8fd9\u4ef6", "\u4e8b\u60c5", "\u5c31", "\u4e00\u4e86\u767e\u4e86", "\u4e86"},
[]string{"\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
[]string{"\u6211\u4eec", "\u4e70", "\u4e86", "\u4e00\u4e2a", "\u7f8e\u7684", "\u7a7a\u8c03"},
[]string{"\u7ebf\u7a0b", "\u521d\u59cb", "\u521d\u59cb\u5316", "\u65f6", "\u6211\u4eec", "\u8981", "\u6ce8\u610f"},
[]string{"\u4e00\u4e2a", "\u5206\u5b50", "\u662f", "\u7531", "\u597d\u591a", "\u539f\u5b50", "\u7ec4\u7ec7", "\u6210", "\u7684"},
[]string{"\u795d", "\u4f60", "\u9a6c\u5230\u529f\u6210"},
[]string{"\u4ed6", "\u6389", "\u8fdb", "\u4e86", "\u65e0\u5e95", "\u65e0\u5e95\u6d1e", "\u91cc"},
[]string{"\u4e2d\u56fd", "\u7684", "\u9996\u90fd", "\u662f", "\u5317\u4eac"},
[]string{"\u5b59", "\u541b", "\u610f"},
[]string{"\u5916\u4ea4", "\u5916\u4ea4\u90e8", "\u53d1\u8a00", "\u53d1\u8a00\u4eba", "\u9a6c\u671d\u65ed"},
[]string{"\u9886\u5bfc", "\u9886\u5bfc\u4eba", "\u4f1a\u8bae", "\u548c", "\u7b2c\u56db", "\u56db\u5c4a", "\u7b2c\u56db\u5c4a", "\u4e1c\u4e9a", "\u5cf0\u4f1a"},
[]string{"\u5728", "\u8fc7\u53bb", "\u7684", "\u8fd9", "\u4e94\u5e74"},
[]string{"\u8fd8", "\u9700\u8981", "\u5f88", "\u957f", "\u7684", "\u8def", "\u8981", "\u8d70"},
[]string{"60", "\u5468\u5e74", "\u9996\u90fd", "\u9605\u5175"},
[]string{"\u4f60\u597d", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
[]string{"\u4e70", "\u6c34\u679c", "\u7136\u540e", "\u6765", "\u4e16\u535a", "\u535a\u56ed", "\u4e16\u535a\u56ed"},
[]string{"\u4e70", "\u6c34\u679c", "\u7136\u540e", "\u53bb", "\u4e16\u535a", "\u535a\u56ed", "\u4e16\u535a\u56ed"},
[]string{"\u4f46\u662f", "\u540e\u6765", "\u6211", "\u624d", "\u77e5\u9053", "\u4f60", "\u662f", "\u5bf9", "\u7684"},
[]string{"\u5b58\u5728", "\u5373", "\u5408\u7406"},
[]string{"\u7684", "\u7684", "\u7684", "\u7684", "\u7684", "\u5728", "\u7684", "\u7684", "\u7684", "\u7684", "\u5c31", "\u4ee5", "\u548c", "\u548c", "\u548c"},
[]string{"I", " ", "love", "\u4f60", "\uff0c", "\u4e0d\u4ee5", "\u4ee5\u4e3a", "\u4e0d\u4ee5\u4e3a\u803b", "\uff0c", "\u53cd", "\u4ee5\u4e3a", "rong"},
[]string{"\u56e0"},
[]string{},
[]string{"hello", "\u4f60\u597d", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
[]string{"\u5f88", "\u597d", "\u4f46", "\u4e3b\u8981", "\u662f", "\u57fa\u4e8e", "\u7f51\u9875", "\u5f62\u5f0f"},
[]string{"hello", "\u4f60\u597d", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
[]string{"\u4ec0\u4e48", "\u4e3a\u4ec0\u4e48", "\u6211", "\u4e0d\u80fd", "\u62e5\u6709", "\u60f3\u8981", "\u7684", "\u751f\u6d3b"},
[]string{"\u540e\u6765", "\u6211", "\u624d"},
[]string{"\u6b64\u6b21", "\u6765", "\u4e2d\u56fd", "\u662f", "\u4e3a\u4e86"},
[]string{"\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
[]string{",", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
[]string{"\u5176\u5b9e", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
[]string{"\u597d\u4eba", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
[]string{"\u56e0\u4e3a", "\u662f\u56e0\u4e3a", "\u548c", "\u56fd\u5bb6"},
[]string{"\u8001\u5e74", "\u641c\u7d22", "\u8fd8", "\u652f\u6301"},
[]string{"\u5e72\u8106", "\u5c31", "\u628a", "\u90a3", "\u90e8", "\u8499", "\u4eba", "\u7684", "\u95f2", "\u6cd5", "\u7ed9", "\u5e9f", "\u4e86", "\u62c9\u5012", "\uff01", "RT", " ", "@", "laoshipukong", " ", ":", " ", "27", "\u65e5", "\uff0c", "\u5168\u56fd", "\u56fd\u4eba", "\u4eba\u5927", "\u5e38\u59d4", "\u59d4\u4f1a", "\u5e38\u59d4\u4f1a", "\u5168\u56fd\u4eba\u5927\u5e38\u59d4\u4f1a", "\u7b2c\u4e09", "\u4e09\u6b21", "\u7b2c\u4e09\u6b21", "\u5ba1\u8bae", "\u4fb5\u6743", "\u8d23\u4efb", "\u8d23\u4efb\u6cd5", "\u8349\u6848", "\uff0c", "\u5220\u9664", "\u4e86", "\u6709\u5173", "\u533b\u7597", "\u635f\u5bb3", "\u8d23\u4efb", "\u201c", "\u4e3e\u8bc1", "\u5012\u7f6e", "\u201d", "\u7684", "\u89c4\u5b9a", "\u3002", "\u5728", "\u533b\u60a3", "\u7ea0\u7eb7", "\u4e2d", "\u672c", "\u5df2", "\u5904\u4e8e", "\u5f31\u52bf", "\u5730\u4f4d", "\u7684", "\u6d88\u8d39", "\u6d88\u8d39\u8005", "\u7531\u6b64", "\u5c06", "\u9677\u5165", "\u4e0d\u590d", "\u4e07\u52ab\u4e0d\u590d", "\u7684", "\u5883\u5730", "\u3002", " "},
[]string{"\u5927"},
[]string{},
[]string{"\u4ed6", "\u8bf4", "\u7684", "\u786e\u5b9e", "\u5728", "\u7406"},
[]string{"\u957f\u6625", "\u5e02\u957f", "\u6625\u8282", "\u8bb2\u8bdd"},
[]string{"\u7ed3\u5a5a", "\u7684", "\u548c", "\u5c1a\u672a", "\u7ed3\u5a5a", "\u7684"},
[]string{"\u7ed3\u5408", "\u6210", "\u5206\u5b50", "\u65f6"},
[]string{"\u65c5\u6e38", "\u548c", "\u670d\u52a1", "\u662f", "\u6700\u597d", "\u7684"},
[]string{"\u8fd9\u4ef6", "\u4e8b\u60c5", "\u7684\u786e", "\u662f", "\u6211", "\u7684", "\u9519"},
[]string{"\u4f9b", "\u5927\u5bb6", "\u53c2\u8003", "\u6307\u6b63"},
[]string{"\u54c8\u5c14", "\u54c8\u5c14\u6ee8", "\u653f\u5e9c", "\u516c\u5e03", "\u584c", "\u6865", "\u539f\u56e0"},
[]string{"\u6211", "\u5728", "\u673a\u573a", "\u5165\u53e3", "\u5165\u53e3\u5904"},
[]string{"\u90a2", "\u6c38", "\u81e3", "\u6444\u5f71", "\u62a5\u9053"},
[]string{"BP", "\u795e\u7ecf", "\u7f51\u7edc", "\u795e\u7ecf\u7f51", "\u795e\u7ecf\u7f51\u7edc", "\u5982\u4f55", "\u8bad\u7ec3", "\u624d\u80fd", "\u5728", "\u5206\u7c7b", "\u65f6", "\u589e\u52a0", "\u533a\u5206", "\u5206\u5ea6", "\u533a\u5206\u5ea6", "\uff1f"},
[]string{"\u5357\u4eac", "\u4eac\u5e02", "\u5357\u4eac\u5e02", "\u957f\u6c5f", "\u5927\u6865", "\u957f\u6c5f\u5927\u6865"},
[]string{"\u5e94", "\u4e00\u4e9b", "\u4f7f\u7528", "\u7528\u8005", "\u4f7f\u7528\u8005", "\u7684", "\u5efa\u8bae", "\uff0c", "\u4e5f", "\u4e3a\u4e86", "\u4fbf\u4e8e", "\u5229\u7528", "NiuTrans", "\u7528\u4e8e", "SMT", "\u7814\u7a76"},
[]string{"\u957f\u6625", "\u957f\u6625\u5e02", "\u957f\u6625", "\u836f\u5e97"},
[]string{"\u9093\u9896\u8d85", "\u751f\u524d", "\u6700", "\u559c\u6b22", "\u7684", "\u8863\u670d"},
[]string{"\u9526\u6d9b", "\u80e1\u9526\u6d9b", "\u662f", "\u70ed\u7231", "\u4e16\u754c", "\u548c\u5e73", "\u7684", "\u653f\u6cbb", "\u653f\u6cbb\u5c40", "\u5e38\u59d4"},
[]string{"\u7a0b\u5e8f", "\u7a0b\u5e8f\u5458", "\u795d", "\u6d77\u6797", "\u548c", "\u6731", "\u4f1a", "\u9707", "\u662f", "\u5728", "\u5b59", "\u5065", "\u7684", "\u5de6\u9762", "\u548c", "\u53f3\u9762", ",", " ", "\u8303", "\u51ef", "\u5728", "\u6700", "\u53f3\u9762", ".", "\u518d", "\u5f80", "\u5de6", "\u662f", "\u674e", "\u677e", "\u6d2a"},
[]string{"\u4e00\u6b21", "\u4e00\u6b21\u6027", "\u4ea4", "\u591a\u5c11", "\u94b1"},
[]string{"\u4e24\u5757", "\u4e94", "\u4e00\u5957", "\uff0c", "\u4e09\u5757", "\u516b", "\u4e00\u65a4", "\uff0c", "\u56db\u5757", "\u4e03", "\u4e00\u672c", "\uff0c", "\u4e94\u5757", "\u516d", "\u4e00\u6761"},
[]string{"\u5c0f", "\u548c\u5c1a", "\u7559", "\u4e86", "\u4e00\u4e2a", "\u50cf", "\u5927", "\u548c\u5c1a", "\u4e00\u6837", "\u7684", "\u548c\u5c1a", "\u548c\u5c1a\u5934"},
[]string{"\u6211", "\u662f", "\u4e2d\u534e", "\u534e\u4eba", "\u4eba\u6c11", "\u5171\u548c", "\u5171\u548c\u56fd", "\u4e2d\u534e\u4eba\u6c11\u5171\u548c\u56fd", "\u516c\u6c11", ";", "\u6211", "\u7238\u7238", "\u662f", "\u5171\u548c", "\u5171\u548c\u515a", "\u515a\u5458", ";", " ", "\u5730\u94c1", "\u548c\u5e73", "\u548c\u5e73\u95e8", "\u7ad9"},
[]string{"\u5f20\u6653\u6885", "\u53bb", "\u4eba\u6c11", "\u533b\u9662", "\u505a", "\u4e86", "\u4e2a", "B\u8d85", "\u7136\u540e", "\u53bb", "\u4e70", "\u4e86", "\u4ef6", "T\u6064"},
[]string{"AT&T", "\u662f", "\u4e00\u4ef6", "\u4e0d\u9519", "\u7684", "\u516c\u53f8", "\uff0c", "\u7ed9", "\u4f60", "\u53d1", "offer", "\u4e86", "\u5417", "\uff1f"},
[]string{"C++", "\u548c", "c#", "\u662f", "\u4ec0\u4e48", "\u5173\u7cfb", "\uff1f", "11", "+", "122", "=", "133", "\uff0c", "\u662f", "\u5417", "\uff1f", "PI", "=", "3", ".", "14159"},
[]string{"\u4f60", "\u8ba4\u8bc6", "\u90a3\u4e2a", "\u548c", "\u4e3b\u5e2d", "\u63e1\u624b", "\u7684", "\u7684\u54e5", "\u5417", "\uff1f", "\u4ed6", "\u5f00", "\u4e00\u8f86", "\u9ed1\u8272", "\u7684\u58eb", "\u3002"},
[]string{"\u67aa\u6746", "\u6746\u5b50", "\u67aa\u6746\u5b50", "\u4e2d", "\u51fa", "\u653f\u6743"},
}
userDictCutResult = [][]string{
[]string{"\u8fd9\u662f", "\u4e00\u4e2a", "\u4f38\u624b", "\u4e0d\u89c1", "\u4e94\u6307", "\u7684", "\u9ed1\u591c", "\u3002", "\u6211", "\u53eb", "\u5b59\u609f\u7a7a", "\uff0c", "\u6211", "\u7231\u5317\u4eac", "\uff0c", "\u6211", "\u7231", "Python", "\u548c", "C", "++", "\u3002"},
[]string{"\u6211", "\u4e0d", "\u559c\u6b22", "\u65e5\u672c", "\u548c", "\u670d", "\u3002"},
[]string{"\u96f7\u7334", "\u56de\u5f52\u4eba\u95f4", "\u3002"},
[]string{"\u5de5\u4fe1", "\u5904\u5973", "\u5e72\u4e8b", "\u6bcf", "\u6708", "\u7ecf\u8fc7", "\u4e0b", "\u5c5e", "\u79d1\u5ba4", "\u90fd", "\u8981", "\u4eb2\u53e3", "\u4ea4\u4ee3", "24", "\u53e3\u4ea4\u6362\u673a", "\u7b49", "\u6280\u672f\u6027", "\u5668\u4ef6", "\u7684", "\u5b89\u88c5", "\u5de5\u4f5c"},
[]string{"\u6211", "\u9700\u8981", "\u5ec9\u79df\u623f"},
[]string{"\u6c38\u548c\u670d", "\u88c5\u9970\u54c1", "\u6709", "\u9650\u516c\u53f8"},
[]string{"\u6211", "\u7231\u5317\u4eac", "\u5929\u5b89\u95e8"},
[]string{"abc"},
[]string{"\u9690\u9a6c\u5c14", "\u53ef\u592b"},
[]string{"\u96f7\u7334", "\u662f", "\u4e2a", "\u597d", "\u7f51\u7ad9"},
[]string{"\u201c", "Microsoft", "\u201d", "\u4e00\u8bcd", "\u7531", "\u201c", "MICROcomputer", "\uff08", "\u5fae\u578b", "\u8ba1\u7b97\u673a", "\uff09", "\u201d", "\u548c", "\u201c", "SOFTware", "\uff08", "\u8f6f\u4ef6", "\uff09", "\u201d", "\u4e24\u90e8\u5206", "\u7ec4\u6210"},
[]string{"\u8349\u6ce5", "\u9a6c", "\u548c", "\u6b3a\u5b9e", "\u9a6c", "\u662f", "\u4eca", "\u5e74", "\u7684", "\u6d41\u884c", "\u8bcd\u6c47"},
[]string{"\u4f0a\u85e4\u6d0b\u534e\u5802", "\u603b\u5e9c", "\u5e97"},
[]string{"\u4e2d\u56fd", "\u79d1\u5b66\u9662", "\u8ba1\u7b97", "\u6280\u672f", "\u7814\u7a76", "\u6240"},
[]string{"\u7f57\u5bc6\u6b27", "\u4e0e", "\u6731\u4e3d\u53f6"},
[]string{"\u6211\u8d2d", "\u4e70", "\u4e86", "\u9053", "\u5177", "\u548c", "\u670d\u88c5"},
[]string{"PS", ":", " ", "\u6211\u89c9", "\u5f97", "\u5f00\u6e90", "\u6709", "\u4e00\u4e2a", "\u597d", "\u5904", "\uff0c", "\u5c31", "\u662f", "\u80fd\u591f", "\u6566\u4fc3", "\u81ea\u5df1", "\u4e0d\u65ad", "\u6539\u8fdb", "\uff0c", "\u907f\u514d", "\u655e\u5e1a", "\u81ea\u73cd"},
[]string{"\u6e56\u5317\u7701", "\u77f3\u9996\u5e02"},
[]string{"\u6e56\u5317\u7701", "\u5341\u5830\u5e02"},
[]string{"\u603b\u7ecf\u7406", "\u5b8c\u6210", "\u4e86", "\u8fd9\u4ef6", "\u4e8b\u60c5"},
[]string{"\u7535\u8111", "\u4fee\u597d", "\u4e86"},
[]string{"\u505a", "\u597d", "\u4e86", "\u8fd9\u4ef6", "\u4e8b\u60c5", "\u5c31", "\u4e00", "\u4e86", "\u767e", "\u4e86", "\u4e86"},
[]string{"\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
[]string{"\u6211\u4eec", "\u4e70", "\u4e86", "\u4e00\u4e2a", "\u7f8e", "\u7684", "\u7a7a\u8c03"},
[]string{"\u7ebf\u7a0b", "\u521d\u59cb", "\u5316\u65f6", "\u6211\u4eec", "\u8981", "\u6ce8\u610f"},
[]string{"\u4e00\u4e2a", "\u5206\u5b50", "\u662f", "\u7531", "\u597d", "\u591a", "\u539f\u5b50", "\u7ec4\u7ec7\u6210", "\u7684"},
[]string{"\u795d", "\u4f60", "\u9a6c\u5230", "\u529f\u6210"},
[]string{"\u4ed6", "\u6389", "\u8fdb", "\u4e86", "\u65e0\u5e95", "\u6d1e\u91cc"},
[]string{"\u4e2d\u56fd", "\u7684", "\u9996", "\u90fd", "\u662f", "\u5317\u4eac"},
[]string{"\u5b59\u541b\u610f"},
[]string{"\u5916\u4ea4\u90e8", "\u53d1\u8a00\u4eba", "\u9a6c\u671d\u65ed"},
[]string{"\u9886\u5bfc", "\u4eba\u4f1a\u8bae", "\u548c", "\u7b2c\u56db\u5c4a", "\u4e1c\u4e9a\u5cf0", "\u4f1a"},
[]string{"\u5728", "\u8fc7", "\u53bb", "\u7684", "\u8fd9\u4e94\u5e74"},
[]string{"\u8fd8", "\u9700\u8981", "\u5f88\u957f", "\u7684", "\u8def", "\u8981", "\u8d70"},
[]string{"60", "\u5468\u5e74\u9996", "\u90fd", "\u9605\u5175"},
[]string{"\u4f60", "\u597d\u4eba", "\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
[]string{"\u4e70\u6c34\u679c", "\u7136\u540e", "\u6765", "\u4e16\u535a\u56ed"},
[]string{"\u4e70\u6c34\u679c", "\u7136\u540e", "\u53bb", "\u4e16\u535a\u56ed"},
[]string{"\u4f46", "\u662f", "\u540e", "\u6765", "\u6211", "\u624d", "\u77e5\u9053", "\u4f60", "\u662f", "\u5bf9", "\u7684"},
[]string{"\u5b58\u5728", "\u5373", "\u5408\u7406"},
[]string{"\u7684", "\u7684", "\u7684", "\u7684", "\u7684", "\u5728", "\u7684", "\u7684", "\u7684", "\u7684", "\u5c31", "\u4ee5", "\u548c", "\u548c", "\u548c"},
[]string{"I", " ", "love", "\u4f60", "\uff0c", "\u4e0d\u4ee5", "\u4e3a\u803b", "\uff0c", "\u53cd\u4ee5", "\u4e3a", "rong"},
[]string{"\u56e0"},
[]string{},
[]string{"hello", "\u4f60", "\u597d\u4eba", "\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
[]string{"\u5f88", "\u597d", "\u4f46", "\u4e3b\u8981", "\u662f", "\u57fa\u4e8e", "\u7f51\u9875", "\u5f62\u5f0f"},
[]string{"hello", "\u4f60", "\u597d\u4eba", "\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
[]string{"\u4e3a", "\u4ec0\u4e48", "\u6211", "\u4e0d\u80fd", "\u62e5\u6709", "\u60f3", "\u8981", "\u7684", "\u751f\u6d3b"},
[]string{"\u540e\u6765", "\u6211", "\u624d"},
[]string{"\u6b64\u6b21", "\u6765", "\u4e2d\u56fd", "\u662f", "\u4e3a", "\u4e86"},
[]string{"\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
[]string{",", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
[]string{"\u5176\u5b9e", "\u4f7f", "\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
[]string{"\u597d\u4eba", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
[]string{"\u662f", "\u56e0\u4e3a", "\u548c", "\u56fd\u5bb6"},
[]string{"\u8001\u5e74", "\u641c\u7d22", "\u8fd8", "\u652f\u6301"},
[]string{"\u5e72\u8106", "\u5c31", "\u628a", "\u90a3\u90e8", "\u8499\u4eba", "\u7684", "\u95f2\u6cd5", "\u7ed9", "\u5e9f", "\u4e86", "\u62c9", "\u5012", "\uff01", "RT", " ", "@", "laoshipukong", " ", ":", " ", "27", "\u65e5", "\uff0c", "\u5168\u56fd\u4eba", "\u5927\u5e38\u59d4\u4f1a", "\u7b2c\u4e09\u6b21", "\u5ba1\u8bae", "\u4fb5\u6743\u8d23", "\u4efb\u6cd5", "\u8349\u6848", "\uff0c", "\u5220\u9664", "\u4e86", "\u6709", "\u5173\u533b\u7597", "\u635f\u5bb3", "\u8d23\u4efb", "\u201c", "\u4e3e\u8bc1", "\u5012", "\u7f6e", "\u201d", "\u7684", "\u89c4\u5b9a", "\u3002", "\u5728", "\u533b\u60a3", "\u7ea0\u7eb7", "\u4e2d\u672c", "\u5df2", "\u5904\u4e8e", "\u5f31\u52bf", "\u5730\u4f4d", "\u7684", "\u6d88\u8d39\u8005", "\u7531", "\u6b64", "\u5c06", "\u9677\u5165", "\u4e07\u52ab", "\u4e0d\u590d", "\u7684", "\u5883\u5730", "\u3002", " "},
[]string{"\u5927"},
[]string{},
[]string{"\u4ed6", "\u8bf4", "\u7684", "\u786e\u5b9e", "\u5728", "\u7406"},
[]string{"\u957f\u6625\u5e02", "\u957f\u6625\u8282", "\u8bb2\u8bdd"},
[]string{"\u7ed3\u5a5a", "\u7684", "\u548c", "\u5c1a\u672a", "\u7ed3\u5a5a", "\u7684"},
[]string{"\u7ed3\u5408\u6210", "\u5206\u5b50", "\u65f6"},
[]string{"\u65c5\u6e38", "\u548c", "\u670d\u52a1", "\u662f", "\u6700", "\u597d", "\u7684"},
[]string{"\u8fd9\u4ef6", "\u4e8b\u60c5", "\u7684", "\u786e\u662f", "\u6211", "\u7684", "\u9519"},
[]string{"\u4f9b\u5927\u5bb6", "\u53c2\u8003", "\u6307\u6b63"},
[]string{"\u54c8\u5c14\u6ee8", "\u653f\u5e9c", "\u516c\u5e03\u584c\u6865", "\u539f\u56e0"},
[]string{"\u6211", "\u5728", "\u673a\u573a", "\u5165\u53e3", "\u5904"},
[]string{"\u90a2\u6c38\u81e3", "\u6444\u5f71", "\u62a5\u9053"},
[]string{"BP", "\u795e\u7ecf", "\u7f51\u7edc", "\u5982\u4f55", "\u8bad\u7ec3", "\u624d", "\u80fd", "\u5728", "\u5206\u7c7b", "\u65f6", "\u589e\u52a0\u533a", "\u5206\u5ea6", "\uff1f"},
[]string{"\u5357\u4eac\u5e02", "\u957f\u6c5f\u5927\u6865"},
[]string{"\u5e94\u4e00\u4e9b", "\u4f7f", "\u7528\u8005", "\u7684", "\u5efa\u8bae", "\uff0c", "\u4e5f", "\u4e3a", "\u4e86", "\u4fbf", "\u4e8e", "\u5229\u7528", "NiuTrans", "\u7528\u4e8e", "SMT", "\u7814\u7a76"},
[]string{"\u957f\u6625\u5e02", "\u957f\u6625\u836f\u5e97"},
[]string{"\u9093\u9896", "\u8d85\u751f", "\u524d", "\u6700", "\u559c\u6b22", "\u7684", "\u8863\u670d"},
[]string{"\u80e1\u9526\u6d9b", "\u662f", "\u70ed\u7231\u4e16\u754c", "\u548c", "\u5e73", "\u7684", "\u653f\u6cbb\u5c40", "\u5e38\u59d4"},
[]string{"\u7a0b\u5e8f\u5458", "\u795d\u6d77\u6797", "\u548c", "\u6731\u4f1a\u9707", "\u662f", "\u5728", "\u5b59\u5065", "\u7684", "\u5de6\u9762", "\u548c", "\u53f3\u9762", ",", " ", "\u8303\u51ef", "\u5728", "\u6700\u53f3\u9762", ".", "\u518d\u5f80", "\u5de6", "\u662f", "\u674e\u677e\u6d2a"},
[]string{"\u4e00\u6b21\u6027", "\u4ea4\u591a\u5c11", "\u94b1"},
[]string{"\u4e24\u5757", "\u4e94\u4e00\u5957", "\uff0c", "\u4e09\u5757", "\u516b\u4e00\u65a4", "\uff0c", "\u56db\u5757", "\u4e03", "\u4e00\u672c", "\uff0c", "\u4e94\u5757", "\u516d", "\u4e00\u6761"},
[]string{"\u5c0f", "\u548c", "\u5c1a\u7559", "\u4e86", "\u4e00\u4e2a", "\u50cf", "\u5927", "\u548c", "\u5c1a", "\u4e00\u6837", "\u7684", "\u548c", "\u5c1a\u5934"},
[]string{"\u6211", "\u662f", "\u4e2d\u534e\u4eba\u6c11\u5171\u548c\u56fd", "\u516c\u6c11", ";", "\u6211", "\u7238\u7238", "\u662f", "\u5171", "\u548c", "\u515a", "\u515a\u5458", ";", " ", "\u5730\u94c1", "\u548c", "\u5e73\u95e8", "\u7ad9"},
[]string{"\u5f20\u6653\u6885", "\u53bb", "\u4eba\u6c11\u533b\u9662", "\u505a", "\u4e86", "\u4e2a", "B", "\u8d85\u7136", "\u540e", "\u53bb", "\u4e70", "\u4e86", "\u4ef6", "T", "\u6064"},
[]string{"AT", "&", "T", "\u662f", "\u4e00\u4ef6", "\u4e0d\u9519", "\u7684", "\u516c\u53f8", "\uff0c", "\u7ed9", "\u4f60", "\u53d1", "offer", "\u4e86", "\u5417", "\uff1f"},
[]string{"C", "++", "\u548c", "c", "#", "\u662f", "\u4ec0\u4e48", "\u5173\u7cfb", "\uff1f", "11", "+", "122", "=", "133", "\uff0c", "\u662f", "\u5417", "\uff1f", "PI", "=", "3.14159"},
[]string{"\u4f60", "\u8ba4\u8bc6", "\u90a3\u4e2a", "\u548c", "\u4e3b\u5e2d\u63e1", "\u624b", "\u7684", "\u7684", "\u54e5", "\u5417", "\uff1f", "\u4ed6\u5f00", "\u4e00\u8f86", "\u9ed1\u8272", "\u7684", "\u58eb", "\u3002"},
[]string{"\u67aa\u6746\u5b50", "\u4e2d", "\u51fa\u653f\u6743"},
}
)
func init() {
SetDictionary("dict.txt")
}
func TestCutDAG(t *testing.T) {
result := cut_DAG("BP神经网络如何训练才能在分类时增加区分度")
if len(result) != 11 {
t.Error(result)
}
}
func TestCutDAGNoHmm(t *testing.T) {
result := cut_DAG_NO_HMM("BP神经网络如何训练才能在分类时增加区分度")
if len(result) != 11 {
t.Error(result)
}
}
func TestRegexpSplit(t *testing.T) {
result := RegexpSplit(regexp.MustCompile(`\p{Han}+`),
"BP神经网络如何训练才能在分类时增加区分度")
if len(result) != 3 {
t.Error(result)
}
result = RegexpSplit(regexp.MustCompile(`([\p{Han}#]+)`),
",BP神经网络如何训练才能在分类时#增加区分度?")
if len(result) != 3 {
t.Error(result)
}
}
func TestDefaultCut(t *testing.T) {
var result []string
for index, content := range test_contents {
result = Cut(content, false, true)
if len(result) != len(defaultCutResult[index]) {
t.Errorf("default cut for %s length should be %d not %d\n",
content, len(defaultCutResult[index]), len(result))
}
for i, r := range result {
if r != defaultCutResult[index][i] {
t.Error(r)
}
}
}
}
func TestCutAll(t *testing.T) {
var result []string
for index, content := range test_contents {
result = Cut(content, true, true)
if len(result) != len(cutAllResult[index]) {
t.Errorf("cut all for %s length should be %d not %d\n",
content, len(cutAllResult[index]), len(result))
}
for i, c := range result {
if c != cutAllResult[index][i] {
t.Error(c)
}
}
}
}
func TestDefaultCutNoHMM(t *testing.T) {
var result []string
for index, content := range test_contents {
result = Cut(content, false, false)
if len(result) != len(defaultCutNoHMMResult[index]) {
t.Errorf("default cut no hmm for %s length should be %d not %d\n",
content, len(defaultCutNoHMMResult[index]), len(result))
}
for i, c := range result {
if c != defaultCutNoHMMResult[index][i] {
t.Error(c)
}
}
}
}
func TestCutForSearch(t *testing.T) {
var result []string
for index, content := range test_contents {
result = CutForSearch(content, true)
if len(result) != len(cutForSearchResult[index]) {
t.Errorf("cut for search for %s length should be %d not %d\n",
content, len(cutForSearchResult[index]), len(result))
}
for i, c := range result {
if c != cutForSearchResult[index][i] {
t.Error(c)
}
}
}
for index, content := range test_contents {
result = CutForSearch(content, false)
if len(result) != len(cutForSearchNoHMMResult[index]) {
t.Errorf("cut for search no hmm for %s length should be %d not %d\n",
content, len(cutForSearchNoHMMResult[index]), len(result))
}
for i, c := range result {
if c != cutForSearchNoHMMResult[index][i] {
t.Error(c)
}
}
}
}
func TestSetdictionary(t *testing.T) {
var result []string
SetDictionary("foobar.txt")
for index, content := range test_contents {
result = Cut(content, false, true)
if len(result) != len(userDictCutResult[index]) {
t.Errorf("default cut with user dictionary for %s length should be %d not %d\n",
content, len(userDictCutResult[index]), len(result))
}
for i, c := range result {
if c != userDictCutResult[index][i] {
t.Error(c)
}
}
}
}

79475
posseg/char_state_tab.go Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,16 @@
package posseg
import (
"testing"
)
func TestGet(t *testing.T) {
result := CharStateTab.Get('\u8000')
if len(result) != 17 {
t.FailNow()
}
result = CharStateTab.Get('\uaaaa')
if len(result) == 17 {
t.FailNow()
}
}

292
posseg/posseg.go Normal file
View File

@@ -0,0 +1,292 @@
package posseg
import (
"bufio"
"fmt"
"github.com/wangbin/jiebago"
"os"
"path/filepath"
"regexp"
"runtime"
"strings"
)
var (
WordTagTab = make(map[string]string)
isUserDictLoaded = false
)
type WordTag struct {
Word, Tag string
}
func (wt WordTag) String() string {
return fmt.Sprintf("%s/%s", wt.Word, wt.Tag)
}
func init() {
_, filename, _, _ := runtime.Caller(1)
dict_dir := filepath.Dir(filepath.Dir(filename))
dict_path := filepath.Join(dict_dir, jiebago.Dictionary)
load_model(dict_path)
}
func load_model(f_name string) error {
file, openError := os.Open(f_name)
if openError != nil {
return openError
}
defer file.Close()
reader := bufio.NewReader(file)
for {
line, readError := reader.ReadString('\n')
if readError != nil && len(line) == 0 {
break
}
words := strings.Split(strings.TrimSpace(line), " ")
word, tag := words[0], words[2]
WordTagTab[word] = tag
}
return nil
}
func __cut(sentence string) []WordTag {
result := make([]WordTag, 0)
runes := []rune(sentence)
_, posList := Viterbi(runes)
begin := 0
next := 0
for i, char := range runes {
pos := posList[i].State
switch pos {
case 'B':
begin = i
case 'E':
result = append(result, WordTag{string(runes[begin : i+1]), posList[i].Tag})
next = i + 1
case 'S':
result = append(result, WordTag{string(char), posList[i].Tag})
next = i + 1
}
}
if next < len(runes) {
result = append(result, WordTag{string(runes[next:]), posList[next].Tag})
}
return result
}
func cutDetail(sentence string) []WordTag {
result := make([]WordTag, 0)
re_han := regexp.MustCompile(`\p{Han}+`)
re_skip := regexp.MustCompile(`[[\.[:digit:]]+|[:alnum:]]+`)
re_eng := regexp.MustCompile(`[[:alnum:]]`)
re_num := regexp.MustCompile(`[\.[:digit:]]+`)
blocks := jiebago.RegexpSplit(re_han, sentence)
for _, blk := range blocks {
if re_han.MatchString(blk) {
for _, wordTag := range __cut(blk) {
result = append(result, wordTag)
}
} else {
for _, x := range jiebago.RegexpSplit(re_skip, blk) {
if len(x) == 0 {
continue
}
switch {
case re_num.MatchString(x):
result = append(result, WordTag{x, "m"})
case re_eng.MatchString(x):
result = append(result, WordTag{x, "eng"})
default:
result = append(result, WordTag{x, "x"})
}
}
}
}
return result
}
type cutAction func(sentence string) []WordTag
func cut_DAG(sentence string) []WordTag {
dag := jiebago.GetDAG(sentence)
routes := jiebago.Calc(sentence, dag, 0)
x := 0
var y int
runes := []rune(sentence)
length := len(runes)
result := make([]WordTag, 0)
buf := make([]rune, 0)
for {
if x >= length {
break
}
y = routes[x].Index + 1
l_word := runes[x:y]
if y-x == 1 {
buf = append(buf, l_word...)
} else {
if len(buf) > 0 {
if len(buf) == 1 {
sbuf := string(buf)
if tag, ok := WordTagTab[sbuf]; ok {
result = append(result, WordTag{sbuf, tag})
} else {
result = append(result, WordTag{sbuf, "x"})
}
buf = make([]rune, 0)
} else {
bufString := string(buf)
if _, ok := jiebago.TT.Freq[bufString]; !ok {
recognized := cutDetail(bufString)
for _, t := range recognized {
result = append(result, t)
}
} else {
for _, elem := range buf {
selem := string(elem)
if tag, ok := WordTagTab[selem]; ok {
result = append(result, WordTag{string(elem), tag})
} else {
result = append(result, WordTag{string(elem), "x"})
}
}
}
buf = make([]rune, 0)
}
}
sl_word := string(l_word)
if tag, ok := WordTagTab[sl_word]; ok {
result = append(result, WordTag{sl_word, tag})
} else {
result = append(result, WordTag{sl_word, "x"})
}
}
x = y
}
if len(buf) > 0 {
if len(buf) == 1 {
sbuf := string(buf)
if tag, ok := WordTagTab[sbuf]; ok {
result = append(result, WordTag{sbuf, tag})
} else {
result = append(result, WordTag{sbuf, "x"})
}
} else {
bufString := string(buf)
if _, ok := jiebago.TT.Freq[bufString]; !ok {
recognized := cutDetail(bufString)
for _, t := range recognized {
result = append(result, t)
}
} else {
for _, elem := range buf {
selem := string(elem)
if tag, ok := WordTagTab[selem]; ok {
result = append(result, WordTag{selem, tag})
} else {
result = append(result, WordTag{selem, "x"})
}
}
}
}
}
return result
}
func cut_DAG_NO_HMM(sentence string) []WordTag {
result := make([]WordTag, 0)
re_eng := regexp.MustCompile(`[[:alnum:]]`)
dag := jiebago.GetDAG(sentence)
routes := jiebago.Calc(sentence, dag, 0)
x := 0
var y int
runes := []rune(sentence)
length := len(runes)
buf := make([]rune, 0)
for {
if x >= length {
break
}
y = routes[x].Index + 1
l_word := runes[x:y]
if re_eng.MatchString(string(l_word)) && len(l_word) == 1 {
buf = append(buf, l_word...)
x = y
} else {
if len(buf) > 0 {
result = append(result, WordTag{string(buf), "eng"})
buf = make([]rune, 0)
}
sl_word := string(l_word)
if tag, ok := WordTagTab[sl_word]; ok {
result = append(result, WordTag{sl_word, tag})
} else {
result = append(result, WordTag{sl_word, "x"})
}
x = y
}
}
if len(buf) > 0 {
result = append(result, WordTag{string(buf), "eng"})
buf = make([]rune, 0)
}
return result
}
func cut(sentence string, HMM bool) []WordTag {
result := make([]WordTag, 0)
re_han := regexp.MustCompile(`([\p{Han}+[:alnum:]+#&\._]+)`)
re_skip := regexp.MustCompile(`(\r\n|\s)`)
re_eng := regexp.MustCompile(`[[:alnum:]]`)
re_num := regexp.MustCompile(`[\.[:digit:]]+`)
blocks := jiebago.RegexpSplit(re_han, sentence)
var cut_block cutAction
if HMM {
cut_block = cut_DAG
} else {
cut_block = cut_DAG_NO_HMM
}
for _, blk := range blocks {
if re_han.MatchString(blk) {
for _, wordTag := range cut_block(blk) {
result = append(result, wordTag)
}
} else {
for _, x := range jiebago.RegexpSplit(re_skip, blk) {
if re_skip.MatchString(x) {
result = append(result, WordTag{x, "x"})
} else {
for _, xx := range x {
s := string(xx)
switch {
case re_num.MatchString(s):
result = append(result, WordTag{s, "m"})
case re_eng.MatchString(x):
result = append(result, WordTag{x, "eng"})
break
default:
result = append(result, WordTag{s, "x"})
}
}
}
}
}
}
return result
}
func Cut(sentence string, HMM bool) []WordTag {
if !isUserDictLoaded {
for key, value := range jiebago.UserWordTagTab {
WordTagTab[key] = value
}
isUserDictLoaded = true
}
return cut(sentence, HMM)
}

294
posseg/posseg_test.go Normal file
View File

@@ -0,0 +1,294 @@
package posseg
import (
"github.com/wangbin/jiebago"
"testing"
)
var (
test_contents = []string{
"这是一个伸手不见五指的黑夜。我叫孙悟空我爱北京我爱Python和C++。",
"我不喜欢日本和服。",
"雷猴回归人间。",
"工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作",
"我需要廉租房",
"永和服装饰品有限公司",
"我爱北京天安门",
"abc",
"隐马尔可夫",
"雷猴是个好网站",
"“Microsoft”一词由“MICROcomputer微型计算机”和“SOFTware软件”两部分组成",
"草泥马和欺实马是今年的流行词汇",
"伊藤洋华堂总府店",
"中国科学院计算技术研究所",
"罗密欧与朱丽叶",
"我购买了道具和服装",
"PS: 我觉得开源有一个好处,就是能够敦促自己不断改进,避免敞帚自珍",
"湖北省石首市",
"湖北省十堰市",
"总经理完成了这件事情",
"电脑修好了",
"做好了这件事情就一了百了了",
"人们审美的观点是不同的",
"我们买了一个美的空调",
"线程初始化时我们要注意",
"一个分子是由好多原子组织成的",
"祝你马到功成",
"他掉进了无底洞里",
"中国的首都是北京",
"孙君意",
"外交部发言人马朝旭",
"领导人会议和第四届东亚峰会",
"在过去的这五年",
"还需要很长的路要走",
"60周年首都阅兵",
"你好人们审美的观点是不同的",
"买水果然后来世博园",
"买水果然后去世博园",
"但是后来我才知道你是对的",
"存在即合理",
"的的的的的在的的的的就以和和和",
"I love你不以为耻反以为rong",
"因",
"",
"hello你好人们审美的观点是不同的",
"很好但主要是基于网页形式",
"hello你好人们审美的观点是不同的",
"为什么我不能拥有想要的生活",
"后来我才",
"此次来中国是为了",
"使用了它就可以解决一些问题",
",使用了它就可以解决一些问题",
"其实使用了它就可以解决一些问题",
"好人使用了它就可以解决一些问题",
"是因为和国家",
"老年搜索还支持",
"干脆就把那部蒙人的闲法给废了拉倒RT @laoshipukong : 27日全国人大常委会第三次审议侵权责任法草案删除了有关医疗损害责任“举证倒置”的规定。在医患纠纷中本已处于弱势地位的消费者由此将陷入万劫不复的境地。 ",
"大",
"",
"他说的确实在理",
"长春市长春节讲话",
"结婚的和尚未结婚的",
"结合成分子时",
"旅游和服务是最好的",
"这件事情的确是我的错",
"供大家参考指正",
"哈尔滨政府公布塌桥原因",
"我在机场入口处",
"邢永臣摄影报道",
"BP神经网络如何训练才能在分类时增加区分度",
"南京市长江大桥",
"应一些使用者的建议也为了便于利用NiuTrans用于SMT研究",
"长春市长春药店",
"邓颖超生前最喜欢的衣服",
"胡锦涛是热爱世界和平的政治局常委",
"程序员祝海林和朱会震是在孙健的左面和右面, 范凯在最右面.再往左是李松洪",
"一次性交多少钱",
"两块五一套,三块八一斤,四块七一本,五块六一条",
"小和尚留了一个像大和尚一样的和尚头",
"我是中华人民共和国公民;我爸爸是共和党党员; 地铁和平门站",
"张晓梅去人民医院做了个B超然后去买了件T恤",
"AT&T是一件不错的公司给你发offer了吗",
"C++和c#是什么关系11+122=133是吗PI=3.14159",
"你认识那个和主席握手的的哥吗?他开一辆黑色的士。",
"枪杆子中出政权"}
defaultCutResult = [][]WordTag{
[]WordTag{WordTag{"\u8fd9", "r"}, WordTag{"\u662f", "v"}, WordTag{"\u4e00\u4e2a", "m"}, WordTag{"\u4f38\u624b\u4e0d\u89c1\u4e94\u6307", "i"}, WordTag{"\u7684", "uj"}, WordTag{"\u9ed1\u591c", "n"}, WordTag{"\u3002", "x"}, WordTag{"\u6211", "r"}, WordTag{"\u53eb", "v"}, WordTag{"\u5b59\u609f\u7a7a", "nr"}, WordTag{"\uff0c", "x"}, WordTag{"\u6211", "r"}, WordTag{"\u7231", "v"}, WordTag{"\u5317\u4eac", "ns"}, WordTag{"\uff0c", "x"}, WordTag{"\u6211", "r"}, WordTag{"\u7231", "v"}, WordTag{"Python", "eng"}, WordTag{"\u548c", "c"}, WordTag{"C++", "nz"}, WordTag{"\u3002", "x"}},
[]WordTag{WordTag{"\u6211", "r"}, WordTag{"\u4e0d", "d"}, WordTag{"\u559c\u6b22", "v"}, WordTag{"\u65e5\u672c", "ns"}, WordTag{"\u548c\u670d", "nz"}, WordTag{"\u3002", "x"}},
[]WordTag{WordTag{"\u96f7\u7334", "n"}, WordTag{"\u56de\u5f52", "v"}, WordTag{"\u4eba\u95f4", "n"}, WordTag{"\u3002", "x"}},
[]WordTag{WordTag{"\u5de5\u4fe1\u5904", "n"}, WordTag{"\u5973\u5e72\u4e8b", "n"}, WordTag{"\u6bcf\u6708", "r"}, WordTag{"\u7ecf\u8fc7", "p"}, WordTag{"\u4e0b\u5c5e", "v"}, WordTag{"\u79d1\u5ba4", "n"}, WordTag{"\u90fd", "d"}, WordTag{"\u8981", "v"}, WordTag{"\u4eb2\u53e3", "n"}, WordTag{"\u4ea4\u4ee3", "n"}, WordTag{"24", "m"}, WordTag{"\u53e3", "n"}, WordTag{"\u4ea4\u6362\u673a", "n"}, WordTag{"\u7b49", "u"}, WordTag{"\u6280\u672f\u6027", "n"}, WordTag{"\u5668\u4ef6", "n"}, WordTag{"\u7684", "uj"}, WordTag{"\u5b89\u88c5", "v"}, WordTag{"\u5de5\u4f5c", "vn"}},
[]WordTag{WordTag{"\u6211", "r"}, WordTag{"\u9700\u8981", "v"}, WordTag{"\u5ec9\u79df\u623f", "n"}},
[]WordTag{WordTag{"\u6c38\u548c", "nz"}, WordTag{"\u670d\u88c5", "vn"}, WordTag{"\u9970\u54c1", "n"}, WordTag{"\u6709\u9650\u516c\u53f8", "n"}},
[]WordTag{WordTag{"\u6211", "r"}, WordTag{"\u7231", "v"}, WordTag{"\u5317\u4eac", "ns"}, WordTag{"\u5929\u5b89\u95e8", "ns"}},
[]WordTag{WordTag{"abc", "eng"}},
[]WordTag{WordTag{"\u9690", "n"}, WordTag{"\u9a6c\u5c14\u53ef\u592b", "nr"}},
[]WordTag{WordTag{"\u96f7\u7334", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u4e2a", "q"}, WordTag{"\u597d", "a"}, WordTag{"\u7f51\u7ad9", "n"}},
[]WordTag{WordTag{"\u201c", "x"}, WordTag{"Microsoft", "eng"}, WordTag{"\u201d", "x"}, WordTag{"\u4e00", "m"}, WordTag{"\u8bcd", "n"}, WordTag{"\u7531", "p"}, WordTag{"\u201c", "x"}, WordTag{"MICROcomputer", "eng"}, WordTag{"\uff08", "x"}, WordTag{"\u5fae\u578b", "b"}, WordTag{"\u8ba1\u7b97\u673a", "n"}, WordTag{"\uff09", "x"}, WordTag{"\u201d", "x"}, WordTag{"\u548c", "c"}, WordTag{"\u201c", "x"}, WordTag{"SOFTware", "eng"}, WordTag{"\uff08", "x"}, WordTag{"\u8f6f\u4ef6", "n"}, WordTag{"\uff09", "x"}, WordTag{"\u201d", "x"}, WordTag{"\u4e24", "m"}, WordTag{"\u90e8\u5206", "n"}, WordTag{"\u7ec4\u6210", "v"}},
[]WordTag{WordTag{"\u8349\u6ce5\u9a6c", "n"}, WordTag{"\u548c", "c"}, WordTag{"\u6b3a\u5b9e", "v"}, WordTag{"\u9a6c", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u4eca\u5e74", "t"}, WordTag{"\u7684", "uj"}, WordTag{"\u6d41\u884c", "v"}, WordTag{"\u8bcd\u6c47", "n"}},
[]WordTag{WordTag{"\u4f0a\u85e4", "nr"}, WordTag{"\u6d0b\u534e\u5802", "n"}, WordTag{"\u603b\u5e9c", "n"}, WordTag{"\u5e97", "n"}},
[]WordTag{WordTag{"\u4e2d\u56fd\u79d1\u5b66\u9662\u8ba1\u7b97\u6280\u672f\u7814\u7a76\u6240", "nt"}},
[]WordTag{WordTag{"\u7f57\u5bc6\u6b27", "nr"}, WordTag{"\u4e0e", "p"}, WordTag{"\u6731\u4e3d\u53f6", "nr"}},
[]WordTag{WordTag{"\u6211", "r"}, WordTag{"\u8d2d\u4e70", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u9053\u5177", "n"}, WordTag{"\u548c", "c"}, WordTag{"\u670d\u88c5", "vn"}},
[]WordTag{WordTag{"PS", "eng"}, WordTag{":", "x"}, WordTag{" ", "x"}, WordTag{"\u6211", "r"}, WordTag{"\u89c9\u5f97", "v"}, WordTag{"\u5f00\u6e90", "n"}, WordTag{"\u6709", "v"}, WordTag{"\u4e00\u4e2a", "m"}, WordTag{"\u597d\u5904", "d"}, WordTag{"\uff0c", "x"}, WordTag{"\u5c31\u662f", "d"}, WordTag{"\u80fd\u591f", "v"}, WordTag{"\u6566\u4fc3", "v"}, WordTag{"\u81ea\u5df1", "r"}, WordTag{"\u4e0d\u65ad\u6539\u8fdb", "l"}, WordTag{"\uff0c", "x"}, WordTag{"\u907f\u514d", "v"}, WordTag{"\u655e", "v"}, WordTag{"\u5e1a", "ng"}, WordTag{"\u81ea\u73cd", "b"}},
[]WordTag{WordTag{"\u6e56\u5317\u7701", "ns"}, WordTag{"\u77f3\u9996\u5e02", "ns"}},
[]WordTag{WordTag{"\u6e56\u5317\u7701", "ns"}, WordTag{"\u5341\u5830\u5e02", "ns"}},
[]WordTag{WordTag{"\u603b\u7ecf\u7406", "n"}, WordTag{"\u5b8c\u6210", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u8fd9\u4ef6", "mq"}, WordTag{"\u4e8b\u60c5", "n"}},
[]WordTag{WordTag{"\u7535\u8111", "n"}, WordTag{"\u4fee\u597d", "v"}, WordTag{"\u4e86", "ul"}},
[]WordTag{WordTag{"\u505a\u597d", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u8fd9\u4ef6", "mq"}, WordTag{"\u4e8b\u60c5", "n"}, WordTag{"\u5c31", "d"}, WordTag{"\u4e00\u4e86\u767e\u4e86", "l"}, WordTag{"\u4e86", "ul"}},
[]WordTag{WordTag{"\u4eba\u4eec", "n"}, WordTag{"\u5ba1\u7f8e", "vn"}, WordTag{"\u7684", "uj"}, WordTag{"\u89c2\u70b9", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u4e0d\u540c", "a"}, WordTag{"\u7684", "uj"}},
[]WordTag{WordTag{"\u6211\u4eec", "r"}, WordTag{"\u4e70", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u4e00\u4e2a", "m"}, WordTag{"\u7f8e\u7684", "nr"}, WordTag{"\u7a7a\u8c03", "n"}},
[]WordTag{WordTag{"\u7ebf\u7a0b", "n"}, WordTag{"\u521d\u59cb\u5316", "l"}, WordTag{"\u65f6", "n"}, WordTag{"\u6211\u4eec", "r"}, WordTag{"\u8981", "v"}, WordTag{"\u6ce8\u610f", "v"}},
[]WordTag{WordTag{"\u4e00\u4e2a", "m"}, WordTag{"\u5206\u5b50", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u7531", "p"}, WordTag{"\u597d\u591a", "m"}, WordTag{"\u539f\u5b50", "n"}, WordTag{"\u7ec4\u7ec7", "v"}, WordTag{"\u6210", "v"}, WordTag{"\u7684", "uj"}},
[]WordTag{WordTag{"\u795d", "v"}, WordTag{"\u4f60", "r"}, WordTag{"\u9a6c\u5230\u529f\u6210", "i"}},
[]WordTag{WordTag{"\u4ed6", "r"}, WordTag{"\u6389", "v"}, WordTag{"\u8fdb", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u65e0\u5e95\u6d1e", "ns"}, WordTag{"\u91cc", "f"}},
[]WordTag{WordTag{"\u4e2d\u56fd", "ns"}, WordTag{"\u7684", "uj"}, WordTag{"\u9996\u90fd", "d"}, WordTag{"\u662f", "v"}, WordTag{"\u5317\u4eac", "ns"}},
[]WordTag{WordTag{"\u5b59\u541b\u610f", "nr"}},
[]WordTag{WordTag{"\u5916\u4ea4\u90e8", "nt"}, WordTag{"\u53d1\u8a00\u4eba", "l"}, WordTag{"\u9a6c\u671d\u65ed", "nr"}},
[]WordTag{WordTag{"\u9886\u5bfc\u4eba", "n"}, WordTag{"\u4f1a\u8bae", "n"}, WordTag{"\u548c", "c"}, WordTag{"\u7b2c\u56db\u5c4a", "m"}, WordTag{"\u4e1c\u4e9a", "ns"}, WordTag{"\u5cf0\u4f1a", "n"}},
[]WordTag{WordTag{"\u5728", "p"}, WordTag{"\u8fc7\u53bb", "t"}, WordTag{"\u7684", "uj"}, WordTag{"\u8fd9", "r"}, WordTag{"\u4e94\u5e74", "t"}},
[]WordTag{WordTag{"\u8fd8", "d"}, WordTag{"\u9700\u8981", "v"}, WordTag{"\u5f88", "d"}, WordTag{"\u957f", "a"}, WordTag{"\u7684", "uj"}, WordTag{"\u8def", "n"}, WordTag{"\u8981", "v"}, WordTag{"\u8d70", "v"}},
[]WordTag{WordTag{"60", "m"}, WordTag{"\u5468\u5e74", "t"}, WordTag{"\u9996\u90fd", "d"}, WordTag{"\u9605\u5175", "v"}},
[]WordTag{WordTag{"\u4f60\u597d", "l"}, WordTag{"\u4eba\u4eec", "n"}, WordTag{"\u5ba1\u7f8e", "vn"}, WordTag{"\u7684", "uj"}, WordTag{"\u89c2\u70b9", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u4e0d\u540c", "a"}, WordTag{"\u7684", "uj"}},
[]WordTag{WordTag{"\u4e70", "v"}, WordTag{"\u6c34\u679c", "n"}, WordTag{"\u7136\u540e", "c"}, WordTag{"\u6765", "v"}, WordTag{"\u4e16\u535a\u56ed", "nr"}},
[]WordTag{WordTag{"\u4e70", "v"}, WordTag{"\u6c34\u679c", "n"}, WordTag{"\u7136\u540e", "c"}, WordTag{"\u53bb", "v"}, WordTag{"\u4e16\u535a\u56ed", "nr"}},
[]WordTag{WordTag{"\u4f46\u662f", "c"}, WordTag{"\u540e\u6765", "t"}, WordTag{"\u6211", "r"}, WordTag{"\u624d", "d"}, WordTag{"\u77e5\u9053", "v"}, WordTag{"\u4f60", "r"}, WordTag{"\u662f", "v"}, WordTag{"\u5bf9", "p"}, WordTag{"\u7684", "uj"}},
[]WordTag{WordTag{"\u5b58\u5728", "v"}, WordTag{"\u5373", "v"}, WordTag{"\u5408\u7406", "vn"}},
[]WordTag{WordTag{"\u7684\u7684", "u"}, WordTag{"\u7684\u7684", "u"}, WordTag{"\u7684", "uj"}, WordTag{"\u5728\u7684", "u"}, WordTag{"\u7684\u7684", "u"}, WordTag{"\u7684", "uj"}, WordTag{"\u5c31", "d"}, WordTag{"\u4ee5", "p"}, WordTag{"\u548c\u548c", "nz"}, WordTag{"\u548c", "c"}},
[]WordTag{WordTag{"I", "x"}, WordTag{" ", "x"}, WordTag{"love", "eng"}, WordTag{"\u4f60", "r"}, WordTag{"\uff0c", "x"}, WordTag{"\u4e0d\u4ee5\u4e3a\u803b", "i"}, WordTag{"\uff0c", "x"}, WordTag{"\u53cd", "zg"}, WordTag{"\u4ee5\u4e3a", "c"}, WordTag{"rong", "eng"}},
[]WordTag{WordTag{"\u56e0", "p"}},
[]WordTag{},
[]WordTag{WordTag{"hello", "eng"}, WordTag{"\u4f60\u597d", "l"}, WordTag{"\u4eba\u4eec", "n"}, WordTag{"\u5ba1\u7f8e", "vn"}, WordTag{"\u7684", "uj"}, WordTag{"\u89c2\u70b9", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u4e0d\u540c", "a"}, WordTag{"\u7684", "uj"}},
[]WordTag{WordTag{"\u5f88\u597d", "a"}, WordTag{"\u4f46", "c"}, WordTag{"\u4e3b\u8981", "b"}, WordTag{"\u662f", "v"}, WordTag{"\u57fa\u4e8e", "p"}, WordTag{"\u7f51\u9875", "n"}, WordTag{"\u5f62\u5f0f", "n"}},
[]WordTag{WordTag{"hello", "eng"}, WordTag{"\u4f60\u597d", "l"}, WordTag{"\u4eba\u4eec", "n"}, WordTag{"\u5ba1\u7f8e", "vn"}, WordTag{"\u7684", "uj"}, WordTag{"\u89c2\u70b9", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u4e0d\u540c", "a"}, WordTag{"\u7684", "uj"}},
[]WordTag{WordTag{"\u4e3a\u4ec0\u4e48", "r"}, WordTag{"\u6211", "r"}, WordTag{"\u4e0d\u80fd", "v"}, WordTag{"\u62e5\u6709", "v"}, WordTag{"\u60f3\u8981", "v"}, WordTag{"\u7684", "uj"}, WordTag{"\u751f\u6d3b", "vn"}},
[]WordTag{WordTag{"\u540e\u6765", "t"}, WordTag{"\u6211", "r"}, WordTag{"\u624d", "d"}},
[]WordTag{WordTag{"\u6b64\u6b21", "r"}, WordTag{"\u6765", "v"}, WordTag{"\u4e2d\u56fd", "ns"}, WordTag{"\u662f", "v"}, WordTag{"\u4e3a\u4e86", "p"}},
[]WordTag{WordTag{"\u4f7f\u7528", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u5b83", "r"}, WordTag{"\u5c31", "d"}, WordTag{"\u53ef\u4ee5", "c"}, WordTag{"\u89e3\u51b3", "v"}, WordTag{"\u4e00\u4e9b", "m"}, WordTag{"\u95ee\u9898", "n"}},
[]WordTag{WordTag{",", "x"}, WordTag{"\u4f7f\u7528", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u5b83", "r"}, WordTag{"\u5c31", "d"}, WordTag{"\u53ef\u4ee5", "c"}, WordTag{"\u89e3\u51b3", "v"}, WordTag{"\u4e00\u4e9b", "m"}, WordTag{"\u95ee\u9898", "n"}},
[]WordTag{WordTag{"\u5176\u5b9e", "d"}, WordTag{"\u4f7f\u7528", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u5b83", "r"}, WordTag{"\u5c31", "d"}, WordTag{"\u53ef\u4ee5", "c"}, WordTag{"\u89e3\u51b3", "v"}, WordTag{"\u4e00\u4e9b", "m"}, WordTag{"\u95ee\u9898", "n"}},
[]WordTag{WordTag{"\u597d\u4eba", "n"}, WordTag{"\u4f7f\u7528", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u5b83", "r"}, WordTag{"\u5c31", "d"}, WordTag{"\u53ef\u4ee5", "c"}, WordTag{"\u89e3\u51b3", "v"}, WordTag{"\u4e00\u4e9b", "m"}, WordTag{"\u95ee\u9898", "n"}},
[]WordTag{WordTag{"\u662f\u56e0\u4e3a", "c"}, WordTag{"\u548c", "c"}, WordTag{"\u56fd\u5bb6", "n"}},
[]WordTag{WordTag{"\u8001\u5e74", "t"}, WordTag{"\u641c\u7d22", "v"}, WordTag{"\u8fd8", "d"}, WordTag{"\u652f\u6301", "v"}},
[]WordTag{WordTag{"\u5e72\u8106", "d"}, WordTag{"\u5c31", "d"}, WordTag{"\u628a", "p"}, WordTag{"\u90a3\u90e8", "r"}, WordTag{"\u8499\u4eba", "n"}, WordTag{"\u7684", "uj"}, WordTag{"\u95f2\u6cd5", "n"}, WordTag{"\u7ed9", "p"}, WordTag{"\u5e9f", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u62c9\u5012", "v"}, WordTag{"\uff01", "x"}, WordTag{"RT", "eng"}, WordTag{" ", "x"}, WordTag{"@", "x"}, WordTag{"laoshipukong", "eng"}, WordTag{" ", "x"}, WordTag{":", "x"}, WordTag{" ", "x"}, WordTag{"27", "m"}, WordTag{"\u65e5", "m"}, WordTag{"\uff0c", "x"}, WordTag{"\u5168\u56fd\u4eba\u5927\u5e38\u59d4\u4f1a", "nt"}, WordTag{"\u7b2c\u4e09\u6b21", "m"}, WordTag{"\u5ba1\u8bae", "v"}, WordTag{"\u4fb5\u6743", "v"}, WordTag{"\u8d23\u4efb\u6cd5", "n"}, WordTag{"\u8349\u6848", "n"}, WordTag{"\uff0c", "x"}, WordTag{"\u5220\u9664", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u6709\u5173", "vn"}, WordTag{"\u533b\u7597", "n"}, WordTag{"\u635f\u5bb3", "v"}, WordTag{"\u8d23\u4efb", "n"}, WordTag{"\u201c", "x"}, WordTag{"\u4e3e\u8bc1", "v"}, WordTag{"\u5012\u7f6e", "v"}, WordTag{"\u201d", "x"}, WordTag{"\u7684", "uj"}, WordTag{"\u89c4\u5b9a", "n"}, WordTag{"\u3002", "x"}, WordTag{"\u5728", "p"}, WordTag{"\u533b\u60a3", "n"}, WordTag{"\u7ea0\u7eb7", "n"}, WordTag{"\u4e2d\u672c", "ns"}, WordTag{"\u5df2", "d"}, WordTag{"\u5904\u4e8e", "v"}, WordTag{"\u5f31\u52bf", "n"}, WordTag{"\u5730\u4f4d", "n"}, WordTag{"\u7684", "uj"}, WordTag{"\u6d88\u8d39\u8005", "n"}, WordTag{"\u7531\u6b64", "c"}, WordTag{"\u5c06", "d"}, WordTag{"\u9677\u5165", "v"}, WordTag{"\u4e07\u52ab\u4e0d\u590d", "i"}, WordTag{"\u7684", "uj"}, WordTag{"\u5883\u5730", "s"}, WordTag{"\u3002", "x"}, WordTag{" ", "x"}},
[]WordTag{WordTag{"\u5927", "a"}},
[]WordTag{},
[]WordTag{WordTag{"\u4ed6", "r"}, WordTag{"\u8bf4", "v"}, WordTag{"\u7684", "uj"}, WordTag{"\u786e\u5b9e", "ad"}, WordTag{"\u5728", "p"}, WordTag{"\u7406", "n"}},
[]WordTag{WordTag{"\u957f\u6625", "ns"}, WordTag{"\u5e02\u957f", "n"}, WordTag{"\u6625\u8282", "t"}, WordTag{"\u8bb2\u8bdd", "n"}},
[]WordTag{WordTag{"\u7ed3\u5a5a", "v"}, WordTag{"\u7684", "uj"}, WordTag{"\u548c", "c"}, WordTag{"\u5c1a\u672a", "d"}, WordTag{"\u7ed3\u5a5a", "v"}, WordTag{"\u7684", "uj"}},
[]WordTag{WordTag{"\u7ed3\u5408", "v"}, WordTag{"\u6210", "n"}, WordTag{"\u5206\u5b50", "n"}, WordTag{"\u65f6", "n"}},
[]WordTag{WordTag{"\u65c5\u6e38", "vn"}, WordTag{"\u548c", "c"}, WordTag{"\u670d\u52a1", "vn"}, WordTag{"\u662f", "v"}, WordTag{"\u6700\u597d", "a"}, WordTag{"\u7684", "uj"}},
[]WordTag{WordTag{"\u8fd9\u4ef6", "mq"}, WordTag{"\u4e8b\u60c5", "n"}, WordTag{"\u7684\u786e", "d"}, WordTag{"\u662f", "v"}, WordTag{"\u6211", "r"}, WordTag{"\u7684", "uj"}, WordTag{"\u9519", "n"}},
[]WordTag{WordTag{"\u4f9b", "v"}, WordTag{"\u5927\u5bb6", "n"}, WordTag{"\u53c2\u8003", "v"}, WordTag{"\u6307\u6b63", "v"}},
[]WordTag{WordTag{"\u54c8\u5c14\u6ee8", "ns"}, WordTag{"\u653f\u5e9c", "n"}, WordTag{"\u516c\u5e03", "v"}, WordTag{"\u584c", "v"}, WordTag{"\u6865", "n"}, WordTag{"\u539f\u56e0", "n"}},
[]WordTag{WordTag{"\u6211", "r"}, WordTag{"\u5728", "p"}, WordTag{"\u673a\u573a", "n"}, WordTag{"\u5165\u53e3\u5904", "i"}},
[]WordTag{WordTag{"\u90a2\u6c38\u81e3", "nr"}, WordTag{"\u6444\u5f71", "n"}, WordTag{"\u62a5\u9053", "v"}},
[]WordTag{WordTag{"BP", "eng"}, WordTag{"\u795e\u7ecf\u7f51\u7edc", "n"}, WordTag{"\u5982\u4f55", "r"}, WordTag{"\u8bad\u7ec3", "vn"}, WordTag{"\u624d\u80fd", "v"}, WordTag{"\u5728", "p"}, WordTag{"\u5206\u7c7b", "n"}, WordTag{"\u65f6", "n"}, WordTag{"\u589e\u52a0", "v"}, WordTag{"\u533a\u5206\u5ea6", "n"}, WordTag{"\uff1f", "x"}},
[]WordTag{WordTag{"\u5357\u4eac\u5e02", "ns"}, WordTag{"\u957f\u6c5f\u5927\u6865", "ns"}},
[]WordTag{WordTag{"\u5e94", "v"}, WordTag{"\u4e00\u4e9b", "m"}, WordTag{"\u4f7f\u7528\u8005", "n"}, WordTag{"\u7684", "uj"}, WordTag{"\u5efa\u8bae", "n"}, WordTag{"\uff0c", "x"}, WordTag{"\u4e5f", "d"}, WordTag{"\u4e3a\u4e86", "p"}, WordTag{"\u4fbf\u4e8e", "v"}, WordTag{"\u5229\u7528", "n"}, WordTag{"NiuTrans", "eng"}, WordTag{"\u7528\u4e8e", "v"}, WordTag{"SMT", "eng"}, WordTag{"\u7814\u7a76", "vn"}},
[]WordTag{WordTag{"\u957f\u6625\u5e02", "ns"}, WordTag{"\u957f\u6625", "ns"}, WordTag{"\u836f\u5e97", "n"}},
[]WordTag{WordTag{"\u9093\u9896\u8d85", "nr"}, WordTag{"\u751f\u524d", "t"}, WordTag{"\u6700", "d"}, WordTag{"\u559c\u6b22", "v"}, WordTag{"\u7684", "uj"}, WordTag{"\u8863\u670d", "n"}},
[]WordTag{WordTag{"\u80e1\u9526\u6d9b", "nr"}, WordTag{"\u662f", "v"}, WordTag{"\u70ed\u7231", "a"}, WordTag{"\u4e16\u754c", "n"}, WordTag{"\u548c\u5e73", "nz"}, WordTag{"\u7684", "uj"}, WordTag{"\u653f\u6cbb\u5c40", "n"}, WordTag{"\u5e38\u59d4", "j"}},
[]WordTag{WordTag{"\u7a0b\u5e8f\u5458", "n"}, WordTag{"\u795d", "v"}, WordTag{"\u6d77\u6797", "nz"}, WordTag{"\u548c", "c"}, WordTag{"\u6731\u4f1a\u9707", "nr"}, WordTag{"\u662f", "v"}, WordTag{"\u5728", "p"}, WordTag{"\u5b59\u5065", "nr"}, WordTag{"\u7684", "uj"}, WordTag{"\u5de6\u9762", "f"}, WordTag{"\u548c", "c"}, WordTag{"\u53f3\u9762", "f"}, WordTag{",", "x"}, WordTag{" ", "x"}, WordTag{"\u8303\u51ef", "nr"}, WordTag{"\u5728", "p"}, WordTag{"\u6700", "a"}, WordTag{"\u53f3\u9762", "f"}, WordTag{".", "m"}, WordTag{"\u518d\u5f80", "d"}, WordTag{"\u5de6", "f"}, WordTag{"\u662f", "v"}, WordTag{"\u674e\u677e\u6d2a", "nr"}},
[]WordTag{WordTag{"\u4e00\u6b21\u6027", "d"}, WordTag{"\u4ea4", "v"}, WordTag{"\u591a\u5c11", "m"}, WordTag{"\u94b1", "n"}},
[]WordTag{WordTag{"\u4e24\u5757", "m"}, WordTag{"\u4e94", "m"}, WordTag{"\u4e00\u5957", "m"}, WordTag{"\uff0c", "x"}, WordTag{"\u4e09\u5757", "m"}, WordTag{"\u516b", "m"}, WordTag{"\u4e00\u65a4", "m"}, WordTag{"\uff0c", "x"}, WordTag{"\u56db\u5757", "m"}, WordTag{"\u4e03", "m"}, WordTag{"\u4e00\u672c", "m"}, WordTag{"\uff0c", "x"}, WordTag{"\u4e94\u5757", "m"}, WordTag{"\u516d", "m"}, WordTag{"\u4e00\u6761", "m"}},
[]WordTag{WordTag{"\u5c0f", "a"}, WordTag{"\u548c\u5c1a", "nr"}, WordTag{"\u7559", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u4e00\u4e2a", "m"}, WordTag{"\u50cf", "v"}, WordTag{"\u5927", "a"}, WordTag{"\u548c\u5c1a", "nr"}, WordTag{"\u4e00\u6837", "r"}, WordTag{"\u7684", "uj"}, WordTag{"\u548c\u5c1a\u5934", "nr"}},
[]WordTag{WordTag{"\u6211", "r"}, WordTag{"\u662f", "v"}, WordTag{"\u4e2d\u534e\u4eba\u6c11\u5171\u548c\u56fd", "ns"}, WordTag{"\u516c\u6c11", "n"}, WordTag{";", "x"}, WordTag{"\u6211", "r"}, WordTag{"\u7238\u7238", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u5171\u548c\u515a", "nt"}, WordTag{"\u515a\u5458", "n"}, WordTag{";", "x"}, WordTag{" ", "x"}, WordTag{"\u5730\u94c1", "n"}, WordTag{"\u548c\u5e73\u95e8", "ns"}, WordTag{"\u7ad9", "v"}},
[]WordTag{WordTag{"\u5f20\u6653\u6885", "nr"}, WordTag{"\u53bb", "v"}, WordTag{"\u4eba\u6c11", "n"}, WordTag{"\u533b\u9662", "n"}, WordTag{"\u505a", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u4e2a", "q"}, WordTag{"B\u8d85", "n"}, WordTag{"\u7136\u540e", "c"}, WordTag{"\u53bb", "v"}, WordTag{"\u4e70", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u4ef6", "q"}, WordTag{"T\u6064", "n"}},
[]WordTag{WordTag{"AT&T", "nz"}, WordTag{"\u662f", "v"}, WordTag{"\u4e00\u4ef6", "m"}, WordTag{"\u4e0d\u9519", "a"}, WordTag{"\u7684", "uj"}, WordTag{"\u516c\u53f8", "n"}, WordTag{"\uff0c", "x"}, WordTag{"\u7ed9", "p"}, WordTag{"\u4f60", "r"}, WordTag{"\u53d1", "v"}, WordTag{"offer", "eng"}, WordTag{"\u4e86", "ul"}, WordTag{"\u5417", "y"}, WordTag{"\uff1f", "x"}},
[]WordTag{WordTag{"C++", "nz"}, WordTag{"\u548c", "c"}, WordTag{"c#", "nz"}, WordTag{"\u662f", "v"}, WordTag{"\u4ec0\u4e48", "r"}, WordTag{"\u5173\u7cfb", "n"}, WordTag{"\uff1f", "x"}, WordTag{"11", "m"}, WordTag{"+", "x"}, WordTag{"122", "m"}, WordTag{"=", "x"}, WordTag{"133", "m"}, WordTag{"\uff0c", "x"}, WordTag{"\u662f", "v"}, WordTag{"\u5417", "y"}, WordTag{"\uff1f", "x"}, WordTag{"PI", "eng"}, WordTag{"=", "x"}, WordTag{"3.14159", "m"}},
[]WordTag{WordTag{"\u4f60", "r"}, WordTag{"\u8ba4\u8bc6", "v"}, WordTag{"\u90a3\u4e2a", "r"}, WordTag{"\u548c", "c"}, WordTag{"\u4e3b\u5e2d", "n"}, WordTag{"\u63e1\u624b", "v"}, WordTag{"\u7684", "uj"}, WordTag{"\u7684\u54e5", "n"}, WordTag{"\u5417", "y"}, WordTag{"\uff1f", "x"}, WordTag{"\u4ed6", "r"}, WordTag{"\u5f00", "v"}, WordTag{"\u4e00\u8f86", "m"}, WordTag{"\u9ed1\u8272", "n"}, WordTag{"\u7684\u58eb", "n"}, WordTag{"\u3002", "x"}},
[]WordTag{WordTag{"\u67aa\u6746\u5b50", "n"}, WordTag{"\u4e2d", "f"}, WordTag{"\u51fa", "v"}, WordTag{"\u653f\u6743", "n"}},
}
noHMMCutResult = [][]WordTag{
[]WordTag{WordTag{"\u8fd9", "r"}, WordTag{"\u662f", "v"}, WordTag{"\u4e00\u4e2a", "m"}, WordTag{"\u4f38\u624b\u4e0d\u89c1\u4e94\u6307", "i"}, WordTag{"\u7684", "uj"}, WordTag{"\u9ed1\u591c", "n"}, WordTag{"\u3002", "x"}, WordTag{"\u6211", "r"}, WordTag{"\u53eb", "v"}, WordTag{"\u5b59\u609f\u7a7a", "nr"}, WordTag{"\uff0c", "x"}, WordTag{"\u6211", "r"}, WordTag{"\u7231", "v"}, WordTag{"\u5317\u4eac", "ns"}, WordTag{"\uff0c", "x"}, WordTag{"\u6211", "r"}, WordTag{"\u7231", "v"}, WordTag{"Python", "eng"}, WordTag{"\u548c", "c"}, WordTag{"C++", "nz"}, WordTag{"\u3002", "x"}},
[]WordTag{WordTag{"\u6211", "r"}, WordTag{"\u4e0d", "d"}, WordTag{"\u559c\u6b22", "v"}, WordTag{"\u65e5\u672c", "ns"}, WordTag{"\u548c\u670d", "nz"}, WordTag{"\u3002", "x"}},
[]WordTag{WordTag{"\u96f7\u7334", "n"}, WordTag{"\u56de\u5f52", "v"}, WordTag{"\u4eba\u95f4", "n"}, WordTag{"\u3002", "x"}},
[]WordTag{WordTag{"\u5de5\u4fe1\u5904", "n"}, WordTag{"\u5973\u5e72\u4e8b", "n"}, WordTag{"\u6bcf\u6708", "r"}, WordTag{"\u7ecf\u8fc7", "p"}, WordTag{"\u4e0b\u5c5e", "v"}, WordTag{"\u79d1\u5ba4", "n"}, WordTag{"\u90fd", "d"}, WordTag{"\u8981", "v"}, WordTag{"\u4eb2\u53e3", "n"}, WordTag{"\u4ea4\u4ee3", "n"}, WordTag{"24", "eng"}, WordTag{"\u53e3", "q"}, WordTag{"\u4ea4\u6362\u673a", "n"}, WordTag{"\u7b49", "u"}, WordTag{"\u6280\u672f\u6027", "n"}, WordTag{"\u5668\u4ef6", "n"}, WordTag{"\u7684", "uj"}, WordTag{"\u5b89\u88c5", "v"}, WordTag{"\u5de5\u4f5c", "vn"}},
[]WordTag{WordTag{"\u6211", "r"}, WordTag{"\u9700\u8981", "v"}, WordTag{"\u5ec9\u79df\u623f", "n"}},
[]WordTag{WordTag{"\u6c38\u548c", "nz"}, WordTag{"\u670d\u88c5", "vn"}, WordTag{"\u9970\u54c1", "n"}, WordTag{"\u6709\u9650\u516c\u53f8", "n"}},
[]WordTag{WordTag{"\u6211", "r"}, WordTag{"\u7231", "v"}, WordTag{"\u5317\u4eac", "ns"}, WordTag{"\u5929\u5b89\u95e8", "ns"}},
[]WordTag{WordTag{"abc", "eng"}},
[]WordTag{WordTag{"\u9690", "n"}, WordTag{"\u9a6c\u5c14\u53ef\u592b", "nr"}},
[]WordTag{WordTag{"\u96f7\u7334", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u4e2a", "q"}, WordTag{"\u597d", "a"}, WordTag{"\u7f51\u7ad9", "n"}},
[]WordTag{WordTag{"\u201c", "x"}, WordTag{"Microsoft", "eng"}, WordTag{"\u201d", "x"}, WordTag{"\u4e00", "m"}, WordTag{"\u8bcd", "n"}, WordTag{"\u7531", "p"}, WordTag{"\u201c", "x"}, WordTag{"MICROcomputer", "eng"}, WordTag{"\uff08", "x"}, WordTag{"\u5fae\u578b", "b"}, WordTag{"\u8ba1\u7b97\u673a", "n"}, WordTag{"\uff09", "x"}, WordTag{"\u201d", "x"}, WordTag{"\u548c", "c"}, WordTag{"\u201c", "x"}, WordTag{"SOFTware", "eng"}, WordTag{"\uff08", "x"}, WordTag{"\u8f6f\u4ef6", "n"}, WordTag{"\uff09", "x"}, WordTag{"\u201d", "x"}, WordTag{"\u4e24", "m"}, WordTag{"\u90e8\u5206", "n"}, WordTag{"\u7ec4\u6210", "v"}},
[]WordTag{WordTag{"\u8349\u6ce5\u9a6c", "n"}, WordTag{"\u548c", "c"}, WordTag{"\u6b3a", "vn"}, WordTag{"\u5b9e", "n"}, WordTag{"\u9a6c", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u4eca\u5e74", "t"}, WordTag{"\u7684", "uj"}, WordTag{"\u6d41\u884c", "v"}, WordTag{"\u8bcd\u6c47", "n"}},
[]WordTag{WordTag{"\u4f0a", "ns"}, WordTag{"\u85e4", "nr"}, WordTag{"\u6d0b\u534e\u5802", "n"}, WordTag{"\u603b\u5e9c", "n"}, WordTag{"\u5e97", "n"}},
[]WordTag{WordTag{"\u4e2d\u56fd\u79d1\u5b66\u9662\u8ba1\u7b97\u6280\u672f\u7814\u7a76\u6240", "nt"}},
[]WordTag{WordTag{"\u7f57\u5bc6\u6b27", "nr"}, WordTag{"\u4e0e", "p"}, WordTag{"\u6731\u4e3d\u53f6", "nr"}},
[]WordTag{WordTag{"\u6211", "r"}, WordTag{"\u8d2d\u4e70", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u9053\u5177", "n"}, WordTag{"\u548c", "c"}, WordTag{"\u670d\u88c5", "vn"}},
[]WordTag{WordTag{"PS", "eng"}, WordTag{":", "x"}, WordTag{" ", "x"}, WordTag{"\u6211", "r"}, WordTag{"\u89c9\u5f97", "v"}, WordTag{"\u5f00\u6e90", "n"}, WordTag{"\u6709", "v"}, WordTag{"\u4e00\u4e2a", "m"}, WordTag{"\u597d\u5904", "d"}, WordTag{"\uff0c", "x"}, WordTag{"\u5c31\u662f", "d"}, WordTag{"\u80fd\u591f", "v"}, WordTag{"\u6566\u4fc3", "v"}, WordTag{"\u81ea\u5df1", "r"}, WordTag{"\u4e0d\u65ad\u6539\u8fdb", "l"}, WordTag{"\uff0c", "x"}, WordTag{"\u907f\u514d", "v"}, WordTag{"\u655e", "v"}, WordTag{"\u5e1a", "ng"}, WordTag{"\u81ea\u73cd", "b"}},
[]WordTag{WordTag{"\u6e56\u5317\u7701", "ns"}, WordTag{"\u77f3\u9996\u5e02", "ns"}},
[]WordTag{WordTag{"\u6e56\u5317\u7701", "ns"}, WordTag{"\u5341\u5830\u5e02", "ns"}},
[]WordTag{WordTag{"\u603b\u7ecf\u7406", "n"}, WordTag{"\u5b8c\u6210", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u8fd9\u4ef6", "mq"}, WordTag{"\u4e8b\u60c5", "n"}},
[]WordTag{WordTag{"\u7535\u8111", "n"}, WordTag{"\u4fee\u597d", "v"}, WordTag{"\u4e86", "ul"}},
[]WordTag{WordTag{"\u505a\u597d", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u8fd9\u4ef6", "mq"}, WordTag{"\u4e8b\u60c5", "n"}, WordTag{"\u5c31", "d"}, WordTag{"\u4e00\u4e86\u767e\u4e86", "l"}, WordTag{"\u4e86", "ul"}},
[]WordTag{WordTag{"\u4eba\u4eec", "n"}, WordTag{"\u5ba1\u7f8e", "vn"}, WordTag{"\u7684", "uj"}, WordTag{"\u89c2\u70b9", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u4e0d\u540c", "a"}, WordTag{"\u7684", "uj"}},
[]WordTag{WordTag{"\u6211\u4eec", "r"}, WordTag{"\u4e70", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u4e00\u4e2a", "m"}, WordTag{"\u7f8e\u7684", "nr"}, WordTag{"\u7a7a\u8c03", "n"}},
[]WordTag{WordTag{"\u7ebf\u7a0b", "n"}, WordTag{"\u521d\u59cb\u5316", "l"}, WordTag{"\u65f6", "n"}, WordTag{"\u6211\u4eec", "r"}, WordTag{"\u8981", "v"}, WordTag{"\u6ce8\u610f", "v"}},
[]WordTag{WordTag{"\u4e00\u4e2a", "m"}, WordTag{"\u5206\u5b50", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u7531", "p"}, WordTag{"\u597d\u591a", "m"}, WordTag{"\u539f\u5b50", "n"}, WordTag{"\u7ec4\u7ec7", "v"}, WordTag{"\u6210", "n"}, WordTag{"\u7684", "uj"}},
[]WordTag{WordTag{"\u795d", "v"}, WordTag{"\u4f60", "r"}, WordTag{"\u9a6c\u5230\u529f\u6210", "i"}},
[]WordTag{WordTag{"\u4ed6", "r"}, WordTag{"\u6389", "zg"}, WordTag{"\u8fdb", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u65e0\u5e95\u6d1e", "ns"}, WordTag{"\u91cc", "f"}},
[]WordTag{WordTag{"\u4e2d\u56fd", "ns"}, WordTag{"\u7684", "uj"}, WordTag{"\u9996\u90fd", "d"}, WordTag{"\u662f", "v"}, WordTag{"\u5317\u4eac", "ns"}},
[]WordTag{WordTag{"\u5b59", "zg"}, WordTag{"\u541b", "nz"}, WordTag{"\u610f", "n"}},
[]WordTag{WordTag{"\u5916\u4ea4\u90e8", "nt"}, WordTag{"\u53d1\u8a00\u4eba", "l"}, WordTag{"\u9a6c\u671d\u65ed", "nr"}},
[]WordTag{WordTag{"\u9886\u5bfc\u4eba", "n"}, WordTag{"\u4f1a\u8bae", "n"}, WordTag{"\u548c", "c"}, WordTag{"\u7b2c\u56db\u5c4a", "m"}, WordTag{"\u4e1c\u4e9a", "ns"}, WordTag{"\u5cf0\u4f1a", "n"}},
[]WordTag{WordTag{"\u5728", "p"}, WordTag{"\u8fc7\u53bb", "t"}, WordTag{"\u7684", "uj"}, WordTag{"\u8fd9", "r"}, WordTag{"\u4e94\u5e74", "t"}},
[]WordTag{WordTag{"\u8fd8", "d"}, WordTag{"\u9700\u8981", "v"}, WordTag{"\u5f88", "zg"}, WordTag{"\u957f", "a"}, WordTag{"\u7684", "uj"}, WordTag{"\u8def", "n"}, WordTag{"\u8981", "v"}, WordTag{"\u8d70", "v"}},
[]WordTag{WordTag{"60", "eng"}, WordTag{"\u5468\u5e74", "t"}, WordTag{"\u9996\u90fd", "d"}, WordTag{"\u9605\u5175", "v"}},
[]WordTag{WordTag{"\u4f60\u597d", "l"}, WordTag{"\u4eba\u4eec", "n"}, WordTag{"\u5ba1\u7f8e", "vn"}, WordTag{"\u7684", "uj"}, WordTag{"\u89c2\u70b9", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u4e0d\u540c", "a"}, WordTag{"\u7684", "uj"}},
[]WordTag{WordTag{"\u4e70", "v"}, WordTag{"\u6c34\u679c", "n"}, WordTag{"\u7136\u540e", "c"}, WordTag{"\u6765", "v"}, WordTag{"\u4e16\u535a\u56ed", "nr"}},
[]WordTag{WordTag{"\u4e70", "v"}, WordTag{"\u6c34\u679c", "n"}, WordTag{"\u7136\u540e", "c"}, WordTag{"\u53bb", "v"}, WordTag{"\u4e16\u535a\u56ed", "nr"}},
[]WordTag{WordTag{"\u4f46\u662f", "c"}, WordTag{"\u540e\u6765", "t"}, WordTag{"\u6211", "r"}, WordTag{"\u624d", "d"}, WordTag{"\u77e5\u9053", "v"}, WordTag{"\u4f60", "r"}, WordTag{"\u662f", "v"}, WordTag{"\u5bf9", "p"}, WordTag{"\u7684", "uj"}},
[]WordTag{WordTag{"\u5b58\u5728", "v"}, WordTag{"\u5373", "v"}, WordTag{"\u5408\u7406", "vn"}},
[]WordTag{WordTag{"\u7684", "uj"}, WordTag{"\u7684", "uj"}, WordTag{"\u7684", "uj"}, WordTag{"\u7684", "uj"}, WordTag{"\u7684", "uj"}, WordTag{"\u5728", "p"}, WordTag{"\u7684", "uj"}, WordTag{"\u7684", "uj"}, WordTag{"\u7684", "uj"}, WordTag{"\u7684", "uj"}, WordTag{"\u5c31", "d"}, WordTag{"\u4ee5", "p"}, WordTag{"\u548c", "c"}, WordTag{"\u548c", "c"}, WordTag{"\u548c", "c"}},
[]WordTag{WordTag{"I", "eng"}, WordTag{" ", "x"}, WordTag{"love", "eng"}, WordTag{"\u4f60", "r"}, WordTag{"\uff0c", "x"}, WordTag{"\u4e0d\u4ee5\u4e3a\u803b", "i"}, WordTag{"\uff0c", "x"}, WordTag{"\u53cd", "zg"}, WordTag{"\u4ee5\u4e3a", "c"}, WordTag{"rong", "eng"}},
[]WordTag{WordTag{"\u56e0", "p"}},
[]WordTag{},
[]WordTag{WordTag{"hello", "eng"}, WordTag{"\u4f60\u597d", "l"}, WordTag{"\u4eba\u4eec", "n"}, WordTag{"\u5ba1\u7f8e", "vn"}, WordTag{"\u7684", "uj"}, WordTag{"\u89c2\u70b9", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u4e0d\u540c", "a"}, WordTag{"\u7684", "uj"}},
[]WordTag{WordTag{"\u5f88", "zg"}, WordTag{"\u597d", "a"}, WordTag{"\u4f46", "c"}, WordTag{"\u4e3b\u8981", "b"}, WordTag{"\u662f", "v"}, WordTag{"\u57fa\u4e8e", "p"}, WordTag{"\u7f51\u9875", "n"}, WordTag{"\u5f62\u5f0f", "n"}},
[]WordTag{WordTag{"hello", "eng"}, WordTag{"\u4f60\u597d", "l"}, WordTag{"\u4eba\u4eec", "n"}, WordTag{"\u5ba1\u7f8e", "vn"}, WordTag{"\u7684", "uj"}, WordTag{"\u89c2\u70b9", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u4e0d\u540c", "a"}, WordTag{"\u7684", "uj"}},
[]WordTag{WordTag{"\u4e3a\u4ec0\u4e48", "r"}, WordTag{"\u6211", "r"}, WordTag{"\u4e0d\u80fd", "v"}, WordTag{"\u62e5\u6709", "v"}, WordTag{"\u60f3\u8981", "v"}, WordTag{"\u7684", "uj"}, WordTag{"\u751f\u6d3b", "vn"}},
[]WordTag{WordTag{"\u540e\u6765", "t"}, WordTag{"\u6211", "r"}, WordTag{"\u624d", "d"}},
[]WordTag{WordTag{"\u6b64\u6b21", "r"}, WordTag{"\u6765", "v"}, WordTag{"\u4e2d\u56fd", "ns"}, WordTag{"\u662f", "v"}, WordTag{"\u4e3a\u4e86", "p"}},
[]WordTag{WordTag{"\u4f7f\u7528", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u5b83", "r"}, WordTag{"\u5c31", "d"}, WordTag{"\u53ef\u4ee5", "c"}, WordTag{"\u89e3\u51b3", "v"}, WordTag{"\u4e00\u4e9b", "m"}, WordTag{"\u95ee\u9898", "n"}},
[]WordTag{WordTag{",", "x"}, WordTag{"\u4f7f\u7528", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u5b83", "r"}, WordTag{"\u5c31", "d"}, WordTag{"\u53ef\u4ee5", "c"}, WordTag{"\u89e3\u51b3", "v"}, WordTag{"\u4e00\u4e9b", "m"}, WordTag{"\u95ee\u9898", "n"}},
[]WordTag{WordTag{"\u5176\u5b9e", "d"}, WordTag{"\u4f7f\u7528", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u5b83", "r"}, WordTag{"\u5c31", "d"}, WordTag{"\u53ef\u4ee5", "c"}, WordTag{"\u89e3\u51b3", "v"}, WordTag{"\u4e00\u4e9b", "m"}, WordTag{"\u95ee\u9898", "n"}},
[]WordTag{WordTag{"\u597d\u4eba", "n"}, WordTag{"\u4f7f\u7528", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u5b83", "r"}, WordTag{"\u5c31", "d"}, WordTag{"\u53ef\u4ee5", "c"}, WordTag{"\u89e3\u51b3", "v"}, WordTag{"\u4e00\u4e9b", "m"}, WordTag{"\u95ee\u9898", "n"}},
[]WordTag{WordTag{"\u662f\u56e0\u4e3a", "c"}, WordTag{"\u548c", "c"}, WordTag{"\u56fd\u5bb6", "n"}},
[]WordTag{WordTag{"\u8001\u5e74", "t"}, WordTag{"\u641c\u7d22", "v"}, WordTag{"\u8fd8", "d"}, WordTag{"\u652f\u6301", "v"}},
[]WordTag{WordTag{"\u5e72\u8106", "d"}, WordTag{"\u5c31", "d"}, WordTag{"\u628a", "p"}, WordTag{"\u90a3", "r"}, WordTag{"\u90e8", "n"}, WordTag{"\u8499", "v"}, WordTag{"\u4eba", "n"}, WordTag{"\u7684", "uj"}, WordTag{"\u95f2", "n"}, WordTag{"\u6cd5", "j"}, WordTag{"\u7ed9", "p"}, WordTag{"\u5e9f", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u62c9\u5012", "v"}, WordTag{"\uff01", "x"}, WordTag{"RT", "eng"}, WordTag{" ", "x"}, WordTag{"@", "x"}, WordTag{"laoshipukong", "eng"}, WordTag{" ", "x"}, WordTag{":", "x"}, WordTag{" ", "x"}, WordTag{"27", "eng"}, WordTag{"\u65e5", "m"}, WordTag{"\uff0c", "x"}, WordTag{"\u5168\u56fd\u4eba\u5927\u5e38\u59d4\u4f1a", "nt"}, WordTag{"\u7b2c\u4e09\u6b21", "m"}, WordTag{"\u5ba1\u8bae", "v"}, WordTag{"\u4fb5\u6743", "v"}, WordTag{"\u8d23\u4efb\u6cd5", "n"}, WordTag{"\u8349\u6848", "n"}, WordTag{"\uff0c", "x"}, WordTag{"\u5220\u9664", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u6709\u5173", "vn"}, WordTag{"\u533b\u7597", "n"}, WordTag{"\u635f\u5bb3", "v"}, WordTag{"\u8d23\u4efb", "n"}, WordTag{"\u201c", "x"}, WordTag{"\u4e3e\u8bc1", "v"}, WordTag{"\u5012\u7f6e", "v"}, WordTag{"\u201d", "x"}, WordTag{"\u7684", "uj"}, WordTag{"\u89c4\u5b9a", "n"}, WordTag{"\u3002", "x"}, WordTag{"\u5728", "p"}, WordTag{"\u533b\u60a3", "n"}, WordTag{"\u7ea0\u7eb7", "n"}, WordTag{"\u4e2d", "f"}, WordTag{"\u672c", "r"}, WordTag{"\u5df2", "d"}, WordTag{"\u5904\u4e8e", "v"}, WordTag{"\u5f31\u52bf", "n"}, WordTag{"\u5730\u4f4d", "n"}, WordTag{"\u7684", "uj"}, WordTag{"\u6d88\u8d39\u8005", "n"}, WordTag{"\u7531\u6b64", "c"}, WordTag{"\u5c06", "d"}, WordTag{"\u9677\u5165", "v"}, WordTag{"\u4e07\u52ab\u4e0d\u590d", "i"}, WordTag{"\u7684", "uj"}, WordTag{"\u5883\u5730", "s"}, WordTag{"\u3002", "x"}, WordTag{" ", "x"}},
[]WordTag{WordTag{"\u5927", "a"}},
[]WordTag{},
[]WordTag{WordTag{"\u4ed6", "r"}, WordTag{"\u8bf4", "v"}, WordTag{"\u7684", "uj"}, WordTag{"\u786e\u5b9e", "ad"}, WordTag{"\u5728", "p"}, WordTag{"\u7406", "n"}},
[]WordTag{WordTag{"\u957f\u6625", "ns"}, WordTag{"\u5e02\u957f", "n"}, WordTag{"\u6625\u8282", "t"}, WordTag{"\u8bb2\u8bdd", "n"}},
[]WordTag{WordTag{"\u7ed3\u5a5a", "v"}, WordTag{"\u7684", "uj"}, WordTag{"\u548c", "c"}, WordTag{"\u5c1a\u672a", "d"}, WordTag{"\u7ed3\u5a5a", "v"}, WordTag{"\u7684", "uj"}},
[]WordTag{WordTag{"\u7ed3\u5408", "v"}, WordTag{"\u6210", "n"}, WordTag{"\u5206\u5b50", "n"}, WordTag{"\u65f6", "n"}},
[]WordTag{WordTag{"\u65c5\u6e38", "vn"}, WordTag{"\u548c", "c"}, WordTag{"\u670d\u52a1", "vn"}, WordTag{"\u662f", "v"}, WordTag{"\u6700\u597d", "a"}, WordTag{"\u7684", "uj"}},
[]WordTag{WordTag{"\u8fd9\u4ef6", "mq"}, WordTag{"\u4e8b\u60c5", "n"}, WordTag{"\u7684\u786e", "d"}, WordTag{"\u662f", "v"}, WordTag{"\u6211", "r"}, WordTag{"\u7684", "uj"}, WordTag{"\u9519", "v"}},
[]WordTag{WordTag{"\u4f9b", "v"}, WordTag{"\u5927\u5bb6", "n"}, WordTag{"\u53c2\u8003", "v"}, WordTag{"\u6307\u6b63", "v"}},
[]WordTag{WordTag{"\u54c8\u5c14\u6ee8", "ns"}, WordTag{"\u653f\u5e9c", "n"}, WordTag{"\u516c\u5e03", "v"}, WordTag{"\u584c", "v"}, WordTag{"\u6865", "n"}, WordTag{"\u539f\u56e0", "n"}},
[]WordTag{WordTag{"\u6211", "r"}, WordTag{"\u5728", "p"}, WordTag{"\u673a\u573a", "n"}, WordTag{"\u5165\u53e3\u5904", "i"}},
[]WordTag{WordTag{"\u90a2", "nr"}, WordTag{"\u6c38", "ns"}, WordTag{"\u81e3", "n"}, WordTag{"\u6444\u5f71", "n"}, WordTag{"\u62a5\u9053", "v"}},
[]WordTag{WordTag{"BP", "eng"}, WordTag{"\u795e\u7ecf\u7f51\u7edc", "n"}, WordTag{"\u5982\u4f55", "r"}, WordTag{"\u8bad\u7ec3", "vn"}, WordTag{"\u624d\u80fd", "v"}, WordTag{"\u5728", "p"}, WordTag{"\u5206\u7c7b", "n"}, WordTag{"\u65f6", "n"}, WordTag{"\u589e\u52a0", "v"}, WordTag{"\u533a\u5206\u5ea6", "n"}, WordTag{"\uff1f", "x"}},
[]WordTag{WordTag{"\u5357\u4eac\u5e02", "ns"}, WordTag{"\u957f\u6c5f\u5927\u6865", "ns"}},
[]WordTag{WordTag{"\u5e94", "v"}, WordTag{"\u4e00\u4e9b", "m"}, WordTag{"\u4f7f\u7528\u8005", "n"}, WordTag{"\u7684", "uj"}, WordTag{"\u5efa\u8bae", "n"}, WordTag{"\uff0c", "x"}, WordTag{"\u4e5f", "d"}, WordTag{"\u4e3a\u4e86", "p"}, WordTag{"\u4fbf\u4e8e", "v"}, WordTag{"\u5229\u7528", "n"}, WordTag{"NiuTrans", "eng"}, WordTag{"\u7528\u4e8e", "v"}, WordTag{"SMT", "eng"}, WordTag{"\u7814\u7a76", "vn"}},
[]WordTag{WordTag{"\u957f\u6625\u5e02", "ns"}, WordTag{"\u957f\u6625", "ns"}, WordTag{"\u836f\u5e97", "n"}},
[]WordTag{WordTag{"\u9093\u9896\u8d85", "nr"}, WordTag{"\u751f\u524d", "t"}, WordTag{"\u6700", "d"}, WordTag{"\u559c\u6b22", "v"}, WordTag{"\u7684", "uj"}, WordTag{"\u8863\u670d", "n"}},
[]WordTag{WordTag{"\u80e1\u9526\u6d9b", "nr"}, WordTag{"\u662f", "v"}, WordTag{"\u70ed\u7231", "a"}, WordTag{"\u4e16\u754c", "n"}, WordTag{"\u548c\u5e73", "nz"}, WordTag{"\u7684", "uj"}, WordTag{"\u653f\u6cbb\u5c40", "n"}, WordTag{"\u5e38\u59d4", "j"}},
[]WordTag{WordTag{"\u7a0b\u5e8f\u5458", "n"}, WordTag{"\u795d", "v"}, WordTag{"\u6d77\u6797", "nz"}, WordTag{"\u548c", "c"}, WordTag{"\u6731", "nr"}, WordTag{"\u4f1a", "v"}, WordTag{"\u9707", "v"}, WordTag{"\u662f", "v"}, WordTag{"\u5728", "p"}, WordTag{"\u5b59", "zg"}, WordTag{"\u5065", "a"}, WordTag{"\u7684", "uj"}, WordTag{"\u5de6\u9762", "f"}, WordTag{"\u548c", "c"}, WordTag{"\u53f3\u9762", "f"}, WordTag{",", "x"}, WordTag{" ", "x"}, WordTag{"\u8303", "nr"}, WordTag{"\u51ef", "nr"}, WordTag{"\u5728", "p"}, WordTag{"\u6700", "d"}, WordTag{"\u53f3\u9762", "f"}, WordTag{".", "x"}, WordTag{"\u518d", "d"}, WordTag{"\u5f80", "zg"}, WordTag{"\u5de6", "m"}, WordTag{"\u662f", "v"}, WordTag{"\u674e", "nr"}, WordTag{"\u677e", "v"}, WordTag{"\u6d2a", "nr"}},
[]WordTag{WordTag{"\u4e00\u6b21\u6027", "d"}, WordTag{"\u4ea4", "v"}, WordTag{"\u591a\u5c11", "m"}, WordTag{"\u94b1", "n"}},
[]WordTag{WordTag{"\u4e24\u5757", "m"}, WordTag{"\u4e94", "m"}, WordTag{"\u4e00\u5957", "m"}, WordTag{"\uff0c", "x"}, WordTag{"\u4e09\u5757", "m"}, WordTag{"\u516b", "m"}, WordTag{"\u4e00\u65a4", "m"}, WordTag{"\uff0c", "x"}, WordTag{"\u56db\u5757", "m"}, WordTag{"\u4e03", "m"}, WordTag{"\u4e00\u672c", "m"}, WordTag{"\uff0c", "x"}, WordTag{"\u4e94\u5757", "m"}, WordTag{"\u516d", "m"}, WordTag{"\u4e00\u6761", "m"}},
[]WordTag{WordTag{"\u5c0f", "a"}, WordTag{"\u548c\u5c1a", "nr"}, WordTag{"\u7559", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u4e00\u4e2a", "m"}, WordTag{"\u50cf", "v"}, WordTag{"\u5927", "a"}, WordTag{"\u548c\u5c1a", "nr"}, WordTag{"\u4e00\u6837", "r"}, WordTag{"\u7684", "uj"}, WordTag{"\u548c\u5c1a\u5934", "nr"}},
[]WordTag{WordTag{"\u6211", "r"}, WordTag{"\u662f", "v"}, WordTag{"\u4e2d\u534e\u4eba\u6c11\u5171\u548c\u56fd", "ns"}, WordTag{"\u516c\u6c11", "n"}, WordTag{";", "x"}, WordTag{"\u6211", "r"}, WordTag{"\u7238\u7238", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u5171\u548c\u515a", "nt"}, WordTag{"\u515a\u5458", "n"}, WordTag{";", "x"}, WordTag{" ", "x"}, WordTag{"\u5730\u94c1", "n"}, WordTag{"\u548c\u5e73\u95e8", "ns"}, WordTag{"\u7ad9", "v"}},
[]WordTag{WordTag{"\u5f20\u6653\u6885", "nr"}, WordTag{"\u53bb", "v"}, WordTag{"\u4eba\u6c11", "n"}, WordTag{"\u533b\u9662", "n"}, WordTag{"\u505a", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u4e2a", "q"}, WordTag{"B\u8d85", "n"}, WordTag{"\u7136\u540e", "c"}, WordTag{"\u53bb", "v"}, WordTag{"\u4e70", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u4ef6", "zg"}, WordTag{"T\u6064", "n"}},
[]WordTag{WordTag{"AT&T", "nz"}, WordTag{"\u662f", "v"}, WordTag{"\u4e00\u4ef6", "m"}, WordTag{"\u4e0d\u9519", "a"}, WordTag{"\u7684", "uj"}, WordTag{"\u516c\u53f8", "n"}, WordTag{"\uff0c", "x"}, WordTag{"\u7ed9", "p"}, WordTag{"\u4f60", "r"}, WordTag{"\u53d1", "v"}, WordTag{"offer", "eng"}, WordTag{"\u4e86", "ul"}, WordTag{"\u5417", "y"}, WordTag{"\uff1f", "x"}},
[]WordTag{WordTag{"C++", "nz"}, WordTag{"\u548c", "c"}, WordTag{"c#", "nz"}, WordTag{"\u662f", "v"}, WordTag{"\u4ec0\u4e48", "r"}, WordTag{"\u5173\u7cfb", "n"}, WordTag{"\uff1f", "x"}, WordTag{"11", "eng"}, WordTag{"+", "x"}, WordTag{"122", "eng"}, WordTag{"=", "x"}, WordTag{"133", "eng"}, WordTag{"\uff0c", "x"}, WordTag{"\u662f", "v"}, WordTag{"\u5417", "y"}, WordTag{"\uff1f", "x"}, WordTag{"PI", "eng"}, WordTag{"=", "x"}, WordTag{"3", "eng"}, WordTag{".", "x"}, WordTag{"14159", "eng"}},
[]WordTag{WordTag{"\u4f60", "r"}, WordTag{"\u8ba4\u8bc6", "v"}, WordTag{"\u90a3\u4e2a", "r"}, WordTag{"\u548c", "c"}, WordTag{"\u4e3b\u5e2d", "n"}, WordTag{"\u63e1\u624b", "v"}, WordTag{"\u7684", "uj"}, WordTag{"\u7684\u54e5", "n"}, WordTag{"\u5417", "y"}, WordTag{"\uff1f", "x"}, WordTag{"\u4ed6", "r"}, WordTag{"\u5f00", "v"}, WordTag{"\u4e00\u8f86", "m"}, WordTag{"\u9ed1\u8272", "n"}, WordTag{"\u7684\u58eb", "n"}, WordTag{"\u3002", "x"}},
[]WordTag{WordTag{"\u67aa\u6746\u5b50", "n"}, WordTag{"\u4e2d", "f"}, WordTag{"\u51fa", "v"}, WordTag{"\u653f\u6743", "n"}},
}
)
func TestCut(t *testing.T) {
jiebago.SetDictionary("../dict.txt")
for index, content := range test_contents {
result := Cut(content, true)
if len(defaultCutResult[index]) != len(result) {
t.Error(content)
}
for i, _ := range result {
if result[i] != defaultCutResult[index][i] {
t.Error(content)
}
}
result = Cut(content, false)
if len(noHMMCutResult[index]) != len(result) {
t.Error(content)
}
for i, _ := range result {
if result[i] != noHMMCutResult[index][i] {
t.Error(content)
}
}
}
}

89402
posseg/prob_emit.go Normal file

File diff suppressed because it is too large Load Diff

264
posseg/prob_start.go Normal file
View File

@@ -0,0 +1,264 @@
package posseg
var (
ProbStart = make(map[StateTag]float64)
)
func init() {
ProbStart[StateTag{'B', "a"}] = -4.762305214596967
ProbStart[StateTag{'B', "ad"}] = -6.680066036784177
ProbStart[StateTag{'B', "ag"}] = -3.14e+100
ProbStart[StateTag{'B', "an"}] = -8.697083223018778
ProbStart[StateTag{'B', "b"}] = -5.018374362109218
ProbStart[StateTag{'B', "bg"}] = -3.14e+100
ProbStart[StateTag{'B', "c"}] = -3.423880184954888
ProbStart[StateTag{'B', "d"}] = -3.9750475297585357
ProbStart[StateTag{'B', "df"}] = -8.888974230828882
ProbStart[StateTag{'B', "dg"}] = -3.14e+100
ProbStart[StateTag{'B', "e"}] = -8.563551830394255
ProbStart[StateTag{'B', "en"}] = -3.14e+100
ProbStart[StateTag{'B', "f"}] = -5.491630418482717
ProbStart[StateTag{'B', "g"}] = -3.14e+100
ProbStart[StateTag{'B', "h"}] = -13.533365129970255
ProbStart[StateTag{'B', "i"}] = -6.1157847275557105
ProbStart[StateTag{'B', "in"}] = -3.14e+100
ProbStart[StateTag{'B', "j"}] = -5.0576191284681915
ProbStart[StateTag{'B', "jn"}] = -3.14e+100
ProbStart[StateTag{'B', "k"}] = -3.14e+100
ProbStart[StateTag{'B', "l"}] = -4.905883584659895
ProbStart[StateTag{'B', "ln"}] = -3.14e+100
ProbStart[StateTag{'B', "m"}] = -3.6524299819046386
ProbStart[StateTag{'B', "mg"}] = -3.14e+100
ProbStart[StateTag{'B', "mq"}] = -6.78695300139688
ProbStart[StateTag{'B', "n"}] = -1.6966257797548328
ProbStart[StateTag{'B', "ng"}] = -3.14e+100
ProbStart[StateTag{'B', "nr"}] = -2.2310495913769506
ProbStart[StateTag{'B', "nrfg"}] = -5.873722175405573
ProbStart[StateTag{'B', "nrt"}] = -4.985642733519195
ProbStart[StateTag{'B', "ns"}] = -2.8228438314969213
ProbStart[StateTag{'B', "nt"}] = -4.846091668182416
ProbStart[StateTag{'B', "nz"}] = -3.94698846057672
ProbStart[StateTag{'B', "o"}] = -8.433498702146057
ProbStart[StateTag{'B', "p"}] = -4.200984132085048
ProbStart[StateTag{'B', "q"}] = -6.998123858956596
ProbStart[StateTag{'B', "qe"}] = -3.14e+100
ProbStart[StateTag{'B', "qg"}] = -3.14e+100
ProbStart[StateTag{'B', "r"}] = -3.4098187790818413
ProbStart[StateTag{'B', "rg"}] = -3.14e+100
ProbStart[StateTag{'B', "rr"}] = -12.434752841302146
ProbStart[StateTag{'B', "rz"}] = -7.946116471570005
ProbStart[StateTag{'B', "s"}] = -5.522673590839954
ProbStart[StateTag{'B', "t"}] = -3.3647479094528574
ProbStart[StateTag{'B', "tg"}] = -3.14e+100
ProbStart[StateTag{'B', "u"}] = -9.163917277503234
ProbStart[StateTag{'B', "ud"}] = -3.14e+100
ProbStart[StateTag{'B', "ug"}] = -3.14e+100
ProbStart[StateTag{'B', "uj"}] = -3.14e+100
ProbStart[StateTag{'B', "ul"}] = -3.14e+100
ProbStart[StateTag{'B', "uv"}] = -3.14e+100
ProbStart[StateTag{'B', "uz"}] = -3.14e+100
ProbStart[StateTag{'B', "v"}] = -2.6740584874265685
ProbStart[StateTag{'B', "vd"}] = -9.044728760238115
ProbStart[StateTag{'B', "vg"}] = -3.14e+100
ProbStart[StateTag{'B', "vi"}] = -12.434752841302146
ProbStart[StateTag{'B', "vn"}] = -4.3315610890163585
ProbStart[StateTag{'B', "vq"}] = -12.147070768850364
ProbStart[StateTag{'B', "w"}] = -3.14e+100
ProbStart[StateTag{'B', "x"}] = -3.14e+100
ProbStart[StateTag{'B', "y"}] = -9.844485675856319
ProbStart[StateTag{'B', "yg"}] = -3.14e+100
ProbStart[StateTag{'B', "z"}] = -7.045681111485645
ProbStart[StateTag{'B', "zg"}] = -3.14e+100
ProbStart[StateTag{'E', "a"}] = -3.14e+100
ProbStart[StateTag{'E', "ad"}] = -3.14e+100
ProbStart[StateTag{'E', "ag"}] = -3.14e+100
ProbStart[StateTag{'E', "an"}] = -3.14e+100
ProbStart[StateTag{'E', "b"}] = -3.14e+100
ProbStart[StateTag{'E', "bg"}] = -3.14e+100
ProbStart[StateTag{'E', "c"}] = -3.14e+100
ProbStart[StateTag{'E', "d"}] = -3.14e+100
ProbStart[StateTag{'E', "df"}] = -3.14e+100
ProbStart[StateTag{'E', "dg"}] = -3.14e+100
ProbStart[StateTag{'E', "e"}] = -3.14e+100
ProbStart[StateTag{'E', "en"}] = -3.14e+100
ProbStart[StateTag{'E', "f"}] = -3.14e+100
ProbStart[StateTag{'E', "g"}] = -3.14e+100
ProbStart[StateTag{'E', "h"}] = -3.14e+100
ProbStart[StateTag{'E', "i"}] = -3.14e+100
ProbStart[StateTag{'E', "in"}] = -3.14e+100
ProbStart[StateTag{'E', "j"}] = -3.14e+100
ProbStart[StateTag{'E', "jn"}] = -3.14e+100
ProbStart[StateTag{'E', "k"}] = -3.14e+100
ProbStart[StateTag{'E', "l"}] = -3.14e+100
ProbStart[StateTag{'E', "ln"}] = -3.14e+100
ProbStart[StateTag{'E', "m"}] = -3.14e+100
ProbStart[StateTag{'E', "mg"}] = -3.14e+100
ProbStart[StateTag{'E', "mq"}] = -3.14e+100
ProbStart[StateTag{'E', "n"}] = -3.14e+100
ProbStart[StateTag{'E', "ng"}] = -3.14e+100
ProbStart[StateTag{'E', "nr"}] = -3.14e+100
ProbStart[StateTag{'E', "nrfg"}] = -3.14e+100
ProbStart[StateTag{'E', "nrt"}] = -3.14e+100
ProbStart[StateTag{'E', "ns"}] = -3.14e+100
ProbStart[StateTag{'E', "nt"}] = -3.14e+100
ProbStart[StateTag{'E', "nz"}] = -3.14e+100
ProbStart[StateTag{'E', "o"}] = -3.14e+100
ProbStart[StateTag{'E', "p"}] = -3.14e+100
ProbStart[StateTag{'E', "q"}] = -3.14e+100
ProbStart[StateTag{'E', "qe"}] = -3.14e+100
ProbStart[StateTag{'E', "qg"}] = -3.14e+100
ProbStart[StateTag{'E', "r"}] = -3.14e+100
ProbStart[StateTag{'E', "rg"}] = -3.14e+100
ProbStart[StateTag{'E', "rr"}] = -3.14e+100
ProbStart[StateTag{'E', "rz"}] = -3.14e+100
ProbStart[StateTag{'E', "s"}] = -3.14e+100
ProbStart[StateTag{'E', "t"}] = -3.14e+100
ProbStart[StateTag{'E', "tg"}] = -3.14e+100
ProbStart[StateTag{'E', "u"}] = -3.14e+100
ProbStart[StateTag{'E', "ud"}] = -3.14e+100
ProbStart[StateTag{'E', "ug"}] = -3.14e+100
ProbStart[StateTag{'E', "uj"}] = -3.14e+100
ProbStart[StateTag{'E', "ul"}] = -3.14e+100
ProbStart[StateTag{'E', "uv"}] = -3.14e+100
ProbStart[StateTag{'E', "uz"}] = -3.14e+100
ProbStart[StateTag{'E', "v"}] = -3.14e+100
ProbStart[StateTag{'E', "vd"}] = -3.14e+100
ProbStart[StateTag{'E', "vg"}] = -3.14e+100
ProbStart[StateTag{'E', "vi"}] = -3.14e+100
ProbStart[StateTag{'E', "vn"}] = -3.14e+100
ProbStart[StateTag{'E', "vq"}] = -3.14e+100
ProbStart[StateTag{'E', "w"}] = -3.14e+100
ProbStart[StateTag{'E', "x"}] = -3.14e+100
ProbStart[StateTag{'E', "y"}] = -3.14e+100
ProbStart[StateTag{'E', "yg"}] = -3.14e+100
ProbStart[StateTag{'E', "z"}] = -3.14e+100
ProbStart[StateTag{'E', "zg"}] = -3.14e+100
ProbStart[StateTag{'M', "a"}] = -3.14e+100
ProbStart[StateTag{'M', "ad"}] = -3.14e+100
ProbStart[StateTag{'M', "ag"}] = -3.14e+100
ProbStart[StateTag{'M', "an"}] = -3.14e+100
ProbStart[StateTag{'M', "b"}] = -3.14e+100
ProbStart[StateTag{'M', "bg"}] = -3.14e+100
ProbStart[StateTag{'M', "c"}] = -3.14e+100
ProbStart[StateTag{'M', "d"}] = -3.14e+100
ProbStart[StateTag{'M', "df"}] = -3.14e+100
ProbStart[StateTag{'M', "dg"}] = -3.14e+100
ProbStart[StateTag{'M', "e"}] = -3.14e+100
ProbStart[StateTag{'M', "en"}] = -3.14e+100
ProbStart[StateTag{'M', "f"}] = -3.14e+100
ProbStart[StateTag{'M', "g"}] = -3.14e+100
ProbStart[StateTag{'M', "h"}] = -3.14e+100
ProbStart[StateTag{'M', "i"}] = -3.14e+100
ProbStart[StateTag{'M', "in"}] = -3.14e+100
ProbStart[StateTag{'M', "j"}] = -3.14e+100
ProbStart[StateTag{'M', "jn"}] = -3.14e+100
ProbStart[StateTag{'M', "k"}] = -3.14e+100
ProbStart[StateTag{'M', "l"}] = -3.14e+100
ProbStart[StateTag{'M', "ln"}] = -3.14e+100
ProbStart[StateTag{'M', "m"}] = -3.14e+100
ProbStart[StateTag{'M', "mg"}] = -3.14e+100
ProbStart[StateTag{'M', "mq"}] = -3.14e+100
ProbStart[StateTag{'M', "n"}] = -3.14e+100
ProbStart[StateTag{'M', "ng"}] = -3.14e+100
ProbStart[StateTag{'M', "nr"}] = -3.14e+100
ProbStart[StateTag{'M', "nrfg"}] = -3.14e+100
ProbStart[StateTag{'M', "nrt"}] = -3.14e+100
ProbStart[StateTag{'M', "ns"}] = -3.14e+100
ProbStart[StateTag{'M', "nt"}] = -3.14e+100
ProbStart[StateTag{'M', "nz"}] = -3.14e+100
ProbStart[StateTag{'M', "o"}] = -3.14e+100
ProbStart[StateTag{'M', "p"}] = -3.14e+100
ProbStart[StateTag{'M', "q"}] = -3.14e+100
ProbStart[StateTag{'M', "qe"}] = -3.14e+100
ProbStart[StateTag{'M', "qg"}] = -3.14e+100
ProbStart[StateTag{'M', "r"}] = -3.14e+100
ProbStart[StateTag{'M', "rg"}] = -3.14e+100
ProbStart[StateTag{'M', "rr"}] = -3.14e+100
ProbStart[StateTag{'M', "rz"}] = -3.14e+100
ProbStart[StateTag{'M', "s"}] = -3.14e+100
ProbStart[StateTag{'M', "t"}] = -3.14e+100
ProbStart[StateTag{'M', "tg"}] = -3.14e+100
ProbStart[StateTag{'M', "u"}] = -3.14e+100
ProbStart[StateTag{'M', "ud"}] = -3.14e+100
ProbStart[StateTag{'M', "ug"}] = -3.14e+100
ProbStart[StateTag{'M', "uj"}] = -3.14e+100
ProbStart[StateTag{'M', "ul"}] = -3.14e+100
ProbStart[StateTag{'M', "uv"}] = -3.14e+100
ProbStart[StateTag{'M', "uz"}] = -3.14e+100
ProbStart[StateTag{'M', "v"}] = -3.14e+100
ProbStart[StateTag{'M', "vd"}] = -3.14e+100
ProbStart[StateTag{'M', "vg"}] = -3.14e+100
ProbStart[StateTag{'M', "vi"}] = -3.14e+100
ProbStart[StateTag{'M', "vn"}] = -3.14e+100
ProbStart[StateTag{'M', "vq"}] = -3.14e+100
ProbStart[StateTag{'M', "w"}] = -3.14e+100
ProbStart[StateTag{'M', "x"}] = -3.14e+100
ProbStart[StateTag{'M', "y"}] = -3.14e+100
ProbStart[StateTag{'M', "yg"}] = -3.14e+100
ProbStart[StateTag{'M', "z"}] = -3.14e+100
ProbStart[StateTag{'M', "zg"}] = -3.14e+100
ProbStart[StateTag{'S', "a"}] = -3.9025396831295227
ProbStart[StateTag{'S', "ad"}] = -11.048458480182255
ProbStart[StateTag{'S', "ag"}] = -6.954113917960154
ProbStart[StateTag{'S', "an"}] = -12.84021794941031
ProbStart[StateTag{'S', "b"}] = -6.472888763970454
ProbStart[StateTag{'S', "bg"}] = -3.14e+100
ProbStart[StateTag{'S', "c"}] = -4.786966795861212
ProbStart[StateTag{'S', "d"}] = -3.903919764181873
ProbStart[StateTag{'S', "df"}] = -3.14e+100
ProbStart[StateTag{'S', "dg"}] = -8.948397651299683
ProbStart[StateTag{'S', "e"}] = -5.942513006281674
ProbStart[StateTag{'S', "en"}] = -3.14e+100
ProbStart[StateTag{'S', "f"}] = -5.194820249981676
ProbStart[StateTag{'S', "g"}] = -6.507826815331734
ProbStart[StateTag{'S', "h"}] = -8.650563207383884
ProbStart[StateTag{'S', "i"}] = -3.14e+100
ProbStart[StateTag{'S', "in"}] = -3.14e+100
ProbStart[StateTag{'S', "j"}] = -4.911992119644354
ProbStart[StateTag{'S', "jn"}] = -3.14e+100
ProbStart[StateTag{'S', "k"}] = -6.940320595827818
ProbStart[StateTag{'S', "l"}] = -3.14e+100
ProbStart[StateTag{'S', "ln"}] = -3.14e+100
ProbStart[StateTag{'S', "m"}] = -3.269200652116097
ProbStart[StateTag{'S', "mg"}] = -10.825314928868044
ProbStart[StateTag{'S', "mq"}] = -3.14e+100
ProbStart[StateTag{'S', "n"}] = -3.8551483897645107
ProbStart[StateTag{'S', "ng"}] = -4.913434861102905
ProbStart[StateTag{'S', "nr"}] = -4.483663103956885
ProbStart[StateTag{'S', "nrfg"}] = -3.14e+100
ProbStart[StateTag{'S', "nrt"}] = -3.14e+100
ProbStart[StateTag{'S', "ns"}] = -3.14e+100
ProbStart[StateTag{'S', "nt"}] = -12.147070768850364
ProbStart[StateTag{'S', "nz"}] = -3.14e+100
ProbStart[StateTag{'S', "o"}] = -8.464460927750023
ProbStart[StateTag{'S', "p"}] = -2.9868401813596317
ProbStart[StateTag{'S', "q"}] = -4.888658618255058
ProbStart[StateTag{'S', "qe"}] = -3.14e+100
ProbStart[StateTag{'S', "qg"}] = -3.14e+100
ProbStart[StateTag{'S', "r"}] = -2.7635336784127853
ProbStart[StateTag{'S', "rg"}] = -10.275268591948773
ProbStart[StateTag{'S', "rr"}] = -3.14e+100
ProbStart[StateTag{'S', "rz"}] = -3.14e+100
ProbStart[StateTag{'S', "s"}] = -3.14e+100
ProbStart[StateTag{'S', "t"}] = -3.14e+100
ProbStart[StateTag{'S', "tg"}] = -6.272842531880403
ProbStart[StateTag{'S', "u"}] = -6.940320595827818
ProbStart[StateTag{'S', "ud"}] = -7.728230161053767
ProbStart[StateTag{'S', "ug"}] = -7.5394037026636855
ProbStart[StateTag{'S', "uj"}] = -6.85251045118004
ProbStart[StateTag{'S', "ul"}] = -8.4153713175535
ProbStart[StateTag{'S', "uv"}] = -8.15808672228609
ProbStart[StateTag{'S', "uz"}] = -9.299258625372996
ProbStart[StateTag{'S', "v"}] = -3.053292303412302
ProbStart[StateTag{'S', "vd"}] = -3.14e+100
ProbStart[StateTag{'S', "vg"}] = -5.9430181843676895
ProbStart[StateTag{'S', "vi"}] = -3.14e+100
ProbStart[StateTag{'S', "vn"}] = -11.453923588290419
ProbStart[StateTag{'S', "vq"}] = -3.14e+100
ProbStart[StateTag{'S', "w"}] = -3.14e+100
ProbStart[StateTag{'S', "x"}] = -8.427419656069674
ProbStart[StateTag{'S', "y"}] = -6.1970794699489575
ProbStart[StateTag{'S', "yg"}] = -13.533365129970255
ProbStart[StateTag{'S', "z"}] = -3.14e+100
ProbStart[StateTag{'S', "zg"}] = -3.14e+100
}

5496
posseg/prob_trans.go Normal file

File diff suppressed because it is too large Load Diff

128
posseg/viterbi.go Normal file
View File

@@ -0,0 +1,128 @@
package posseg
import (
"fmt"
"sort"
)
const MIN_FLOAT = -3.14e100
type StateTag struct {
State byte
Tag string
}
func (st StateTag) String() string {
return fmt.Sprintf("(%q, %s)", st.State, st.Tag)
}
func emptyStateTag() StateTag {
return StateTag{' ', ""}
}
type ProbState struct {
Prob float64
ST StateTag
}
func (ps ProbState) String() string {
return fmt.Sprintf("(%v: %f)", ps.ST, ps.Prob)
}
type ProbStates []ProbState
func (pss ProbStates) Len() int {
return len(pss)
}
func (pss ProbStates) Less(i, j int) bool {
if pss[i].Prob == pss[j].Prob {
if pss[i].ST.Tag < pss[j].ST.Tag {
return true
} else if pss[i].ST.State < pss[j].ST.State {
return true
} else {
return false
}
}
return pss[i].Prob < pss[j].Prob
}
func (pss ProbStates) Swap(i, j int) {
pss[i], pss[j] = pss[j], pss[i]
}
func Viterbi(obs []rune) (float64, []StateTag) {
obsLength := len(obs)
V := make([]map[StateTag]float64, obsLength)
V[0] = make(map[StateTag]float64)
mem_path := make([]map[StateTag]StateTag, obsLength)
mem_path[0] = make(map[StateTag]StateTag)
// all_states := ProbTransKeys
ys := CharStateTab.Get(obs[0]) // default is all_states
for _, y := range ys {
V[0][y] = ProbEmit[y].Get(obs[0]) + ProbStart[y]
mem_path[0][y] = emptyStateTag()
}
for t := 1; t < obsLength; t++ {
prev_states := make([]StateTag, 0)
for x, _ := range mem_path[t-1] {
if len(ProbTrans[x]) > 0 {
prev_states = append(prev_states, x)
}
}
//use Go's map to implement Python's Set()
prev_states_expect_next := make(map[StateTag]StateTag)
for _, x := range prev_states {
for y, _ := range ProbTrans[x] {
prev_states_expect_next[y] = y
}
}
tmp_obs_states := CharStateTab.Get(obs[t])
obs_states := make([]StateTag, 0)
for index, _ := range tmp_obs_states {
if _, ok := prev_states_expect_next[tmp_obs_states[index]]; ok {
obs_states = append(obs_states, tmp_obs_states[index])
}
}
if len(obs_states) == 0 {
obs_states = ProbTransKeys
}
mem_path[t] = make(map[StateTag]StateTag)
V[t] = make(map[StateTag]float64)
for _, y := range obs_states {
pss := make(ProbStates, 0)
for _, y0 := range prev_states {
ps := ProbState{
Prob: V[t-1][y0] + ProbTrans[y0].Get(y) + ProbEmit[y].Get(obs[t]),
ST: y0}
pss = append(pss, ps)
}
sort.Sort(sort.Reverse(pss))
V[t][y] = pss[0].Prob
mem_path[t][y] = pss[0].ST
}
}
last := make(ProbStates, 0)
length := len(mem_path)
vlength := len(V)
for y, _ := range mem_path[length-1] {
ps := ProbState{Prob: V[vlength-1][y], ST: y}
last = append(last, ps)
}
sort.Sort(sort.Reverse(last))
prob := last[0].Prob
state := last[0].ST
route := make([]StateTag, len(obs))
i := obsLength - 1
for {
if i < 0 {
break
}
route[i] = state
state = mem_path[i][state]
i -= 1
}
return prob, route
}

46
posseg/viterbi_test.go Normal file
View File

@@ -0,0 +1,46 @@
package posseg
import (
"testing"
)
var (
route1 = []StateTag{
StateTag{'B', "nr"},
StateTag{'M', "nr"},
StateTag{'E', "nr"},
StateTag{'S', "v"},
StateTag{'B', "v"},
StateTag{'E', "v"},
StateTag{'B', "n"},
StateTag{'M', "n"},
StateTag{'E', "n"},
StateTag{'S', "d"},
StateTag{'S', "v"},
StateTag{'S', "n"},
StateTag{'B', "v"},
StateTag{'E', "v"},
StateTag{'B', "nr"},
StateTag{'M', "nr"},
StateTag{'M', "nr"},
StateTag{'M', "nr"},
StateTag{'E', "nr"},
StateTag{'S', "zg"}}
)
func TestViterbi(t *testing.T) {
ss := "李小福是创新办主任也是云计算方面的专家;"
prob, route := Viterbi([]rune(ss))
if prob != MIN_FLOAT {
t.Error(prob)
}
if len(route) != len(route1) {
t.Error(len(route))
}
for index, _ := range route {
if route[index] != route1[index] {
t.Error(route[index])
}
}
}

174
trie_node.go Normal file
View File

@@ -0,0 +1,174 @@
package jiebago
import (
"bufio"
"crypto/sha1"
"encoding/gob"
"fmt"
"math"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"unicode/utf8"
)
const (
CACHE_NAME = "jieba.gob"
USER_CACHE_PREFIX = "jieba.user."
USER_CACHE_SUFFIX = ".gob"
)
type Node struct {
Name string
SubNodes Trie
IsLeaf bool
}
type Trie map[string]*Node
type TopTrie struct {
T Trie
MinFreq float64
Total float64
Freq map[string]float64
}
func hash(s string) string {
h := sha1.New()
h.Write([]byte(s))
return fmt.Sprintf("%x", h.Sum(nil))
}
func getUserCacheName(prefix string, path string, suffix string) string {
return fmt.Sprintf("%s%s%s", prefix, hash(path), suffix)
}
func newTopTrie(filename string) (*TopTrie, error) {
var file_path string
var topTrie *TopTrie
if filepath.IsAbs(filename) {
file_path = filename
} else {
pwd, err := os.Getwd()
if err != nil {
return nil, err
}
file_path = filepath.Clean(filepath.Join(pwd, filename))
}
_, curFileName, _, _ := runtime.Caller(1)
_curpath := filepath.Dir(curFileName)
abs_path := filepath.Join(_curpath, Dictionary)
var cache_file string
if file_path == abs_path {
cache_file = filepath.Join(os.TempDir(), CACHE_NAME)
} else {
cache_file = filepath.Join(os.TempDir(),
getUserCacheName(USER_CACHE_PREFIX, abs_path, USER_CACHE_SUFFIX))
}
cacheFileStat, cacheErr := os.Stat(cache_file)
dictFileStat, _ := os.Stat(abs_path)
if cacheErr == nil {
if cacheFileStat.ModTime().After(dictFileStat.ModTime()) {
cacheFile, openError := os.Open(cache_file)
if openError == nil {
dec := gob.NewDecoder(cacheFile)
err := dec.Decode(&topTrie)
if err == nil {
return topTrie, nil
}
}
}
}
topTrie = &TopTrie{T: make(Trie), MinFreq: 100.0, Total: 0.0, Freq: make(map[string]float64)}
file, openError := os.Open(file_path)
if openError != nil {
return nil, openError
}
defer file.Close()
reader := bufio.NewReader(file)
for {
line, readError := reader.ReadString('\n')
if readError != nil && len(line) == 0 {
break
}
words := strings.Split(line, " ")
word, freqStr := words[0], words[1]
freq, _ := strconv.ParseFloat(freqStr, 64)
topTrie.Total += freq
topTrie.addWord(word, freq)
}
var val float64
for key := range topTrie.Freq {
val = math.Log(topTrie.Freq[key] / topTrie.Total)
if val < topTrie.MinFreq {
topTrie.MinFreq = val
}
topTrie.Freq[key] = val
}
cacheFile_, _ := os.OpenFile(cache_file, os.O_CREATE|os.O_WRONLY, 0644)
defer cacheFile_.Close()
enc := gob.NewEncoder(cacheFile_)
enc.Encode(topTrie)
return topTrie, nil
}
func (tt *TopTrie) addWord(word string, freq float64) {
tt.Freq[word] = freq
var p Trie
var node *Node
var key string
count := utf8.RuneCountInString(word)
for index, c := range []rune(word) {
if index == 0 {
p = tt.T
}
key = string(c)
if _, ok := p[key]; ok {
node = p[key]
} else {
node = &Node{Name: key, IsLeaf: false}
p[key] = node
node.SubNodes = make(Trie)
}
if index == count-1 {
p[key].IsLeaf = true
}
p = node.SubNodes
}
}
func addWord(word string, freq float64, tag string) {
if len(tag) > 0 {
UserWordTagTab[word] = strings.TrimSpace(tag)
}
TT.addWord(word, freq)
}
func LoadUserDict(file_path string) error {
file, openError := os.Open(file_path)
if openError != nil {
return openError
}
defer file.Close()
reader := bufio.NewReader(file)
for {
line, readError := reader.ReadString('\n')
if readError != nil && len(line) == 0 {
break
}
words := strings.Split(line, " ")
word, freqStr := words[0], words[1]
freq, _ := strconv.ParseFloat(freqStr, 64)
TT.addWord(word, freq)
}
return nil
}

6
userdict.txt Normal file
View File

@@ -0,0 +1,6 @@
云计算 5
李小福 2 nr
创新办 3 i
easy_install 3 eng
好用 300
韩玉赏鉴 3 nz