mirror of
https://github.com/fumiama/jieba.git
synced 2026-06-10 11:40:26 +08:00
initial commit
This commit is contained in:
80
analyse/analyse.go
Normal file
80
analyse/analyse.go
Normal file
@@ -0,0 +1,80 @@
|
||||
package analyse
|
||||
|
||||
import (
|
||||
"github.com/wangbin/jiebago"
|
||||
"sort"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type TfIdf struct {
|
||||
word string
|
||||
freq float64
|
||||
}
|
||||
|
||||
type TfIdfs []TfIdf
|
||||
|
||||
func (tis TfIdfs) Len() int {
|
||||
return len(tis)
|
||||
}
|
||||
|
||||
func (tis TfIdfs) Less(i, j int) bool {
|
||||
if tis[i].freq == tis[j].freq {
|
||||
return tis[i].word < tis[j].word
|
||||
}
|
||||
return tis[i].freq < tis[j].freq
|
||||
}
|
||||
|
||||
func (tis TfIdfs) Swap(i, j int) {
|
||||
tis[i], tis[j] = tis[j], tis[i]
|
||||
}
|
||||
|
||||
func ExtractTags(sentence string, topK int) []string {
|
||||
words := jiebago.Cut(sentence, false, true)
|
||||
freq := make(map[string]float64)
|
||||
|
||||
for _, w := range words {
|
||||
w = strings.TrimSpace(w)
|
||||
if utf8.RuneCountInString(w) < 2 {
|
||||
continue
|
||||
}
|
||||
index := stopWords.Search(w)
|
||||
if index < len(stopWords) && stopWords[index] == w {
|
||||
continue
|
||||
}
|
||||
if f, ok := freq[w]; ok {
|
||||
freq[w] = f + 1.0
|
||||
} else {
|
||||
freq[w] = 1.0
|
||||
}
|
||||
}
|
||||
total := 0.0
|
||||
for _, f := range freq {
|
||||
total += f
|
||||
}
|
||||
for k, v := range freq {
|
||||
freq[k] = v / total
|
||||
}
|
||||
tis := make(TfIdfs, 0)
|
||||
for k, v := range freq {
|
||||
var ti TfIdf
|
||||
if freq_, ok := idfFreq[k]; ok {
|
||||
ti = TfIdf{word: k, freq: freq_ * v}
|
||||
} else {
|
||||
ti = TfIdf{word: k, freq: medianIdf * v}
|
||||
}
|
||||
tis = append(tis, ti)
|
||||
}
|
||||
sort.Sort(sort.Reverse(tis))
|
||||
var topTfIdfs TfIdfs
|
||||
if len(tis) > topK {
|
||||
topTfIdfs = tis[:topK]
|
||||
} else {
|
||||
topTfIdfs = tis
|
||||
}
|
||||
tags := make([]string, len(topTfIdfs))
|
||||
for index, ti := range topTfIdfs {
|
||||
tags[index] = ti.word
|
||||
}
|
||||
return tags
|
||||
}
|
||||
197
analyse/analyse_test.go
Normal file
197
analyse/analyse_test.go
Normal file
@@ -0,0 +1,197 @@
|
||||
package analyse
|
||||
|
||||
import (
|
||||
"github.com/wangbin/jiebago"
|
||||
"testing"
|
||||
)
|
||||
|
||||
var (
|
||||
test_contents = []string{
|
||||
"这是一个伸手不见五指的黑夜。我叫孙悟空,我爱北京,我爱Python和C++。",
|
||||
"我不喜欢日本和服。",
|
||||
"雷猴回归人间。",
|
||||
"工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作",
|
||||
"我需要廉租房",
|
||||
"永和服装饰品有限公司",
|
||||
"我爱北京天安门",
|
||||
"abc",
|
||||
"隐马尔可夫",
|
||||
"雷猴是个好网站",
|
||||
"“Microsoft”一词由“MICROcomputer(微型计算机)”和“SOFTware(软件)”两部分组成",
|
||||
"草泥马和欺实马是今年的流行词汇",
|
||||
"伊藤洋华堂总府店",
|
||||
"中国科学院计算技术研究所",
|
||||
"罗密欧与朱丽叶",
|
||||
"我购买了道具和服装",
|
||||
"PS: 我觉得开源有一个好处,就是能够敦促自己不断改进,避免敞帚自珍",
|
||||
"湖北省石首市",
|
||||
"湖北省十堰市",
|
||||
"总经理完成了这件事情",
|
||||
"电脑修好了",
|
||||
"做好了这件事情就一了百了了",
|
||||
"人们审美的观点是不同的",
|
||||
"我们买了一个美的空调",
|
||||
"线程初始化时我们要注意",
|
||||
"一个分子是由好多原子组织成的",
|
||||
"祝你马到功成",
|
||||
"他掉进了无底洞里",
|
||||
"中国的首都是北京",
|
||||
"孙君意",
|
||||
"外交部发言人马朝旭",
|
||||
"领导人会议和第四届东亚峰会",
|
||||
"在过去的这五年",
|
||||
"还需要很长的路要走",
|
||||
"60周年首都阅兵",
|
||||
"你好人们审美的观点是不同的",
|
||||
"买水果然后来世博园",
|
||||
"买水果然后去世博园",
|
||||
"但是后来我才知道你是对的",
|
||||
"存在即合理",
|
||||
"的的的的的在的的的的就以和和和",
|
||||
"I love你,不以为耻,反以为rong",
|
||||
"因",
|
||||
"",
|
||||
"hello你好人们审美的观点是不同的",
|
||||
"很好但主要是基于网页形式",
|
||||
"hello你好人们审美的观点是不同的",
|
||||
"为什么我不能拥有想要的生活",
|
||||
"后来我才",
|
||||
"此次来中国是为了",
|
||||
"使用了它就可以解决一些问题",
|
||||
",使用了它就可以解决一些问题",
|
||||
"其实使用了它就可以解决一些问题",
|
||||
"好人使用了它就可以解决一些问题",
|
||||
"是因为和国家",
|
||||
"老年搜索还支持",
|
||||
"干脆就把那部蒙人的闲法给废了拉倒!RT @laoshipukong : 27日,全国人大常委会第三次审议侵权责任法草案,删除了有关医疗损害责任“举证倒置”的规定。在医患纠纷中本已处于弱势地位的消费者由此将陷入万劫不复的境地。 ",
|
||||
"大",
|
||||
"",
|
||||
"他说的确实在理",
|
||||
"长春市长春节讲话",
|
||||
"结婚的和尚未结婚的",
|
||||
"结合成分子时",
|
||||
"旅游和服务是最好的",
|
||||
"这件事情的确是我的错",
|
||||
"供大家参考指正",
|
||||
"哈尔滨政府公布塌桥原因",
|
||||
"我在机场入口处",
|
||||
"邢永臣摄影报道",
|
||||
"BP神经网络如何训练才能在分类时增加区分度?",
|
||||
"南京市长江大桥",
|
||||
"应一些使用者的建议,也为了便于利用NiuTrans用于SMT研究",
|
||||
"长春市长春药店",
|
||||
"邓颖超生前最喜欢的衣服",
|
||||
"胡锦涛是热爱世界和平的政治局常委",
|
||||
"程序员祝海林和朱会震是在孙健的左面和右面, 范凯在最右面.再往左是李松洪",
|
||||
"一次性交多少钱",
|
||||
"两块五一套,三块八一斤,四块七一本,五块六一条",
|
||||
"小和尚留了一个像大和尚一样的和尚头",
|
||||
"我是中华人民共和国公民;我爸爸是共和党党员; 地铁和平门站",
|
||||
"张晓梅去人民医院做了个B超然后去买了件T恤",
|
||||
"AT&T是一件不错的公司,给你发offer了吗?",
|
||||
"C++和c#是什么关系?11+122=133,是吗?PI=3.14159",
|
||||
"你认识那个和主席握手的的哥吗?他开一辆黑色的士。",
|
||||
"枪杆子中出政权"}
|
||||
Tags = [][]string{
|
||||
[]string{"Python", "C++", "\u4f38\u624b\u4e0d\u89c1\u4e94\u6307", "\u5b59\u609f\u7a7a", "\u9ed1\u591c", "\u5317\u4eac", "\u8fd9\u662f", "\u4e00\u4e2a"},
|
||||
[]string{"\u548c\u670d", "\u559c\u6b22", "\u65e5\u672c"},
|
||||
[]string{"\u96f7\u7334", "\u4eba\u95f4", "\u56de\u5f52"},
|
||||
[]string{"\u5de5\u4fe1\u5904", "\u5973\u5e72\u4e8b", "24", "\u4ea4\u6362\u673a", "\u79d1\u5ba4", "\u4eb2\u53e3", "\u5668\u4ef6", "\u6280\u672f\u6027", "\u4e0b\u5c5e", "\u4ea4\u4ee3", "\u6bcf\u6708", "\u5b89\u88c5", "\u7ecf\u8fc7", "\u5de5\u4f5c"},
|
||||
[]string{"\u5ec9\u79df\u623f", "\u9700\u8981"},
|
||||
[]string{"\u9970\u54c1", "\u6c38\u548c", "\u670d\u88c5", "\u6709\u9650\u516c\u53f8"},
|
||||
[]string{"\u5929\u5b89\u95e8", "\u5317\u4eac"},
|
||||
[]string{"abc"},
|
||||
[]string{"\u9a6c\u5c14\u53ef\u592b"},
|
||||
[]string{"\u96f7\u7334", "\u7f51\u7ad9"},
|
||||
[]string{"SOFTware", "Microsoft", "MICROcomputer", "\u5fae\u578b", "\u4e00\u8bcd", "\u8f6f\u4ef6", "\u8ba1\u7b97\u673a", "\u7ec4\u6210", "\u90e8\u5206"},
|
||||
[]string{"\u8349\u6ce5\u9a6c", "\u6b3a\u5b9e", "\u8bcd\u6c47", "\u6d41\u884c", "\u4eca\u5e74"},
|
||||
[]string{"\u6d0b\u534e\u5802", "\u603b\u5e9c", "\u4f0a\u85e4"},
|
||||
[]string{"\u4e2d\u56fd\u79d1\u5b66\u9662\u8ba1\u7b97\u6280\u672f\u7814\u7a76\u6240"},
|
||||
[]string{"\u6731\u4e3d\u53f6", "\u7f57\u5bc6\u6b27"},
|
||||
[]string{"\u9053\u5177", "\u670d\u88c5", "\u8d2d\u4e70"},
|
||||
[]string{"\u81ea\u73cd", "\u655e\u5e1a", "PS", "\u5f00\u6e90", "\u4e0d\u65ad\u6539\u8fdb", "\u6566\u4fc3", "\u597d\u5904", "\u907f\u514d", "\u80fd\u591f", "\u89c9\u5f97", "\u5c31\u662f", "\u81ea\u5df1", "\u4e00\u4e2a"},
|
||||
[]string{"\u77f3\u9996\u5e02", "\u6e56\u5317\u7701"},
|
||||
[]string{"\u5341\u5830\u5e02", "\u6e56\u5317\u7701"},
|
||||
[]string{"\u603b\u7ecf\u7406", "\u8fd9\u4ef6", "\u5b8c\u6210", "\u4e8b\u60c5"},
|
||||
[]string{"\u4fee\u597d", "\u7535\u8111"},
|
||||
[]string{"\u4e00\u4e86\u767e\u4e86", "\u505a\u597d", "\u8fd9\u4ef6", "\u4e8b\u60c5"},
|
||||
[]string{"\u5ba1\u7f8e", "\u89c2\u70b9", "\u4eba\u4eec", "\u4e0d\u540c"},
|
||||
[]string{"\u7f8e\u7684", "\u7a7a\u8c03", "\u6211\u4eec", "\u4e00\u4e2a"},
|
||||
[]string{"\u7ebf\u7a0b", "\u521d\u59cb\u5316", "\u6ce8\u610f", "\u6211\u4eec"},
|
||||
[]string{"\u597d\u591a", "\u539f\u5b50", "\u5206\u5b50", "\u7ec4\u7ec7", "\u4e00\u4e2a"},
|
||||
[]string{"\u9a6c\u5230\u529f\u6210"},
|
||||
[]string{"\u65e0\u5e95\u6d1e"},
|
||||
[]string{"\u9996\u90fd", "\u5317\u4eac", "\u4e2d\u56fd"},
|
||||
[]string{"\u5b59\u541b\u610f"},
|
||||
[]string{"\u9a6c\u671d\u65ed", "\u5916\u4ea4\u90e8", "\u53d1\u8a00\u4eba"},
|
||||
[]string{"\u7b2c\u56db\u5c4a", "\u4e1c\u4e9a", "\u5cf0\u4f1a", "\u9886\u5bfc\u4eba", "\u4f1a\u8bae"},
|
||||
[]string{"\u4e94\u5e74", "\u8fc7\u53bb"},
|
||||
[]string{"\u5f88\u957f", "\u9700\u8981"},
|
||||
[]string{"60", "\u9605\u5175", "\u5468\u5e74", "\u9996\u90fd"},
|
||||
[]string{"\u5ba1\u7f8e", "\u4f60\u597d", "\u89c2\u70b9", "\u4eba\u4eec", "\u4e0d\u540c"},
|
||||
[]string{"\u4e16\u535a\u56ed", "\u6c34\u679c", "\u7136\u540e"},
|
||||
[]string{"\u4e16\u535a\u56ed", "\u6c34\u679c", "\u7136\u540e"},
|
||||
[]string{"\u540e\u6765", "\u4f46\u662f", "\u77e5\u9053"},
|
||||
[]string{"\u5408\u7406", "\u5b58\u5728"},
|
||||
[]string{},
|
||||
[]string{"rong", "love", "\u4e0d\u4ee5\u4e3a\u803b", "\u4ee5\u4e3a"},
|
||||
[]string{},
|
||||
[]string{},
|
||||
[]string{"hello", "\u5ba1\u7f8e", "\u4f60\u597d", "\u89c2\u70b9", "\u4eba\u4eec", "\u4e0d\u540c"},
|
||||
[]string{"\u7f51\u9875", "\u57fa\u4e8e", "\u5f62\u5f0f", "\u4e3b\u8981"},
|
||||
[]string{"hello", "\u5ba1\u7f8e", "\u4f60\u597d", "\u89c2\u70b9", "\u4eba\u4eec", "\u4e0d\u540c"},
|
||||
[]string{"\u60f3\u8981", "\u62e5\u6709", "\u4e3a\u4ec0\u4e48", "\u751f\u6d3b", "\u4e0d\u80fd"},
|
||||
[]string{"\u540e\u6765"},
|
||||
[]string{"\u6b64\u6b21", "\u4e3a\u4e86", "\u4e2d\u56fd"},
|
||||
[]string{"\u89e3\u51b3", "\u4f7f\u7528", "\u4e00\u4e9b", "\u95ee\u9898", "\u53ef\u4ee5"},
|
||||
[]string{"\u89e3\u51b3", "\u4f7f\u7528", "\u4e00\u4e9b", "\u95ee\u9898", "\u53ef\u4ee5"},
|
||||
[]string{"\u89e3\u51b3", "\u5176\u5b9e", "\u4f7f\u7528", "\u4e00\u4e9b", "\u95ee\u9898", "\u53ef\u4ee5"},
|
||||
[]string{"\u597d\u4eba", "\u89e3\u51b3", "\u4f7f\u7528", "\u4e00\u4e9b", "\u95ee\u9898", "\u53ef\u4ee5"},
|
||||
[]string{"\u662f\u56e0\u4e3a", "\u56fd\u5bb6"},
|
||||
[]string{"\u8001\u5e74", "\u641c\u7d22", "\u652f\u6301"},
|
||||
[]string{"\u95f2\u6cd5", "\u4e2d\u672c", "laoshipukong", "RT", "27", "\u8d23\u4efb\u6cd5", "\u8499\u4eba", "\u4e07\u52ab\u4e0d\u590d", "\u4e3e\u8bc1", "\u5012\u7f6e", "\u533b\u60a3", "\u90a3\u90e8", "\u62c9\u5012", "\u4fb5\u6743", "\u5168\u56fd\u4eba\u5927\u5e38\u59d4\u4f1a", "\u8349\u6848", "\u5883\u5730", "\u7ea0\u7eb7", "\u5220\u9664", "\u5f31\u52bf"},
|
||||
[]string{},
|
||||
[]string{},
|
||||
[]string{"\u5728\u7406", "\u786e\u5b9e"},
|
||||
[]string{"\u957f\u6625", "\u6625\u8282", "\u8bb2\u8bdd", "\u5e02\u957f"},
|
||||
[]string{"\u7ed3\u5a5a", "\u5c1a\u672a"},
|
||||
[]string{"\u5206\u5b50", "\u7ed3\u5408"},
|
||||
[]string{"\u65c5\u6e38", "\u6700\u597d", "\u670d\u52a1"},
|
||||
[]string{"\u7684\u786e", "\u8fd9\u4ef6", "\u4e8b\u60c5"},
|
||||
[]string{"\u6307\u6b63", "\u53c2\u8003", "\u5927\u5bb6"},
|
||||
[]string{"\u584c\u6865", "\u54c8\u5c14\u6ee8", "\u516c\u5e03", "\u539f\u56e0", "\u653f\u5e9c"},
|
||||
[]string{"\u5165\u53e3\u5904", "\u673a\u573a"},
|
||||
[]string{"\u90a2\u6c38\u81e3", "\u6444\u5f71", "\u62a5\u9053"},
|
||||
[]string{"\u533a\u5206\u5ea6", "BP", "\u795e\u7ecf\u7f51\u7edc", "\u8bad\u7ec3", "\u5206\u7c7b", "\u624d\u80fd", "\u5982\u4f55", "\u589e\u52a0"},
|
||||
[]string{"\u957f\u6c5f\u5927\u6865", "\u5357\u4eac\u5e02"},
|
||||
[]string{"SMT", "NiuTrans", "\u4f7f\u7528\u8005", "\u4fbf\u4e8e", "\u7528\u4e8e", "\u5efa\u8bae", "\u5229\u7528", "\u4e3a\u4e86", "\u7814\u7a76", "\u4e00\u4e9b"},
|
||||
[]string{"\u957f\u6625\u5e02", "\u836f\u5e97", "\u957f\u6625"},
|
||||
[]string{"\u9093\u9896\u8d85", "\u751f\u524d", "\u8863\u670d", "\u559c\u6b22"},
|
||||
[]string{"\u653f\u6cbb\u5c40", "\u70ed\u7231", "\u5e38\u59d4", "\u80e1\u9526\u6d9b", "\u548c\u5e73", "\u4e16\u754c"},
|
||||
[]string{"\u53f3\u9762", "\u5b59\u5065", "\u8303\u51ef", "\u674e\u677e\u6d2a", "\u6731\u4f1a\u9707", "\u6d77\u6797", "\u5de6\u9762", "\u7a0b\u5e8f\u5458", "\u518d\u5f80"},
|
||||
[]string{"\u4e00\u6b21\u6027", "\u591a\u5c11"},
|
||||
[]string{"\u56db\u5757", "\u4e94\u5757", "\u4e09\u5757", "\u4e00\u65a4", "\u4e24\u5757", "\u4e00\u672c", "\u4e00\u5957", "\u4e00\u6761"},
|
||||
[]string{"\u548c\u5c1a", "\u548c\u5c1a\u5934", "\u4e00\u6837", "\u4e00\u4e2a"},
|
||||
[]string{"\u548c\u5e73\u95e8", "\u5171\u548c\u515a", "\u5730\u94c1", "\u515a\u5458", "\u516c\u6c11", "\u7238\u7238", "\u4e2d\u534e\u4eba\u6c11\u5171\u548c\u56fd"},
|
||||
[]string{"\u5f20\u6653\u6885", "T\u6064", "B\u8d85", "\u533b\u9662", "\u4eba\u6c11", "\u7136\u540e"},
|
||||
[]string{"offer", "AT&T", "\u4e0d\u9519", "\u4e00\u4ef6", "\u516c\u53f8"},
|
||||
[]string{"c#", "PI", "C++", "3.14159", "133", "122", "11", "\u5173\u7cfb", "\u4ec0\u4e48"},
|
||||
[]string{"\u7684\u58eb", "\u7684\u54e5", "\u4ed6\u5f00", "\u63e1\u624b", "\u4e00\u8f86", "\u9ed1\u8272", "\u4e3b\u5e2d", "\u8ba4\u8bc6", "\u90a3\u4e2a"},
|
||||
[]string{"\u67aa\u6746\u5b50", "\u653f\u6743"},
|
||||
}
|
||||
)
|
||||
|
||||
func TestExtractTags(t *testing.T) {
|
||||
jiebago.SetDictionary("../dict.txt")
|
||||
for index, sentence := range test_contents {
|
||||
result := ExtractTags(sentence, 20)
|
||||
if len(result) != len(Tags[index]) {
|
||||
t.Errorf("%s = %v", sentence, result)
|
||||
}
|
||||
for i, tag := range result {
|
||||
if tag != Tags[index][i] {
|
||||
t.Error(tag)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
270181
analyse/idf.go
Normal file
270181
analyse/idf.go
Normal file
File diff suppressed because it is too large
Load Diff
85
finalseg/finalseg.go
Normal file
85
finalseg/finalseg.go
Normal file
@@ -0,0 +1,85 @@
|
||||
package finalseg
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
)
|
||||
|
||||
func cutHan(sentence string) []string {
|
||||
runes := []rune(sentence)
|
||||
result := make([]string, 0)
|
||||
_, pos_list := viterbi(runes, []byte{'B', 'M', 'E', 'S'})
|
||||
begin, next := 0, 0
|
||||
for i, char := range runes {
|
||||
pos := pos_list[i]
|
||||
switch pos {
|
||||
case 'B':
|
||||
begin = i
|
||||
case 'E':
|
||||
result = append(result, string(runes[begin:i+1]))
|
||||
next = i + 1
|
||||
case 'S':
|
||||
result = append(result, string(char))
|
||||
next = i + 1
|
||||
}
|
||||
}
|
||||
if next < len(runes) {
|
||||
result = append(result, string(runes[next:]))
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func Cut(sentence string) []string {
|
||||
result := make([]string, 0)
|
||||
re_han := regexp.MustCompile(`\p{Han}+`)
|
||||
re_skip := regexp.MustCompile(`(\d+\.\d+|[a-zA-Z0-9]+)`)
|
||||
s := sentence
|
||||
var hans string
|
||||
var hanLoc []int
|
||||
var nonhanLoc []int
|
||||
for {
|
||||
hanLoc = re_han.FindStringIndex(s)
|
||||
if hanLoc == nil {
|
||||
if len(s) == 0 {
|
||||
break
|
||||
}
|
||||
} else if hanLoc[0] == 0 {
|
||||
hans = s[hanLoc[0]:hanLoc[1]]
|
||||
s = s[hanLoc[1]:]
|
||||
for _, han := range cutHan(hans) {
|
||||
result = append(result, han)
|
||||
}
|
||||
continue
|
||||
}
|
||||
nonhanLoc = re_skip.FindStringIndex(s)
|
||||
if nonhanLoc == nil {
|
||||
if len(s) == 0 {
|
||||
break
|
||||
}
|
||||
} else if nonhanLoc[0] == 0 {
|
||||
nonhans := s[nonhanLoc[0]:nonhanLoc[1]]
|
||||
s = s[nonhanLoc[1]:]
|
||||
if nonhans != "" {
|
||||
result = append(result, nonhans)
|
||||
continue
|
||||
}
|
||||
}
|
||||
var loc []int
|
||||
if hanLoc == nil && nonhanLoc == nil {
|
||||
if len(s) > 0 {
|
||||
result = append(result, s)
|
||||
break
|
||||
}
|
||||
} else if hanLoc == nil {
|
||||
loc = nonhanLoc
|
||||
} else if nonhanLoc == nil {
|
||||
loc = hanLoc
|
||||
} else if hanLoc[0] < nonhanLoc[0] {
|
||||
loc = hanLoc
|
||||
} else {
|
||||
loc = nonhanLoc
|
||||
}
|
||||
result = append(result, s[:loc[0]])
|
||||
s = s[loc[0]:]
|
||||
}
|
||||
return result
|
||||
}
|
||||
63
finalseg/finalseg_test.go
Normal file
63
finalseg/finalseg_test.go
Normal file
@@ -0,0 +1,63 @@
|
||||
package finalseg
|
||||
|
||||
import (
|
||||
"math"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestViterbi(t *testing.T) {
|
||||
obs := "我们是程序员"
|
||||
states := []byte{'B', 'M', 'E', 'S'}
|
||||
prob, path := viterbi([]rune(obs), states)
|
||||
if math.Abs(prob+39.68824128493802) > 1e-10 {
|
||||
t.Error(prob)
|
||||
}
|
||||
for index, state := range []byte{'B', 'E', 'S', 'B', 'M', 'E'} {
|
||||
if path[index] != state {
|
||||
t.Error(path)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCutHan(t *testing.T) {
|
||||
obs := "我们是程序员"
|
||||
result := cutHan(obs)
|
||||
if len(result) != 3 {
|
||||
t.Error(result)
|
||||
}
|
||||
if result[0] != "我们" {
|
||||
t.Error(result[0])
|
||||
}
|
||||
if result[1] != "是" {
|
||||
t.Error(result[1])
|
||||
}
|
||||
if result[2] != "程序员" {
|
||||
t.Error(result[2])
|
||||
}
|
||||
}
|
||||
|
||||
func TestCut(t *testing.T) {
|
||||
sentence := "我们是程序员"
|
||||
result := Cut(sentence)
|
||||
if len(result) != 3 {
|
||||
t.Error(len(result))
|
||||
}
|
||||
if result[0] != "我们" {
|
||||
t.Error(result[0])
|
||||
}
|
||||
if result[1] != "是" {
|
||||
t.Error(result[1])
|
||||
}
|
||||
if result[2] != "程序员" {
|
||||
t.Error(result[2])
|
||||
}
|
||||
result2 := Cut("I'm a programmer!")
|
||||
if len(result2) != 8 {
|
||||
t.Error(result2)
|
||||
}
|
||||
result3 := Cut("程序员average年龄28.6岁。")
|
||||
if len(result3) != 6 {
|
||||
t.Error(result3)
|
||||
}
|
||||
|
||||
}
|
||||
35231
finalseg/prob_emit.go
Normal file
35231
finalseg/prob_emit.go
Normal file
File diff suppressed because it is too large
Load Diff
10
finalseg/prob_start.go
Normal file
10
finalseg/prob_start.go
Normal file
@@ -0,0 +1,10 @@
|
||||
package finalseg
|
||||
|
||||
var ProbStart = make(map[byte]float64)
|
||||
|
||||
func init() {
|
||||
ProbStart['B'] = -0.26268660809250016
|
||||
ProbStart['E'] = -3.14e+100
|
||||
ProbStart['M'] = -3.14e+100
|
||||
ProbStart['S'] = -1.4652633398537678
|
||||
}
|
||||
14
finalseg/prob_trans.go
Normal file
14
finalseg/prob_trans.go
Normal file
@@ -0,0 +1,14 @@
|
||||
package finalseg
|
||||
|
||||
var ProbTrans = make(map[byte]map[byte]float64)
|
||||
|
||||
func init() {
|
||||
ProbTrans['B'] = map[byte]float64{'E': -0.510825623765990,
|
||||
'M': -0.916290731874155}
|
||||
ProbTrans['E'] = map[byte]float64{'B': -0.5897149736854513,
|
||||
'S': -0.8085250474669937}
|
||||
ProbTrans['M'] = map[byte]float64{'E': -0.33344856811948514,
|
||||
'M': -1.2603623820268226}
|
||||
ProbTrans['S'] = map[byte]float64{'B': -0.7211965654669841,
|
||||
'S': -0.6658631448798212}
|
||||
}
|
||||
94
finalseg/viterbi.go
Normal file
94
finalseg/viterbi.go
Normal file
@@ -0,0 +1,94 @@
|
||||
package finalseg
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
)
|
||||
|
||||
const MIN_FLOAT = -3.14e100
|
||||
|
||||
var PrevStatus = make(map[byte][]byte)
|
||||
|
||||
func init() {
|
||||
PrevStatus['B'] = []byte{'E', 'S'}
|
||||
PrevStatus['M'] = []byte{'M', 'B'}
|
||||
PrevStatus['S'] = []byte{'S', 'E'}
|
||||
PrevStatus['E'] = []byte{'B', 'M'}
|
||||
}
|
||||
|
||||
type Viterbi struct {
|
||||
prob float64
|
||||
state byte
|
||||
}
|
||||
|
||||
func (v Viterbi) String() string {
|
||||
return fmt.Sprintf("(%f, %s)", v.prob, v.state)
|
||||
}
|
||||
|
||||
type Viterbis []*Viterbi
|
||||
|
||||
func (vs Viterbis) Len() int {
|
||||
return len(vs)
|
||||
}
|
||||
|
||||
func (vs Viterbis) Less(i, j int) bool {
|
||||
if vs[i].prob == vs[j].prob {
|
||||
return vs[i].state < vs[j].state
|
||||
}
|
||||
return vs[i].prob < vs[j].prob
|
||||
}
|
||||
|
||||
func (vs Viterbis) Swap(i, j int) {
|
||||
vs[i], vs[j] = vs[j], vs[i]
|
||||
}
|
||||
|
||||
func viterbi(obs []rune, states []byte) (float64, []byte) {
|
||||
path := make(map[byte][]byte)
|
||||
V := make([]map[byte]float64, len(obs))
|
||||
V[0] = make(map[byte]float64)
|
||||
for _, y := range states {
|
||||
if val, ok := ProbEmit[y][obs[0]]; ok {
|
||||
V[0][y] = val + ProbStart[y]
|
||||
} else {
|
||||
V[0][y] = MIN_FLOAT + ProbStart[y]
|
||||
}
|
||||
path[y] = []byte{y}
|
||||
}
|
||||
|
||||
for t := 1; t < len(obs); t++ {
|
||||
newPath := make(map[byte][]byte)
|
||||
V[t] = make(map[byte]float64)
|
||||
for _, y := range states {
|
||||
vs0 := make(Viterbis, 0)
|
||||
var em_p float64
|
||||
if val, ok := ProbEmit[y][obs[t]]; ok {
|
||||
em_p = val
|
||||
} else {
|
||||
em_p = MIN_FLOAT
|
||||
}
|
||||
for _, y0 := range PrevStatus[y] {
|
||||
var transP float64
|
||||
if tp, ok := ProbTrans[y0][y]; ok {
|
||||
transP = tp
|
||||
} else {
|
||||
transP = MIN_FLOAT
|
||||
}
|
||||
prob0 := V[t-1][y0] + transP + em_p
|
||||
vs0 = append(vs0, &Viterbi{prob: prob0, state: y0})
|
||||
}
|
||||
sort.Sort(sort.Reverse(vs0))
|
||||
V[t][y] = vs0[0].prob
|
||||
pp := make([]byte, len(path[vs0[0].state]))
|
||||
copy(pp, path[vs0[0].state])
|
||||
newPath[y] = append(pp, y)
|
||||
}
|
||||
path = newPath
|
||||
}
|
||||
vs := make(Viterbis, 0)
|
||||
for _, y := range []byte{'E', 'S'} {
|
||||
vs = append(vs, &Viterbi{V[len(obs)-1][y], y})
|
||||
}
|
||||
sort.Sort(sort.Reverse(vs))
|
||||
v := vs[0]
|
||||
return v.prob, path[v.state]
|
||||
}
|
||||
1
foobar.txt
Normal file
1
foobar.txt
Normal file
@@ -0,0 +1 @@
|
||||
好人 12 n
|
||||
344
jieba.go
Normal file
344
jieba.go
Normal file
@@ -0,0 +1,344 @@
|
||||
package jiebago
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/wangbin/jiebago/finalseg"
|
||||
"regexp"
|
||||
"sort"
|
||||
)
|
||||
|
||||
var (
|
||||
Dictionary = "dict.txt"
|
||||
TT *TopTrie
|
||||
UserWordTagTab = make(map[string]string)
|
||||
)
|
||||
|
||||
type Route struct {
|
||||
Freq float64
|
||||
Index int
|
||||
}
|
||||
|
||||
func (route Route) String() string {
|
||||
return fmt.Sprintf("(%f, %d)", route.Freq, route.Index)
|
||||
}
|
||||
|
||||
type Routes []*Route
|
||||
|
||||
func (routes Routes) Len() int {
|
||||
return len(routes)
|
||||
}
|
||||
|
||||
func (routes Routes) Less(i, j int) bool {
|
||||
routei := routes[i]
|
||||
routej := routes[j]
|
||||
if routei.Freq < routej.Freq {
|
||||
return true
|
||||
} else if routei.Freq == routej.Freq {
|
||||
return routei.Index < routej.Index
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (routes Routes) Swap(i, j int) {
|
||||
routes[i], routes[j] = routes[j], routes[i]
|
||||
}
|
||||
|
||||
func RegexpSplit(r *regexp.Regexp, sentence string) []string {
|
||||
result := make([]string, 0)
|
||||
locs := r.FindAllStringIndex(sentence, -1)
|
||||
lastLoc := 0
|
||||
if len(locs) == 0 {
|
||||
return []string{sentence}
|
||||
}
|
||||
for _, loc := range locs {
|
||||
if loc[0] == lastLoc {
|
||||
result = append(result, sentence[loc[0]:loc[1]])
|
||||
} else {
|
||||
result = append(result, sentence[lastLoc:loc[0]])
|
||||
result = append(result, sentence[loc[0]:loc[1]])
|
||||
}
|
||||
lastLoc = loc[1]
|
||||
}
|
||||
if lastLoc < len(sentence) {
|
||||
result = append(result, sentence[lastLoc:])
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func GetDAG(sentence string) map[int][]int {
|
||||
dag := make(map[int][]int)
|
||||
runes := []rune(sentence)
|
||||
n := len(runes)
|
||||
p := TT.T
|
||||
i, j := 0, 0
|
||||
for {
|
||||
if i >= n {
|
||||
break
|
||||
}
|
||||
c := string(runes[j])
|
||||
if node, ok := p[c]; ok {
|
||||
p = node.SubNodes
|
||||
if node.IsLeaf {
|
||||
if _, inDag := dag[i]; !inDag {
|
||||
dag[i] = []int{j}
|
||||
} else {
|
||||
dag[i] = append(dag[i], j)
|
||||
}
|
||||
}
|
||||
j += 1
|
||||
if j >= n {
|
||||
i += 1
|
||||
j = i
|
||||
p = TT.T
|
||||
}
|
||||
} else {
|
||||
p = TT.T
|
||||
i += 1
|
||||
j = i
|
||||
}
|
||||
}
|
||||
for i := 0; i < n; i++ {
|
||||
if _, ok := dag[i]; !ok {
|
||||
dag[i] = []int{i}
|
||||
}
|
||||
}
|
||||
return dag
|
||||
}
|
||||
|
||||
func Calc(sentence string, dag map[int][]int, idx int) map[int]*Route {
|
||||
runes := []rune(sentence)
|
||||
number := len(runes)
|
||||
routes := make(map[int]*Route)
|
||||
routes[number] = &Route{0.0, 0}
|
||||
for idx := number - 1; idx >= 0; idx-- {
|
||||
candidates := make(Routes, 0)
|
||||
for _, i := range dag[idx] {
|
||||
var word string
|
||||
if i <= idx-1 {
|
||||
word = string(runes[i+1 : idx])
|
||||
} else {
|
||||
word = string(runes[idx : i+1])
|
||||
}
|
||||
var route *Route
|
||||
if _, ok := TT.Freq[word]; ok {
|
||||
route = &Route{TT.Freq[word] + routes[i+1].Freq, i}
|
||||
} else {
|
||||
route = &Route{TT.MinFreq + routes[i+1].Freq, i}
|
||||
}
|
||||
candidates = append(candidates, route)
|
||||
}
|
||||
sort.Sort(sort.Reverse(candidates))
|
||||
routes[idx] = candidates[0]
|
||||
}
|
||||
return routes
|
||||
}
|
||||
|
||||
type cutAction func(sentence string) []string
|
||||
|
||||
func cut_DAG(sentence string) []string {
|
||||
dag := GetDAG(sentence)
|
||||
routes := Calc(sentence, dag, 0)
|
||||
x := 0
|
||||
var y int
|
||||
runes := []rune(sentence)
|
||||
length := len(runes)
|
||||
result := make([]string, 0)
|
||||
buf := make([]rune, 0)
|
||||
for {
|
||||
if x >= length {
|
||||
break
|
||||
}
|
||||
y = routes[x].Index + 1
|
||||
l_word := runes[x:y]
|
||||
if y-x == 1 {
|
||||
buf = append(buf, l_word...)
|
||||
} else {
|
||||
if len(buf) > 0 {
|
||||
if len(buf) == 1 {
|
||||
result = append(result, string(buf))
|
||||
buf = make([]rune, 0)
|
||||
} else {
|
||||
bufString := string(buf)
|
||||
if _, ok := TT.Freq[bufString]; !ok {
|
||||
recognized := finalseg.Cut(bufString)
|
||||
for _, t := range recognized {
|
||||
result = append(result, t)
|
||||
}
|
||||
} else {
|
||||
for _, elem := range buf {
|
||||
result = append(result, string(elem)) // TODO: I don't get this?
|
||||
}
|
||||
}
|
||||
buf = make([]rune, 0)
|
||||
}
|
||||
}
|
||||
result = append(result, string(l_word))
|
||||
}
|
||||
x = y
|
||||
}
|
||||
|
||||
if len(buf) > 0 {
|
||||
if len(buf) == 1 {
|
||||
result = append(result, string(buf))
|
||||
} else {
|
||||
bufString := string(buf)
|
||||
if _, ok := TT.Freq[bufString]; !ok {
|
||||
recognized := finalseg.Cut(bufString)
|
||||
for _, t := range recognized {
|
||||
result = append(result, t)
|
||||
}
|
||||
} else {
|
||||
for _, elem := range buf {
|
||||
result = append(result, string(elem)) // TODO: I don't get this?
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func cut_DAG_NO_HMM(sentence string) []string {
|
||||
result := make([]string, 0)
|
||||
re_eng := regexp.MustCompile(`[[:alnum:]]`)
|
||||
dag := GetDAG(sentence)
|
||||
routes := Calc(sentence, dag, 0)
|
||||
x := 0
|
||||
var y int
|
||||
runes := []rune(sentence)
|
||||
length := len(runes)
|
||||
buf := make([]rune, 0)
|
||||
for {
|
||||
if x >= length {
|
||||
break
|
||||
}
|
||||
y = routes[x].Index + 1
|
||||
l_word := runes[x:y]
|
||||
if re_eng.MatchString(string(l_word)) && len(l_word) == 1 {
|
||||
buf = append(buf, l_word...)
|
||||
x = y
|
||||
} else {
|
||||
if len(buf) > 0 {
|
||||
result = append(result, string(buf))
|
||||
buf = make([]rune, 0)
|
||||
}
|
||||
result = append(result, string(l_word))
|
||||
x = y
|
||||
}
|
||||
}
|
||||
if len(buf) > 0 {
|
||||
result = append(result, string(buf))
|
||||
buf = make([]rune, 0)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func cut_All(sentence string) []string {
|
||||
result := make([]string, 0)
|
||||
runes := []rune(sentence)
|
||||
dag := GetDAG(sentence)
|
||||
old_j := -1
|
||||
ks := make([]int, 0)
|
||||
for k := range dag {
|
||||
ks = append(ks, k)
|
||||
}
|
||||
sort.Ints(ks)
|
||||
for k := range ks {
|
||||
l := dag[k]
|
||||
if len(l) == 1 && k > old_j {
|
||||
result = append(result, string(runes[k:l[0]+1]))
|
||||
old_j = l[0]
|
||||
} else {
|
||||
for _, j := range l {
|
||||
if j > k {
|
||||
result = append(result, string(runes[k:j+1]))
|
||||
old_j = j
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func Cut(sentence string, cut_all bool, HMM bool) []string {
|
||||
result := make([]string, 0)
|
||||
var re_han, re_skip *regexp.Regexp
|
||||
if cut_all {
|
||||
re_han = regexp.MustCompile(`\p{Han}+`)
|
||||
re_skip = regexp.MustCompile(`[^[:alnum:]+#\n]`)
|
||||
} else {
|
||||
re_han = regexp.MustCompile(`([\p{Han}+[:alnum:]+#&\._]+)`)
|
||||
re_skip = regexp.MustCompile(`(\r\n|\s)`)
|
||||
}
|
||||
blocks := RegexpSplit(re_han, sentence)
|
||||
var cut_block cutAction
|
||||
if HMM {
|
||||
cut_block = cut_DAG
|
||||
} else {
|
||||
cut_block = cut_DAG_NO_HMM
|
||||
}
|
||||
if cut_all {
|
||||
cut_block = cut_All
|
||||
}
|
||||
for _, blk := range blocks {
|
||||
if len(blk) == 0 {
|
||||
continue
|
||||
}
|
||||
if re_han.MatchString(blk) {
|
||||
for _, word := range cut_block(blk) {
|
||||
result = append(result, word)
|
||||
}
|
||||
} else {
|
||||
type skipSplitFunc func(sentence string) []string
|
||||
var ssf skipSplitFunc
|
||||
if cut_all {
|
||||
ssf = func(sentence string) []string {
|
||||
return re_skip.Split(sentence, -1)
|
||||
}
|
||||
} else {
|
||||
ssf = func(sentence string) []string {
|
||||
return RegexpSplit(re_skip, sentence)
|
||||
}
|
||||
}
|
||||
|
||||
for _, x := range ssf(blk) {
|
||||
if re_skip.MatchString(x) {
|
||||
result = append(result, x)
|
||||
} else if !cut_all {
|
||||
for _, xx := range x {
|
||||
result = append(result, string(xx))
|
||||
}
|
||||
} else {
|
||||
result = append(result, x)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func CutForSearch(sentence string, hmm bool) []string {
|
||||
result := make([]string, 0)
|
||||
words := Cut(sentence, false, hmm)
|
||||
for _, word := range words {
|
||||
runes := []rune(word)
|
||||
for _, increment := range []int{2, 3} {
|
||||
if len(runes) > increment {
|
||||
var gram2 string
|
||||
for i := 0; i < len(runes)-increment+1; i++ {
|
||||
gram2 = string(runes[i : i+increment])
|
||||
if _, ok := TT.Freq[gram2]; ok {
|
||||
result = append(result, gram2)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
result = append(result, word)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func SetDictionary(dict_path string) (err error) {
|
||||
TT, err = newTopTrie(dict_path)
|
||||
return
|
||||
}
|
||||
747
jieba_test.go
Normal file
747
jieba_test.go
Normal file
@@ -0,0 +1,747 @@
|
||||
package jiebago
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"testing"
|
||||
)
|
||||
|
||||
var (
|
||||
test_contents = []string{
|
||||
"这是一个伸手不见五指的黑夜。我叫孙悟空,我爱北京,我爱Python和C++。",
|
||||
"我不喜欢日本和服。",
|
||||
"雷猴回归人间。",
|
||||
"工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作",
|
||||
"我需要廉租房",
|
||||
"永和服装饰品有限公司",
|
||||
"我爱北京天安门",
|
||||
"abc",
|
||||
"隐马尔可夫",
|
||||
"雷猴是个好网站",
|
||||
"“Microsoft”一词由“MICROcomputer(微型计算机)”和“SOFTware(软件)”两部分组成",
|
||||
"草泥马和欺实马是今年的流行词汇",
|
||||
"伊藤洋华堂总府店",
|
||||
"中国科学院计算技术研究所",
|
||||
"罗密欧与朱丽叶",
|
||||
"我购买了道具和服装",
|
||||
"PS: 我觉得开源有一个好处,就是能够敦促自己不断改进,避免敞帚自珍",
|
||||
"湖北省石首市",
|
||||
"湖北省十堰市",
|
||||
"总经理完成了这件事情",
|
||||
"电脑修好了",
|
||||
"做好了这件事情就一了百了了",
|
||||
"人们审美的观点是不同的",
|
||||
"我们买了一个美的空调",
|
||||
"线程初始化时我们要注意",
|
||||
"一个分子是由好多原子组织成的",
|
||||
"祝你马到功成",
|
||||
"他掉进了无底洞里",
|
||||
"中国的首都是北京",
|
||||
"孙君意",
|
||||
"外交部发言人马朝旭",
|
||||
"领导人会议和第四届东亚峰会",
|
||||
"在过去的这五年",
|
||||
"还需要很长的路要走",
|
||||
"60周年首都阅兵",
|
||||
"你好人们审美的观点是不同的",
|
||||
"买水果然后来世博园",
|
||||
"买水果然后去世博园",
|
||||
"但是后来我才知道你是对的",
|
||||
"存在即合理",
|
||||
"的的的的的在的的的的就以和和和",
|
||||
"I love你,不以为耻,反以为rong",
|
||||
"因",
|
||||
"",
|
||||
"hello你好人们审美的观点是不同的",
|
||||
"很好但主要是基于网页形式",
|
||||
"hello你好人们审美的观点是不同的",
|
||||
"为什么我不能拥有想要的生活",
|
||||
"后来我才",
|
||||
"此次来中国是为了",
|
||||
"使用了它就可以解决一些问题",
|
||||
",使用了它就可以解决一些问题",
|
||||
"其实使用了它就可以解决一些问题",
|
||||
"好人使用了它就可以解决一些问题",
|
||||
"是因为和国家",
|
||||
"老年搜索还支持",
|
||||
"干脆就把那部蒙人的闲法给废了拉倒!RT @laoshipukong : 27日,全国人大常委会第三次审议侵权责任法草案,删除了有关医疗损害责任“举证倒置”的规定。在医患纠纷中本已处于弱势地位的消费者由此将陷入万劫不复的境地。 ",
|
||||
"大",
|
||||
"",
|
||||
"他说的确实在理",
|
||||
"长春市长春节讲话",
|
||||
"结婚的和尚未结婚的",
|
||||
"结合成分子时",
|
||||
"旅游和服务是最好的",
|
||||
"这件事情的确是我的错",
|
||||
"供大家参考指正",
|
||||
"哈尔滨政府公布塌桥原因",
|
||||
"我在机场入口处",
|
||||
"邢永臣摄影报道",
|
||||
"BP神经网络如何训练才能在分类时增加区分度?",
|
||||
"南京市长江大桥",
|
||||
"应一些使用者的建议,也为了便于利用NiuTrans用于SMT研究",
|
||||
"长春市长春药店",
|
||||
"邓颖超生前最喜欢的衣服",
|
||||
"胡锦涛是热爱世界和平的政治局常委",
|
||||
"程序员祝海林和朱会震是在孙健的左面和右面, 范凯在最右面.再往左是李松洪",
|
||||
"一次性交多少钱",
|
||||
"两块五一套,三块八一斤,四块七一本,五块六一条",
|
||||
"小和尚留了一个像大和尚一样的和尚头",
|
||||
"我是中华人民共和国公民;我爸爸是共和党党员; 地铁和平门站",
|
||||
"张晓梅去人民医院做了个B超然后去买了件T恤",
|
||||
"AT&T是一件不错的公司,给你发offer了吗?",
|
||||
"C++和c#是什么关系?11+122=133,是吗?PI=3.14159",
|
||||
"你认识那个和主席握手的的哥吗?他开一辆黑色的士。",
|
||||
"枪杆子中出政权"}
|
||||
|
||||
defaultCutResult = [][]string{
|
||||
[]string{"\u8fd9\u662f", "\u4e00\u4e2a", "\u4f38\u624b\u4e0d\u89c1\u4e94\u6307", "\u7684", "\u9ed1\u591c", "\u3002", "\u6211", "\u53eb", "\u5b59\u609f\u7a7a", "\uff0c", "\u6211", "\u7231", "\u5317\u4eac", "\uff0c", "\u6211", "\u7231", "Python", "\u548c", "C++", "\u3002"},
|
||||
[]string{"\u6211", "\u4e0d", "\u559c\u6b22", "\u65e5\u672c", "\u548c\u670d", "\u3002"},
|
||||
[]string{"\u96f7\u7334", "\u56de\u5f52", "\u4eba\u95f4", "\u3002"},
|
||||
[]string{"\u5de5\u4fe1\u5904", "\u5973\u5e72\u4e8b", "\u6bcf\u6708", "\u7ecf\u8fc7", "\u4e0b\u5c5e", "\u79d1\u5ba4", "\u90fd", "\u8981", "\u4eb2\u53e3", "\u4ea4\u4ee3", "24", "\u53e3", "\u4ea4\u6362\u673a", "\u7b49", "\u6280\u672f\u6027", "\u5668\u4ef6", "\u7684", "\u5b89\u88c5", "\u5de5\u4f5c"},
|
||||
[]string{"\u6211", "\u9700\u8981", "\u5ec9\u79df\u623f"},
|
||||
[]string{"\u6c38\u548c", "\u670d\u88c5", "\u9970\u54c1", "\u6709\u9650\u516c\u53f8"},
|
||||
[]string{"\u6211", "\u7231", "\u5317\u4eac", "\u5929\u5b89\u95e8"},
|
||||
[]string{"abc"},
|
||||
[]string{"\u9690", "\u9a6c\u5c14\u53ef\u592b"},
|
||||
[]string{"\u96f7\u7334", "\u662f", "\u4e2a", "\u597d", "\u7f51\u7ad9"},
|
||||
[]string{"\u201c", "Microsoft", "\u201d", "\u4e00\u8bcd", "\u7531", "\u201c", "MICROcomputer", "\uff08", "\u5fae\u578b", "\u8ba1\u7b97\u673a", "\uff09", "\u201d", "\u548c", "\u201c", "SOFTware", "\uff08", "\u8f6f\u4ef6", "\uff09", "\u201d", "\u4e24", "\u90e8\u5206", "\u7ec4\u6210"},
|
||||
[]string{"\u8349\u6ce5\u9a6c", "\u548c", "\u6b3a\u5b9e", "\u9a6c", "\u662f", "\u4eca\u5e74", "\u7684", "\u6d41\u884c", "\u8bcd\u6c47"},
|
||||
[]string{"\u4f0a\u85e4", "\u6d0b\u534e\u5802", "\u603b\u5e9c", "\u5e97"},
|
||||
[]string{"\u4e2d\u56fd\u79d1\u5b66\u9662\u8ba1\u7b97\u6280\u672f\u7814\u7a76\u6240"},
|
||||
[]string{"\u7f57\u5bc6\u6b27", "\u4e0e", "\u6731\u4e3d\u53f6"},
|
||||
[]string{"\u6211", "\u8d2d\u4e70", "\u4e86", "\u9053\u5177", "\u548c", "\u670d\u88c5"},
|
||||
[]string{"PS", ":", " ", "\u6211", "\u89c9\u5f97", "\u5f00\u6e90", "\u6709", "\u4e00\u4e2a", "\u597d\u5904", "\uff0c", "\u5c31\u662f", "\u80fd\u591f", "\u6566\u4fc3", "\u81ea\u5df1", "\u4e0d\u65ad\u6539\u8fdb", "\uff0c", "\u907f\u514d", "\u655e\u5e1a", "\u81ea\u73cd"},
|
||||
[]string{"\u6e56\u5317\u7701", "\u77f3\u9996\u5e02"},
|
||||
[]string{"\u6e56\u5317\u7701", "\u5341\u5830\u5e02"},
|
||||
[]string{"\u603b\u7ecf\u7406", "\u5b8c\u6210", "\u4e86", "\u8fd9\u4ef6", "\u4e8b\u60c5"},
|
||||
[]string{"\u7535\u8111", "\u4fee\u597d", "\u4e86"},
|
||||
[]string{"\u505a\u597d", "\u4e86", "\u8fd9\u4ef6", "\u4e8b\u60c5", "\u5c31", "\u4e00\u4e86\u767e\u4e86", "\u4e86"},
|
||||
[]string{"\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
|
||||
[]string{"\u6211\u4eec", "\u4e70", "\u4e86", "\u4e00\u4e2a", "\u7f8e\u7684", "\u7a7a\u8c03"},
|
||||
[]string{"\u7ebf\u7a0b", "\u521d\u59cb\u5316", "\u65f6", "\u6211\u4eec", "\u8981", "\u6ce8\u610f"},
|
||||
[]string{"\u4e00\u4e2a", "\u5206\u5b50", "\u662f", "\u7531", "\u597d\u591a", "\u539f\u5b50", "\u7ec4\u7ec7", "\u6210", "\u7684"},
|
||||
[]string{"\u795d", "\u4f60", "\u9a6c\u5230\u529f\u6210"},
|
||||
[]string{"\u4ed6", "\u6389", "\u8fdb", "\u4e86", "\u65e0\u5e95\u6d1e", "\u91cc"},
|
||||
[]string{"\u4e2d\u56fd", "\u7684", "\u9996\u90fd", "\u662f", "\u5317\u4eac"},
|
||||
[]string{"\u5b59\u541b\u610f"},
|
||||
[]string{"\u5916\u4ea4\u90e8", "\u53d1\u8a00\u4eba", "\u9a6c\u671d\u65ed"},
|
||||
[]string{"\u9886\u5bfc\u4eba", "\u4f1a\u8bae", "\u548c", "\u7b2c\u56db\u5c4a", "\u4e1c\u4e9a", "\u5cf0\u4f1a"},
|
||||
[]string{"\u5728", "\u8fc7\u53bb", "\u7684", "\u8fd9", "\u4e94\u5e74"},
|
||||
[]string{"\u8fd8", "\u9700\u8981", "\u5f88\u957f", "\u7684", "\u8def", "\u8981", "\u8d70"},
|
||||
[]string{"60", "\u5468\u5e74", "\u9996\u90fd", "\u9605\u5175"},
|
||||
[]string{"\u4f60\u597d", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
|
||||
[]string{"\u4e70", "\u6c34\u679c", "\u7136\u540e", "\u6765", "\u4e16\u535a\u56ed"},
|
||||
[]string{"\u4e70", "\u6c34\u679c", "\u7136\u540e", "\u53bb", "\u4e16\u535a\u56ed"},
|
||||
[]string{"\u4f46\u662f", "\u540e\u6765", "\u6211", "\u624d", "\u77e5\u9053", "\u4f60", "\u662f", "\u5bf9", "\u7684"},
|
||||
[]string{"\u5b58\u5728", "\u5373", "\u5408\u7406"},
|
||||
[]string{"\u7684", "\u7684", "\u7684", "\u7684", "\u7684", "\u5728", "\u7684", "\u7684", "\u7684", "\u7684", "\u5c31", "\u4ee5", "\u548c", "\u548c", "\u548c"},
|
||||
[]string{"I", " ", "love", "\u4f60", "\uff0c", "\u4e0d\u4ee5\u4e3a\u803b", "\uff0c", "\u53cd", "\u4ee5\u4e3a", "rong"},
|
||||
[]string{"\u56e0"},
|
||||
[]string{},
|
||||
[]string{"hello", "\u4f60\u597d", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
|
||||
[]string{"\u5f88", "\u597d", "\u4f46", "\u4e3b\u8981", "\u662f", "\u57fa\u4e8e", "\u7f51\u9875", "\u5f62\u5f0f"},
|
||||
[]string{"hello", "\u4f60\u597d", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
|
||||
[]string{"\u4e3a\u4ec0\u4e48", "\u6211", "\u4e0d\u80fd", "\u62e5\u6709", "\u60f3\u8981", "\u7684", "\u751f\u6d3b"},
|
||||
[]string{"\u540e\u6765", "\u6211", "\u624d"},
|
||||
[]string{"\u6b64\u6b21", "\u6765", "\u4e2d\u56fd", "\u662f", "\u4e3a\u4e86"},
|
||||
[]string{"\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
|
||||
[]string{",", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
|
||||
[]string{"\u5176\u5b9e", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
|
||||
[]string{"\u597d\u4eba", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
|
||||
[]string{"\u662f\u56e0\u4e3a", "\u548c", "\u56fd\u5bb6"},
|
||||
[]string{"\u8001\u5e74", "\u641c\u7d22", "\u8fd8", "\u652f\u6301"},
|
||||
[]string{"\u5e72\u8106", "\u5c31", "\u628a", "\u90a3\u90e8", "\u8499\u4eba", "\u7684", "\u95f2\u6cd5", "\u7ed9", "\u5e9f", "\u4e86", "\u62c9\u5012", "\uff01", "RT", " ", "@", "laoshipukong", " ", ":", " ", "27", "\u65e5", "\uff0c", "\u5168\u56fd\u4eba\u5927\u5e38\u59d4\u4f1a", "\u7b2c\u4e09\u6b21", "\u5ba1\u8bae", "\u4fb5\u6743", "\u8d23\u4efb\u6cd5", "\u8349\u6848", "\uff0c", "\u5220\u9664", "\u4e86", "\u6709\u5173", "\u533b\u7597", "\u635f\u5bb3", "\u8d23\u4efb", "\u201c", "\u4e3e\u8bc1", "\u5012\u7f6e", "\u201d", "\u7684", "\u89c4\u5b9a", "\u3002", "\u5728", "\u533b\u60a3", "\u7ea0\u7eb7", "\u4e2d\u672c", "\u5df2", "\u5904\u4e8e", "\u5f31\u52bf", "\u5730\u4f4d", "\u7684", "\u6d88\u8d39\u8005", "\u7531\u6b64", "\u5c06", "\u9677\u5165", "\u4e07\u52ab\u4e0d\u590d", "\u7684", "\u5883\u5730", "\u3002", " "},
|
||||
[]string{"\u5927"},
|
||||
[]string{},
|
||||
[]string{"\u4ed6", "\u8bf4", "\u7684", "\u786e\u5b9e", "\u5728\u7406"},
|
||||
[]string{"\u957f\u6625", "\u5e02\u957f", "\u6625\u8282", "\u8bb2\u8bdd"},
|
||||
[]string{"\u7ed3\u5a5a", "\u7684", "\u548c", "\u5c1a\u672a", "\u7ed3\u5a5a", "\u7684"},
|
||||
[]string{"\u7ed3\u5408", "\u6210", "\u5206\u5b50", "\u65f6"},
|
||||
[]string{"\u65c5\u6e38", "\u548c", "\u670d\u52a1", "\u662f", "\u6700\u597d", "\u7684"},
|
||||
[]string{"\u8fd9\u4ef6", "\u4e8b\u60c5", "\u7684\u786e", "\u662f", "\u6211", "\u7684", "\u9519"},
|
||||
[]string{"\u4f9b", "\u5927\u5bb6", "\u53c2\u8003", "\u6307\u6b63"},
|
||||
[]string{"\u54c8\u5c14\u6ee8", "\u653f\u5e9c", "\u516c\u5e03", "\u584c\u6865", "\u539f\u56e0"},
|
||||
[]string{"\u6211", "\u5728", "\u673a\u573a", "\u5165\u53e3\u5904"},
|
||||
[]string{"\u90a2\u6c38\u81e3", "\u6444\u5f71", "\u62a5\u9053"},
|
||||
[]string{"BP", "\u795e\u7ecf\u7f51\u7edc", "\u5982\u4f55", "\u8bad\u7ec3", "\u624d\u80fd", "\u5728", "\u5206\u7c7b", "\u65f6", "\u589e\u52a0", "\u533a\u5206\u5ea6", "\uff1f"},
|
||||
[]string{"\u5357\u4eac\u5e02", "\u957f\u6c5f\u5927\u6865"},
|
||||
[]string{"\u5e94", "\u4e00\u4e9b", "\u4f7f\u7528\u8005", "\u7684", "\u5efa\u8bae", "\uff0c", "\u4e5f", "\u4e3a\u4e86", "\u4fbf\u4e8e", "\u5229\u7528", "NiuTrans", "\u7528\u4e8e", "SMT", "\u7814\u7a76"},
|
||||
[]string{"\u957f\u6625\u5e02", "\u957f\u6625", "\u836f\u5e97"},
|
||||
[]string{"\u9093\u9896\u8d85", "\u751f\u524d", "\u6700", "\u559c\u6b22", "\u7684", "\u8863\u670d"},
|
||||
[]string{"\u80e1\u9526\u6d9b", "\u662f", "\u70ed\u7231", "\u4e16\u754c", "\u548c\u5e73", "\u7684", "\u653f\u6cbb\u5c40", "\u5e38\u59d4"},
|
||||
[]string{"\u7a0b\u5e8f\u5458", "\u795d", "\u6d77\u6797", "\u548c", "\u6731\u4f1a\u9707", "\u662f", "\u5728", "\u5b59\u5065", "\u7684", "\u5de6\u9762", "\u548c", "\u53f3\u9762", ",", " ", "\u8303\u51ef", "\u5728", "\u6700", "\u53f3\u9762", ".", "\u518d\u5f80", "\u5de6", "\u662f", "\u674e\u677e\u6d2a"},
|
||||
[]string{"\u4e00\u6b21\u6027", "\u4ea4", "\u591a\u5c11", "\u94b1"},
|
||||
[]string{"\u4e24\u5757", "\u4e94", "\u4e00\u5957", "\uff0c", "\u4e09\u5757", "\u516b", "\u4e00\u65a4", "\uff0c", "\u56db\u5757", "\u4e03", "\u4e00\u672c", "\uff0c", "\u4e94\u5757", "\u516d", "\u4e00\u6761"},
|
||||
[]string{"\u5c0f", "\u548c\u5c1a", "\u7559", "\u4e86", "\u4e00\u4e2a", "\u50cf", "\u5927", "\u548c\u5c1a", "\u4e00\u6837", "\u7684", "\u548c\u5c1a\u5934"},
|
||||
[]string{"\u6211", "\u662f", "\u4e2d\u534e\u4eba\u6c11\u5171\u548c\u56fd", "\u516c\u6c11", ";", "\u6211", "\u7238\u7238", "\u662f", "\u5171\u548c\u515a", "\u515a\u5458", ";", " ", "\u5730\u94c1", "\u548c\u5e73\u95e8", "\u7ad9"},
|
||||
[]string{"\u5f20\u6653\u6885", "\u53bb", "\u4eba\u6c11", "\u533b\u9662", "\u505a", "\u4e86", "\u4e2a", "B\u8d85", "\u7136\u540e", "\u53bb", "\u4e70", "\u4e86", "\u4ef6", "T\u6064"},
|
||||
[]string{"AT&T", "\u662f", "\u4e00\u4ef6", "\u4e0d\u9519", "\u7684", "\u516c\u53f8", "\uff0c", "\u7ed9", "\u4f60", "\u53d1", "offer", "\u4e86", "\u5417", "\uff1f"},
|
||||
[]string{"C++", "\u548c", "c#", "\u662f", "\u4ec0\u4e48", "\u5173\u7cfb", "\uff1f", "11", "+", "122", "=", "133", "\uff0c", "\u662f", "\u5417", "\uff1f", "PI", "=", "3.14159"},
|
||||
[]string{"\u4f60", "\u8ba4\u8bc6", "\u90a3\u4e2a", "\u548c", "\u4e3b\u5e2d", "\u63e1\u624b", "\u7684", "\u7684\u54e5", "\u5417", "\uff1f", "\u4ed6\u5f00", "\u4e00\u8f86", "\u9ed1\u8272", "\u7684\u58eb", "\u3002"},
|
||||
[]string{"\u67aa\u6746\u5b50", "\u4e2d", "\u51fa", "\u653f\u6743"},
|
||||
}
|
||||
|
||||
cutAllResult = [][]string{
|
||||
[]string{"\u8fd9", "\u662f", "\u4e00\u4e2a", "\u4f38\u624b", "\u4f38\u624b\u4e0d\u89c1", "\u4f38\u624b\u4e0d\u89c1\u4e94\u6307", "\u4e0d\u89c1", "\u4e94\u6307", "\u7684", "\u9ed1\u591c", "", "", "\u6211", "\u53eb", "\u5b59\u609f\u7a7a", "\u609f\u7a7a", "", "", "\u6211", "\u7231", "\u5317\u4eac", "", "", "\u6211", "\u7231", "Python", "\u548c", "C++", ""},
|
||||
[]string{"\u6211", "\u4e0d", "\u559c\u6b22", "\u65e5\u672c", "\u548c\u670d", "", ""},
|
||||
[]string{"\u96f7\u7334", "\u56de\u5f52", "\u4eba\u95f4", "", ""},
|
||||
[]string{"\u5de5\u4fe1\u5904", "\u5904\u5973", "\u5973\u5e72\u4e8b", "\u5e72\u4e8b", "\u6bcf\u6708", "\u6708\u7ecf", "\u7ecf\u8fc7", "\u4e0b\u5c5e", "\u79d1\u5ba4", "\u90fd", "\u8981", "\u4eb2\u53e3", "\u53e3\u4ea4", "\u4ea4\u4ee3", "24", "\u53e3\u4ea4", "\u4ea4\u6362", "\u4ea4\u6362\u673a", "\u6362\u673a", "\u7b49", "\u6280\u672f", "\u6280\u672f\u6027", "\u6027\u5668", "\u5668\u4ef6", "\u7684", "\u5b89\u88c5", "\u5b89\u88c5\u5de5", "\u88c5\u5de5", "\u5de5\u4f5c"},
|
||||
[]string{"\u6211", "\u9700\u8981", "\u5ec9\u79df", "\u5ec9\u79df\u623f", "\u79df\u623f"},
|
||||
[]string{"\u6c38\u548c", "\u548c\u670d", "\u670d\u88c5", "\u88c5\u9970", "\u88c5\u9970\u54c1", "\u9970\u54c1", "\u6709\u9650", "\u6709\u9650\u516c\u53f8", "\u516c\u53f8"},
|
||||
[]string{"\u6211", "\u7231", "\u5317\u4eac", "\u5929\u5b89", "\u5929\u5b89\u95e8"},
|
||||
[]string{"abc"},
|
||||
[]string{"\u9690", "\u9a6c\u5c14\u53ef", "\u9a6c\u5c14\u53ef\u592b", "\u53ef\u592b"},
|
||||
[]string{"\u96f7\u7334", "\u662f", "\u4e2a", "\u597d", "\u7f51\u7ad9"},
|
||||
[]string{"", "Microsoft", "", "\u4e00", "\u8bcd", "\u7531", "", "MICROcomputer", "", "\u5fae\u578b", "\u8ba1\u7b97", "\u8ba1\u7b97\u673a", "\u7b97\u673a", "", "", "", "\u548c", "", "SOFTware", "", "\u8f6f\u4ef6", "", "", "", "\u4e24\u90e8", "\u90e8\u5206", "\u5206\u7ec4", "\u7ec4\u6210"},
|
||||
[]string{"\u8349\u6ce5\u9a6c", "\u548c", "\u6b3a", "\u5b9e", "\u9a6c", "\u662f", "\u4eca\u5e74", "\u7684", "\u6d41\u884c", "\u8bcd\u6c47"},
|
||||
[]string{"\u4f0a", "\u85e4", "\u6d0b\u534e\u5802", "\u603b\u5e9c", "\u5e97"},
|
||||
[]string{"\u4e2d\u56fd", "\u4e2d\u56fd\u79d1\u5b66\u9662", "\u4e2d\u56fd\u79d1\u5b66\u9662\u8ba1\u7b97\u6280\u672f\u7814\u7a76\u6240", "\u79d1\u5b66", "\u79d1\u5b66\u9662", "\u5b66\u9662", "\u8ba1\u7b97", "\u8ba1\u7b97\u6280\u672f", "\u6280\u672f", "\u7814\u7a76", "\u7814\u7a76\u6240"},
|
||||
[]string{"\u7f57\u5bc6\u6b27", "\u4e0e", "\u6731\u4e3d\u53f6"},
|
||||
[]string{"\u6211", "\u8d2d\u4e70", "\u4e86", "\u9053\u5177", "\u548c\u670d", "\u670d\u88c5"},
|
||||
[]string{"PS", "", "", "\u6211", "\u89c9\u5f97", "\u5f00\u6e90", "\u6709", "\u4e00\u4e2a", "\u597d\u5904", "", "", "\u5c31\u662f", "\u80fd\u591f", "\u6566\u4fc3", "\u81ea\u5df1", "\u4e0d\u65ad", "\u4e0d\u65ad\u6539\u8fdb", "\u6539\u8fdb", "", "", "\u907f\u514d", "\u655e", "\u5e1a", "\u81ea\u73cd"},
|
||||
[]string{"\u6e56\u5317", "\u6e56\u5317\u7701", "\u77f3\u9996", "\u77f3\u9996\u5e02"},
|
||||
[]string{"\u6e56\u5317", "\u6e56\u5317\u7701", "\u5341\u5830", "\u5341\u5830\u5e02"},
|
||||
[]string{"\u603b\u7ecf\u7406", "\u7ecf\u7406", "\u7406\u5b8c", "\u5b8c\u6210", "\u4e86", "\u8fd9\u4ef6", "\u4e8b\u60c5"},
|
||||
[]string{"\u7535\u8111", "\u4fee\u597d", "\u4e86"},
|
||||
[]string{"\u505a\u597d", "\u4e86", "\u8fd9\u4ef6", "\u4e8b\u60c5", "\u5c31", "\u4e00\u4e86\u767e\u4e86", "\u4e86\u4e86"},
|
||||
[]string{"\u4eba\u4eec", "\u5ba1\u7f8e", "\u7f8e\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
|
||||
[]string{"\u6211\u4eec", "\u4e70", "\u4e86", "\u4e00\u4e2a", "\u7f8e\u7684", "\u7a7a\u8c03"},
|
||||
[]string{"\u7ebf\u7a0b", "\u521d\u59cb", "\u521d\u59cb\u5316", "\u5316\u65f6", "\u6211\u4eec", "\u8981", "\u6ce8\u610f"},
|
||||
[]string{"\u4e00\u4e2a", "\u5206\u5b50", "\u662f", "\u7531", "\u597d\u591a", "\u539f\u5b50", "\u7ec4\u7ec7", "\u7ec7\u6210", "\u7684"},
|
||||
[]string{"\u795d", "\u4f60", "\u9a6c\u5230\u529f\u6210"},
|
||||
[]string{"\u4ed6", "\u6389", "\u8fdb", "\u4e86", "\u65e0\u5e95", "\u65e0\u5e95\u6d1e", "\u91cc"},
|
||||
[]string{"\u4e2d\u56fd", "\u7684", "\u9996\u90fd", "\u662f", "\u5317\u4eac"},
|
||||
[]string{"\u5b59", "\u541b", "\u610f"},
|
||||
[]string{"\u5916\u4ea4", "\u5916\u4ea4\u90e8", "\u90e8\u53d1", "\u53d1\u8a00", "\u53d1\u8a00\u4eba", "\u4eba\u9a6c", "\u9a6c\u671d\u65ed"},
|
||||
[]string{"\u9886\u5bfc", "\u9886\u5bfc\u4eba", "\u4f1a\u8bae", "\u8bae\u548c", "\u7b2c\u56db", "\u7b2c\u56db\u5c4a", "\u56db\u5c4a", "\u4e1c\u4e9a", "\u5cf0\u4f1a"},
|
||||
[]string{"\u5728", "\u8fc7\u53bb", "\u7684", "\u8fd9", "\u4e94\u5e74"},
|
||||
[]string{"\u8fd8", "\u9700\u8981", "\u5f88", "\u957f", "\u7684", "\u8def", "\u8981", "\u8d70"},
|
||||
[]string{"60", "\u5468\u5e74", "\u9996\u90fd", "\u9605\u5175"},
|
||||
[]string{"\u4f60\u597d", "\u597d\u4eba", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7f8e\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
|
||||
[]string{"\u4e70", "\u6c34\u679c", "\u679c\u7136", "\u7136\u540e", "\u540e\u6765", "\u6765\u4e16", "\u4e16\u535a", "\u4e16\u535a\u56ed", "\u535a\u56ed"},
|
||||
[]string{"\u4e70", "\u6c34\u679c", "\u679c\u7136", "\u7136\u540e", "\u540e\u53bb", "\u53bb\u4e16", "\u4e16\u535a", "\u4e16\u535a\u56ed", "\u535a\u56ed"},
|
||||
[]string{"\u4f46\u662f", "\u540e\u6765", "\u6211", "\u624d", "\u77e5\u9053", "\u4f60", "\u662f", "\u5bf9", "\u7684"},
|
||||
[]string{"\u5b58\u5728", "\u5373", "\u5408\u7406"},
|
||||
[]string{"\u7684", "\u7684", "\u7684", "\u7684", "\u7684", "\u5728", "\u7684", "\u7684", "\u7684", "\u7684", "\u5c31", "\u4ee5", "\u548c", "\u548c", "\u548c"},
|
||||
[]string{"I", "love", "\u4f60", "", "", "\u4e0d\u4ee5", "\u4e0d\u4ee5\u4e3a\u803b", "\u4ee5\u4e3a", "\u803b", "", "", "\u53cd", "\u4ee5\u4e3a", "rong"},
|
||||
[]string{"\u56e0"},
|
||||
[]string{},
|
||||
[]string{"hello", "\u4f60\u597d", "\u597d\u4eba", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7f8e\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
|
||||
[]string{"\u5f88", "\u597d", "\u4f46", "\u4e3b\u8981", "\u8981\u662f", "\u57fa\u4e8e", "\u7f51\u9875", "\u5f62\u5f0f"},
|
||||
[]string{"hello", "\u4f60\u597d", "\u597d\u4eba", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7f8e\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
|
||||
[]string{"\u4e3a\u4ec0\u4e48", "\u4ec0\u4e48", "\u6211", "\u4e0d\u80fd", "\u62e5\u6709", "\u60f3\u8981", "\u7684", "\u751f\u6d3b"},
|
||||
[]string{"\u540e\u6765", "\u6211", "\u624d"},
|
||||
[]string{"\u6b64\u6b21", "\u6765", "\u4e2d\u56fd", "\u56fd\u662f", "\u4e3a\u4e86"},
|
||||
[]string{"\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
|
||||
[]string{"", "", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
|
||||
[]string{"\u5176\u5b9e", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
|
||||
[]string{"\u597d\u4eba", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
|
||||
[]string{"\u662f\u56e0\u4e3a", "\u56e0\u4e3a", "\u548c", "\u56fd\u5bb6"},
|
||||
[]string{"\u8001\u5e74", "\u641c\u7d22", "\u7d22\u8fd8", "\u652f\u6301"},
|
||||
[]string{"\u5e72\u8106", "\u5c31", "\u628a", "\u90a3\u90e8", "\u8499\u4eba", "\u7684", "\u95f2", "\u6cd5", "\u7ed9", "\u5e9f", "\u4e86", "\u62c9\u5012", "", "RT", "", "laoshipukong", "", "", "27", "\u65e5", "", "", "\u5168\u56fd", "\u5168\u56fd\u4eba\u5927", "\u5168\u56fd\u4eba\u5927\u5e38\u59d4\u4f1a", "\u56fd\u4eba", "\u4eba\u5927", "\u4eba\u5927\u5e38\u59d4\u4f1a", "\u5e38\u59d4", "\u5e38\u59d4\u4f1a", "\u59d4\u4f1a", "\u7b2c\u4e09", "\u7b2c\u4e09\u6b21", "\u4e09\u6b21", "\u5ba1\u8bae", "\u4fb5\u6743", "\u6743\u8d23", "\u8d23\u4efb", "\u8d23\u4efb\u6cd5", "\u8349\u6848", "", "", "\u5220\u9664", "\u9664\u4e86", "\u6709\u5173", "\u533b\u7597", "\u635f\u5bb3", "\u8d23\u4efb", "", "", "\u4e3e\u8bc1", "\u5012\u7f6e", "", "", "\u7684", "\u89c4\u5b9a", "", "", "\u5728", "\u533b\u60a3", "\u7ea0\u7eb7", "\u4e2d", "\u672c", "\u5df2", "\u5904\u4e8e", "\u5f31\u52bf", "\u5730\u4f4d", "\u7684", "\u6d88\u8d39", "\u6d88\u8d39\u8005", "\u7531\u6b64", "\u5c06", "\u9677\u5165", "\u4e07\u52ab\u4e0d\u590d", "\u4e0d\u590d", "\u7684", "\u5883\u5730", "", "", ""},
|
||||
[]string{"\u5927"},
|
||||
[]string{},
|
||||
[]string{"\u4ed6", "\u8bf4", "\u7684\u786e", "\u786e\u5b9e", "\u5b9e\u5728", "\u7406"},
|
||||
[]string{"\u957f\u6625", "\u957f\u6625\u5e02", "\u5e02\u957f", "\u957f\u6625", "\u6625\u8282", "\u8bb2\u8bdd"},
|
||||
[]string{"\u7ed3\u5a5a", "\u7684", "\u548c\u5c1a", "\u5c1a\u672a", "\u672a\u7ed3", "\u7ed3\u5a5a", "\u7684"},
|
||||
[]string{"\u7ed3\u5408", "\u5408\u6210", "\u6210\u5206", "\u5206\u5b50", "\u65f6"},
|
||||
[]string{"\u65c5\u6e38", "\u548c\u670d", "\u670d\u52a1", "\u662f", "\u6700\u597d", "\u7684"},
|
||||
[]string{"\u8fd9\u4ef6", "\u4e8b\u60c5", "\u7684\u786e", "\u662f", "\u6211", "\u7684", "\u9519"},
|
||||
[]string{"\u4f9b", "\u5927\u5bb6", "\u53c2\u8003", "\u6307\u6b63"},
|
||||
[]string{"\u54c8\u5c14", "\u54c8\u5c14\u6ee8", "\u653f\u5e9c", "\u516c\u5e03", "\u584c", "\u6865", "\u539f\u56e0"},
|
||||
[]string{"\u6211", "\u5728", "\u673a\u573a", "\u5165\u53e3", "\u5165\u53e3\u5904"},
|
||||
[]string{"\u90a2", "\u6c38", "\u81e3", "\u6444\u5f71", "\u62a5\u9053"},
|
||||
[]string{"BP", "\u795e\u7ecf", "\u795e\u7ecf\u7f51", "\u795e\u7ecf\u7f51\u7edc", "\u7f51\u7edc", "\u5982\u4f55", "\u8bad\u7ec3", "\u624d\u80fd", "\u5728", "\u5206\u7c7b", "\u65f6", "\u589e\u52a0", "\u52a0\u533a", "\u533a\u5206", "\u533a\u5206\u5ea6", "\u5206\u5ea6", "", ""},
|
||||
[]string{"\u5357\u4eac", "\u5357\u4eac\u5e02", "\u4eac\u5e02", "\u5e02\u957f", "\u957f\u6c5f", "\u957f\u6c5f\u5927\u6865", "\u5927\u6865"},
|
||||
[]string{"\u5e94", "\u4e00\u4e9b", "\u4f7f\u7528", "\u4f7f\u7528\u8005", "\u7528\u8005", "\u7684", "\u5efa\u8bae", "", "", "\u4e5f", "\u4e3a\u4e86", "\u4fbf\u4e8e", "\u5229\u7528", "NiuTrans", "\u7528\u4e8e", "SMT", "\u7814\u7a76"},
|
||||
[]string{"\u957f\u6625", "\u957f\u6625\u5e02", "\u5e02\u957f", "\u957f\u6625", "\u6625\u836f", "\u836f\u5e97"},
|
||||
[]string{"\u9093\u9896\u8d85", "\u8d85\u751f", "\u751f\u524d", "\u6700", "\u559c\u6b22", "\u7684", "\u8863\u670d"},
|
||||
[]string{"\u80e1\u9526\u6d9b", "\u9526\u6d9b", "\u662f", "\u70ed\u7231", "\u4e16\u754c", "\u548c\u5e73", "\u7684", "\u653f\u6cbb", "\u653f\u6cbb\u5c40", "\u5e38\u59d4"},
|
||||
[]string{"\u7a0b\u5e8f", "\u7a0b\u5e8f\u5458", "\u795d", "\u6d77\u6797", "\u548c", "\u6731", "\u4f1a", "\u9707", "\u662f", "\u5728", "\u5b59", "\u5065", "\u7684", "\u5de6\u9762", "\u548c", "\u53f3\u9762", "", "", "", "\u8303", "\u51ef", "\u5728", "\u6700", "\u53f3\u9762", "", "", "\u518d\u5f80", "\u5de6", "\u662f", "\u674e", "\u677e", "\u6d2a"},
|
||||
[]string{"\u4e00\u6b21", "\u4e00\u6b21\u6027", "\u6027\u4ea4", "\u591a\u5c11", "\u591a\u5c11\u94b1"},
|
||||
[]string{"\u4e24\u5757", "\u4e94\u4e00", "\u4e00\u5957", "", "", "\u4e09\u5757", "\u516b\u4e00", "\u4e00\u65a4", "", "", "\u56db\u5757", "\u4e03\u4e00", "\u4e00\u672c", "", "", "\u4e94\u5757", "\u516d\u4e00", "\u4e00\u6761"},
|
||||
[]string{"\u5c0f", "\u548c\u5c1a", "\u7559", "\u4e86", "\u4e00\u4e2a", "\u50cf", "\u5927", "\u548c\u5c1a", "\u4e00\u6837", "\u7684", "\u548c\u5c1a", "\u548c\u5c1a\u5934"},
|
||||
[]string{"\u6211", "\u662f", "\u4e2d\u534e", "\u4e2d\u534e\u4eba\u6c11", "\u4e2d\u534e\u4eba\u6c11\u5171\u548c\u56fd", "\u534e\u4eba", "\u4eba\u6c11", "\u4eba\u6c11\u5171\u548c\u56fd", "\u5171\u548c", "\u5171\u548c\u56fd", "\u56fd\u516c", "\u516c\u6c11", "", "", "\u6211", "\u7238\u7238", "\u662f", "\u5171\u548c", "\u5171\u548c\u515a", "\u515a\u5458", "", "", "", "\u5730\u94c1", "\u548c\u5e73", "\u548c\u5e73\u95e8", "\u7ad9"},
|
||||
[]string{"\u5f20\u6653\u6885", "\u53bb", "\u4eba\u6c11", "\u6c11\u533b\u9662", "\u533b\u9662", "\u505a", "\u4e86", "\u4e2a", "B", "\u8d85\u7136", "\u7136\u540e", "\u540e\u53bb", "\u4e70", "\u4e86", "\u4ef6", "T", "\u6064"},
|
||||
[]string{"AT", "T", "\u662f", "\u4e00\u4ef6", "\u4e0d\u9519", "\u7684", "\u516c\u53f8", "", "", "\u7ed9", "\u4f60", "\u53d1", "offer", "\u4e86", "\u5417", "", ""},
|
||||
[]string{"C++", "\u548c", "c#", "\u662f", "\u4ec0\u4e48", "\u5173\u7cfb", "", "11+122", "133", "", "\u662f", "\u5417", "", "PI", "3", "14159"},
|
||||
[]string{"\u4f60", "\u8ba4\u8bc6", "\u90a3\u4e2a", "\u548c", "\u4e3b\u5e2d", "\u63e1\u624b", "\u7684", "\u7684\u54e5", "\u5417", "", "", "\u4ed6", "\u5f00", "\u4e00\u8f86", "\u9ed1\u8272", "\u7684\u58eb", "", ""},
|
||||
[]string{"\u67aa\u6746", "\u67aa\u6746\u5b50", "\u6746\u5b50", "\u4e2d\u51fa", "\u653f\u6743"},
|
||||
}
|
||||
|
||||
defaultCutNoHMMResult = [][]string{
|
||||
[]string{"\u8fd9", "\u662f", "\u4e00\u4e2a", "\u4f38\u624b\u4e0d\u89c1\u4e94\u6307", "\u7684", "\u9ed1\u591c", "\u3002", "\u6211", "\u53eb", "\u5b59\u609f\u7a7a", "\uff0c", "\u6211", "\u7231", "\u5317\u4eac", "\uff0c", "\u6211", "\u7231", "Python", "\u548c", "C++", "\u3002"},
|
||||
[]string{"\u6211", "\u4e0d", "\u559c\u6b22", "\u65e5\u672c", "\u548c\u670d", "\u3002"},
|
||||
[]string{"\u96f7\u7334", "\u56de\u5f52", "\u4eba\u95f4", "\u3002"},
|
||||
[]string{"\u5de5\u4fe1\u5904", "\u5973\u5e72\u4e8b", "\u6bcf\u6708", "\u7ecf\u8fc7", "\u4e0b\u5c5e", "\u79d1\u5ba4", "\u90fd", "\u8981", "\u4eb2\u53e3", "\u4ea4\u4ee3", "24", "\u53e3", "\u4ea4\u6362\u673a", "\u7b49", "\u6280\u672f\u6027", "\u5668\u4ef6", "\u7684", "\u5b89\u88c5", "\u5de5\u4f5c"},
|
||||
[]string{"\u6211", "\u9700\u8981", "\u5ec9\u79df\u623f"},
|
||||
[]string{"\u6c38\u548c", "\u670d\u88c5", "\u9970\u54c1", "\u6709\u9650\u516c\u53f8"},
|
||||
[]string{"\u6211", "\u7231", "\u5317\u4eac", "\u5929\u5b89\u95e8"},
|
||||
[]string{"abc"},
|
||||
[]string{"\u9690", "\u9a6c\u5c14\u53ef\u592b"},
|
||||
[]string{"\u96f7\u7334", "\u662f", "\u4e2a", "\u597d", "\u7f51\u7ad9"},
|
||||
[]string{"\u201c", "Microsoft", "\u201d", "\u4e00", "\u8bcd", "\u7531", "\u201c", "MICROcomputer", "\uff08", "\u5fae\u578b", "\u8ba1\u7b97\u673a", "\uff09", "\u201d", "\u548c", "\u201c", "SOFTware", "\uff08", "\u8f6f\u4ef6", "\uff09", "\u201d", "\u4e24", "\u90e8\u5206", "\u7ec4\u6210"},
|
||||
[]string{"\u8349\u6ce5\u9a6c", "\u548c", "\u6b3a", "\u5b9e", "\u9a6c", "\u662f", "\u4eca\u5e74", "\u7684", "\u6d41\u884c", "\u8bcd\u6c47"},
|
||||
[]string{"\u4f0a", "\u85e4", "\u6d0b\u534e\u5802", "\u603b\u5e9c", "\u5e97"},
|
||||
[]string{"\u4e2d\u56fd\u79d1\u5b66\u9662\u8ba1\u7b97\u6280\u672f\u7814\u7a76\u6240"},
|
||||
[]string{"\u7f57\u5bc6\u6b27", "\u4e0e", "\u6731\u4e3d\u53f6"},
|
||||
[]string{"\u6211", "\u8d2d\u4e70", "\u4e86", "\u9053\u5177", "\u548c", "\u670d\u88c5"},
|
||||
[]string{"PS", ":", " ", "\u6211", "\u89c9\u5f97", "\u5f00\u6e90", "\u6709", "\u4e00\u4e2a", "\u597d\u5904", "\uff0c", "\u5c31\u662f", "\u80fd\u591f", "\u6566\u4fc3", "\u81ea\u5df1", "\u4e0d\u65ad\u6539\u8fdb", "\uff0c", "\u907f\u514d", "\u655e", "\u5e1a", "\u81ea\u73cd"},
|
||||
[]string{"\u6e56\u5317\u7701", "\u77f3\u9996\u5e02"},
|
||||
[]string{"\u6e56\u5317\u7701", "\u5341\u5830\u5e02"},
|
||||
[]string{"\u603b\u7ecf\u7406", "\u5b8c\u6210", "\u4e86", "\u8fd9\u4ef6", "\u4e8b\u60c5"},
|
||||
[]string{"\u7535\u8111", "\u4fee\u597d", "\u4e86"},
|
||||
[]string{"\u505a\u597d", "\u4e86", "\u8fd9\u4ef6", "\u4e8b\u60c5", "\u5c31", "\u4e00\u4e86\u767e\u4e86", "\u4e86"},
|
||||
[]string{"\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
|
||||
[]string{"\u6211\u4eec", "\u4e70", "\u4e86", "\u4e00\u4e2a", "\u7f8e\u7684", "\u7a7a\u8c03"},
|
||||
[]string{"\u7ebf\u7a0b", "\u521d\u59cb\u5316", "\u65f6", "\u6211\u4eec", "\u8981", "\u6ce8\u610f"},
|
||||
[]string{"\u4e00\u4e2a", "\u5206\u5b50", "\u662f", "\u7531", "\u597d\u591a", "\u539f\u5b50", "\u7ec4\u7ec7", "\u6210", "\u7684"},
|
||||
[]string{"\u795d", "\u4f60", "\u9a6c\u5230\u529f\u6210"},
|
||||
[]string{"\u4ed6", "\u6389", "\u8fdb", "\u4e86", "\u65e0\u5e95\u6d1e", "\u91cc"},
|
||||
[]string{"\u4e2d\u56fd", "\u7684", "\u9996\u90fd", "\u662f", "\u5317\u4eac"},
|
||||
[]string{"\u5b59", "\u541b", "\u610f"},
|
||||
[]string{"\u5916\u4ea4\u90e8", "\u53d1\u8a00\u4eba", "\u9a6c\u671d\u65ed"},
|
||||
[]string{"\u9886\u5bfc\u4eba", "\u4f1a\u8bae", "\u548c", "\u7b2c\u56db\u5c4a", "\u4e1c\u4e9a", "\u5cf0\u4f1a"},
|
||||
[]string{"\u5728", "\u8fc7\u53bb", "\u7684", "\u8fd9", "\u4e94\u5e74"},
|
||||
[]string{"\u8fd8", "\u9700\u8981", "\u5f88", "\u957f", "\u7684", "\u8def", "\u8981", "\u8d70"},
|
||||
[]string{"60", "\u5468\u5e74", "\u9996\u90fd", "\u9605\u5175"},
|
||||
[]string{"\u4f60\u597d", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
|
||||
[]string{"\u4e70", "\u6c34\u679c", "\u7136\u540e", "\u6765", "\u4e16\u535a\u56ed"},
|
||||
[]string{"\u4e70", "\u6c34\u679c", "\u7136\u540e", "\u53bb", "\u4e16\u535a\u56ed"},
|
||||
[]string{"\u4f46\u662f", "\u540e\u6765", "\u6211", "\u624d", "\u77e5\u9053", "\u4f60", "\u662f", "\u5bf9", "\u7684"},
|
||||
[]string{"\u5b58\u5728", "\u5373", "\u5408\u7406"},
|
||||
[]string{"\u7684", "\u7684", "\u7684", "\u7684", "\u7684", "\u5728", "\u7684", "\u7684", "\u7684", "\u7684", "\u5c31", "\u4ee5", "\u548c", "\u548c", "\u548c"},
|
||||
[]string{"I", " ", "love", "\u4f60", "\uff0c", "\u4e0d\u4ee5\u4e3a\u803b", "\uff0c", "\u53cd", "\u4ee5\u4e3a", "rong"},
|
||||
[]string{"\u56e0"},
|
||||
[]string{},
|
||||
[]string{"hello", "\u4f60\u597d", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
|
||||
[]string{"\u5f88", "\u597d", "\u4f46", "\u4e3b\u8981", "\u662f", "\u57fa\u4e8e", "\u7f51\u9875", "\u5f62\u5f0f"},
|
||||
[]string{"hello", "\u4f60\u597d", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
|
||||
[]string{"\u4e3a\u4ec0\u4e48", "\u6211", "\u4e0d\u80fd", "\u62e5\u6709", "\u60f3\u8981", "\u7684", "\u751f\u6d3b"},
|
||||
[]string{"\u540e\u6765", "\u6211", "\u624d"},
|
||||
[]string{"\u6b64\u6b21", "\u6765", "\u4e2d\u56fd", "\u662f", "\u4e3a\u4e86"},
|
||||
[]string{"\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
|
||||
[]string{",", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
|
||||
[]string{"\u5176\u5b9e", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
|
||||
[]string{"\u597d\u4eba", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
|
||||
[]string{"\u662f\u56e0\u4e3a", "\u548c", "\u56fd\u5bb6"},
|
||||
[]string{"\u8001\u5e74", "\u641c\u7d22", "\u8fd8", "\u652f\u6301"},
|
||||
[]string{"\u5e72\u8106", "\u5c31", "\u628a", "\u90a3", "\u90e8", "\u8499", "\u4eba", "\u7684", "\u95f2", "\u6cd5", "\u7ed9", "\u5e9f", "\u4e86", "\u62c9\u5012", "\uff01", "RT", " ", "@", "laoshipukong", " ", ":", " ", "27", "\u65e5", "\uff0c", "\u5168\u56fd\u4eba\u5927\u5e38\u59d4\u4f1a", "\u7b2c\u4e09\u6b21", "\u5ba1\u8bae", "\u4fb5\u6743", "\u8d23\u4efb\u6cd5", "\u8349\u6848", "\uff0c", "\u5220\u9664", "\u4e86", "\u6709\u5173", "\u533b\u7597", "\u635f\u5bb3", "\u8d23\u4efb", "\u201c", "\u4e3e\u8bc1", "\u5012\u7f6e", "\u201d", "\u7684", "\u89c4\u5b9a", "\u3002", "\u5728", "\u533b\u60a3", "\u7ea0\u7eb7", "\u4e2d", "\u672c", "\u5df2", "\u5904\u4e8e", "\u5f31\u52bf", "\u5730\u4f4d", "\u7684", "\u6d88\u8d39\u8005", "\u7531\u6b64", "\u5c06", "\u9677\u5165", "\u4e07\u52ab\u4e0d\u590d", "\u7684", "\u5883\u5730", "\u3002", " "},
|
||||
[]string{"\u5927"},
|
||||
[]string{},
|
||||
[]string{"\u4ed6", "\u8bf4", "\u7684", "\u786e\u5b9e", "\u5728", "\u7406"},
|
||||
[]string{"\u957f\u6625", "\u5e02\u957f", "\u6625\u8282", "\u8bb2\u8bdd"},
|
||||
[]string{"\u7ed3\u5a5a", "\u7684", "\u548c", "\u5c1a\u672a", "\u7ed3\u5a5a", "\u7684"},
|
||||
[]string{"\u7ed3\u5408", "\u6210", "\u5206\u5b50", "\u65f6"},
|
||||
[]string{"\u65c5\u6e38", "\u548c", "\u670d\u52a1", "\u662f", "\u6700\u597d", "\u7684"},
|
||||
[]string{"\u8fd9\u4ef6", "\u4e8b\u60c5", "\u7684\u786e", "\u662f", "\u6211", "\u7684", "\u9519"},
|
||||
[]string{"\u4f9b", "\u5927\u5bb6", "\u53c2\u8003", "\u6307\u6b63"},
|
||||
[]string{"\u54c8\u5c14\u6ee8", "\u653f\u5e9c", "\u516c\u5e03", "\u584c", "\u6865", "\u539f\u56e0"},
|
||||
[]string{"\u6211", "\u5728", "\u673a\u573a", "\u5165\u53e3\u5904"},
|
||||
[]string{"\u90a2", "\u6c38", "\u81e3", "\u6444\u5f71", "\u62a5\u9053"},
|
||||
[]string{"BP", "\u795e\u7ecf\u7f51\u7edc", "\u5982\u4f55", "\u8bad\u7ec3", "\u624d\u80fd", "\u5728", "\u5206\u7c7b", "\u65f6", "\u589e\u52a0", "\u533a\u5206\u5ea6", "\uff1f"},
|
||||
[]string{"\u5357\u4eac\u5e02", "\u957f\u6c5f\u5927\u6865"},
|
||||
[]string{"\u5e94", "\u4e00\u4e9b", "\u4f7f\u7528\u8005", "\u7684", "\u5efa\u8bae", "\uff0c", "\u4e5f", "\u4e3a\u4e86", "\u4fbf\u4e8e", "\u5229\u7528", "NiuTrans", "\u7528\u4e8e", "SMT", "\u7814\u7a76"},
|
||||
[]string{"\u957f\u6625\u5e02", "\u957f\u6625", "\u836f\u5e97"},
|
||||
[]string{"\u9093\u9896\u8d85", "\u751f\u524d", "\u6700", "\u559c\u6b22", "\u7684", "\u8863\u670d"},
|
||||
[]string{"\u80e1\u9526\u6d9b", "\u662f", "\u70ed\u7231", "\u4e16\u754c", "\u548c\u5e73", "\u7684", "\u653f\u6cbb\u5c40", "\u5e38\u59d4"},
|
||||
[]string{"\u7a0b\u5e8f\u5458", "\u795d", "\u6d77\u6797", "\u548c", "\u6731", "\u4f1a", "\u9707", "\u662f", "\u5728", "\u5b59", "\u5065", "\u7684", "\u5de6\u9762", "\u548c", "\u53f3\u9762", ",", " ", "\u8303", "\u51ef", "\u5728", "\u6700", "\u53f3\u9762", ".", "\u518d", "\u5f80", "\u5de6", "\u662f", "\u674e", "\u677e", "\u6d2a"},
|
||||
[]string{"\u4e00\u6b21\u6027", "\u4ea4", "\u591a\u5c11", "\u94b1"},
|
||||
[]string{"\u4e24\u5757", "\u4e94", "\u4e00\u5957", "\uff0c", "\u4e09\u5757", "\u516b", "\u4e00\u65a4", "\uff0c", "\u56db\u5757", "\u4e03", "\u4e00\u672c", "\uff0c", "\u4e94\u5757", "\u516d", "\u4e00\u6761"},
|
||||
[]string{"\u5c0f", "\u548c\u5c1a", "\u7559", "\u4e86", "\u4e00\u4e2a", "\u50cf", "\u5927", "\u548c\u5c1a", "\u4e00\u6837", "\u7684", "\u548c\u5c1a\u5934"},
|
||||
[]string{"\u6211", "\u662f", "\u4e2d\u534e\u4eba\u6c11\u5171\u548c\u56fd", "\u516c\u6c11", ";", "\u6211", "\u7238\u7238", "\u662f", "\u5171\u548c\u515a", "\u515a\u5458", ";", " ", "\u5730\u94c1", "\u548c\u5e73\u95e8", "\u7ad9"},
|
||||
[]string{"\u5f20\u6653\u6885", "\u53bb", "\u4eba\u6c11", "\u533b\u9662", "\u505a", "\u4e86", "\u4e2a", "B\u8d85", "\u7136\u540e", "\u53bb", "\u4e70", "\u4e86", "\u4ef6", "T\u6064"},
|
||||
[]string{"AT&T", "\u662f", "\u4e00\u4ef6", "\u4e0d\u9519", "\u7684", "\u516c\u53f8", "\uff0c", "\u7ed9", "\u4f60", "\u53d1", "offer", "\u4e86", "\u5417", "\uff1f"},
|
||||
[]string{"C++", "\u548c", "c#", "\u662f", "\u4ec0\u4e48", "\u5173\u7cfb", "\uff1f", "11", "+", "122", "=", "133", "\uff0c", "\u662f", "\u5417", "\uff1f", "PI", "=", "3", ".", "14159"},
|
||||
[]string{"\u4f60", "\u8ba4\u8bc6", "\u90a3\u4e2a", "\u548c", "\u4e3b\u5e2d", "\u63e1\u624b", "\u7684", "\u7684\u54e5", "\u5417", "\uff1f", "\u4ed6", "\u5f00", "\u4e00\u8f86", "\u9ed1\u8272", "\u7684\u58eb", "\u3002"},
|
||||
[]string{"\u67aa\u6746\u5b50", "\u4e2d", "\u51fa", "\u653f\u6743"},
|
||||
}
|
||||
|
||||
cutForSearchResult = [][]string{
|
||||
[]string{"\u8fd9\u662f", "\u4e00\u4e2a", "\u4f38\u624b", "\u4e0d\u89c1", "\u4e94\u6307", "\u4f38\u624b\u4e0d\u89c1\u4e94\u6307", "\u7684", "\u9ed1\u591c", "\u3002", "\u6211", "\u53eb", "\u609f\u7a7a", "\u5b59\u609f\u7a7a", "\uff0c", "\u6211", "\u7231", "\u5317\u4eac", "\uff0c", "\u6211", "\u7231", "Python", "\u548c", "C++", "\u3002"},
|
||||
[]string{"\u6211", "\u4e0d", "\u559c\u6b22", "\u65e5\u672c", "\u548c\u670d", "\u3002"},
|
||||
[]string{"\u96f7\u7334", "\u56de\u5f52", "\u4eba\u95f4", "\u3002"},
|
||||
[]string{"\u5de5\u4fe1\u5904", "\u5e72\u4e8b", "\u5973\u5e72\u4e8b", "\u6bcf\u6708", "\u7ecf\u8fc7", "\u4e0b\u5c5e", "\u79d1\u5ba4", "\u90fd", "\u8981", "\u4eb2\u53e3", "\u4ea4\u4ee3", "24", "\u53e3", "\u4ea4\u6362", "\u6362\u673a", "\u4ea4\u6362\u673a", "\u7b49", "\u6280\u672f", "\u6280\u672f\u6027", "\u5668\u4ef6", "\u7684", "\u5b89\u88c5", "\u5de5\u4f5c"},
|
||||
[]string{"\u6211", "\u9700\u8981", "\u5ec9\u79df", "\u79df\u623f", "\u5ec9\u79df\u623f"},
|
||||
[]string{"\u6c38\u548c", "\u670d\u88c5", "\u9970\u54c1", "\u6709\u9650", "\u516c\u53f8", "\u6709\u9650\u516c\u53f8"},
|
||||
[]string{"\u6211", "\u7231", "\u5317\u4eac", "\u5929\u5b89", "\u5929\u5b89\u95e8"},
|
||||
[]string{"abc"},
|
||||
[]string{"\u9690", "\u53ef\u592b", "\u9a6c\u5c14\u53ef", "\u9a6c\u5c14\u53ef\u592b"},
|
||||
[]string{"\u96f7\u7334", "\u662f", "\u4e2a", "\u597d", "\u7f51\u7ad9"},
|
||||
[]string{"\u201c", "Microsoft", "\u201d", "\u4e00\u8bcd", "\u7531", "\u201c", "MICROcomputer", "\uff08", "\u5fae\u578b", "\u8ba1\u7b97", "\u7b97\u673a", "\u8ba1\u7b97\u673a", "\uff09", "\u201d", "\u548c", "\u201c", "SOFTware", "\uff08", "\u8f6f\u4ef6", "\uff09", "\u201d", "\u4e24", "\u90e8\u5206", "\u7ec4\u6210"},
|
||||
[]string{"\u8349\u6ce5\u9a6c", "\u548c", "\u6b3a\u5b9e", "\u9a6c", "\u662f", "\u4eca\u5e74", "\u7684", "\u6d41\u884c", "\u8bcd\u6c47"},
|
||||
[]string{"\u4f0a\u85e4", "\u6d0b\u534e\u5802", "\u603b\u5e9c", "\u5e97"},
|
||||
[]string{"\u4e2d\u56fd", "\u79d1\u5b66", "\u5b66\u9662", "\u8ba1\u7b97", "\u6280\u672f", "\u7814\u7a76", "\u79d1\u5b66\u9662", "\u7814\u7a76\u6240", "\u4e2d\u56fd\u79d1\u5b66\u9662\u8ba1\u7b97\u6280\u672f\u7814\u7a76\u6240"},
|
||||
[]string{"\u7f57\u5bc6\u6b27", "\u4e0e", "\u6731\u4e3d\u53f6"},
|
||||
[]string{"\u6211", "\u8d2d\u4e70", "\u4e86", "\u9053\u5177", "\u548c", "\u670d\u88c5"},
|
||||
[]string{"PS", ":", " ", "\u6211", "\u89c9\u5f97", "\u5f00\u6e90", "\u6709", "\u4e00\u4e2a", "\u597d\u5904", "\uff0c", "\u5c31\u662f", "\u80fd\u591f", "\u6566\u4fc3", "\u81ea\u5df1", "\u4e0d\u65ad", "\u6539\u8fdb", "\u4e0d\u65ad\u6539\u8fdb", "\uff0c", "\u907f\u514d", "\u655e\u5e1a", "\u81ea\u73cd"},
|
||||
[]string{"\u6e56\u5317", "\u6e56\u5317\u7701", "\u77f3\u9996", "\u77f3\u9996\u5e02"},
|
||||
[]string{"\u6e56\u5317", "\u6e56\u5317\u7701", "\u5341\u5830", "\u5341\u5830\u5e02"},
|
||||
[]string{"\u7ecf\u7406", "\u603b\u7ecf\u7406", "\u5b8c\u6210", "\u4e86", "\u8fd9\u4ef6", "\u4e8b\u60c5"},
|
||||
[]string{"\u7535\u8111", "\u4fee\u597d", "\u4e86"},
|
||||
[]string{"\u505a\u597d", "\u4e86", "\u8fd9\u4ef6", "\u4e8b\u60c5", "\u5c31", "\u4e00\u4e86\u767e\u4e86", "\u4e86"},
|
||||
[]string{"\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
|
||||
[]string{"\u6211\u4eec", "\u4e70", "\u4e86", "\u4e00\u4e2a", "\u7f8e\u7684", "\u7a7a\u8c03"},
|
||||
[]string{"\u7ebf\u7a0b", "\u521d\u59cb", "\u521d\u59cb\u5316", "\u65f6", "\u6211\u4eec", "\u8981", "\u6ce8\u610f"},
|
||||
[]string{"\u4e00\u4e2a", "\u5206\u5b50", "\u662f", "\u7531", "\u597d\u591a", "\u539f\u5b50", "\u7ec4\u7ec7", "\u6210", "\u7684"},
|
||||
[]string{"\u795d", "\u4f60", "\u9a6c\u5230\u529f\u6210"},
|
||||
[]string{"\u4ed6", "\u6389", "\u8fdb", "\u4e86", "\u65e0\u5e95", "\u65e0\u5e95\u6d1e", "\u91cc"},
|
||||
[]string{"\u4e2d\u56fd", "\u7684", "\u9996\u90fd", "\u662f", "\u5317\u4eac"},
|
||||
[]string{"\u5b59\u541b\u610f"},
|
||||
[]string{"\u5916\u4ea4", "\u5916\u4ea4\u90e8", "\u53d1\u8a00", "\u53d1\u8a00\u4eba", "\u9a6c\u671d\u65ed"},
|
||||
[]string{"\u9886\u5bfc", "\u9886\u5bfc\u4eba", "\u4f1a\u8bae", "\u548c", "\u7b2c\u56db", "\u56db\u5c4a", "\u7b2c\u56db\u5c4a", "\u4e1c\u4e9a", "\u5cf0\u4f1a"},
|
||||
[]string{"\u5728", "\u8fc7\u53bb", "\u7684", "\u8fd9", "\u4e94\u5e74"},
|
||||
[]string{"\u8fd8", "\u9700\u8981", "\u5f88\u957f", "\u7684", "\u8def", "\u8981", "\u8d70"},
|
||||
[]string{"60", "\u5468\u5e74", "\u9996\u90fd", "\u9605\u5175"},
|
||||
[]string{"\u4f60\u597d", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
|
||||
[]string{"\u4e70", "\u6c34\u679c", "\u7136\u540e", "\u6765", "\u4e16\u535a", "\u535a\u56ed", "\u4e16\u535a\u56ed"},
|
||||
[]string{"\u4e70", "\u6c34\u679c", "\u7136\u540e", "\u53bb", "\u4e16\u535a", "\u535a\u56ed", "\u4e16\u535a\u56ed"},
|
||||
[]string{"\u4f46\u662f", "\u540e\u6765", "\u6211", "\u624d", "\u77e5\u9053", "\u4f60", "\u662f", "\u5bf9", "\u7684"},
|
||||
[]string{"\u5b58\u5728", "\u5373", "\u5408\u7406"},
|
||||
[]string{"\u7684", "\u7684", "\u7684", "\u7684", "\u7684", "\u5728", "\u7684", "\u7684", "\u7684", "\u7684", "\u5c31", "\u4ee5", "\u548c", "\u548c", "\u548c"},
|
||||
[]string{"I", " ", "love", "\u4f60", "\uff0c", "\u4e0d\u4ee5", "\u4ee5\u4e3a", "\u4e0d\u4ee5\u4e3a\u803b", "\uff0c", "\u53cd", "\u4ee5\u4e3a", "rong"},
|
||||
[]string{"\u56e0"},
|
||||
[]string{},
|
||||
[]string{"hello", "\u4f60\u597d", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
|
||||
[]string{"\u5f88", "\u597d", "\u4f46", "\u4e3b\u8981", "\u662f", "\u57fa\u4e8e", "\u7f51\u9875", "\u5f62\u5f0f"},
|
||||
[]string{"hello", "\u4f60\u597d", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
|
||||
[]string{"\u4ec0\u4e48", "\u4e3a\u4ec0\u4e48", "\u6211", "\u4e0d\u80fd", "\u62e5\u6709", "\u60f3\u8981", "\u7684", "\u751f\u6d3b"},
|
||||
[]string{"\u540e\u6765", "\u6211", "\u624d"},
|
||||
[]string{"\u6b64\u6b21", "\u6765", "\u4e2d\u56fd", "\u662f", "\u4e3a\u4e86"},
|
||||
[]string{"\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
|
||||
[]string{",", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
|
||||
[]string{"\u5176\u5b9e", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
|
||||
[]string{"\u597d\u4eba", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
|
||||
[]string{"\u56e0\u4e3a", "\u662f\u56e0\u4e3a", "\u548c", "\u56fd\u5bb6"},
|
||||
[]string{"\u8001\u5e74", "\u641c\u7d22", "\u8fd8", "\u652f\u6301"},
|
||||
[]string{"\u5e72\u8106", "\u5c31", "\u628a", "\u90a3\u90e8", "\u8499\u4eba", "\u7684", "\u95f2\u6cd5", "\u7ed9", "\u5e9f", "\u4e86", "\u62c9\u5012", "\uff01", "RT", " ", "@", "laoshipukong", " ", ":", " ", "27", "\u65e5", "\uff0c", "\u5168\u56fd", "\u56fd\u4eba", "\u4eba\u5927", "\u5e38\u59d4", "\u59d4\u4f1a", "\u5e38\u59d4\u4f1a", "\u5168\u56fd\u4eba\u5927\u5e38\u59d4\u4f1a", "\u7b2c\u4e09", "\u4e09\u6b21", "\u7b2c\u4e09\u6b21", "\u5ba1\u8bae", "\u4fb5\u6743", "\u8d23\u4efb", "\u8d23\u4efb\u6cd5", "\u8349\u6848", "\uff0c", "\u5220\u9664", "\u4e86", "\u6709\u5173", "\u533b\u7597", "\u635f\u5bb3", "\u8d23\u4efb", "\u201c", "\u4e3e\u8bc1", "\u5012\u7f6e", "\u201d", "\u7684", "\u89c4\u5b9a", "\u3002", "\u5728", "\u533b\u60a3", "\u7ea0\u7eb7", "\u4e2d\u672c", "\u5df2", "\u5904\u4e8e", "\u5f31\u52bf", "\u5730\u4f4d", "\u7684", "\u6d88\u8d39", "\u6d88\u8d39\u8005", "\u7531\u6b64", "\u5c06", "\u9677\u5165", "\u4e0d\u590d", "\u4e07\u52ab\u4e0d\u590d", "\u7684", "\u5883\u5730", "\u3002", " "},
|
||||
[]string{"\u5927"},
|
||||
[]string{},
|
||||
[]string{"\u4ed6", "\u8bf4", "\u7684", "\u786e\u5b9e", "\u5728\u7406"},
|
||||
[]string{"\u957f\u6625", "\u5e02\u957f", "\u6625\u8282", "\u8bb2\u8bdd"},
|
||||
[]string{"\u7ed3\u5a5a", "\u7684", "\u548c", "\u5c1a\u672a", "\u7ed3\u5a5a", "\u7684"},
|
||||
[]string{"\u7ed3\u5408", "\u6210", "\u5206\u5b50", "\u65f6"},
|
||||
[]string{"\u65c5\u6e38", "\u548c", "\u670d\u52a1", "\u662f", "\u6700\u597d", "\u7684"},
|
||||
[]string{"\u8fd9\u4ef6", "\u4e8b\u60c5", "\u7684\u786e", "\u662f", "\u6211", "\u7684", "\u9519"},
|
||||
[]string{"\u4f9b", "\u5927\u5bb6", "\u53c2\u8003", "\u6307\u6b63"},
|
||||
[]string{"\u54c8\u5c14", "\u54c8\u5c14\u6ee8", "\u653f\u5e9c", "\u516c\u5e03", "\u584c\u6865", "\u539f\u56e0"},
|
||||
[]string{"\u6211", "\u5728", "\u673a\u573a", "\u5165\u53e3", "\u5165\u53e3\u5904"},
|
||||
[]string{"\u90a2\u6c38\u81e3", "\u6444\u5f71", "\u62a5\u9053"},
|
||||
[]string{"BP", "\u795e\u7ecf", "\u7f51\u7edc", "\u795e\u7ecf\u7f51", "\u795e\u7ecf\u7f51\u7edc", "\u5982\u4f55", "\u8bad\u7ec3", "\u624d\u80fd", "\u5728", "\u5206\u7c7b", "\u65f6", "\u589e\u52a0", "\u533a\u5206", "\u5206\u5ea6", "\u533a\u5206\u5ea6", "\uff1f"},
|
||||
[]string{"\u5357\u4eac", "\u4eac\u5e02", "\u5357\u4eac\u5e02", "\u957f\u6c5f", "\u5927\u6865", "\u957f\u6c5f\u5927\u6865"},
|
||||
[]string{"\u5e94", "\u4e00\u4e9b", "\u4f7f\u7528", "\u7528\u8005", "\u4f7f\u7528\u8005", "\u7684", "\u5efa\u8bae", "\uff0c", "\u4e5f", "\u4e3a\u4e86", "\u4fbf\u4e8e", "\u5229\u7528", "NiuTrans", "\u7528\u4e8e", "SMT", "\u7814\u7a76"},
|
||||
[]string{"\u957f\u6625", "\u957f\u6625\u5e02", "\u957f\u6625", "\u836f\u5e97"},
|
||||
[]string{"\u9093\u9896\u8d85", "\u751f\u524d", "\u6700", "\u559c\u6b22", "\u7684", "\u8863\u670d"},
|
||||
[]string{"\u9526\u6d9b", "\u80e1\u9526\u6d9b", "\u662f", "\u70ed\u7231", "\u4e16\u754c", "\u548c\u5e73", "\u7684", "\u653f\u6cbb", "\u653f\u6cbb\u5c40", "\u5e38\u59d4"},
|
||||
[]string{"\u7a0b\u5e8f", "\u7a0b\u5e8f\u5458", "\u795d", "\u6d77\u6797", "\u548c", "\u6731\u4f1a\u9707", "\u662f", "\u5728", "\u5b59\u5065", "\u7684", "\u5de6\u9762", "\u548c", "\u53f3\u9762", ",", " ", "\u8303\u51ef", "\u5728", "\u6700", "\u53f3\u9762", ".", "\u518d\u5f80", "\u5de6", "\u662f", "\u674e\u677e\u6d2a"},
|
||||
[]string{"\u4e00\u6b21", "\u4e00\u6b21\u6027", "\u4ea4", "\u591a\u5c11", "\u94b1"},
|
||||
[]string{"\u4e24\u5757", "\u4e94", "\u4e00\u5957", "\uff0c", "\u4e09\u5757", "\u516b", "\u4e00\u65a4", "\uff0c", "\u56db\u5757", "\u4e03", "\u4e00\u672c", "\uff0c", "\u4e94\u5757", "\u516d", "\u4e00\u6761"},
|
||||
[]string{"\u5c0f", "\u548c\u5c1a", "\u7559", "\u4e86", "\u4e00\u4e2a", "\u50cf", "\u5927", "\u548c\u5c1a", "\u4e00\u6837", "\u7684", "\u548c\u5c1a", "\u548c\u5c1a\u5934"},
|
||||
[]string{"\u6211", "\u662f", "\u4e2d\u534e", "\u534e\u4eba", "\u4eba\u6c11", "\u5171\u548c", "\u5171\u548c\u56fd", "\u4e2d\u534e\u4eba\u6c11\u5171\u548c\u56fd", "\u516c\u6c11", ";", "\u6211", "\u7238\u7238", "\u662f", "\u5171\u548c", "\u5171\u548c\u515a", "\u515a\u5458", ";", " ", "\u5730\u94c1", "\u548c\u5e73", "\u548c\u5e73\u95e8", "\u7ad9"},
|
||||
[]string{"\u5f20\u6653\u6885", "\u53bb", "\u4eba\u6c11", "\u533b\u9662", "\u505a", "\u4e86", "\u4e2a", "B\u8d85", "\u7136\u540e", "\u53bb", "\u4e70", "\u4e86", "\u4ef6", "T\u6064"},
|
||||
[]string{"AT&T", "\u662f", "\u4e00\u4ef6", "\u4e0d\u9519", "\u7684", "\u516c\u53f8", "\uff0c", "\u7ed9", "\u4f60", "\u53d1", "offer", "\u4e86", "\u5417", "\uff1f"},
|
||||
[]string{"C++", "\u548c", "c#", "\u662f", "\u4ec0\u4e48", "\u5173\u7cfb", "\uff1f", "11", "+", "122", "=", "133", "\uff0c", "\u662f", "\u5417", "\uff1f", "PI", "=", "3.14159"},
|
||||
[]string{"\u4f60", "\u8ba4\u8bc6", "\u90a3\u4e2a", "\u548c", "\u4e3b\u5e2d", "\u63e1\u624b", "\u7684", "\u7684\u54e5", "\u5417", "\uff1f", "\u4ed6\u5f00", "\u4e00\u8f86", "\u9ed1\u8272", "\u7684\u58eb", "\u3002"},
|
||||
[]string{"\u67aa\u6746", "\u6746\u5b50", "\u67aa\u6746\u5b50", "\u4e2d", "\u51fa", "\u653f\u6743"},
|
||||
}
|
||||
|
||||
cutForSearchNoHMMResult = [][]string{
|
||||
[]string{"\u8fd9", "\u662f", "\u4e00\u4e2a", "\u4f38\u624b", "\u4e0d\u89c1", "\u4e94\u6307", "\u4f38\u624b\u4e0d\u89c1\u4e94\u6307", "\u7684", "\u9ed1\u591c", "\u3002", "\u6211", "\u53eb", "\u609f\u7a7a", "\u5b59\u609f\u7a7a", "\uff0c", "\u6211", "\u7231", "\u5317\u4eac", "\uff0c", "\u6211", "\u7231", "Python", "\u548c", "C++", "\u3002"},
|
||||
[]string{"\u6211", "\u4e0d", "\u559c\u6b22", "\u65e5\u672c", "\u548c\u670d", "\u3002"},
|
||||
[]string{"\u96f7\u7334", "\u56de\u5f52", "\u4eba\u95f4", "\u3002"},
|
||||
[]string{"\u5de5\u4fe1\u5904", "\u5e72\u4e8b", "\u5973\u5e72\u4e8b", "\u6bcf\u6708", "\u7ecf\u8fc7", "\u4e0b\u5c5e", "\u79d1\u5ba4", "\u90fd", "\u8981", "\u4eb2\u53e3", "\u4ea4\u4ee3", "24", "\u53e3", "\u4ea4\u6362", "\u6362\u673a", "\u4ea4\u6362\u673a", "\u7b49", "\u6280\u672f", "\u6280\u672f\u6027", "\u5668\u4ef6", "\u7684", "\u5b89\u88c5", "\u5de5\u4f5c"},
|
||||
[]string{"\u6211", "\u9700\u8981", "\u5ec9\u79df", "\u79df\u623f", "\u5ec9\u79df\u623f"},
|
||||
[]string{"\u6c38\u548c", "\u670d\u88c5", "\u9970\u54c1", "\u6709\u9650", "\u516c\u53f8", "\u6709\u9650\u516c\u53f8"},
|
||||
[]string{"\u6211", "\u7231", "\u5317\u4eac", "\u5929\u5b89", "\u5929\u5b89\u95e8"},
|
||||
[]string{"abc"},
|
||||
[]string{"\u9690", "\u53ef\u592b", "\u9a6c\u5c14\u53ef", "\u9a6c\u5c14\u53ef\u592b"},
|
||||
[]string{"\u96f7\u7334", "\u662f", "\u4e2a", "\u597d", "\u7f51\u7ad9"},
|
||||
[]string{"\u201c", "Microsoft", "\u201d", "\u4e00", "\u8bcd", "\u7531", "\u201c", "MICROcomputer", "\uff08", "\u5fae\u578b", "\u8ba1\u7b97", "\u7b97\u673a", "\u8ba1\u7b97\u673a", "\uff09", "\u201d", "\u548c", "\u201c", "SOFTware", "\uff08", "\u8f6f\u4ef6", "\uff09", "\u201d", "\u4e24", "\u90e8\u5206", "\u7ec4\u6210"},
|
||||
[]string{"\u8349\u6ce5\u9a6c", "\u548c", "\u6b3a", "\u5b9e", "\u9a6c", "\u662f", "\u4eca\u5e74", "\u7684", "\u6d41\u884c", "\u8bcd\u6c47"},
|
||||
[]string{"\u4f0a", "\u85e4", "\u6d0b\u534e\u5802", "\u603b\u5e9c", "\u5e97"},
|
||||
[]string{"\u4e2d\u56fd", "\u79d1\u5b66", "\u5b66\u9662", "\u8ba1\u7b97", "\u6280\u672f", "\u7814\u7a76", "\u79d1\u5b66\u9662", "\u7814\u7a76\u6240", "\u4e2d\u56fd\u79d1\u5b66\u9662\u8ba1\u7b97\u6280\u672f\u7814\u7a76\u6240"},
|
||||
[]string{"\u7f57\u5bc6\u6b27", "\u4e0e", "\u6731\u4e3d\u53f6"},
|
||||
[]string{"\u6211", "\u8d2d\u4e70", "\u4e86", "\u9053\u5177", "\u548c", "\u670d\u88c5"},
|
||||
[]string{"PS", ":", " ", "\u6211", "\u89c9\u5f97", "\u5f00\u6e90", "\u6709", "\u4e00\u4e2a", "\u597d\u5904", "\uff0c", "\u5c31\u662f", "\u80fd\u591f", "\u6566\u4fc3", "\u81ea\u5df1", "\u4e0d\u65ad", "\u6539\u8fdb", "\u4e0d\u65ad\u6539\u8fdb", "\uff0c", "\u907f\u514d", "\u655e", "\u5e1a", "\u81ea\u73cd"},
|
||||
[]string{"\u6e56\u5317", "\u6e56\u5317\u7701", "\u77f3\u9996", "\u77f3\u9996\u5e02"},
|
||||
[]string{"\u6e56\u5317", "\u6e56\u5317\u7701", "\u5341\u5830", "\u5341\u5830\u5e02"},
|
||||
[]string{"\u7ecf\u7406", "\u603b\u7ecf\u7406", "\u5b8c\u6210", "\u4e86", "\u8fd9\u4ef6", "\u4e8b\u60c5"},
|
||||
[]string{"\u7535\u8111", "\u4fee\u597d", "\u4e86"},
|
||||
[]string{"\u505a\u597d", "\u4e86", "\u8fd9\u4ef6", "\u4e8b\u60c5", "\u5c31", "\u4e00\u4e86\u767e\u4e86", "\u4e86"},
|
||||
[]string{"\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
|
||||
[]string{"\u6211\u4eec", "\u4e70", "\u4e86", "\u4e00\u4e2a", "\u7f8e\u7684", "\u7a7a\u8c03"},
|
||||
[]string{"\u7ebf\u7a0b", "\u521d\u59cb", "\u521d\u59cb\u5316", "\u65f6", "\u6211\u4eec", "\u8981", "\u6ce8\u610f"},
|
||||
[]string{"\u4e00\u4e2a", "\u5206\u5b50", "\u662f", "\u7531", "\u597d\u591a", "\u539f\u5b50", "\u7ec4\u7ec7", "\u6210", "\u7684"},
|
||||
[]string{"\u795d", "\u4f60", "\u9a6c\u5230\u529f\u6210"},
|
||||
[]string{"\u4ed6", "\u6389", "\u8fdb", "\u4e86", "\u65e0\u5e95", "\u65e0\u5e95\u6d1e", "\u91cc"},
|
||||
[]string{"\u4e2d\u56fd", "\u7684", "\u9996\u90fd", "\u662f", "\u5317\u4eac"},
|
||||
[]string{"\u5b59", "\u541b", "\u610f"},
|
||||
[]string{"\u5916\u4ea4", "\u5916\u4ea4\u90e8", "\u53d1\u8a00", "\u53d1\u8a00\u4eba", "\u9a6c\u671d\u65ed"},
|
||||
[]string{"\u9886\u5bfc", "\u9886\u5bfc\u4eba", "\u4f1a\u8bae", "\u548c", "\u7b2c\u56db", "\u56db\u5c4a", "\u7b2c\u56db\u5c4a", "\u4e1c\u4e9a", "\u5cf0\u4f1a"},
|
||||
[]string{"\u5728", "\u8fc7\u53bb", "\u7684", "\u8fd9", "\u4e94\u5e74"},
|
||||
[]string{"\u8fd8", "\u9700\u8981", "\u5f88", "\u957f", "\u7684", "\u8def", "\u8981", "\u8d70"},
|
||||
[]string{"60", "\u5468\u5e74", "\u9996\u90fd", "\u9605\u5175"},
|
||||
[]string{"\u4f60\u597d", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
|
||||
[]string{"\u4e70", "\u6c34\u679c", "\u7136\u540e", "\u6765", "\u4e16\u535a", "\u535a\u56ed", "\u4e16\u535a\u56ed"},
|
||||
[]string{"\u4e70", "\u6c34\u679c", "\u7136\u540e", "\u53bb", "\u4e16\u535a", "\u535a\u56ed", "\u4e16\u535a\u56ed"},
|
||||
[]string{"\u4f46\u662f", "\u540e\u6765", "\u6211", "\u624d", "\u77e5\u9053", "\u4f60", "\u662f", "\u5bf9", "\u7684"},
|
||||
[]string{"\u5b58\u5728", "\u5373", "\u5408\u7406"},
|
||||
[]string{"\u7684", "\u7684", "\u7684", "\u7684", "\u7684", "\u5728", "\u7684", "\u7684", "\u7684", "\u7684", "\u5c31", "\u4ee5", "\u548c", "\u548c", "\u548c"},
|
||||
[]string{"I", " ", "love", "\u4f60", "\uff0c", "\u4e0d\u4ee5", "\u4ee5\u4e3a", "\u4e0d\u4ee5\u4e3a\u803b", "\uff0c", "\u53cd", "\u4ee5\u4e3a", "rong"},
|
||||
[]string{"\u56e0"},
|
||||
[]string{},
|
||||
[]string{"hello", "\u4f60\u597d", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
|
||||
[]string{"\u5f88", "\u597d", "\u4f46", "\u4e3b\u8981", "\u662f", "\u57fa\u4e8e", "\u7f51\u9875", "\u5f62\u5f0f"},
|
||||
[]string{"hello", "\u4f60\u597d", "\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
|
||||
[]string{"\u4ec0\u4e48", "\u4e3a\u4ec0\u4e48", "\u6211", "\u4e0d\u80fd", "\u62e5\u6709", "\u60f3\u8981", "\u7684", "\u751f\u6d3b"},
|
||||
[]string{"\u540e\u6765", "\u6211", "\u624d"},
|
||||
[]string{"\u6b64\u6b21", "\u6765", "\u4e2d\u56fd", "\u662f", "\u4e3a\u4e86"},
|
||||
[]string{"\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
|
||||
[]string{",", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
|
||||
[]string{"\u5176\u5b9e", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
|
||||
[]string{"\u597d\u4eba", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
|
||||
[]string{"\u56e0\u4e3a", "\u662f\u56e0\u4e3a", "\u548c", "\u56fd\u5bb6"},
|
||||
[]string{"\u8001\u5e74", "\u641c\u7d22", "\u8fd8", "\u652f\u6301"},
|
||||
[]string{"\u5e72\u8106", "\u5c31", "\u628a", "\u90a3", "\u90e8", "\u8499", "\u4eba", "\u7684", "\u95f2", "\u6cd5", "\u7ed9", "\u5e9f", "\u4e86", "\u62c9\u5012", "\uff01", "RT", " ", "@", "laoshipukong", " ", ":", " ", "27", "\u65e5", "\uff0c", "\u5168\u56fd", "\u56fd\u4eba", "\u4eba\u5927", "\u5e38\u59d4", "\u59d4\u4f1a", "\u5e38\u59d4\u4f1a", "\u5168\u56fd\u4eba\u5927\u5e38\u59d4\u4f1a", "\u7b2c\u4e09", "\u4e09\u6b21", "\u7b2c\u4e09\u6b21", "\u5ba1\u8bae", "\u4fb5\u6743", "\u8d23\u4efb", "\u8d23\u4efb\u6cd5", "\u8349\u6848", "\uff0c", "\u5220\u9664", "\u4e86", "\u6709\u5173", "\u533b\u7597", "\u635f\u5bb3", "\u8d23\u4efb", "\u201c", "\u4e3e\u8bc1", "\u5012\u7f6e", "\u201d", "\u7684", "\u89c4\u5b9a", "\u3002", "\u5728", "\u533b\u60a3", "\u7ea0\u7eb7", "\u4e2d", "\u672c", "\u5df2", "\u5904\u4e8e", "\u5f31\u52bf", "\u5730\u4f4d", "\u7684", "\u6d88\u8d39", "\u6d88\u8d39\u8005", "\u7531\u6b64", "\u5c06", "\u9677\u5165", "\u4e0d\u590d", "\u4e07\u52ab\u4e0d\u590d", "\u7684", "\u5883\u5730", "\u3002", " "},
|
||||
[]string{"\u5927"},
|
||||
[]string{},
|
||||
[]string{"\u4ed6", "\u8bf4", "\u7684", "\u786e\u5b9e", "\u5728", "\u7406"},
|
||||
[]string{"\u957f\u6625", "\u5e02\u957f", "\u6625\u8282", "\u8bb2\u8bdd"},
|
||||
[]string{"\u7ed3\u5a5a", "\u7684", "\u548c", "\u5c1a\u672a", "\u7ed3\u5a5a", "\u7684"},
|
||||
[]string{"\u7ed3\u5408", "\u6210", "\u5206\u5b50", "\u65f6"},
|
||||
[]string{"\u65c5\u6e38", "\u548c", "\u670d\u52a1", "\u662f", "\u6700\u597d", "\u7684"},
|
||||
[]string{"\u8fd9\u4ef6", "\u4e8b\u60c5", "\u7684\u786e", "\u662f", "\u6211", "\u7684", "\u9519"},
|
||||
[]string{"\u4f9b", "\u5927\u5bb6", "\u53c2\u8003", "\u6307\u6b63"},
|
||||
[]string{"\u54c8\u5c14", "\u54c8\u5c14\u6ee8", "\u653f\u5e9c", "\u516c\u5e03", "\u584c", "\u6865", "\u539f\u56e0"},
|
||||
[]string{"\u6211", "\u5728", "\u673a\u573a", "\u5165\u53e3", "\u5165\u53e3\u5904"},
|
||||
[]string{"\u90a2", "\u6c38", "\u81e3", "\u6444\u5f71", "\u62a5\u9053"},
|
||||
[]string{"BP", "\u795e\u7ecf", "\u7f51\u7edc", "\u795e\u7ecf\u7f51", "\u795e\u7ecf\u7f51\u7edc", "\u5982\u4f55", "\u8bad\u7ec3", "\u624d\u80fd", "\u5728", "\u5206\u7c7b", "\u65f6", "\u589e\u52a0", "\u533a\u5206", "\u5206\u5ea6", "\u533a\u5206\u5ea6", "\uff1f"},
|
||||
[]string{"\u5357\u4eac", "\u4eac\u5e02", "\u5357\u4eac\u5e02", "\u957f\u6c5f", "\u5927\u6865", "\u957f\u6c5f\u5927\u6865"},
|
||||
[]string{"\u5e94", "\u4e00\u4e9b", "\u4f7f\u7528", "\u7528\u8005", "\u4f7f\u7528\u8005", "\u7684", "\u5efa\u8bae", "\uff0c", "\u4e5f", "\u4e3a\u4e86", "\u4fbf\u4e8e", "\u5229\u7528", "NiuTrans", "\u7528\u4e8e", "SMT", "\u7814\u7a76"},
|
||||
[]string{"\u957f\u6625", "\u957f\u6625\u5e02", "\u957f\u6625", "\u836f\u5e97"},
|
||||
[]string{"\u9093\u9896\u8d85", "\u751f\u524d", "\u6700", "\u559c\u6b22", "\u7684", "\u8863\u670d"},
|
||||
[]string{"\u9526\u6d9b", "\u80e1\u9526\u6d9b", "\u662f", "\u70ed\u7231", "\u4e16\u754c", "\u548c\u5e73", "\u7684", "\u653f\u6cbb", "\u653f\u6cbb\u5c40", "\u5e38\u59d4"},
|
||||
[]string{"\u7a0b\u5e8f", "\u7a0b\u5e8f\u5458", "\u795d", "\u6d77\u6797", "\u548c", "\u6731", "\u4f1a", "\u9707", "\u662f", "\u5728", "\u5b59", "\u5065", "\u7684", "\u5de6\u9762", "\u548c", "\u53f3\u9762", ",", " ", "\u8303", "\u51ef", "\u5728", "\u6700", "\u53f3\u9762", ".", "\u518d", "\u5f80", "\u5de6", "\u662f", "\u674e", "\u677e", "\u6d2a"},
|
||||
[]string{"\u4e00\u6b21", "\u4e00\u6b21\u6027", "\u4ea4", "\u591a\u5c11", "\u94b1"},
|
||||
[]string{"\u4e24\u5757", "\u4e94", "\u4e00\u5957", "\uff0c", "\u4e09\u5757", "\u516b", "\u4e00\u65a4", "\uff0c", "\u56db\u5757", "\u4e03", "\u4e00\u672c", "\uff0c", "\u4e94\u5757", "\u516d", "\u4e00\u6761"},
|
||||
[]string{"\u5c0f", "\u548c\u5c1a", "\u7559", "\u4e86", "\u4e00\u4e2a", "\u50cf", "\u5927", "\u548c\u5c1a", "\u4e00\u6837", "\u7684", "\u548c\u5c1a", "\u548c\u5c1a\u5934"},
|
||||
[]string{"\u6211", "\u662f", "\u4e2d\u534e", "\u534e\u4eba", "\u4eba\u6c11", "\u5171\u548c", "\u5171\u548c\u56fd", "\u4e2d\u534e\u4eba\u6c11\u5171\u548c\u56fd", "\u516c\u6c11", ";", "\u6211", "\u7238\u7238", "\u662f", "\u5171\u548c", "\u5171\u548c\u515a", "\u515a\u5458", ";", " ", "\u5730\u94c1", "\u548c\u5e73", "\u548c\u5e73\u95e8", "\u7ad9"},
|
||||
[]string{"\u5f20\u6653\u6885", "\u53bb", "\u4eba\u6c11", "\u533b\u9662", "\u505a", "\u4e86", "\u4e2a", "B\u8d85", "\u7136\u540e", "\u53bb", "\u4e70", "\u4e86", "\u4ef6", "T\u6064"},
|
||||
[]string{"AT&T", "\u662f", "\u4e00\u4ef6", "\u4e0d\u9519", "\u7684", "\u516c\u53f8", "\uff0c", "\u7ed9", "\u4f60", "\u53d1", "offer", "\u4e86", "\u5417", "\uff1f"},
|
||||
[]string{"C++", "\u548c", "c#", "\u662f", "\u4ec0\u4e48", "\u5173\u7cfb", "\uff1f", "11", "+", "122", "=", "133", "\uff0c", "\u662f", "\u5417", "\uff1f", "PI", "=", "3", ".", "14159"},
|
||||
[]string{"\u4f60", "\u8ba4\u8bc6", "\u90a3\u4e2a", "\u548c", "\u4e3b\u5e2d", "\u63e1\u624b", "\u7684", "\u7684\u54e5", "\u5417", "\uff1f", "\u4ed6", "\u5f00", "\u4e00\u8f86", "\u9ed1\u8272", "\u7684\u58eb", "\u3002"},
|
||||
[]string{"\u67aa\u6746", "\u6746\u5b50", "\u67aa\u6746\u5b50", "\u4e2d", "\u51fa", "\u653f\u6743"},
|
||||
}
|
||||
|
||||
userDictCutResult = [][]string{
|
||||
[]string{"\u8fd9\u662f", "\u4e00\u4e2a", "\u4f38\u624b", "\u4e0d\u89c1", "\u4e94\u6307", "\u7684", "\u9ed1\u591c", "\u3002", "\u6211", "\u53eb", "\u5b59\u609f\u7a7a", "\uff0c", "\u6211", "\u7231\u5317\u4eac", "\uff0c", "\u6211", "\u7231", "Python", "\u548c", "C", "++", "\u3002"},
|
||||
[]string{"\u6211", "\u4e0d", "\u559c\u6b22", "\u65e5\u672c", "\u548c", "\u670d", "\u3002"},
|
||||
[]string{"\u96f7\u7334", "\u56de\u5f52\u4eba\u95f4", "\u3002"},
|
||||
[]string{"\u5de5\u4fe1", "\u5904\u5973", "\u5e72\u4e8b", "\u6bcf", "\u6708", "\u7ecf\u8fc7", "\u4e0b", "\u5c5e", "\u79d1\u5ba4", "\u90fd", "\u8981", "\u4eb2\u53e3", "\u4ea4\u4ee3", "24", "\u53e3\u4ea4\u6362\u673a", "\u7b49", "\u6280\u672f\u6027", "\u5668\u4ef6", "\u7684", "\u5b89\u88c5", "\u5de5\u4f5c"},
|
||||
[]string{"\u6211", "\u9700\u8981", "\u5ec9\u79df\u623f"},
|
||||
[]string{"\u6c38\u548c\u670d", "\u88c5\u9970\u54c1", "\u6709", "\u9650\u516c\u53f8"},
|
||||
[]string{"\u6211", "\u7231\u5317\u4eac", "\u5929\u5b89\u95e8"},
|
||||
[]string{"abc"},
|
||||
[]string{"\u9690\u9a6c\u5c14", "\u53ef\u592b"},
|
||||
[]string{"\u96f7\u7334", "\u662f", "\u4e2a", "\u597d", "\u7f51\u7ad9"},
|
||||
[]string{"\u201c", "Microsoft", "\u201d", "\u4e00\u8bcd", "\u7531", "\u201c", "MICROcomputer", "\uff08", "\u5fae\u578b", "\u8ba1\u7b97\u673a", "\uff09", "\u201d", "\u548c", "\u201c", "SOFTware", "\uff08", "\u8f6f\u4ef6", "\uff09", "\u201d", "\u4e24\u90e8\u5206", "\u7ec4\u6210"},
|
||||
[]string{"\u8349\u6ce5", "\u9a6c", "\u548c", "\u6b3a\u5b9e", "\u9a6c", "\u662f", "\u4eca", "\u5e74", "\u7684", "\u6d41\u884c", "\u8bcd\u6c47"},
|
||||
[]string{"\u4f0a\u85e4\u6d0b\u534e\u5802", "\u603b\u5e9c", "\u5e97"},
|
||||
[]string{"\u4e2d\u56fd", "\u79d1\u5b66\u9662", "\u8ba1\u7b97", "\u6280\u672f", "\u7814\u7a76", "\u6240"},
|
||||
[]string{"\u7f57\u5bc6\u6b27", "\u4e0e", "\u6731\u4e3d\u53f6"},
|
||||
[]string{"\u6211\u8d2d", "\u4e70", "\u4e86", "\u9053", "\u5177", "\u548c", "\u670d\u88c5"},
|
||||
[]string{"PS", ":", " ", "\u6211\u89c9", "\u5f97", "\u5f00\u6e90", "\u6709", "\u4e00\u4e2a", "\u597d", "\u5904", "\uff0c", "\u5c31", "\u662f", "\u80fd\u591f", "\u6566\u4fc3", "\u81ea\u5df1", "\u4e0d\u65ad", "\u6539\u8fdb", "\uff0c", "\u907f\u514d", "\u655e\u5e1a", "\u81ea\u73cd"},
|
||||
[]string{"\u6e56\u5317\u7701", "\u77f3\u9996\u5e02"},
|
||||
[]string{"\u6e56\u5317\u7701", "\u5341\u5830\u5e02"},
|
||||
[]string{"\u603b\u7ecf\u7406", "\u5b8c\u6210", "\u4e86", "\u8fd9\u4ef6", "\u4e8b\u60c5"},
|
||||
[]string{"\u7535\u8111", "\u4fee\u597d", "\u4e86"},
|
||||
[]string{"\u505a", "\u597d", "\u4e86", "\u8fd9\u4ef6", "\u4e8b\u60c5", "\u5c31", "\u4e00", "\u4e86", "\u767e", "\u4e86", "\u4e86"},
|
||||
[]string{"\u4eba\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
|
||||
[]string{"\u6211\u4eec", "\u4e70", "\u4e86", "\u4e00\u4e2a", "\u7f8e", "\u7684", "\u7a7a\u8c03"},
|
||||
[]string{"\u7ebf\u7a0b", "\u521d\u59cb", "\u5316\u65f6", "\u6211\u4eec", "\u8981", "\u6ce8\u610f"},
|
||||
[]string{"\u4e00\u4e2a", "\u5206\u5b50", "\u662f", "\u7531", "\u597d", "\u591a", "\u539f\u5b50", "\u7ec4\u7ec7\u6210", "\u7684"},
|
||||
[]string{"\u795d", "\u4f60", "\u9a6c\u5230", "\u529f\u6210"},
|
||||
[]string{"\u4ed6", "\u6389", "\u8fdb", "\u4e86", "\u65e0\u5e95", "\u6d1e\u91cc"},
|
||||
[]string{"\u4e2d\u56fd", "\u7684", "\u9996", "\u90fd", "\u662f", "\u5317\u4eac"},
|
||||
[]string{"\u5b59\u541b\u610f"},
|
||||
[]string{"\u5916\u4ea4\u90e8", "\u53d1\u8a00\u4eba", "\u9a6c\u671d\u65ed"},
|
||||
[]string{"\u9886\u5bfc", "\u4eba\u4f1a\u8bae", "\u548c", "\u7b2c\u56db\u5c4a", "\u4e1c\u4e9a\u5cf0", "\u4f1a"},
|
||||
[]string{"\u5728", "\u8fc7", "\u53bb", "\u7684", "\u8fd9\u4e94\u5e74"},
|
||||
[]string{"\u8fd8", "\u9700\u8981", "\u5f88\u957f", "\u7684", "\u8def", "\u8981", "\u8d70"},
|
||||
[]string{"60", "\u5468\u5e74\u9996", "\u90fd", "\u9605\u5175"},
|
||||
[]string{"\u4f60", "\u597d\u4eba", "\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
|
||||
[]string{"\u4e70\u6c34\u679c", "\u7136\u540e", "\u6765", "\u4e16\u535a\u56ed"},
|
||||
[]string{"\u4e70\u6c34\u679c", "\u7136\u540e", "\u53bb", "\u4e16\u535a\u56ed"},
|
||||
[]string{"\u4f46", "\u662f", "\u540e", "\u6765", "\u6211", "\u624d", "\u77e5\u9053", "\u4f60", "\u662f", "\u5bf9", "\u7684"},
|
||||
[]string{"\u5b58\u5728", "\u5373", "\u5408\u7406"},
|
||||
[]string{"\u7684", "\u7684", "\u7684", "\u7684", "\u7684", "\u5728", "\u7684", "\u7684", "\u7684", "\u7684", "\u5c31", "\u4ee5", "\u548c", "\u548c", "\u548c"},
|
||||
[]string{"I", " ", "love", "\u4f60", "\uff0c", "\u4e0d\u4ee5", "\u4e3a\u803b", "\uff0c", "\u53cd\u4ee5", "\u4e3a", "rong"},
|
||||
[]string{"\u56e0"},
|
||||
[]string{},
|
||||
[]string{"hello", "\u4f60", "\u597d\u4eba", "\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
|
||||
[]string{"\u5f88", "\u597d", "\u4f46", "\u4e3b\u8981", "\u662f", "\u57fa\u4e8e", "\u7f51\u9875", "\u5f62\u5f0f"},
|
||||
[]string{"hello", "\u4f60", "\u597d\u4eba", "\u4eec", "\u5ba1\u7f8e", "\u7684", "\u89c2\u70b9", "\u662f", "\u4e0d\u540c", "\u7684"},
|
||||
[]string{"\u4e3a", "\u4ec0\u4e48", "\u6211", "\u4e0d\u80fd", "\u62e5\u6709", "\u60f3", "\u8981", "\u7684", "\u751f\u6d3b"},
|
||||
[]string{"\u540e\u6765", "\u6211", "\u624d"},
|
||||
[]string{"\u6b64\u6b21", "\u6765", "\u4e2d\u56fd", "\u662f", "\u4e3a", "\u4e86"},
|
||||
[]string{"\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
|
||||
[]string{",", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
|
||||
[]string{"\u5176\u5b9e", "\u4f7f", "\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
|
||||
[]string{"\u597d\u4eba", "\u4f7f\u7528", "\u4e86", "\u5b83", "\u5c31", "\u53ef\u4ee5", "\u89e3\u51b3", "\u4e00\u4e9b", "\u95ee\u9898"},
|
||||
[]string{"\u662f", "\u56e0\u4e3a", "\u548c", "\u56fd\u5bb6"},
|
||||
[]string{"\u8001\u5e74", "\u641c\u7d22", "\u8fd8", "\u652f\u6301"},
|
||||
[]string{"\u5e72\u8106", "\u5c31", "\u628a", "\u90a3\u90e8", "\u8499\u4eba", "\u7684", "\u95f2\u6cd5", "\u7ed9", "\u5e9f", "\u4e86", "\u62c9", "\u5012", "\uff01", "RT", " ", "@", "laoshipukong", " ", ":", " ", "27", "\u65e5", "\uff0c", "\u5168\u56fd\u4eba", "\u5927\u5e38\u59d4\u4f1a", "\u7b2c\u4e09\u6b21", "\u5ba1\u8bae", "\u4fb5\u6743\u8d23", "\u4efb\u6cd5", "\u8349\u6848", "\uff0c", "\u5220\u9664", "\u4e86", "\u6709", "\u5173\u533b\u7597", "\u635f\u5bb3", "\u8d23\u4efb", "\u201c", "\u4e3e\u8bc1", "\u5012", "\u7f6e", "\u201d", "\u7684", "\u89c4\u5b9a", "\u3002", "\u5728", "\u533b\u60a3", "\u7ea0\u7eb7", "\u4e2d\u672c", "\u5df2", "\u5904\u4e8e", "\u5f31\u52bf", "\u5730\u4f4d", "\u7684", "\u6d88\u8d39\u8005", "\u7531", "\u6b64", "\u5c06", "\u9677\u5165", "\u4e07\u52ab", "\u4e0d\u590d", "\u7684", "\u5883\u5730", "\u3002", " "},
|
||||
[]string{"\u5927"},
|
||||
[]string{},
|
||||
[]string{"\u4ed6", "\u8bf4", "\u7684", "\u786e\u5b9e", "\u5728", "\u7406"},
|
||||
[]string{"\u957f\u6625\u5e02", "\u957f\u6625\u8282", "\u8bb2\u8bdd"},
|
||||
[]string{"\u7ed3\u5a5a", "\u7684", "\u548c", "\u5c1a\u672a", "\u7ed3\u5a5a", "\u7684"},
|
||||
[]string{"\u7ed3\u5408\u6210", "\u5206\u5b50", "\u65f6"},
|
||||
[]string{"\u65c5\u6e38", "\u548c", "\u670d\u52a1", "\u662f", "\u6700", "\u597d", "\u7684"},
|
||||
[]string{"\u8fd9\u4ef6", "\u4e8b\u60c5", "\u7684", "\u786e\u662f", "\u6211", "\u7684", "\u9519"},
|
||||
[]string{"\u4f9b\u5927\u5bb6", "\u53c2\u8003", "\u6307\u6b63"},
|
||||
[]string{"\u54c8\u5c14\u6ee8", "\u653f\u5e9c", "\u516c\u5e03\u584c\u6865", "\u539f\u56e0"},
|
||||
[]string{"\u6211", "\u5728", "\u673a\u573a", "\u5165\u53e3", "\u5904"},
|
||||
[]string{"\u90a2\u6c38\u81e3", "\u6444\u5f71", "\u62a5\u9053"},
|
||||
[]string{"BP", "\u795e\u7ecf", "\u7f51\u7edc", "\u5982\u4f55", "\u8bad\u7ec3", "\u624d", "\u80fd", "\u5728", "\u5206\u7c7b", "\u65f6", "\u589e\u52a0\u533a", "\u5206\u5ea6", "\uff1f"},
|
||||
[]string{"\u5357\u4eac\u5e02", "\u957f\u6c5f\u5927\u6865"},
|
||||
[]string{"\u5e94\u4e00\u4e9b", "\u4f7f", "\u7528\u8005", "\u7684", "\u5efa\u8bae", "\uff0c", "\u4e5f", "\u4e3a", "\u4e86", "\u4fbf", "\u4e8e", "\u5229\u7528", "NiuTrans", "\u7528\u4e8e", "SMT", "\u7814\u7a76"},
|
||||
[]string{"\u957f\u6625\u5e02", "\u957f\u6625\u836f\u5e97"},
|
||||
[]string{"\u9093\u9896", "\u8d85\u751f", "\u524d", "\u6700", "\u559c\u6b22", "\u7684", "\u8863\u670d"},
|
||||
[]string{"\u80e1\u9526\u6d9b", "\u662f", "\u70ed\u7231\u4e16\u754c", "\u548c", "\u5e73", "\u7684", "\u653f\u6cbb\u5c40", "\u5e38\u59d4"},
|
||||
[]string{"\u7a0b\u5e8f\u5458", "\u795d\u6d77\u6797", "\u548c", "\u6731\u4f1a\u9707", "\u662f", "\u5728", "\u5b59\u5065", "\u7684", "\u5de6\u9762", "\u548c", "\u53f3\u9762", ",", " ", "\u8303\u51ef", "\u5728", "\u6700\u53f3\u9762", ".", "\u518d\u5f80", "\u5de6", "\u662f", "\u674e\u677e\u6d2a"},
|
||||
[]string{"\u4e00\u6b21\u6027", "\u4ea4\u591a\u5c11", "\u94b1"},
|
||||
[]string{"\u4e24\u5757", "\u4e94\u4e00\u5957", "\uff0c", "\u4e09\u5757", "\u516b\u4e00\u65a4", "\uff0c", "\u56db\u5757", "\u4e03", "\u4e00\u672c", "\uff0c", "\u4e94\u5757", "\u516d", "\u4e00\u6761"},
|
||||
[]string{"\u5c0f", "\u548c", "\u5c1a\u7559", "\u4e86", "\u4e00\u4e2a", "\u50cf", "\u5927", "\u548c", "\u5c1a", "\u4e00\u6837", "\u7684", "\u548c", "\u5c1a\u5934"},
|
||||
[]string{"\u6211", "\u662f", "\u4e2d\u534e\u4eba\u6c11\u5171\u548c\u56fd", "\u516c\u6c11", ";", "\u6211", "\u7238\u7238", "\u662f", "\u5171", "\u548c", "\u515a", "\u515a\u5458", ";", " ", "\u5730\u94c1", "\u548c", "\u5e73\u95e8", "\u7ad9"},
|
||||
[]string{"\u5f20\u6653\u6885", "\u53bb", "\u4eba\u6c11\u533b\u9662", "\u505a", "\u4e86", "\u4e2a", "B", "\u8d85\u7136", "\u540e", "\u53bb", "\u4e70", "\u4e86", "\u4ef6", "T", "\u6064"},
|
||||
[]string{"AT", "&", "T", "\u662f", "\u4e00\u4ef6", "\u4e0d\u9519", "\u7684", "\u516c\u53f8", "\uff0c", "\u7ed9", "\u4f60", "\u53d1", "offer", "\u4e86", "\u5417", "\uff1f"},
|
||||
[]string{"C", "++", "\u548c", "c", "#", "\u662f", "\u4ec0\u4e48", "\u5173\u7cfb", "\uff1f", "11", "+", "122", "=", "133", "\uff0c", "\u662f", "\u5417", "\uff1f", "PI", "=", "3.14159"},
|
||||
[]string{"\u4f60", "\u8ba4\u8bc6", "\u90a3\u4e2a", "\u548c", "\u4e3b\u5e2d\u63e1", "\u624b", "\u7684", "\u7684", "\u54e5", "\u5417", "\uff1f", "\u4ed6\u5f00", "\u4e00\u8f86", "\u9ed1\u8272", "\u7684", "\u58eb", "\u3002"},
|
||||
[]string{"\u67aa\u6746\u5b50", "\u4e2d", "\u51fa\u653f\u6743"},
|
||||
}
|
||||
)
|
||||
|
||||
func init() {
|
||||
SetDictionary("dict.txt")
|
||||
}
|
||||
|
||||
func TestCutDAG(t *testing.T) {
|
||||
result := cut_DAG("BP神经网络如何训练才能在分类时增加区分度?")
|
||||
if len(result) != 11 {
|
||||
t.Error(result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCutDAGNoHmm(t *testing.T) {
|
||||
result := cut_DAG_NO_HMM("BP神经网络如何训练才能在分类时增加区分度?")
|
||||
if len(result) != 11 {
|
||||
t.Error(result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegexpSplit(t *testing.T) {
|
||||
result := RegexpSplit(regexp.MustCompile(`\p{Han}+`),
|
||||
"BP神经网络如何训练才能在分类时增加区分度?")
|
||||
if len(result) != 3 {
|
||||
t.Error(result)
|
||||
}
|
||||
result = RegexpSplit(regexp.MustCompile(`([\p{Han}#]+)`),
|
||||
",BP神经网络如何训练才能在分类时#增加区分度?")
|
||||
if len(result) != 3 {
|
||||
t.Error(result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefaultCut(t *testing.T) {
|
||||
var result []string
|
||||
for index, content := range test_contents {
|
||||
result = Cut(content, false, true)
|
||||
if len(result) != len(defaultCutResult[index]) {
|
||||
t.Errorf("default cut for %s length should be %d not %d\n",
|
||||
content, len(defaultCutResult[index]), len(result))
|
||||
}
|
||||
for i, r := range result {
|
||||
if r != defaultCutResult[index][i] {
|
||||
t.Error(r)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCutAll(t *testing.T) {
|
||||
var result []string
|
||||
for index, content := range test_contents {
|
||||
result = Cut(content, true, true)
|
||||
if len(result) != len(cutAllResult[index]) {
|
||||
t.Errorf("cut all for %s length should be %d not %d\n",
|
||||
content, len(cutAllResult[index]), len(result))
|
||||
}
|
||||
for i, c := range result {
|
||||
if c != cutAllResult[index][i] {
|
||||
t.Error(c)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefaultCutNoHMM(t *testing.T) {
|
||||
var result []string
|
||||
for index, content := range test_contents {
|
||||
result = Cut(content, false, false)
|
||||
if len(result) != len(defaultCutNoHMMResult[index]) {
|
||||
t.Errorf("default cut no hmm for %s length should be %d not %d\n",
|
||||
content, len(defaultCutNoHMMResult[index]), len(result))
|
||||
}
|
||||
for i, c := range result {
|
||||
if c != defaultCutNoHMMResult[index][i] {
|
||||
t.Error(c)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCutForSearch(t *testing.T) {
|
||||
var result []string
|
||||
for index, content := range test_contents {
|
||||
result = CutForSearch(content, true)
|
||||
if len(result) != len(cutForSearchResult[index]) {
|
||||
t.Errorf("cut for search for %s length should be %d not %d\n",
|
||||
content, len(cutForSearchResult[index]), len(result))
|
||||
}
|
||||
for i, c := range result {
|
||||
if c != cutForSearchResult[index][i] {
|
||||
t.Error(c)
|
||||
}
|
||||
}
|
||||
}
|
||||
for index, content := range test_contents {
|
||||
result = CutForSearch(content, false)
|
||||
if len(result) != len(cutForSearchNoHMMResult[index]) {
|
||||
t.Errorf("cut for search no hmm for %s length should be %d not %d\n",
|
||||
content, len(cutForSearchNoHMMResult[index]), len(result))
|
||||
}
|
||||
for i, c := range result {
|
||||
if c != cutForSearchNoHMMResult[index][i] {
|
||||
t.Error(c)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetdictionary(t *testing.T) {
|
||||
var result []string
|
||||
SetDictionary("foobar.txt")
|
||||
for index, content := range test_contents {
|
||||
result = Cut(content, false, true)
|
||||
if len(result) != len(userDictCutResult[index]) {
|
||||
t.Errorf("default cut with user dictionary for %s length should be %d not %d\n",
|
||||
content, len(userDictCutResult[index]), len(result))
|
||||
}
|
||||
for i, c := range result {
|
||||
if c != userDictCutResult[index][i] {
|
||||
t.Error(c)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
79475
posseg/char_state_tab.go
Normal file
79475
posseg/char_state_tab.go
Normal file
File diff suppressed because it is too large
Load Diff
16
posseg/char_state_tab_test.go
Normal file
16
posseg/char_state_tab_test.go
Normal file
@@ -0,0 +1,16 @@
|
||||
package posseg
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestGet(t *testing.T) {
|
||||
result := CharStateTab.Get('\u8000')
|
||||
if len(result) != 17 {
|
||||
t.FailNow()
|
||||
}
|
||||
result = CharStateTab.Get('\uaaaa')
|
||||
if len(result) == 17 {
|
||||
t.FailNow()
|
||||
}
|
||||
}
|
||||
292
posseg/posseg.go
Normal file
292
posseg/posseg.go
Normal file
@@ -0,0 +1,292 @@
|
||||
package posseg
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"github.com/wangbin/jiebago"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var (
|
||||
WordTagTab = make(map[string]string)
|
||||
isUserDictLoaded = false
|
||||
)
|
||||
|
||||
type WordTag struct {
|
||||
Word, Tag string
|
||||
}
|
||||
|
||||
func (wt WordTag) String() string {
|
||||
return fmt.Sprintf("%s/%s", wt.Word, wt.Tag)
|
||||
}
|
||||
|
||||
func init() {
|
||||
_, filename, _, _ := runtime.Caller(1)
|
||||
dict_dir := filepath.Dir(filepath.Dir(filename))
|
||||
dict_path := filepath.Join(dict_dir, jiebago.Dictionary)
|
||||
load_model(dict_path)
|
||||
}
|
||||
|
||||
func load_model(f_name string) error {
|
||||
file, openError := os.Open(f_name)
|
||||
if openError != nil {
|
||||
return openError
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
reader := bufio.NewReader(file)
|
||||
for {
|
||||
line, readError := reader.ReadString('\n')
|
||||
if readError != nil && len(line) == 0 {
|
||||
break
|
||||
}
|
||||
words := strings.Split(strings.TrimSpace(line), " ")
|
||||
word, tag := words[0], words[2]
|
||||
WordTagTab[word] = tag
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func __cut(sentence string) []WordTag {
|
||||
result := make([]WordTag, 0)
|
||||
runes := []rune(sentence)
|
||||
_, posList := Viterbi(runes)
|
||||
begin := 0
|
||||
next := 0
|
||||
for i, char := range runes {
|
||||
pos := posList[i].State
|
||||
switch pos {
|
||||
case 'B':
|
||||
begin = i
|
||||
case 'E':
|
||||
result = append(result, WordTag{string(runes[begin : i+1]), posList[i].Tag})
|
||||
next = i + 1
|
||||
case 'S':
|
||||
result = append(result, WordTag{string(char), posList[i].Tag})
|
||||
next = i + 1
|
||||
}
|
||||
}
|
||||
if next < len(runes) {
|
||||
result = append(result, WordTag{string(runes[next:]), posList[next].Tag})
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func cutDetail(sentence string) []WordTag {
|
||||
result := make([]WordTag, 0)
|
||||
re_han := regexp.MustCompile(`\p{Han}+`)
|
||||
re_skip := regexp.MustCompile(`[[\.[:digit:]]+|[:alnum:]]+`)
|
||||
|
||||
re_eng := regexp.MustCompile(`[[:alnum:]]`)
|
||||
re_num := regexp.MustCompile(`[\.[:digit:]]+`)
|
||||
blocks := jiebago.RegexpSplit(re_han, sentence)
|
||||
for _, blk := range blocks {
|
||||
if re_han.MatchString(blk) {
|
||||
for _, wordTag := range __cut(blk) {
|
||||
result = append(result, wordTag)
|
||||
}
|
||||
} else {
|
||||
for _, x := range jiebago.RegexpSplit(re_skip, blk) {
|
||||
if len(x) == 0 {
|
||||
continue
|
||||
}
|
||||
switch {
|
||||
case re_num.MatchString(x):
|
||||
result = append(result, WordTag{x, "m"})
|
||||
case re_eng.MatchString(x):
|
||||
result = append(result, WordTag{x, "eng"})
|
||||
default:
|
||||
result = append(result, WordTag{x, "x"})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
type cutAction func(sentence string) []WordTag
|
||||
|
||||
func cut_DAG(sentence string) []WordTag {
|
||||
dag := jiebago.GetDAG(sentence)
|
||||
routes := jiebago.Calc(sentence, dag, 0)
|
||||
x := 0
|
||||
var y int
|
||||
runes := []rune(sentence)
|
||||
length := len(runes)
|
||||
result := make([]WordTag, 0)
|
||||
buf := make([]rune, 0)
|
||||
for {
|
||||
if x >= length {
|
||||
break
|
||||
}
|
||||
y = routes[x].Index + 1
|
||||
l_word := runes[x:y]
|
||||
if y-x == 1 {
|
||||
buf = append(buf, l_word...)
|
||||
} else {
|
||||
if len(buf) > 0 {
|
||||
if len(buf) == 1 {
|
||||
sbuf := string(buf)
|
||||
if tag, ok := WordTagTab[sbuf]; ok {
|
||||
result = append(result, WordTag{sbuf, tag})
|
||||
} else {
|
||||
result = append(result, WordTag{sbuf, "x"})
|
||||
}
|
||||
buf = make([]rune, 0)
|
||||
} else {
|
||||
bufString := string(buf)
|
||||
if _, ok := jiebago.TT.Freq[bufString]; !ok {
|
||||
recognized := cutDetail(bufString)
|
||||
for _, t := range recognized {
|
||||
result = append(result, t)
|
||||
}
|
||||
} else {
|
||||
for _, elem := range buf {
|
||||
selem := string(elem)
|
||||
if tag, ok := WordTagTab[selem]; ok {
|
||||
result = append(result, WordTag{string(elem), tag})
|
||||
} else {
|
||||
result = append(result, WordTag{string(elem), "x"})
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
buf = make([]rune, 0)
|
||||
}
|
||||
}
|
||||
sl_word := string(l_word)
|
||||
if tag, ok := WordTagTab[sl_word]; ok {
|
||||
result = append(result, WordTag{sl_word, tag})
|
||||
} else {
|
||||
result = append(result, WordTag{sl_word, "x"})
|
||||
}
|
||||
}
|
||||
x = y
|
||||
}
|
||||
|
||||
if len(buf) > 0 {
|
||||
if len(buf) == 1 {
|
||||
sbuf := string(buf)
|
||||
if tag, ok := WordTagTab[sbuf]; ok {
|
||||
result = append(result, WordTag{sbuf, tag})
|
||||
} else {
|
||||
result = append(result, WordTag{sbuf, "x"})
|
||||
}
|
||||
} else {
|
||||
bufString := string(buf)
|
||||
if _, ok := jiebago.TT.Freq[bufString]; !ok {
|
||||
recognized := cutDetail(bufString)
|
||||
for _, t := range recognized {
|
||||
result = append(result, t)
|
||||
}
|
||||
} else {
|
||||
for _, elem := range buf {
|
||||
selem := string(elem)
|
||||
if tag, ok := WordTagTab[selem]; ok {
|
||||
result = append(result, WordTag{selem, tag})
|
||||
} else {
|
||||
result = append(result, WordTag{selem, "x"})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func cut_DAG_NO_HMM(sentence string) []WordTag {
|
||||
result := make([]WordTag, 0)
|
||||
re_eng := regexp.MustCompile(`[[:alnum:]]`)
|
||||
dag := jiebago.GetDAG(sentence)
|
||||
routes := jiebago.Calc(sentence, dag, 0)
|
||||
x := 0
|
||||
var y int
|
||||
runes := []rune(sentence)
|
||||
length := len(runes)
|
||||
buf := make([]rune, 0)
|
||||
for {
|
||||
if x >= length {
|
||||
break
|
||||
}
|
||||
y = routes[x].Index + 1
|
||||
l_word := runes[x:y]
|
||||
if re_eng.MatchString(string(l_word)) && len(l_word) == 1 {
|
||||
buf = append(buf, l_word...)
|
||||
x = y
|
||||
} else {
|
||||
if len(buf) > 0 {
|
||||
result = append(result, WordTag{string(buf), "eng"})
|
||||
buf = make([]rune, 0)
|
||||
}
|
||||
sl_word := string(l_word)
|
||||
if tag, ok := WordTagTab[sl_word]; ok {
|
||||
result = append(result, WordTag{sl_word, tag})
|
||||
} else {
|
||||
result = append(result, WordTag{sl_word, "x"})
|
||||
}
|
||||
x = y
|
||||
}
|
||||
}
|
||||
if len(buf) > 0 {
|
||||
result = append(result, WordTag{string(buf), "eng"})
|
||||
buf = make([]rune, 0)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func cut(sentence string, HMM bool) []WordTag {
|
||||
result := make([]WordTag, 0)
|
||||
re_han := regexp.MustCompile(`([\p{Han}+[:alnum:]+#&\._]+)`)
|
||||
re_skip := regexp.MustCompile(`(\r\n|\s)`)
|
||||
re_eng := regexp.MustCompile(`[[:alnum:]]`)
|
||||
re_num := regexp.MustCompile(`[\.[:digit:]]+`)
|
||||
blocks := jiebago.RegexpSplit(re_han, sentence)
|
||||
var cut_block cutAction
|
||||
if HMM {
|
||||
cut_block = cut_DAG
|
||||
} else {
|
||||
cut_block = cut_DAG_NO_HMM
|
||||
}
|
||||
for _, blk := range blocks {
|
||||
if re_han.MatchString(blk) {
|
||||
for _, wordTag := range cut_block(blk) {
|
||||
result = append(result, wordTag)
|
||||
}
|
||||
} else {
|
||||
for _, x := range jiebago.RegexpSplit(re_skip, blk) {
|
||||
if re_skip.MatchString(x) {
|
||||
result = append(result, WordTag{x, "x"})
|
||||
} else {
|
||||
for _, xx := range x {
|
||||
s := string(xx)
|
||||
switch {
|
||||
case re_num.MatchString(s):
|
||||
result = append(result, WordTag{s, "m"})
|
||||
case re_eng.MatchString(x):
|
||||
result = append(result, WordTag{x, "eng"})
|
||||
break
|
||||
default:
|
||||
result = append(result, WordTag{s, "x"})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func Cut(sentence string, HMM bool) []WordTag {
|
||||
if !isUserDictLoaded {
|
||||
for key, value := range jiebago.UserWordTagTab {
|
||||
WordTagTab[key] = value
|
||||
}
|
||||
isUserDictLoaded = true
|
||||
}
|
||||
return cut(sentence, HMM)
|
||||
}
|
||||
294
posseg/posseg_test.go
Normal file
294
posseg/posseg_test.go
Normal file
@@ -0,0 +1,294 @@
|
||||
package posseg
|
||||
|
||||
import (
|
||||
"github.com/wangbin/jiebago"
|
||||
"testing"
|
||||
)
|
||||
|
||||
var (
|
||||
test_contents = []string{
|
||||
"这是一个伸手不见五指的黑夜。我叫孙悟空,我爱北京,我爱Python和C++。",
|
||||
"我不喜欢日本和服。",
|
||||
"雷猴回归人间。",
|
||||
"工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作",
|
||||
"我需要廉租房",
|
||||
"永和服装饰品有限公司",
|
||||
"我爱北京天安门",
|
||||
"abc",
|
||||
"隐马尔可夫",
|
||||
"雷猴是个好网站",
|
||||
"“Microsoft”一词由“MICROcomputer(微型计算机)”和“SOFTware(软件)”两部分组成",
|
||||
"草泥马和欺实马是今年的流行词汇",
|
||||
"伊藤洋华堂总府店",
|
||||
"中国科学院计算技术研究所",
|
||||
"罗密欧与朱丽叶",
|
||||
"我购买了道具和服装",
|
||||
"PS: 我觉得开源有一个好处,就是能够敦促自己不断改进,避免敞帚自珍",
|
||||
"湖北省石首市",
|
||||
"湖北省十堰市",
|
||||
"总经理完成了这件事情",
|
||||
"电脑修好了",
|
||||
"做好了这件事情就一了百了了",
|
||||
"人们审美的观点是不同的",
|
||||
"我们买了一个美的空调",
|
||||
"线程初始化时我们要注意",
|
||||
"一个分子是由好多原子组织成的",
|
||||
"祝你马到功成",
|
||||
"他掉进了无底洞里",
|
||||
"中国的首都是北京",
|
||||
"孙君意",
|
||||
"外交部发言人马朝旭",
|
||||
"领导人会议和第四届东亚峰会",
|
||||
"在过去的这五年",
|
||||
"还需要很长的路要走",
|
||||
"60周年首都阅兵",
|
||||
"你好人们审美的观点是不同的",
|
||||
"买水果然后来世博园",
|
||||
"买水果然后去世博园",
|
||||
"但是后来我才知道你是对的",
|
||||
"存在即合理",
|
||||
"的的的的的在的的的的就以和和和",
|
||||
"I love你,不以为耻,反以为rong",
|
||||
"因",
|
||||
"",
|
||||
"hello你好人们审美的观点是不同的",
|
||||
"很好但主要是基于网页形式",
|
||||
"hello你好人们审美的观点是不同的",
|
||||
"为什么我不能拥有想要的生活",
|
||||
"后来我才",
|
||||
"此次来中国是为了",
|
||||
"使用了它就可以解决一些问题",
|
||||
",使用了它就可以解决一些问题",
|
||||
"其实使用了它就可以解决一些问题",
|
||||
"好人使用了它就可以解决一些问题",
|
||||
"是因为和国家",
|
||||
"老年搜索还支持",
|
||||
"干脆就把那部蒙人的闲法给废了拉倒!RT @laoshipukong : 27日,全国人大常委会第三次审议侵权责任法草案,删除了有关医疗损害责任“举证倒置”的规定。在医患纠纷中本已处于弱势地位的消费者由此将陷入万劫不复的境地。 ",
|
||||
"大",
|
||||
"",
|
||||
"他说的确实在理",
|
||||
"长春市长春节讲话",
|
||||
"结婚的和尚未结婚的",
|
||||
"结合成分子时",
|
||||
"旅游和服务是最好的",
|
||||
"这件事情的确是我的错",
|
||||
"供大家参考指正",
|
||||
"哈尔滨政府公布塌桥原因",
|
||||
"我在机场入口处",
|
||||
"邢永臣摄影报道",
|
||||
"BP神经网络如何训练才能在分类时增加区分度?",
|
||||
"南京市长江大桥",
|
||||
"应一些使用者的建议,也为了便于利用NiuTrans用于SMT研究",
|
||||
"长春市长春药店",
|
||||
"邓颖超生前最喜欢的衣服",
|
||||
"胡锦涛是热爱世界和平的政治局常委",
|
||||
"程序员祝海林和朱会震是在孙健的左面和右面, 范凯在最右面.再往左是李松洪",
|
||||
"一次性交多少钱",
|
||||
"两块五一套,三块八一斤,四块七一本,五块六一条",
|
||||
"小和尚留了一个像大和尚一样的和尚头",
|
||||
"我是中华人民共和国公民;我爸爸是共和党党员; 地铁和平门站",
|
||||
"张晓梅去人民医院做了个B超然后去买了件T恤",
|
||||
"AT&T是一件不错的公司,给你发offer了吗?",
|
||||
"C++和c#是什么关系?11+122=133,是吗?PI=3.14159",
|
||||
"你认识那个和主席握手的的哥吗?他开一辆黑色的士。",
|
||||
"枪杆子中出政权"}
|
||||
defaultCutResult = [][]WordTag{
|
||||
[]WordTag{WordTag{"\u8fd9", "r"}, WordTag{"\u662f", "v"}, WordTag{"\u4e00\u4e2a", "m"}, WordTag{"\u4f38\u624b\u4e0d\u89c1\u4e94\u6307", "i"}, WordTag{"\u7684", "uj"}, WordTag{"\u9ed1\u591c", "n"}, WordTag{"\u3002", "x"}, WordTag{"\u6211", "r"}, WordTag{"\u53eb", "v"}, WordTag{"\u5b59\u609f\u7a7a", "nr"}, WordTag{"\uff0c", "x"}, WordTag{"\u6211", "r"}, WordTag{"\u7231", "v"}, WordTag{"\u5317\u4eac", "ns"}, WordTag{"\uff0c", "x"}, WordTag{"\u6211", "r"}, WordTag{"\u7231", "v"}, WordTag{"Python", "eng"}, WordTag{"\u548c", "c"}, WordTag{"C++", "nz"}, WordTag{"\u3002", "x"}},
|
||||
[]WordTag{WordTag{"\u6211", "r"}, WordTag{"\u4e0d", "d"}, WordTag{"\u559c\u6b22", "v"}, WordTag{"\u65e5\u672c", "ns"}, WordTag{"\u548c\u670d", "nz"}, WordTag{"\u3002", "x"}},
|
||||
[]WordTag{WordTag{"\u96f7\u7334", "n"}, WordTag{"\u56de\u5f52", "v"}, WordTag{"\u4eba\u95f4", "n"}, WordTag{"\u3002", "x"}},
|
||||
[]WordTag{WordTag{"\u5de5\u4fe1\u5904", "n"}, WordTag{"\u5973\u5e72\u4e8b", "n"}, WordTag{"\u6bcf\u6708", "r"}, WordTag{"\u7ecf\u8fc7", "p"}, WordTag{"\u4e0b\u5c5e", "v"}, WordTag{"\u79d1\u5ba4", "n"}, WordTag{"\u90fd", "d"}, WordTag{"\u8981", "v"}, WordTag{"\u4eb2\u53e3", "n"}, WordTag{"\u4ea4\u4ee3", "n"}, WordTag{"24", "m"}, WordTag{"\u53e3", "n"}, WordTag{"\u4ea4\u6362\u673a", "n"}, WordTag{"\u7b49", "u"}, WordTag{"\u6280\u672f\u6027", "n"}, WordTag{"\u5668\u4ef6", "n"}, WordTag{"\u7684", "uj"}, WordTag{"\u5b89\u88c5", "v"}, WordTag{"\u5de5\u4f5c", "vn"}},
|
||||
[]WordTag{WordTag{"\u6211", "r"}, WordTag{"\u9700\u8981", "v"}, WordTag{"\u5ec9\u79df\u623f", "n"}},
|
||||
[]WordTag{WordTag{"\u6c38\u548c", "nz"}, WordTag{"\u670d\u88c5", "vn"}, WordTag{"\u9970\u54c1", "n"}, WordTag{"\u6709\u9650\u516c\u53f8", "n"}},
|
||||
[]WordTag{WordTag{"\u6211", "r"}, WordTag{"\u7231", "v"}, WordTag{"\u5317\u4eac", "ns"}, WordTag{"\u5929\u5b89\u95e8", "ns"}},
|
||||
[]WordTag{WordTag{"abc", "eng"}},
|
||||
[]WordTag{WordTag{"\u9690", "n"}, WordTag{"\u9a6c\u5c14\u53ef\u592b", "nr"}},
|
||||
[]WordTag{WordTag{"\u96f7\u7334", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u4e2a", "q"}, WordTag{"\u597d", "a"}, WordTag{"\u7f51\u7ad9", "n"}},
|
||||
[]WordTag{WordTag{"\u201c", "x"}, WordTag{"Microsoft", "eng"}, WordTag{"\u201d", "x"}, WordTag{"\u4e00", "m"}, WordTag{"\u8bcd", "n"}, WordTag{"\u7531", "p"}, WordTag{"\u201c", "x"}, WordTag{"MICROcomputer", "eng"}, WordTag{"\uff08", "x"}, WordTag{"\u5fae\u578b", "b"}, WordTag{"\u8ba1\u7b97\u673a", "n"}, WordTag{"\uff09", "x"}, WordTag{"\u201d", "x"}, WordTag{"\u548c", "c"}, WordTag{"\u201c", "x"}, WordTag{"SOFTware", "eng"}, WordTag{"\uff08", "x"}, WordTag{"\u8f6f\u4ef6", "n"}, WordTag{"\uff09", "x"}, WordTag{"\u201d", "x"}, WordTag{"\u4e24", "m"}, WordTag{"\u90e8\u5206", "n"}, WordTag{"\u7ec4\u6210", "v"}},
|
||||
[]WordTag{WordTag{"\u8349\u6ce5\u9a6c", "n"}, WordTag{"\u548c", "c"}, WordTag{"\u6b3a\u5b9e", "v"}, WordTag{"\u9a6c", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u4eca\u5e74", "t"}, WordTag{"\u7684", "uj"}, WordTag{"\u6d41\u884c", "v"}, WordTag{"\u8bcd\u6c47", "n"}},
|
||||
[]WordTag{WordTag{"\u4f0a\u85e4", "nr"}, WordTag{"\u6d0b\u534e\u5802", "n"}, WordTag{"\u603b\u5e9c", "n"}, WordTag{"\u5e97", "n"}},
|
||||
[]WordTag{WordTag{"\u4e2d\u56fd\u79d1\u5b66\u9662\u8ba1\u7b97\u6280\u672f\u7814\u7a76\u6240", "nt"}},
|
||||
[]WordTag{WordTag{"\u7f57\u5bc6\u6b27", "nr"}, WordTag{"\u4e0e", "p"}, WordTag{"\u6731\u4e3d\u53f6", "nr"}},
|
||||
[]WordTag{WordTag{"\u6211", "r"}, WordTag{"\u8d2d\u4e70", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u9053\u5177", "n"}, WordTag{"\u548c", "c"}, WordTag{"\u670d\u88c5", "vn"}},
|
||||
[]WordTag{WordTag{"PS", "eng"}, WordTag{":", "x"}, WordTag{" ", "x"}, WordTag{"\u6211", "r"}, WordTag{"\u89c9\u5f97", "v"}, WordTag{"\u5f00\u6e90", "n"}, WordTag{"\u6709", "v"}, WordTag{"\u4e00\u4e2a", "m"}, WordTag{"\u597d\u5904", "d"}, WordTag{"\uff0c", "x"}, WordTag{"\u5c31\u662f", "d"}, WordTag{"\u80fd\u591f", "v"}, WordTag{"\u6566\u4fc3", "v"}, WordTag{"\u81ea\u5df1", "r"}, WordTag{"\u4e0d\u65ad\u6539\u8fdb", "l"}, WordTag{"\uff0c", "x"}, WordTag{"\u907f\u514d", "v"}, WordTag{"\u655e", "v"}, WordTag{"\u5e1a", "ng"}, WordTag{"\u81ea\u73cd", "b"}},
|
||||
[]WordTag{WordTag{"\u6e56\u5317\u7701", "ns"}, WordTag{"\u77f3\u9996\u5e02", "ns"}},
|
||||
[]WordTag{WordTag{"\u6e56\u5317\u7701", "ns"}, WordTag{"\u5341\u5830\u5e02", "ns"}},
|
||||
[]WordTag{WordTag{"\u603b\u7ecf\u7406", "n"}, WordTag{"\u5b8c\u6210", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u8fd9\u4ef6", "mq"}, WordTag{"\u4e8b\u60c5", "n"}},
|
||||
[]WordTag{WordTag{"\u7535\u8111", "n"}, WordTag{"\u4fee\u597d", "v"}, WordTag{"\u4e86", "ul"}},
|
||||
[]WordTag{WordTag{"\u505a\u597d", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u8fd9\u4ef6", "mq"}, WordTag{"\u4e8b\u60c5", "n"}, WordTag{"\u5c31", "d"}, WordTag{"\u4e00\u4e86\u767e\u4e86", "l"}, WordTag{"\u4e86", "ul"}},
|
||||
[]WordTag{WordTag{"\u4eba\u4eec", "n"}, WordTag{"\u5ba1\u7f8e", "vn"}, WordTag{"\u7684", "uj"}, WordTag{"\u89c2\u70b9", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u4e0d\u540c", "a"}, WordTag{"\u7684", "uj"}},
|
||||
[]WordTag{WordTag{"\u6211\u4eec", "r"}, WordTag{"\u4e70", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u4e00\u4e2a", "m"}, WordTag{"\u7f8e\u7684", "nr"}, WordTag{"\u7a7a\u8c03", "n"}},
|
||||
[]WordTag{WordTag{"\u7ebf\u7a0b", "n"}, WordTag{"\u521d\u59cb\u5316", "l"}, WordTag{"\u65f6", "n"}, WordTag{"\u6211\u4eec", "r"}, WordTag{"\u8981", "v"}, WordTag{"\u6ce8\u610f", "v"}},
|
||||
[]WordTag{WordTag{"\u4e00\u4e2a", "m"}, WordTag{"\u5206\u5b50", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u7531", "p"}, WordTag{"\u597d\u591a", "m"}, WordTag{"\u539f\u5b50", "n"}, WordTag{"\u7ec4\u7ec7", "v"}, WordTag{"\u6210", "v"}, WordTag{"\u7684", "uj"}},
|
||||
[]WordTag{WordTag{"\u795d", "v"}, WordTag{"\u4f60", "r"}, WordTag{"\u9a6c\u5230\u529f\u6210", "i"}},
|
||||
[]WordTag{WordTag{"\u4ed6", "r"}, WordTag{"\u6389", "v"}, WordTag{"\u8fdb", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u65e0\u5e95\u6d1e", "ns"}, WordTag{"\u91cc", "f"}},
|
||||
[]WordTag{WordTag{"\u4e2d\u56fd", "ns"}, WordTag{"\u7684", "uj"}, WordTag{"\u9996\u90fd", "d"}, WordTag{"\u662f", "v"}, WordTag{"\u5317\u4eac", "ns"}},
|
||||
[]WordTag{WordTag{"\u5b59\u541b\u610f", "nr"}},
|
||||
[]WordTag{WordTag{"\u5916\u4ea4\u90e8", "nt"}, WordTag{"\u53d1\u8a00\u4eba", "l"}, WordTag{"\u9a6c\u671d\u65ed", "nr"}},
|
||||
[]WordTag{WordTag{"\u9886\u5bfc\u4eba", "n"}, WordTag{"\u4f1a\u8bae", "n"}, WordTag{"\u548c", "c"}, WordTag{"\u7b2c\u56db\u5c4a", "m"}, WordTag{"\u4e1c\u4e9a", "ns"}, WordTag{"\u5cf0\u4f1a", "n"}},
|
||||
[]WordTag{WordTag{"\u5728", "p"}, WordTag{"\u8fc7\u53bb", "t"}, WordTag{"\u7684", "uj"}, WordTag{"\u8fd9", "r"}, WordTag{"\u4e94\u5e74", "t"}},
|
||||
[]WordTag{WordTag{"\u8fd8", "d"}, WordTag{"\u9700\u8981", "v"}, WordTag{"\u5f88", "d"}, WordTag{"\u957f", "a"}, WordTag{"\u7684", "uj"}, WordTag{"\u8def", "n"}, WordTag{"\u8981", "v"}, WordTag{"\u8d70", "v"}},
|
||||
[]WordTag{WordTag{"60", "m"}, WordTag{"\u5468\u5e74", "t"}, WordTag{"\u9996\u90fd", "d"}, WordTag{"\u9605\u5175", "v"}},
|
||||
[]WordTag{WordTag{"\u4f60\u597d", "l"}, WordTag{"\u4eba\u4eec", "n"}, WordTag{"\u5ba1\u7f8e", "vn"}, WordTag{"\u7684", "uj"}, WordTag{"\u89c2\u70b9", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u4e0d\u540c", "a"}, WordTag{"\u7684", "uj"}},
|
||||
[]WordTag{WordTag{"\u4e70", "v"}, WordTag{"\u6c34\u679c", "n"}, WordTag{"\u7136\u540e", "c"}, WordTag{"\u6765", "v"}, WordTag{"\u4e16\u535a\u56ed", "nr"}},
|
||||
[]WordTag{WordTag{"\u4e70", "v"}, WordTag{"\u6c34\u679c", "n"}, WordTag{"\u7136\u540e", "c"}, WordTag{"\u53bb", "v"}, WordTag{"\u4e16\u535a\u56ed", "nr"}},
|
||||
[]WordTag{WordTag{"\u4f46\u662f", "c"}, WordTag{"\u540e\u6765", "t"}, WordTag{"\u6211", "r"}, WordTag{"\u624d", "d"}, WordTag{"\u77e5\u9053", "v"}, WordTag{"\u4f60", "r"}, WordTag{"\u662f", "v"}, WordTag{"\u5bf9", "p"}, WordTag{"\u7684", "uj"}},
|
||||
[]WordTag{WordTag{"\u5b58\u5728", "v"}, WordTag{"\u5373", "v"}, WordTag{"\u5408\u7406", "vn"}},
|
||||
[]WordTag{WordTag{"\u7684\u7684", "u"}, WordTag{"\u7684\u7684", "u"}, WordTag{"\u7684", "uj"}, WordTag{"\u5728\u7684", "u"}, WordTag{"\u7684\u7684", "u"}, WordTag{"\u7684", "uj"}, WordTag{"\u5c31", "d"}, WordTag{"\u4ee5", "p"}, WordTag{"\u548c\u548c", "nz"}, WordTag{"\u548c", "c"}},
|
||||
[]WordTag{WordTag{"I", "x"}, WordTag{" ", "x"}, WordTag{"love", "eng"}, WordTag{"\u4f60", "r"}, WordTag{"\uff0c", "x"}, WordTag{"\u4e0d\u4ee5\u4e3a\u803b", "i"}, WordTag{"\uff0c", "x"}, WordTag{"\u53cd", "zg"}, WordTag{"\u4ee5\u4e3a", "c"}, WordTag{"rong", "eng"}},
|
||||
[]WordTag{WordTag{"\u56e0", "p"}},
|
||||
[]WordTag{},
|
||||
[]WordTag{WordTag{"hello", "eng"}, WordTag{"\u4f60\u597d", "l"}, WordTag{"\u4eba\u4eec", "n"}, WordTag{"\u5ba1\u7f8e", "vn"}, WordTag{"\u7684", "uj"}, WordTag{"\u89c2\u70b9", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u4e0d\u540c", "a"}, WordTag{"\u7684", "uj"}},
|
||||
[]WordTag{WordTag{"\u5f88\u597d", "a"}, WordTag{"\u4f46", "c"}, WordTag{"\u4e3b\u8981", "b"}, WordTag{"\u662f", "v"}, WordTag{"\u57fa\u4e8e", "p"}, WordTag{"\u7f51\u9875", "n"}, WordTag{"\u5f62\u5f0f", "n"}},
|
||||
[]WordTag{WordTag{"hello", "eng"}, WordTag{"\u4f60\u597d", "l"}, WordTag{"\u4eba\u4eec", "n"}, WordTag{"\u5ba1\u7f8e", "vn"}, WordTag{"\u7684", "uj"}, WordTag{"\u89c2\u70b9", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u4e0d\u540c", "a"}, WordTag{"\u7684", "uj"}},
|
||||
[]WordTag{WordTag{"\u4e3a\u4ec0\u4e48", "r"}, WordTag{"\u6211", "r"}, WordTag{"\u4e0d\u80fd", "v"}, WordTag{"\u62e5\u6709", "v"}, WordTag{"\u60f3\u8981", "v"}, WordTag{"\u7684", "uj"}, WordTag{"\u751f\u6d3b", "vn"}},
|
||||
[]WordTag{WordTag{"\u540e\u6765", "t"}, WordTag{"\u6211", "r"}, WordTag{"\u624d", "d"}},
|
||||
[]WordTag{WordTag{"\u6b64\u6b21", "r"}, WordTag{"\u6765", "v"}, WordTag{"\u4e2d\u56fd", "ns"}, WordTag{"\u662f", "v"}, WordTag{"\u4e3a\u4e86", "p"}},
|
||||
[]WordTag{WordTag{"\u4f7f\u7528", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u5b83", "r"}, WordTag{"\u5c31", "d"}, WordTag{"\u53ef\u4ee5", "c"}, WordTag{"\u89e3\u51b3", "v"}, WordTag{"\u4e00\u4e9b", "m"}, WordTag{"\u95ee\u9898", "n"}},
|
||||
[]WordTag{WordTag{",", "x"}, WordTag{"\u4f7f\u7528", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u5b83", "r"}, WordTag{"\u5c31", "d"}, WordTag{"\u53ef\u4ee5", "c"}, WordTag{"\u89e3\u51b3", "v"}, WordTag{"\u4e00\u4e9b", "m"}, WordTag{"\u95ee\u9898", "n"}},
|
||||
[]WordTag{WordTag{"\u5176\u5b9e", "d"}, WordTag{"\u4f7f\u7528", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u5b83", "r"}, WordTag{"\u5c31", "d"}, WordTag{"\u53ef\u4ee5", "c"}, WordTag{"\u89e3\u51b3", "v"}, WordTag{"\u4e00\u4e9b", "m"}, WordTag{"\u95ee\u9898", "n"}},
|
||||
[]WordTag{WordTag{"\u597d\u4eba", "n"}, WordTag{"\u4f7f\u7528", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u5b83", "r"}, WordTag{"\u5c31", "d"}, WordTag{"\u53ef\u4ee5", "c"}, WordTag{"\u89e3\u51b3", "v"}, WordTag{"\u4e00\u4e9b", "m"}, WordTag{"\u95ee\u9898", "n"}},
|
||||
[]WordTag{WordTag{"\u662f\u56e0\u4e3a", "c"}, WordTag{"\u548c", "c"}, WordTag{"\u56fd\u5bb6", "n"}},
|
||||
[]WordTag{WordTag{"\u8001\u5e74", "t"}, WordTag{"\u641c\u7d22", "v"}, WordTag{"\u8fd8", "d"}, WordTag{"\u652f\u6301", "v"}},
|
||||
[]WordTag{WordTag{"\u5e72\u8106", "d"}, WordTag{"\u5c31", "d"}, WordTag{"\u628a", "p"}, WordTag{"\u90a3\u90e8", "r"}, WordTag{"\u8499\u4eba", "n"}, WordTag{"\u7684", "uj"}, WordTag{"\u95f2\u6cd5", "n"}, WordTag{"\u7ed9", "p"}, WordTag{"\u5e9f", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u62c9\u5012", "v"}, WordTag{"\uff01", "x"}, WordTag{"RT", "eng"}, WordTag{" ", "x"}, WordTag{"@", "x"}, WordTag{"laoshipukong", "eng"}, WordTag{" ", "x"}, WordTag{":", "x"}, WordTag{" ", "x"}, WordTag{"27", "m"}, WordTag{"\u65e5", "m"}, WordTag{"\uff0c", "x"}, WordTag{"\u5168\u56fd\u4eba\u5927\u5e38\u59d4\u4f1a", "nt"}, WordTag{"\u7b2c\u4e09\u6b21", "m"}, WordTag{"\u5ba1\u8bae", "v"}, WordTag{"\u4fb5\u6743", "v"}, WordTag{"\u8d23\u4efb\u6cd5", "n"}, WordTag{"\u8349\u6848", "n"}, WordTag{"\uff0c", "x"}, WordTag{"\u5220\u9664", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u6709\u5173", "vn"}, WordTag{"\u533b\u7597", "n"}, WordTag{"\u635f\u5bb3", "v"}, WordTag{"\u8d23\u4efb", "n"}, WordTag{"\u201c", "x"}, WordTag{"\u4e3e\u8bc1", "v"}, WordTag{"\u5012\u7f6e", "v"}, WordTag{"\u201d", "x"}, WordTag{"\u7684", "uj"}, WordTag{"\u89c4\u5b9a", "n"}, WordTag{"\u3002", "x"}, WordTag{"\u5728", "p"}, WordTag{"\u533b\u60a3", "n"}, WordTag{"\u7ea0\u7eb7", "n"}, WordTag{"\u4e2d\u672c", "ns"}, WordTag{"\u5df2", "d"}, WordTag{"\u5904\u4e8e", "v"}, WordTag{"\u5f31\u52bf", "n"}, WordTag{"\u5730\u4f4d", "n"}, WordTag{"\u7684", "uj"}, WordTag{"\u6d88\u8d39\u8005", "n"}, WordTag{"\u7531\u6b64", "c"}, WordTag{"\u5c06", "d"}, WordTag{"\u9677\u5165", "v"}, WordTag{"\u4e07\u52ab\u4e0d\u590d", "i"}, WordTag{"\u7684", "uj"}, WordTag{"\u5883\u5730", "s"}, WordTag{"\u3002", "x"}, WordTag{" ", "x"}},
|
||||
[]WordTag{WordTag{"\u5927", "a"}},
|
||||
[]WordTag{},
|
||||
[]WordTag{WordTag{"\u4ed6", "r"}, WordTag{"\u8bf4", "v"}, WordTag{"\u7684", "uj"}, WordTag{"\u786e\u5b9e", "ad"}, WordTag{"\u5728", "p"}, WordTag{"\u7406", "n"}},
|
||||
[]WordTag{WordTag{"\u957f\u6625", "ns"}, WordTag{"\u5e02\u957f", "n"}, WordTag{"\u6625\u8282", "t"}, WordTag{"\u8bb2\u8bdd", "n"}},
|
||||
[]WordTag{WordTag{"\u7ed3\u5a5a", "v"}, WordTag{"\u7684", "uj"}, WordTag{"\u548c", "c"}, WordTag{"\u5c1a\u672a", "d"}, WordTag{"\u7ed3\u5a5a", "v"}, WordTag{"\u7684", "uj"}},
|
||||
[]WordTag{WordTag{"\u7ed3\u5408", "v"}, WordTag{"\u6210", "n"}, WordTag{"\u5206\u5b50", "n"}, WordTag{"\u65f6", "n"}},
|
||||
[]WordTag{WordTag{"\u65c5\u6e38", "vn"}, WordTag{"\u548c", "c"}, WordTag{"\u670d\u52a1", "vn"}, WordTag{"\u662f", "v"}, WordTag{"\u6700\u597d", "a"}, WordTag{"\u7684", "uj"}},
|
||||
[]WordTag{WordTag{"\u8fd9\u4ef6", "mq"}, WordTag{"\u4e8b\u60c5", "n"}, WordTag{"\u7684\u786e", "d"}, WordTag{"\u662f", "v"}, WordTag{"\u6211", "r"}, WordTag{"\u7684", "uj"}, WordTag{"\u9519", "n"}},
|
||||
[]WordTag{WordTag{"\u4f9b", "v"}, WordTag{"\u5927\u5bb6", "n"}, WordTag{"\u53c2\u8003", "v"}, WordTag{"\u6307\u6b63", "v"}},
|
||||
[]WordTag{WordTag{"\u54c8\u5c14\u6ee8", "ns"}, WordTag{"\u653f\u5e9c", "n"}, WordTag{"\u516c\u5e03", "v"}, WordTag{"\u584c", "v"}, WordTag{"\u6865", "n"}, WordTag{"\u539f\u56e0", "n"}},
|
||||
[]WordTag{WordTag{"\u6211", "r"}, WordTag{"\u5728", "p"}, WordTag{"\u673a\u573a", "n"}, WordTag{"\u5165\u53e3\u5904", "i"}},
|
||||
[]WordTag{WordTag{"\u90a2\u6c38\u81e3", "nr"}, WordTag{"\u6444\u5f71", "n"}, WordTag{"\u62a5\u9053", "v"}},
|
||||
[]WordTag{WordTag{"BP", "eng"}, WordTag{"\u795e\u7ecf\u7f51\u7edc", "n"}, WordTag{"\u5982\u4f55", "r"}, WordTag{"\u8bad\u7ec3", "vn"}, WordTag{"\u624d\u80fd", "v"}, WordTag{"\u5728", "p"}, WordTag{"\u5206\u7c7b", "n"}, WordTag{"\u65f6", "n"}, WordTag{"\u589e\u52a0", "v"}, WordTag{"\u533a\u5206\u5ea6", "n"}, WordTag{"\uff1f", "x"}},
|
||||
[]WordTag{WordTag{"\u5357\u4eac\u5e02", "ns"}, WordTag{"\u957f\u6c5f\u5927\u6865", "ns"}},
|
||||
[]WordTag{WordTag{"\u5e94", "v"}, WordTag{"\u4e00\u4e9b", "m"}, WordTag{"\u4f7f\u7528\u8005", "n"}, WordTag{"\u7684", "uj"}, WordTag{"\u5efa\u8bae", "n"}, WordTag{"\uff0c", "x"}, WordTag{"\u4e5f", "d"}, WordTag{"\u4e3a\u4e86", "p"}, WordTag{"\u4fbf\u4e8e", "v"}, WordTag{"\u5229\u7528", "n"}, WordTag{"NiuTrans", "eng"}, WordTag{"\u7528\u4e8e", "v"}, WordTag{"SMT", "eng"}, WordTag{"\u7814\u7a76", "vn"}},
|
||||
[]WordTag{WordTag{"\u957f\u6625\u5e02", "ns"}, WordTag{"\u957f\u6625", "ns"}, WordTag{"\u836f\u5e97", "n"}},
|
||||
[]WordTag{WordTag{"\u9093\u9896\u8d85", "nr"}, WordTag{"\u751f\u524d", "t"}, WordTag{"\u6700", "d"}, WordTag{"\u559c\u6b22", "v"}, WordTag{"\u7684", "uj"}, WordTag{"\u8863\u670d", "n"}},
|
||||
[]WordTag{WordTag{"\u80e1\u9526\u6d9b", "nr"}, WordTag{"\u662f", "v"}, WordTag{"\u70ed\u7231", "a"}, WordTag{"\u4e16\u754c", "n"}, WordTag{"\u548c\u5e73", "nz"}, WordTag{"\u7684", "uj"}, WordTag{"\u653f\u6cbb\u5c40", "n"}, WordTag{"\u5e38\u59d4", "j"}},
|
||||
[]WordTag{WordTag{"\u7a0b\u5e8f\u5458", "n"}, WordTag{"\u795d", "v"}, WordTag{"\u6d77\u6797", "nz"}, WordTag{"\u548c", "c"}, WordTag{"\u6731\u4f1a\u9707", "nr"}, WordTag{"\u662f", "v"}, WordTag{"\u5728", "p"}, WordTag{"\u5b59\u5065", "nr"}, WordTag{"\u7684", "uj"}, WordTag{"\u5de6\u9762", "f"}, WordTag{"\u548c", "c"}, WordTag{"\u53f3\u9762", "f"}, WordTag{",", "x"}, WordTag{" ", "x"}, WordTag{"\u8303\u51ef", "nr"}, WordTag{"\u5728", "p"}, WordTag{"\u6700", "a"}, WordTag{"\u53f3\u9762", "f"}, WordTag{".", "m"}, WordTag{"\u518d\u5f80", "d"}, WordTag{"\u5de6", "f"}, WordTag{"\u662f", "v"}, WordTag{"\u674e\u677e\u6d2a", "nr"}},
|
||||
[]WordTag{WordTag{"\u4e00\u6b21\u6027", "d"}, WordTag{"\u4ea4", "v"}, WordTag{"\u591a\u5c11", "m"}, WordTag{"\u94b1", "n"}},
|
||||
[]WordTag{WordTag{"\u4e24\u5757", "m"}, WordTag{"\u4e94", "m"}, WordTag{"\u4e00\u5957", "m"}, WordTag{"\uff0c", "x"}, WordTag{"\u4e09\u5757", "m"}, WordTag{"\u516b", "m"}, WordTag{"\u4e00\u65a4", "m"}, WordTag{"\uff0c", "x"}, WordTag{"\u56db\u5757", "m"}, WordTag{"\u4e03", "m"}, WordTag{"\u4e00\u672c", "m"}, WordTag{"\uff0c", "x"}, WordTag{"\u4e94\u5757", "m"}, WordTag{"\u516d", "m"}, WordTag{"\u4e00\u6761", "m"}},
|
||||
[]WordTag{WordTag{"\u5c0f", "a"}, WordTag{"\u548c\u5c1a", "nr"}, WordTag{"\u7559", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u4e00\u4e2a", "m"}, WordTag{"\u50cf", "v"}, WordTag{"\u5927", "a"}, WordTag{"\u548c\u5c1a", "nr"}, WordTag{"\u4e00\u6837", "r"}, WordTag{"\u7684", "uj"}, WordTag{"\u548c\u5c1a\u5934", "nr"}},
|
||||
[]WordTag{WordTag{"\u6211", "r"}, WordTag{"\u662f", "v"}, WordTag{"\u4e2d\u534e\u4eba\u6c11\u5171\u548c\u56fd", "ns"}, WordTag{"\u516c\u6c11", "n"}, WordTag{";", "x"}, WordTag{"\u6211", "r"}, WordTag{"\u7238\u7238", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u5171\u548c\u515a", "nt"}, WordTag{"\u515a\u5458", "n"}, WordTag{";", "x"}, WordTag{" ", "x"}, WordTag{"\u5730\u94c1", "n"}, WordTag{"\u548c\u5e73\u95e8", "ns"}, WordTag{"\u7ad9", "v"}},
|
||||
[]WordTag{WordTag{"\u5f20\u6653\u6885", "nr"}, WordTag{"\u53bb", "v"}, WordTag{"\u4eba\u6c11", "n"}, WordTag{"\u533b\u9662", "n"}, WordTag{"\u505a", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u4e2a", "q"}, WordTag{"B\u8d85", "n"}, WordTag{"\u7136\u540e", "c"}, WordTag{"\u53bb", "v"}, WordTag{"\u4e70", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u4ef6", "q"}, WordTag{"T\u6064", "n"}},
|
||||
[]WordTag{WordTag{"AT&T", "nz"}, WordTag{"\u662f", "v"}, WordTag{"\u4e00\u4ef6", "m"}, WordTag{"\u4e0d\u9519", "a"}, WordTag{"\u7684", "uj"}, WordTag{"\u516c\u53f8", "n"}, WordTag{"\uff0c", "x"}, WordTag{"\u7ed9", "p"}, WordTag{"\u4f60", "r"}, WordTag{"\u53d1", "v"}, WordTag{"offer", "eng"}, WordTag{"\u4e86", "ul"}, WordTag{"\u5417", "y"}, WordTag{"\uff1f", "x"}},
|
||||
[]WordTag{WordTag{"C++", "nz"}, WordTag{"\u548c", "c"}, WordTag{"c#", "nz"}, WordTag{"\u662f", "v"}, WordTag{"\u4ec0\u4e48", "r"}, WordTag{"\u5173\u7cfb", "n"}, WordTag{"\uff1f", "x"}, WordTag{"11", "m"}, WordTag{"+", "x"}, WordTag{"122", "m"}, WordTag{"=", "x"}, WordTag{"133", "m"}, WordTag{"\uff0c", "x"}, WordTag{"\u662f", "v"}, WordTag{"\u5417", "y"}, WordTag{"\uff1f", "x"}, WordTag{"PI", "eng"}, WordTag{"=", "x"}, WordTag{"3.14159", "m"}},
|
||||
[]WordTag{WordTag{"\u4f60", "r"}, WordTag{"\u8ba4\u8bc6", "v"}, WordTag{"\u90a3\u4e2a", "r"}, WordTag{"\u548c", "c"}, WordTag{"\u4e3b\u5e2d", "n"}, WordTag{"\u63e1\u624b", "v"}, WordTag{"\u7684", "uj"}, WordTag{"\u7684\u54e5", "n"}, WordTag{"\u5417", "y"}, WordTag{"\uff1f", "x"}, WordTag{"\u4ed6", "r"}, WordTag{"\u5f00", "v"}, WordTag{"\u4e00\u8f86", "m"}, WordTag{"\u9ed1\u8272", "n"}, WordTag{"\u7684\u58eb", "n"}, WordTag{"\u3002", "x"}},
|
||||
[]WordTag{WordTag{"\u67aa\u6746\u5b50", "n"}, WordTag{"\u4e2d", "f"}, WordTag{"\u51fa", "v"}, WordTag{"\u653f\u6743", "n"}},
|
||||
}
|
||||
noHMMCutResult = [][]WordTag{
|
||||
[]WordTag{WordTag{"\u8fd9", "r"}, WordTag{"\u662f", "v"}, WordTag{"\u4e00\u4e2a", "m"}, WordTag{"\u4f38\u624b\u4e0d\u89c1\u4e94\u6307", "i"}, WordTag{"\u7684", "uj"}, WordTag{"\u9ed1\u591c", "n"}, WordTag{"\u3002", "x"}, WordTag{"\u6211", "r"}, WordTag{"\u53eb", "v"}, WordTag{"\u5b59\u609f\u7a7a", "nr"}, WordTag{"\uff0c", "x"}, WordTag{"\u6211", "r"}, WordTag{"\u7231", "v"}, WordTag{"\u5317\u4eac", "ns"}, WordTag{"\uff0c", "x"}, WordTag{"\u6211", "r"}, WordTag{"\u7231", "v"}, WordTag{"Python", "eng"}, WordTag{"\u548c", "c"}, WordTag{"C++", "nz"}, WordTag{"\u3002", "x"}},
|
||||
[]WordTag{WordTag{"\u6211", "r"}, WordTag{"\u4e0d", "d"}, WordTag{"\u559c\u6b22", "v"}, WordTag{"\u65e5\u672c", "ns"}, WordTag{"\u548c\u670d", "nz"}, WordTag{"\u3002", "x"}},
|
||||
[]WordTag{WordTag{"\u96f7\u7334", "n"}, WordTag{"\u56de\u5f52", "v"}, WordTag{"\u4eba\u95f4", "n"}, WordTag{"\u3002", "x"}},
|
||||
[]WordTag{WordTag{"\u5de5\u4fe1\u5904", "n"}, WordTag{"\u5973\u5e72\u4e8b", "n"}, WordTag{"\u6bcf\u6708", "r"}, WordTag{"\u7ecf\u8fc7", "p"}, WordTag{"\u4e0b\u5c5e", "v"}, WordTag{"\u79d1\u5ba4", "n"}, WordTag{"\u90fd", "d"}, WordTag{"\u8981", "v"}, WordTag{"\u4eb2\u53e3", "n"}, WordTag{"\u4ea4\u4ee3", "n"}, WordTag{"24", "eng"}, WordTag{"\u53e3", "q"}, WordTag{"\u4ea4\u6362\u673a", "n"}, WordTag{"\u7b49", "u"}, WordTag{"\u6280\u672f\u6027", "n"}, WordTag{"\u5668\u4ef6", "n"}, WordTag{"\u7684", "uj"}, WordTag{"\u5b89\u88c5", "v"}, WordTag{"\u5de5\u4f5c", "vn"}},
|
||||
[]WordTag{WordTag{"\u6211", "r"}, WordTag{"\u9700\u8981", "v"}, WordTag{"\u5ec9\u79df\u623f", "n"}},
|
||||
[]WordTag{WordTag{"\u6c38\u548c", "nz"}, WordTag{"\u670d\u88c5", "vn"}, WordTag{"\u9970\u54c1", "n"}, WordTag{"\u6709\u9650\u516c\u53f8", "n"}},
|
||||
[]WordTag{WordTag{"\u6211", "r"}, WordTag{"\u7231", "v"}, WordTag{"\u5317\u4eac", "ns"}, WordTag{"\u5929\u5b89\u95e8", "ns"}},
|
||||
[]WordTag{WordTag{"abc", "eng"}},
|
||||
[]WordTag{WordTag{"\u9690", "n"}, WordTag{"\u9a6c\u5c14\u53ef\u592b", "nr"}},
|
||||
[]WordTag{WordTag{"\u96f7\u7334", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u4e2a", "q"}, WordTag{"\u597d", "a"}, WordTag{"\u7f51\u7ad9", "n"}},
|
||||
[]WordTag{WordTag{"\u201c", "x"}, WordTag{"Microsoft", "eng"}, WordTag{"\u201d", "x"}, WordTag{"\u4e00", "m"}, WordTag{"\u8bcd", "n"}, WordTag{"\u7531", "p"}, WordTag{"\u201c", "x"}, WordTag{"MICROcomputer", "eng"}, WordTag{"\uff08", "x"}, WordTag{"\u5fae\u578b", "b"}, WordTag{"\u8ba1\u7b97\u673a", "n"}, WordTag{"\uff09", "x"}, WordTag{"\u201d", "x"}, WordTag{"\u548c", "c"}, WordTag{"\u201c", "x"}, WordTag{"SOFTware", "eng"}, WordTag{"\uff08", "x"}, WordTag{"\u8f6f\u4ef6", "n"}, WordTag{"\uff09", "x"}, WordTag{"\u201d", "x"}, WordTag{"\u4e24", "m"}, WordTag{"\u90e8\u5206", "n"}, WordTag{"\u7ec4\u6210", "v"}},
|
||||
[]WordTag{WordTag{"\u8349\u6ce5\u9a6c", "n"}, WordTag{"\u548c", "c"}, WordTag{"\u6b3a", "vn"}, WordTag{"\u5b9e", "n"}, WordTag{"\u9a6c", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u4eca\u5e74", "t"}, WordTag{"\u7684", "uj"}, WordTag{"\u6d41\u884c", "v"}, WordTag{"\u8bcd\u6c47", "n"}},
|
||||
[]WordTag{WordTag{"\u4f0a", "ns"}, WordTag{"\u85e4", "nr"}, WordTag{"\u6d0b\u534e\u5802", "n"}, WordTag{"\u603b\u5e9c", "n"}, WordTag{"\u5e97", "n"}},
|
||||
[]WordTag{WordTag{"\u4e2d\u56fd\u79d1\u5b66\u9662\u8ba1\u7b97\u6280\u672f\u7814\u7a76\u6240", "nt"}},
|
||||
[]WordTag{WordTag{"\u7f57\u5bc6\u6b27", "nr"}, WordTag{"\u4e0e", "p"}, WordTag{"\u6731\u4e3d\u53f6", "nr"}},
|
||||
[]WordTag{WordTag{"\u6211", "r"}, WordTag{"\u8d2d\u4e70", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u9053\u5177", "n"}, WordTag{"\u548c", "c"}, WordTag{"\u670d\u88c5", "vn"}},
|
||||
[]WordTag{WordTag{"PS", "eng"}, WordTag{":", "x"}, WordTag{" ", "x"}, WordTag{"\u6211", "r"}, WordTag{"\u89c9\u5f97", "v"}, WordTag{"\u5f00\u6e90", "n"}, WordTag{"\u6709", "v"}, WordTag{"\u4e00\u4e2a", "m"}, WordTag{"\u597d\u5904", "d"}, WordTag{"\uff0c", "x"}, WordTag{"\u5c31\u662f", "d"}, WordTag{"\u80fd\u591f", "v"}, WordTag{"\u6566\u4fc3", "v"}, WordTag{"\u81ea\u5df1", "r"}, WordTag{"\u4e0d\u65ad\u6539\u8fdb", "l"}, WordTag{"\uff0c", "x"}, WordTag{"\u907f\u514d", "v"}, WordTag{"\u655e", "v"}, WordTag{"\u5e1a", "ng"}, WordTag{"\u81ea\u73cd", "b"}},
|
||||
[]WordTag{WordTag{"\u6e56\u5317\u7701", "ns"}, WordTag{"\u77f3\u9996\u5e02", "ns"}},
|
||||
[]WordTag{WordTag{"\u6e56\u5317\u7701", "ns"}, WordTag{"\u5341\u5830\u5e02", "ns"}},
|
||||
[]WordTag{WordTag{"\u603b\u7ecf\u7406", "n"}, WordTag{"\u5b8c\u6210", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u8fd9\u4ef6", "mq"}, WordTag{"\u4e8b\u60c5", "n"}},
|
||||
[]WordTag{WordTag{"\u7535\u8111", "n"}, WordTag{"\u4fee\u597d", "v"}, WordTag{"\u4e86", "ul"}},
|
||||
[]WordTag{WordTag{"\u505a\u597d", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u8fd9\u4ef6", "mq"}, WordTag{"\u4e8b\u60c5", "n"}, WordTag{"\u5c31", "d"}, WordTag{"\u4e00\u4e86\u767e\u4e86", "l"}, WordTag{"\u4e86", "ul"}},
|
||||
[]WordTag{WordTag{"\u4eba\u4eec", "n"}, WordTag{"\u5ba1\u7f8e", "vn"}, WordTag{"\u7684", "uj"}, WordTag{"\u89c2\u70b9", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u4e0d\u540c", "a"}, WordTag{"\u7684", "uj"}},
|
||||
[]WordTag{WordTag{"\u6211\u4eec", "r"}, WordTag{"\u4e70", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u4e00\u4e2a", "m"}, WordTag{"\u7f8e\u7684", "nr"}, WordTag{"\u7a7a\u8c03", "n"}},
|
||||
[]WordTag{WordTag{"\u7ebf\u7a0b", "n"}, WordTag{"\u521d\u59cb\u5316", "l"}, WordTag{"\u65f6", "n"}, WordTag{"\u6211\u4eec", "r"}, WordTag{"\u8981", "v"}, WordTag{"\u6ce8\u610f", "v"}},
|
||||
[]WordTag{WordTag{"\u4e00\u4e2a", "m"}, WordTag{"\u5206\u5b50", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u7531", "p"}, WordTag{"\u597d\u591a", "m"}, WordTag{"\u539f\u5b50", "n"}, WordTag{"\u7ec4\u7ec7", "v"}, WordTag{"\u6210", "n"}, WordTag{"\u7684", "uj"}},
|
||||
[]WordTag{WordTag{"\u795d", "v"}, WordTag{"\u4f60", "r"}, WordTag{"\u9a6c\u5230\u529f\u6210", "i"}},
|
||||
[]WordTag{WordTag{"\u4ed6", "r"}, WordTag{"\u6389", "zg"}, WordTag{"\u8fdb", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u65e0\u5e95\u6d1e", "ns"}, WordTag{"\u91cc", "f"}},
|
||||
[]WordTag{WordTag{"\u4e2d\u56fd", "ns"}, WordTag{"\u7684", "uj"}, WordTag{"\u9996\u90fd", "d"}, WordTag{"\u662f", "v"}, WordTag{"\u5317\u4eac", "ns"}},
|
||||
[]WordTag{WordTag{"\u5b59", "zg"}, WordTag{"\u541b", "nz"}, WordTag{"\u610f", "n"}},
|
||||
[]WordTag{WordTag{"\u5916\u4ea4\u90e8", "nt"}, WordTag{"\u53d1\u8a00\u4eba", "l"}, WordTag{"\u9a6c\u671d\u65ed", "nr"}},
|
||||
[]WordTag{WordTag{"\u9886\u5bfc\u4eba", "n"}, WordTag{"\u4f1a\u8bae", "n"}, WordTag{"\u548c", "c"}, WordTag{"\u7b2c\u56db\u5c4a", "m"}, WordTag{"\u4e1c\u4e9a", "ns"}, WordTag{"\u5cf0\u4f1a", "n"}},
|
||||
[]WordTag{WordTag{"\u5728", "p"}, WordTag{"\u8fc7\u53bb", "t"}, WordTag{"\u7684", "uj"}, WordTag{"\u8fd9", "r"}, WordTag{"\u4e94\u5e74", "t"}},
|
||||
[]WordTag{WordTag{"\u8fd8", "d"}, WordTag{"\u9700\u8981", "v"}, WordTag{"\u5f88", "zg"}, WordTag{"\u957f", "a"}, WordTag{"\u7684", "uj"}, WordTag{"\u8def", "n"}, WordTag{"\u8981", "v"}, WordTag{"\u8d70", "v"}},
|
||||
[]WordTag{WordTag{"60", "eng"}, WordTag{"\u5468\u5e74", "t"}, WordTag{"\u9996\u90fd", "d"}, WordTag{"\u9605\u5175", "v"}},
|
||||
[]WordTag{WordTag{"\u4f60\u597d", "l"}, WordTag{"\u4eba\u4eec", "n"}, WordTag{"\u5ba1\u7f8e", "vn"}, WordTag{"\u7684", "uj"}, WordTag{"\u89c2\u70b9", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u4e0d\u540c", "a"}, WordTag{"\u7684", "uj"}},
|
||||
[]WordTag{WordTag{"\u4e70", "v"}, WordTag{"\u6c34\u679c", "n"}, WordTag{"\u7136\u540e", "c"}, WordTag{"\u6765", "v"}, WordTag{"\u4e16\u535a\u56ed", "nr"}},
|
||||
[]WordTag{WordTag{"\u4e70", "v"}, WordTag{"\u6c34\u679c", "n"}, WordTag{"\u7136\u540e", "c"}, WordTag{"\u53bb", "v"}, WordTag{"\u4e16\u535a\u56ed", "nr"}},
|
||||
[]WordTag{WordTag{"\u4f46\u662f", "c"}, WordTag{"\u540e\u6765", "t"}, WordTag{"\u6211", "r"}, WordTag{"\u624d", "d"}, WordTag{"\u77e5\u9053", "v"}, WordTag{"\u4f60", "r"}, WordTag{"\u662f", "v"}, WordTag{"\u5bf9", "p"}, WordTag{"\u7684", "uj"}},
|
||||
[]WordTag{WordTag{"\u5b58\u5728", "v"}, WordTag{"\u5373", "v"}, WordTag{"\u5408\u7406", "vn"}},
|
||||
[]WordTag{WordTag{"\u7684", "uj"}, WordTag{"\u7684", "uj"}, WordTag{"\u7684", "uj"}, WordTag{"\u7684", "uj"}, WordTag{"\u7684", "uj"}, WordTag{"\u5728", "p"}, WordTag{"\u7684", "uj"}, WordTag{"\u7684", "uj"}, WordTag{"\u7684", "uj"}, WordTag{"\u7684", "uj"}, WordTag{"\u5c31", "d"}, WordTag{"\u4ee5", "p"}, WordTag{"\u548c", "c"}, WordTag{"\u548c", "c"}, WordTag{"\u548c", "c"}},
|
||||
[]WordTag{WordTag{"I", "eng"}, WordTag{" ", "x"}, WordTag{"love", "eng"}, WordTag{"\u4f60", "r"}, WordTag{"\uff0c", "x"}, WordTag{"\u4e0d\u4ee5\u4e3a\u803b", "i"}, WordTag{"\uff0c", "x"}, WordTag{"\u53cd", "zg"}, WordTag{"\u4ee5\u4e3a", "c"}, WordTag{"rong", "eng"}},
|
||||
[]WordTag{WordTag{"\u56e0", "p"}},
|
||||
[]WordTag{},
|
||||
[]WordTag{WordTag{"hello", "eng"}, WordTag{"\u4f60\u597d", "l"}, WordTag{"\u4eba\u4eec", "n"}, WordTag{"\u5ba1\u7f8e", "vn"}, WordTag{"\u7684", "uj"}, WordTag{"\u89c2\u70b9", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u4e0d\u540c", "a"}, WordTag{"\u7684", "uj"}},
|
||||
[]WordTag{WordTag{"\u5f88", "zg"}, WordTag{"\u597d", "a"}, WordTag{"\u4f46", "c"}, WordTag{"\u4e3b\u8981", "b"}, WordTag{"\u662f", "v"}, WordTag{"\u57fa\u4e8e", "p"}, WordTag{"\u7f51\u9875", "n"}, WordTag{"\u5f62\u5f0f", "n"}},
|
||||
[]WordTag{WordTag{"hello", "eng"}, WordTag{"\u4f60\u597d", "l"}, WordTag{"\u4eba\u4eec", "n"}, WordTag{"\u5ba1\u7f8e", "vn"}, WordTag{"\u7684", "uj"}, WordTag{"\u89c2\u70b9", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u4e0d\u540c", "a"}, WordTag{"\u7684", "uj"}},
|
||||
[]WordTag{WordTag{"\u4e3a\u4ec0\u4e48", "r"}, WordTag{"\u6211", "r"}, WordTag{"\u4e0d\u80fd", "v"}, WordTag{"\u62e5\u6709", "v"}, WordTag{"\u60f3\u8981", "v"}, WordTag{"\u7684", "uj"}, WordTag{"\u751f\u6d3b", "vn"}},
|
||||
[]WordTag{WordTag{"\u540e\u6765", "t"}, WordTag{"\u6211", "r"}, WordTag{"\u624d", "d"}},
|
||||
[]WordTag{WordTag{"\u6b64\u6b21", "r"}, WordTag{"\u6765", "v"}, WordTag{"\u4e2d\u56fd", "ns"}, WordTag{"\u662f", "v"}, WordTag{"\u4e3a\u4e86", "p"}},
|
||||
[]WordTag{WordTag{"\u4f7f\u7528", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u5b83", "r"}, WordTag{"\u5c31", "d"}, WordTag{"\u53ef\u4ee5", "c"}, WordTag{"\u89e3\u51b3", "v"}, WordTag{"\u4e00\u4e9b", "m"}, WordTag{"\u95ee\u9898", "n"}},
|
||||
[]WordTag{WordTag{",", "x"}, WordTag{"\u4f7f\u7528", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u5b83", "r"}, WordTag{"\u5c31", "d"}, WordTag{"\u53ef\u4ee5", "c"}, WordTag{"\u89e3\u51b3", "v"}, WordTag{"\u4e00\u4e9b", "m"}, WordTag{"\u95ee\u9898", "n"}},
|
||||
[]WordTag{WordTag{"\u5176\u5b9e", "d"}, WordTag{"\u4f7f\u7528", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u5b83", "r"}, WordTag{"\u5c31", "d"}, WordTag{"\u53ef\u4ee5", "c"}, WordTag{"\u89e3\u51b3", "v"}, WordTag{"\u4e00\u4e9b", "m"}, WordTag{"\u95ee\u9898", "n"}},
|
||||
[]WordTag{WordTag{"\u597d\u4eba", "n"}, WordTag{"\u4f7f\u7528", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u5b83", "r"}, WordTag{"\u5c31", "d"}, WordTag{"\u53ef\u4ee5", "c"}, WordTag{"\u89e3\u51b3", "v"}, WordTag{"\u4e00\u4e9b", "m"}, WordTag{"\u95ee\u9898", "n"}},
|
||||
[]WordTag{WordTag{"\u662f\u56e0\u4e3a", "c"}, WordTag{"\u548c", "c"}, WordTag{"\u56fd\u5bb6", "n"}},
|
||||
[]WordTag{WordTag{"\u8001\u5e74", "t"}, WordTag{"\u641c\u7d22", "v"}, WordTag{"\u8fd8", "d"}, WordTag{"\u652f\u6301", "v"}},
|
||||
[]WordTag{WordTag{"\u5e72\u8106", "d"}, WordTag{"\u5c31", "d"}, WordTag{"\u628a", "p"}, WordTag{"\u90a3", "r"}, WordTag{"\u90e8", "n"}, WordTag{"\u8499", "v"}, WordTag{"\u4eba", "n"}, WordTag{"\u7684", "uj"}, WordTag{"\u95f2", "n"}, WordTag{"\u6cd5", "j"}, WordTag{"\u7ed9", "p"}, WordTag{"\u5e9f", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u62c9\u5012", "v"}, WordTag{"\uff01", "x"}, WordTag{"RT", "eng"}, WordTag{" ", "x"}, WordTag{"@", "x"}, WordTag{"laoshipukong", "eng"}, WordTag{" ", "x"}, WordTag{":", "x"}, WordTag{" ", "x"}, WordTag{"27", "eng"}, WordTag{"\u65e5", "m"}, WordTag{"\uff0c", "x"}, WordTag{"\u5168\u56fd\u4eba\u5927\u5e38\u59d4\u4f1a", "nt"}, WordTag{"\u7b2c\u4e09\u6b21", "m"}, WordTag{"\u5ba1\u8bae", "v"}, WordTag{"\u4fb5\u6743", "v"}, WordTag{"\u8d23\u4efb\u6cd5", "n"}, WordTag{"\u8349\u6848", "n"}, WordTag{"\uff0c", "x"}, WordTag{"\u5220\u9664", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u6709\u5173", "vn"}, WordTag{"\u533b\u7597", "n"}, WordTag{"\u635f\u5bb3", "v"}, WordTag{"\u8d23\u4efb", "n"}, WordTag{"\u201c", "x"}, WordTag{"\u4e3e\u8bc1", "v"}, WordTag{"\u5012\u7f6e", "v"}, WordTag{"\u201d", "x"}, WordTag{"\u7684", "uj"}, WordTag{"\u89c4\u5b9a", "n"}, WordTag{"\u3002", "x"}, WordTag{"\u5728", "p"}, WordTag{"\u533b\u60a3", "n"}, WordTag{"\u7ea0\u7eb7", "n"}, WordTag{"\u4e2d", "f"}, WordTag{"\u672c", "r"}, WordTag{"\u5df2", "d"}, WordTag{"\u5904\u4e8e", "v"}, WordTag{"\u5f31\u52bf", "n"}, WordTag{"\u5730\u4f4d", "n"}, WordTag{"\u7684", "uj"}, WordTag{"\u6d88\u8d39\u8005", "n"}, WordTag{"\u7531\u6b64", "c"}, WordTag{"\u5c06", "d"}, WordTag{"\u9677\u5165", "v"}, WordTag{"\u4e07\u52ab\u4e0d\u590d", "i"}, WordTag{"\u7684", "uj"}, WordTag{"\u5883\u5730", "s"}, WordTag{"\u3002", "x"}, WordTag{" ", "x"}},
|
||||
[]WordTag{WordTag{"\u5927", "a"}},
|
||||
[]WordTag{},
|
||||
[]WordTag{WordTag{"\u4ed6", "r"}, WordTag{"\u8bf4", "v"}, WordTag{"\u7684", "uj"}, WordTag{"\u786e\u5b9e", "ad"}, WordTag{"\u5728", "p"}, WordTag{"\u7406", "n"}},
|
||||
[]WordTag{WordTag{"\u957f\u6625", "ns"}, WordTag{"\u5e02\u957f", "n"}, WordTag{"\u6625\u8282", "t"}, WordTag{"\u8bb2\u8bdd", "n"}},
|
||||
[]WordTag{WordTag{"\u7ed3\u5a5a", "v"}, WordTag{"\u7684", "uj"}, WordTag{"\u548c", "c"}, WordTag{"\u5c1a\u672a", "d"}, WordTag{"\u7ed3\u5a5a", "v"}, WordTag{"\u7684", "uj"}},
|
||||
[]WordTag{WordTag{"\u7ed3\u5408", "v"}, WordTag{"\u6210", "n"}, WordTag{"\u5206\u5b50", "n"}, WordTag{"\u65f6", "n"}},
|
||||
[]WordTag{WordTag{"\u65c5\u6e38", "vn"}, WordTag{"\u548c", "c"}, WordTag{"\u670d\u52a1", "vn"}, WordTag{"\u662f", "v"}, WordTag{"\u6700\u597d", "a"}, WordTag{"\u7684", "uj"}},
|
||||
[]WordTag{WordTag{"\u8fd9\u4ef6", "mq"}, WordTag{"\u4e8b\u60c5", "n"}, WordTag{"\u7684\u786e", "d"}, WordTag{"\u662f", "v"}, WordTag{"\u6211", "r"}, WordTag{"\u7684", "uj"}, WordTag{"\u9519", "v"}},
|
||||
[]WordTag{WordTag{"\u4f9b", "v"}, WordTag{"\u5927\u5bb6", "n"}, WordTag{"\u53c2\u8003", "v"}, WordTag{"\u6307\u6b63", "v"}},
|
||||
[]WordTag{WordTag{"\u54c8\u5c14\u6ee8", "ns"}, WordTag{"\u653f\u5e9c", "n"}, WordTag{"\u516c\u5e03", "v"}, WordTag{"\u584c", "v"}, WordTag{"\u6865", "n"}, WordTag{"\u539f\u56e0", "n"}},
|
||||
[]WordTag{WordTag{"\u6211", "r"}, WordTag{"\u5728", "p"}, WordTag{"\u673a\u573a", "n"}, WordTag{"\u5165\u53e3\u5904", "i"}},
|
||||
[]WordTag{WordTag{"\u90a2", "nr"}, WordTag{"\u6c38", "ns"}, WordTag{"\u81e3", "n"}, WordTag{"\u6444\u5f71", "n"}, WordTag{"\u62a5\u9053", "v"}},
|
||||
[]WordTag{WordTag{"BP", "eng"}, WordTag{"\u795e\u7ecf\u7f51\u7edc", "n"}, WordTag{"\u5982\u4f55", "r"}, WordTag{"\u8bad\u7ec3", "vn"}, WordTag{"\u624d\u80fd", "v"}, WordTag{"\u5728", "p"}, WordTag{"\u5206\u7c7b", "n"}, WordTag{"\u65f6", "n"}, WordTag{"\u589e\u52a0", "v"}, WordTag{"\u533a\u5206\u5ea6", "n"}, WordTag{"\uff1f", "x"}},
|
||||
[]WordTag{WordTag{"\u5357\u4eac\u5e02", "ns"}, WordTag{"\u957f\u6c5f\u5927\u6865", "ns"}},
|
||||
[]WordTag{WordTag{"\u5e94", "v"}, WordTag{"\u4e00\u4e9b", "m"}, WordTag{"\u4f7f\u7528\u8005", "n"}, WordTag{"\u7684", "uj"}, WordTag{"\u5efa\u8bae", "n"}, WordTag{"\uff0c", "x"}, WordTag{"\u4e5f", "d"}, WordTag{"\u4e3a\u4e86", "p"}, WordTag{"\u4fbf\u4e8e", "v"}, WordTag{"\u5229\u7528", "n"}, WordTag{"NiuTrans", "eng"}, WordTag{"\u7528\u4e8e", "v"}, WordTag{"SMT", "eng"}, WordTag{"\u7814\u7a76", "vn"}},
|
||||
[]WordTag{WordTag{"\u957f\u6625\u5e02", "ns"}, WordTag{"\u957f\u6625", "ns"}, WordTag{"\u836f\u5e97", "n"}},
|
||||
[]WordTag{WordTag{"\u9093\u9896\u8d85", "nr"}, WordTag{"\u751f\u524d", "t"}, WordTag{"\u6700", "d"}, WordTag{"\u559c\u6b22", "v"}, WordTag{"\u7684", "uj"}, WordTag{"\u8863\u670d", "n"}},
|
||||
[]WordTag{WordTag{"\u80e1\u9526\u6d9b", "nr"}, WordTag{"\u662f", "v"}, WordTag{"\u70ed\u7231", "a"}, WordTag{"\u4e16\u754c", "n"}, WordTag{"\u548c\u5e73", "nz"}, WordTag{"\u7684", "uj"}, WordTag{"\u653f\u6cbb\u5c40", "n"}, WordTag{"\u5e38\u59d4", "j"}},
|
||||
[]WordTag{WordTag{"\u7a0b\u5e8f\u5458", "n"}, WordTag{"\u795d", "v"}, WordTag{"\u6d77\u6797", "nz"}, WordTag{"\u548c", "c"}, WordTag{"\u6731", "nr"}, WordTag{"\u4f1a", "v"}, WordTag{"\u9707", "v"}, WordTag{"\u662f", "v"}, WordTag{"\u5728", "p"}, WordTag{"\u5b59", "zg"}, WordTag{"\u5065", "a"}, WordTag{"\u7684", "uj"}, WordTag{"\u5de6\u9762", "f"}, WordTag{"\u548c", "c"}, WordTag{"\u53f3\u9762", "f"}, WordTag{",", "x"}, WordTag{" ", "x"}, WordTag{"\u8303", "nr"}, WordTag{"\u51ef", "nr"}, WordTag{"\u5728", "p"}, WordTag{"\u6700", "d"}, WordTag{"\u53f3\u9762", "f"}, WordTag{".", "x"}, WordTag{"\u518d", "d"}, WordTag{"\u5f80", "zg"}, WordTag{"\u5de6", "m"}, WordTag{"\u662f", "v"}, WordTag{"\u674e", "nr"}, WordTag{"\u677e", "v"}, WordTag{"\u6d2a", "nr"}},
|
||||
[]WordTag{WordTag{"\u4e00\u6b21\u6027", "d"}, WordTag{"\u4ea4", "v"}, WordTag{"\u591a\u5c11", "m"}, WordTag{"\u94b1", "n"}},
|
||||
[]WordTag{WordTag{"\u4e24\u5757", "m"}, WordTag{"\u4e94", "m"}, WordTag{"\u4e00\u5957", "m"}, WordTag{"\uff0c", "x"}, WordTag{"\u4e09\u5757", "m"}, WordTag{"\u516b", "m"}, WordTag{"\u4e00\u65a4", "m"}, WordTag{"\uff0c", "x"}, WordTag{"\u56db\u5757", "m"}, WordTag{"\u4e03", "m"}, WordTag{"\u4e00\u672c", "m"}, WordTag{"\uff0c", "x"}, WordTag{"\u4e94\u5757", "m"}, WordTag{"\u516d", "m"}, WordTag{"\u4e00\u6761", "m"}},
|
||||
[]WordTag{WordTag{"\u5c0f", "a"}, WordTag{"\u548c\u5c1a", "nr"}, WordTag{"\u7559", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u4e00\u4e2a", "m"}, WordTag{"\u50cf", "v"}, WordTag{"\u5927", "a"}, WordTag{"\u548c\u5c1a", "nr"}, WordTag{"\u4e00\u6837", "r"}, WordTag{"\u7684", "uj"}, WordTag{"\u548c\u5c1a\u5934", "nr"}},
|
||||
[]WordTag{WordTag{"\u6211", "r"}, WordTag{"\u662f", "v"}, WordTag{"\u4e2d\u534e\u4eba\u6c11\u5171\u548c\u56fd", "ns"}, WordTag{"\u516c\u6c11", "n"}, WordTag{";", "x"}, WordTag{"\u6211", "r"}, WordTag{"\u7238\u7238", "n"}, WordTag{"\u662f", "v"}, WordTag{"\u5171\u548c\u515a", "nt"}, WordTag{"\u515a\u5458", "n"}, WordTag{";", "x"}, WordTag{" ", "x"}, WordTag{"\u5730\u94c1", "n"}, WordTag{"\u548c\u5e73\u95e8", "ns"}, WordTag{"\u7ad9", "v"}},
|
||||
[]WordTag{WordTag{"\u5f20\u6653\u6885", "nr"}, WordTag{"\u53bb", "v"}, WordTag{"\u4eba\u6c11", "n"}, WordTag{"\u533b\u9662", "n"}, WordTag{"\u505a", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u4e2a", "q"}, WordTag{"B\u8d85", "n"}, WordTag{"\u7136\u540e", "c"}, WordTag{"\u53bb", "v"}, WordTag{"\u4e70", "v"}, WordTag{"\u4e86", "ul"}, WordTag{"\u4ef6", "zg"}, WordTag{"T\u6064", "n"}},
|
||||
[]WordTag{WordTag{"AT&T", "nz"}, WordTag{"\u662f", "v"}, WordTag{"\u4e00\u4ef6", "m"}, WordTag{"\u4e0d\u9519", "a"}, WordTag{"\u7684", "uj"}, WordTag{"\u516c\u53f8", "n"}, WordTag{"\uff0c", "x"}, WordTag{"\u7ed9", "p"}, WordTag{"\u4f60", "r"}, WordTag{"\u53d1", "v"}, WordTag{"offer", "eng"}, WordTag{"\u4e86", "ul"}, WordTag{"\u5417", "y"}, WordTag{"\uff1f", "x"}},
|
||||
[]WordTag{WordTag{"C++", "nz"}, WordTag{"\u548c", "c"}, WordTag{"c#", "nz"}, WordTag{"\u662f", "v"}, WordTag{"\u4ec0\u4e48", "r"}, WordTag{"\u5173\u7cfb", "n"}, WordTag{"\uff1f", "x"}, WordTag{"11", "eng"}, WordTag{"+", "x"}, WordTag{"122", "eng"}, WordTag{"=", "x"}, WordTag{"133", "eng"}, WordTag{"\uff0c", "x"}, WordTag{"\u662f", "v"}, WordTag{"\u5417", "y"}, WordTag{"\uff1f", "x"}, WordTag{"PI", "eng"}, WordTag{"=", "x"}, WordTag{"3", "eng"}, WordTag{".", "x"}, WordTag{"14159", "eng"}},
|
||||
[]WordTag{WordTag{"\u4f60", "r"}, WordTag{"\u8ba4\u8bc6", "v"}, WordTag{"\u90a3\u4e2a", "r"}, WordTag{"\u548c", "c"}, WordTag{"\u4e3b\u5e2d", "n"}, WordTag{"\u63e1\u624b", "v"}, WordTag{"\u7684", "uj"}, WordTag{"\u7684\u54e5", "n"}, WordTag{"\u5417", "y"}, WordTag{"\uff1f", "x"}, WordTag{"\u4ed6", "r"}, WordTag{"\u5f00", "v"}, WordTag{"\u4e00\u8f86", "m"}, WordTag{"\u9ed1\u8272", "n"}, WordTag{"\u7684\u58eb", "n"}, WordTag{"\u3002", "x"}},
|
||||
[]WordTag{WordTag{"\u67aa\u6746\u5b50", "n"}, WordTag{"\u4e2d", "f"}, WordTag{"\u51fa", "v"}, WordTag{"\u653f\u6743", "n"}},
|
||||
}
|
||||
)
|
||||
|
||||
func TestCut(t *testing.T) {
|
||||
jiebago.SetDictionary("../dict.txt")
|
||||
for index, content := range test_contents {
|
||||
result := Cut(content, true)
|
||||
if len(defaultCutResult[index]) != len(result) {
|
||||
t.Error(content)
|
||||
}
|
||||
for i, _ := range result {
|
||||
if result[i] != defaultCutResult[index][i] {
|
||||
t.Error(content)
|
||||
}
|
||||
}
|
||||
result = Cut(content, false)
|
||||
if len(noHMMCutResult[index]) != len(result) {
|
||||
t.Error(content)
|
||||
}
|
||||
for i, _ := range result {
|
||||
if result[i] != noHMMCutResult[index][i] {
|
||||
t.Error(content)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
89402
posseg/prob_emit.go
Normal file
89402
posseg/prob_emit.go
Normal file
File diff suppressed because it is too large
Load Diff
264
posseg/prob_start.go
Normal file
264
posseg/prob_start.go
Normal file
@@ -0,0 +1,264 @@
|
||||
package posseg
|
||||
|
||||
var (
|
||||
ProbStart = make(map[StateTag]float64)
|
||||
)
|
||||
|
||||
func init() {
|
||||
ProbStart[StateTag{'B', "a"}] = -4.762305214596967
|
||||
ProbStart[StateTag{'B', "ad"}] = -6.680066036784177
|
||||
ProbStart[StateTag{'B', "ag"}] = -3.14e+100
|
||||
ProbStart[StateTag{'B', "an"}] = -8.697083223018778
|
||||
ProbStart[StateTag{'B', "b"}] = -5.018374362109218
|
||||
ProbStart[StateTag{'B', "bg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'B', "c"}] = -3.423880184954888
|
||||
ProbStart[StateTag{'B', "d"}] = -3.9750475297585357
|
||||
ProbStart[StateTag{'B', "df"}] = -8.888974230828882
|
||||
ProbStart[StateTag{'B', "dg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'B', "e"}] = -8.563551830394255
|
||||
ProbStart[StateTag{'B', "en"}] = -3.14e+100
|
||||
ProbStart[StateTag{'B', "f"}] = -5.491630418482717
|
||||
ProbStart[StateTag{'B', "g"}] = -3.14e+100
|
||||
ProbStart[StateTag{'B', "h"}] = -13.533365129970255
|
||||
ProbStart[StateTag{'B', "i"}] = -6.1157847275557105
|
||||
ProbStart[StateTag{'B', "in"}] = -3.14e+100
|
||||
ProbStart[StateTag{'B', "j"}] = -5.0576191284681915
|
||||
ProbStart[StateTag{'B', "jn"}] = -3.14e+100
|
||||
ProbStart[StateTag{'B', "k"}] = -3.14e+100
|
||||
ProbStart[StateTag{'B', "l"}] = -4.905883584659895
|
||||
ProbStart[StateTag{'B', "ln"}] = -3.14e+100
|
||||
ProbStart[StateTag{'B', "m"}] = -3.6524299819046386
|
||||
ProbStart[StateTag{'B', "mg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'B', "mq"}] = -6.78695300139688
|
||||
ProbStart[StateTag{'B', "n"}] = -1.6966257797548328
|
||||
ProbStart[StateTag{'B', "ng"}] = -3.14e+100
|
||||
ProbStart[StateTag{'B', "nr"}] = -2.2310495913769506
|
||||
ProbStart[StateTag{'B', "nrfg"}] = -5.873722175405573
|
||||
ProbStart[StateTag{'B', "nrt"}] = -4.985642733519195
|
||||
ProbStart[StateTag{'B', "ns"}] = -2.8228438314969213
|
||||
ProbStart[StateTag{'B', "nt"}] = -4.846091668182416
|
||||
ProbStart[StateTag{'B', "nz"}] = -3.94698846057672
|
||||
ProbStart[StateTag{'B', "o"}] = -8.433498702146057
|
||||
ProbStart[StateTag{'B', "p"}] = -4.200984132085048
|
||||
ProbStart[StateTag{'B', "q"}] = -6.998123858956596
|
||||
ProbStart[StateTag{'B', "qe"}] = -3.14e+100
|
||||
ProbStart[StateTag{'B', "qg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'B', "r"}] = -3.4098187790818413
|
||||
ProbStart[StateTag{'B', "rg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'B', "rr"}] = -12.434752841302146
|
||||
ProbStart[StateTag{'B', "rz"}] = -7.946116471570005
|
||||
ProbStart[StateTag{'B', "s"}] = -5.522673590839954
|
||||
ProbStart[StateTag{'B', "t"}] = -3.3647479094528574
|
||||
ProbStart[StateTag{'B', "tg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'B', "u"}] = -9.163917277503234
|
||||
ProbStart[StateTag{'B', "ud"}] = -3.14e+100
|
||||
ProbStart[StateTag{'B', "ug"}] = -3.14e+100
|
||||
ProbStart[StateTag{'B', "uj"}] = -3.14e+100
|
||||
ProbStart[StateTag{'B', "ul"}] = -3.14e+100
|
||||
ProbStart[StateTag{'B', "uv"}] = -3.14e+100
|
||||
ProbStart[StateTag{'B', "uz"}] = -3.14e+100
|
||||
ProbStart[StateTag{'B', "v"}] = -2.6740584874265685
|
||||
ProbStart[StateTag{'B', "vd"}] = -9.044728760238115
|
||||
ProbStart[StateTag{'B', "vg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'B', "vi"}] = -12.434752841302146
|
||||
ProbStart[StateTag{'B', "vn"}] = -4.3315610890163585
|
||||
ProbStart[StateTag{'B', "vq"}] = -12.147070768850364
|
||||
ProbStart[StateTag{'B', "w"}] = -3.14e+100
|
||||
ProbStart[StateTag{'B', "x"}] = -3.14e+100
|
||||
ProbStart[StateTag{'B', "y"}] = -9.844485675856319
|
||||
ProbStart[StateTag{'B', "yg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'B', "z"}] = -7.045681111485645
|
||||
ProbStart[StateTag{'B', "zg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "a"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "ad"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "ag"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "an"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "b"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "bg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "c"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "d"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "df"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "dg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "e"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "en"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "f"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "g"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "h"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "i"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "in"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "j"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "jn"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "k"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "l"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "ln"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "m"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "mg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "mq"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "n"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "ng"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "nr"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "nrfg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "nrt"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "ns"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "nt"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "nz"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "o"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "p"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "q"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "qe"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "qg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "r"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "rg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "rr"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "rz"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "s"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "t"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "tg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "u"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "ud"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "ug"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "uj"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "ul"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "uv"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "uz"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "v"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "vd"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "vg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "vi"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "vn"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "vq"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "w"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "x"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "y"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "yg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "z"}] = -3.14e+100
|
||||
ProbStart[StateTag{'E', "zg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "a"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "ad"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "ag"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "an"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "b"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "bg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "c"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "d"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "df"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "dg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "e"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "en"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "f"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "g"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "h"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "i"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "in"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "j"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "jn"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "k"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "l"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "ln"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "m"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "mg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "mq"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "n"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "ng"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "nr"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "nrfg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "nrt"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "ns"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "nt"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "nz"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "o"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "p"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "q"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "qe"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "qg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "r"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "rg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "rr"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "rz"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "s"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "t"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "tg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "u"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "ud"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "ug"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "uj"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "ul"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "uv"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "uz"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "v"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "vd"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "vg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "vi"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "vn"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "vq"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "w"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "x"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "y"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "yg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "z"}] = -3.14e+100
|
||||
ProbStart[StateTag{'M', "zg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'S', "a"}] = -3.9025396831295227
|
||||
ProbStart[StateTag{'S', "ad"}] = -11.048458480182255
|
||||
ProbStart[StateTag{'S', "ag"}] = -6.954113917960154
|
||||
ProbStart[StateTag{'S', "an"}] = -12.84021794941031
|
||||
ProbStart[StateTag{'S', "b"}] = -6.472888763970454
|
||||
ProbStart[StateTag{'S', "bg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'S', "c"}] = -4.786966795861212
|
||||
ProbStart[StateTag{'S', "d"}] = -3.903919764181873
|
||||
ProbStart[StateTag{'S', "df"}] = -3.14e+100
|
||||
ProbStart[StateTag{'S', "dg"}] = -8.948397651299683
|
||||
ProbStart[StateTag{'S', "e"}] = -5.942513006281674
|
||||
ProbStart[StateTag{'S', "en"}] = -3.14e+100
|
||||
ProbStart[StateTag{'S', "f"}] = -5.194820249981676
|
||||
ProbStart[StateTag{'S', "g"}] = -6.507826815331734
|
||||
ProbStart[StateTag{'S', "h"}] = -8.650563207383884
|
||||
ProbStart[StateTag{'S', "i"}] = -3.14e+100
|
||||
ProbStart[StateTag{'S', "in"}] = -3.14e+100
|
||||
ProbStart[StateTag{'S', "j"}] = -4.911992119644354
|
||||
ProbStart[StateTag{'S', "jn"}] = -3.14e+100
|
||||
ProbStart[StateTag{'S', "k"}] = -6.940320595827818
|
||||
ProbStart[StateTag{'S', "l"}] = -3.14e+100
|
||||
ProbStart[StateTag{'S', "ln"}] = -3.14e+100
|
||||
ProbStart[StateTag{'S', "m"}] = -3.269200652116097
|
||||
ProbStart[StateTag{'S', "mg"}] = -10.825314928868044
|
||||
ProbStart[StateTag{'S', "mq"}] = -3.14e+100
|
||||
ProbStart[StateTag{'S', "n"}] = -3.8551483897645107
|
||||
ProbStart[StateTag{'S', "ng"}] = -4.913434861102905
|
||||
ProbStart[StateTag{'S', "nr"}] = -4.483663103956885
|
||||
ProbStart[StateTag{'S', "nrfg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'S', "nrt"}] = -3.14e+100
|
||||
ProbStart[StateTag{'S', "ns"}] = -3.14e+100
|
||||
ProbStart[StateTag{'S', "nt"}] = -12.147070768850364
|
||||
ProbStart[StateTag{'S', "nz"}] = -3.14e+100
|
||||
ProbStart[StateTag{'S', "o"}] = -8.464460927750023
|
||||
ProbStart[StateTag{'S', "p"}] = -2.9868401813596317
|
||||
ProbStart[StateTag{'S', "q"}] = -4.888658618255058
|
||||
ProbStart[StateTag{'S', "qe"}] = -3.14e+100
|
||||
ProbStart[StateTag{'S', "qg"}] = -3.14e+100
|
||||
ProbStart[StateTag{'S', "r"}] = -2.7635336784127853
|
||||
ProbStart[StateTag{'S', "rg"}] = -10.275268591948773
|
||||
ProbStart[StateTag{'S', "rr"}] = -3.14e+100
|
||||
ProbStart[StateTag{'S', "rz"}] = -3.14e+100
|
||||
ProbStart[StateTag{'S', "s"}] = -3.14e+100
|
||||
ProbStart[StateTag{'S', "t"}] = -3.14e+100
|
||||
ProbStart[StateTag{'S', "tg"}] = -6.272842531880403
|
||||
ProbStart[StateTag{'S', "u"}] = -6.940320595827818
|
||||
ProbStart[StateTag{'S', "ud"}] = -7.728230161053767
|
||||
ProbStart[StateTag{'S', "ug"}] = -7.5394037026636855
|
||||
ProbStart[StateTag{'S', "uj"}] = -6.85251045118004
|
||||
ProbStart[StateTag{'S', "ul"}] = -8.4153713175535
|
||||
ProbStart[StateTag{'S', "uv"}] = -8.15808672228609
|
||||
ProbStart[StateTag{'S', "uz"}] = -9.299258625372996
|
||||
ProbStart[StateTag{'S', "v"}] = -3.053292303412302
|
||||
ProbStart[StateTag{'S', "vd"}] = -3.14e+100
|
||||
ProbStart[StateTag{'S', "vg"}] = -5.9430181843676895
|
||||
ProbStart[StateTag{'S', "vi"}] = -3.14e+100
|
||||
ProbStart[StateTag{'S', "vn"}] = -11.453923588290419
|
||||
ProbStart[StateTag{'S', "vq"}] = -3.14e+100
|
||||
ProbStart[StateTag{'S', "w"}] = -3.14e+100
|
||||
ProbStart[StateTag{'S', "x"}] = -8.427419656069674
|
||||
ProbStart[StateTag{'S', "y"}] = -6.1970794699489575
|
||||
ProbStart[StateTag{'S', "yg"}] = -13.533365129970255
|
||||
ProbStart[StateTag{'S', "z"}] = -3.14e+100
|
||||
ProbStart[StateTag{'S', "zg"}] = -3.14e+100
|
||||
}
|
||||
5496
posseg/prob_trans.go
Normal file
5496
posseg/prob_trans.go
Normal file
File diff suppressed because it is too large
Load Diff
128
posseg/viterbi.go
Normal file
128
posseg/viterbi.go
Normal file
@@ -0,0 +1,128 @@
|
||||
package posseg
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
)
|
||||
|
||||
const MIN_FLOAT = -3.14e100
|
||||
|
||||
type StateTag struct {
|
||||
State byte
|
||||
Tag string
|
||||
}
|
||||
|
||||
func (st StateTag) String() string {
|
||||
return fmt.Sprintf("(%q, %s)", st.State, st.Tag)
|
||||
}
|
||||
|
||||
func emptyStateTag() StateTag {
|
||||
return StateTag{' ', ""}
|
||||
}
|
||||
|
||||
type ProbState struct {
|
||||
Prob float64
|
||||
ST StateTag
|
||||
}
|
||||
|
||||
func (ps ProbState) String() string {
|
||||
return fmt.Sprintf("(%v: %f)", ps.ST, ps.Prob)
|
||||
}
|
||||
|
||||
type ProbStates []ProbState
|
||||
|
||||
func (pss ProbStates) Len() int {
|
||||
return len(pss)
|
||||
}
|
||||
|
||||
func (pss ProbStates) Less(i, j int) bool {
|
||||
if pss[i].Prob == pss[j].Prob {
|
||||
if pss[i].ST.Tag < pss[j].ST.Tag {
|
||||
return true
|
||||
} else if pss[i].ST.State < pss[j].ST.State {
|
||||
return true
|
||||
} else {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return pss[i].Prob < pss[j].Prob
|
||||
}
|
||||
|
||||
func (pss ProbStates) Swap(i, j int) {
|
||||
pss[i], pss[j] = pss[j], pss[i]
|
||||
}
|
||||
|
||||
func Viterbi(obs []rune) (float64, []StateTag) {
|
||||
obsLength := len(obs)
|
||||
V := make([]map[StateTag]float64, obsLength)
|
||||
V[0] = make(map[StateTag]float64)
|
||||
mem_path := make([]map[StateTag]StateTag, obsLength)
|
||||
mem_path[0] = make(map[StateTag]StateTag)
|
||||
// all_states := ProbTransKeys
|
||||
ys := CharStateTab.Get(obs[0]) // default is all_states
|
||||
for _, y := range ys {
|
||||
V[0][y] = ProbEmit[y].Get(obs[0]) + ProbStart[y]
|
||||
mem_path[0][y] = emptyStateTag()
|
||||
}
|
||||
for t := 1; t < obsLength; t++ {
|
||||
prev_states := make([]StateTag, 0)
|
||||
for x, _ := range mem_path[t-1] {
|
||||
if len(ProbTrans[x]) > 0 {
|
||||
prev_states = append(prev_states, x)
|
||||
}
|
||||
}
|
||||
//use Go's map to implement Python's Set()
|
||||
prev_states_expect_next := make(map[StateTag]StateTag)
|
||||
for _, x := range prev_states {
|
||||
for y, _ := range ProbTrans[x] {
|
||||
prev_states_expect_next[y] = y
|
||||
}
|
||||
}
|
||||
tmp_obs_states := CharStateTab.Get(obs[t])
|
||||
|
||||
obs_states := make([]StateTag, 0)
|
||||
for index, _ := range tmp_obs_states {
|
||||
if _, ok := prev_states_expect_next[tmp_obs_states[index]]; ok {
|
||||
obs_states = append(obs_states, tmp_obs_states[index])
|
||||
}
|
||||
}
|
||||
if len(obs_states) == 0 {
|
||||
obs_states = ProbTransKeys
|
||||
}
|
||||
mem_path[t] = make(map[StateTag]StateTag)
|
||||
V[t] = make(map[StateTag]float64)
|
||||
for _, y := range obs_states {
|
||||
pss := make(ProbStates, 0)
|
||||
for _, y0 := range prev_states {
|
||||
ps := ProbState{
|
||||
Prob: V[t-1][y0] + ProbTrans[y0].Get(y) + ProbEmit[y].Get(obs[t]),
|
||||
ST: y0}
|
||||
pss = append(pss, ps)
|
||||
}
|
||||
sort.Sort(sort.Reverse(pss))
|
||||
V[t][y] = pss[0].Prob
|
||||
mem_path[t][y] = pss[0].ST
|
||||
}
|
||||
}
|
||||
last := make(ProbStates, 0)
|
||||
length := len(mem_path)
|
||||
vlength := len(V)
|
||||
for y, _ := range mem_path[length-1] {
|
||||
ps := ProbState{Prob: V[vlength-1][y], ST: y}
|
||||
last = append(last, ps)
|
||||
}
|
||||
sort.Sort(sort.Reverse(last))
|
||||
prob := last[0].Prob
|
||||
state := last[0].ST
|
||||
route := make([]StateTag, len(obs))
|
||||
i := obsLength - 1
|
||||
for {
|
||||
if i < 0 {
|
||||
break
|
||||
}
|
||||
route[i] = state
|
||||
state = mem_path[i][state]
|
||||
i -= 1
|
||||
}
|
||||
return prob, route
|
||||
}
|
||||
46
posseg/viterbi_test.go
Normal file
46
posseg/viterbi_test.go
Normal file
@@ -0,0 +1,46 @@
|
||||
package posseg
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
var (
|
||||
route1 = []StateTag{
|
||||
StateTag{'B', "nr"},
|
||||
StateTag{'M', "nr"},
|
||||
StateTag{'E', "nr"},
|
||||
StateTag{'S', "v"},
|
||||
StateTag{'B', "v"},
|
||||
StateTag{'E', "v"},
|
||||
StateTag{'B', "n"},
|
||||
StateTag{'M', "n"},
|
||||
StateTag{'E', "n"},
|
||||
StateTag{'S', "d"},
|
||||
StateTag{'S', "v"},
|
||||
StateTag{'S', "n"},
|
||||
StateTag{'B', "v"},
|
||||
StateTag{'E', "v"},
|
||||
StateTag{'B', "nr"},
|
||||
StateTag{'M', "nr"},
|
||||
StateTag{'M', "nr"},
|
||||
StateTag{'M', "nr"},
|
||||
StateTag{'E', "nr"},
|
||||
StateTag{'S', "zg"}}
|
||||
)
|
||||
|
||||
func TestViterbi(t *testing.T) {
|
||||
ss := "李小福是创新办主任也是云计算方面的专家;"
|
||||
prob, route := Viterbi([]rune(ss))
|
||||
if prob != MIN_FLOAT {
|
||||
t.Error(prob)
|
||||
}
|
||||
if len(route) != len(route1) {
|
||||
t.Error(len(route))
|
||||
}
|
||||
for index, _ := range route {
|
||||
if route[index] != route1[index] {
|
||||
t.Error(route[index])
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
174
trie_node.go
Normal file
174
trie_node.go
Normal file
@@ -0,0 +1,174 @@
|
||||
package jiebago
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"crypto/sha1"
|
||||
"encoding/gob"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
const (
|
||||
CACHE_NAME = "jieba.gob"
|
||||
USER_CACHE_PREFIX = "jieba.user."
|
||||
USER_CACHE_SUFFIX = ".gob"
|
||||
)
|
||||
|
||||
type Node struct {
|
||||
Name string
|
||||
SubNodes Trie
|
||||
IsLeaf bool
|
||||
}
|
||||
|
||||
type Trie map[string]*Node
|
||||
|
||||
type TopTrie struct {
|
||||
T Trie
|
||||
MinFreq float64
|
||||
Total float64
|
||||
Freq map[string]float64
|
||||
}
|
||||
|
||||
func hash(s string) string {
|
||||
h := sha1.New()
|
||||
h.Write([]byte(s))
|
||||
return fmt.Sprintf("%x", h.Sum(nil))
|
||||
}
|
||||
|
||||
func getUserCacheName(prefix string, path string, suffix string) string {
|
||||
return fmt.Sprintf("%s%s%s", prefix, hash(path), suffix)
|
||||
}
|
||||
|
||||
func newTopTrie(filename string) (*TopTrie, error) {
|
||||
var file_path string
|
||||
var topTrie *TopTrie
|
||||
if filepath.IsAbs(filename) {
|
||||
file_path = filename
|
||||
} else {
|
||||
pwd, err := os.Getwd()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
file_path = filepath.Clean(filepath.Join(pwd, filename))
|
||||
}
|
||||
|
||||
_, curFileName, _, _ := runtime.Caller(1)
|
||||
_curpath := filepath.Dir(curFileName)
|
||||
abs_path := filepath.Join(_curpath, Dictionary)
|
||||
var cache_file string
|
||||
if file_path == abs_path {
|
||||
cache_file = filepath.Join(os.TempDir(), CACHE_NAME)
|
||||
} else {
|
||||
cache_file = filepath.Join(os.TempDir(),
|
||||
getUserCacheName(USER_CACHE_PREFIX, abs_path, USER_CACHE_SUFFIX))
|
||||
}
|
||||
|
||||
cacheFileStat, cacheErr := os.Stat(cache_file)
|
||||
dictFileStat, _ := os.Stat(abs_path)
|
||||
if cacheErr == nil {
|
||||
if cacheFileStat.ModTime().After(dictFileStat.ModTime()) {
|
||||
cacheFile, openError := os.Open(cache_file)
|
||||
if openError == nil {
|
||||
dec := gob.NewDecoder(cacheFile)
|
||||
err := dec.Decode(&topTrie)
|
||||
if err == nil {
|
||||
return topTrie, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
topTrie = &TopTrie{T: make(Trie), MinFreq: 100.0, Total: 0.0, Freq: make(map[string]float64)}
|
||||
file, openError := os.Open(file_path)
|
||||
if openError != nil {
|
||||
return nil, openError
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
reader := bufio.NewReader(file)
|
||||
for {
|
||||
line, readError := reader.ReadString('\n')
|
||||
if readError != nil && len(line) == 0 {
|
||||
break
|
||||
}
|
||||
words := strings.Split(line, " ")
|
||||
word, freqStr := words[0], words[1]
|
||||
freq, _ := strconv.ParseFloat(freqStr, 64)
|
||||
topTrie.Total += freq
|
||||
topTrie.addWord(word, freq)
|
||||
}
|
||||
var val float64
|
||||
for key := range topTrie.Freq {
|
||||
val = math.Log(topTrie.Freq[key] / topTrie.Total)
|
||||
if val < topTrie.MinFreq {
|
||||
topTrie.MinFreq = val
|
||||
}
|
||||
topTrie.Freq[key] = val
|
||||
}
|
||||
|
||||
cacheFile_, _ := os.OpenFile(cache_file, os.O_CREATE|os.O_WRONLY, 0644)
|
||||
defer cacheFile_.Close()
|
||||
enc := gob.NewEncoder(cacheFile_)
|
||||
enc.Encode(topTrie)
|
||||
|
||||
return topTrie, nil
|
||||
}
|
||||
|
||||
func (tt *TopTrie) addWord(word string, freq float64) {
|
||||
tt.Freq[word] = freq
|
||||
var p Trie
|
||||
var node *Node
|
||||
var key string
|
||||
count := utf8.RuneCountInString(word)
|
||||
for index, c := range []rune(word) {
|
||||
if index == 0 {
|
||||
p = tt.T
|
||||
}
|
||||
key = string(c)
|
||||
if _, ok := p[key]; ok {
|
||||
node = p[key]
|
||||
} else {
|
||||
node = &Node{Name: key, IsLeaf: false}
|
||||
p[key] = node
|
||||
node.SubNodes = make(Trie)
|
||||
}
|
||||
if index == count-1 {
|
||||
p[key].IsLeaf = true
|
||||
}
|
||||
p = node.SubNodes
|
||||
}
|
||||
}
|
||||
|
||||
func addWord(word string, freq float64, tag string) {
|
||||
if len(tag) > 0 {
|
||||
UserWordTagTab[word] = strings.TrimSpace(tag)
|
||||
}
|
||||
TT.addWord(word, freq)
|
||||
}
|
||||
|
||||
func LoadUserDict(file_path string) error {
|
||||
file, openError := os.Open(file_path)
|
||||
if openError != nil {
|
||||
return openError
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
reader := bufio.NewReader(file)
|
||||
for {
|
||||
line, readError := reader.ReadString('\n')
|
||||
if readError != nil && len(line) == 0 {
|
||||
break
|
||||
}
|
||||
words := strings.Split(line, " ")
|
||||
word, freqStr := words[0], words[1]
|
||||
freq, _ := strconv.ParseFloat(freqStr, 64)
|
||||
TT.addWord(word, freq)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
6
userdict.txt
Normal file
6
userdict.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
云计算 5
|
||||
李小福 2 nr
|
||||
创新办 3 i
|
||||
easy_install 3 eng
|
||||
好用 300
|
||||
韩玉赏鉴 3 nz
|
||||
Reference in New Issue
Block a user