1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-07-01 09:30:29 +08:00

fixed the bug from issue #132 from original jieba issue track

- issue details from https://github.com/fxsjy/jieba/issues/132
 - updated tests
 - also some code refactors
This commit is contained in:
Wang Bin
2014-08-08 17:59:49 +08:00
parent 08cb04815c
commit 3d685f1980
7 changed files with 107 additions and 19 deletions

View File

@@ -750,8 +750,9 @@ func TestLoadUserDict(t *testing.T) {
SetDictionary("dict.txt")
LoadUserDict("userdict.txt")
sentence := "李小福是创新办主任也是云计算方面的专家;例如我输入一个带“韩玉赏鉴”的标题在自定义词库中也增加了此词为N类型"
result := []string{"\u674e\u5c0f\u798f", "\u662f", "\u521b\u65b0\u529e", "\u4e3b\u4efb", "\u4e5f", "\u662f", "\u4e91\u8ba1\u7b97", "\u65b9\u9762", "\u7684", "\u4e13\u5bb6", ";", "\u4f8b\u5982", "\u6211", "\u8f93\u5165", "\u4e00\u4e2a", "\u5e26", "\u201c", "\u97e9\u7389\u8d4f\u9274", "\u201d", "\u7684", "\u6807\u9898", "\uff0c", "\u5728", "\u81ea\u5b9a\u4e49\u8bcd", "\u5e93\u4e2d", "\u4e5f", "\u589e\u52a0", "\u4e86", "\u6b64", "\u8bcd\u4e3a", "N", "\u7c7b\u578b"}
sentence := "李小福是创新办主任也是云计算方面的专家; 什么是八一双鹿例如我输入一个带“韩玉赏鉴”的标题在自定义词库中也增加了此词为N类型"
result := []string{"\u674e\u5c0f\u798f", "\u662f", "\u521b\u65b0\u529e", "\u4e3b\u4efb", "\u4e5f", "\u662f", "\u4e91\u8ba1\u7b97", "\u65b9\u9762", "\u7684", "\u4e13\u5bb6", ";", " ", "\u4ec0\u4e48", "\u662f", "\u516b\u4e00\u53cc\u9e7f", "\u4f8b\u5982", "\u6211", "\u8f93\u5165", "\u4e00\u4e2a", "\u5e26", "\u201c", "\u97e9\u7389\u8d4f\u9274", "\u201d", "\u7684", "\u6807\u9898", "\uff0c", "\u5728", "\u81ea\u5b9a\u4e49\u8bcd", "\u5e93\u4e2d", "\u4e5f", "\u589e\u52a0", "\u4e86", "\u6b64", "\u8bcd\u4e3a", "N", "\u7c7b\u578b"}
words := Cut(sentence, false, true)
if len(words) != len(result) {
t.Error(len(words))