diff --git a/analyse/textrank.go b/analyse/textrank.go index c48468a..b753fa4 100644 --- a/analyse/textrank.go +++ b/analyse/textrank.go @@ -123,7 +123,10 @@ func TextRankWithPOS(sentence string, topK int, allowPOS []string) TfIdfs { g := newUndirectWeightedGraph() cm := make(map[[2]string]float64) span := 5 - wordTags := posseg.Cut(sentence, true) + wordTags := make([]posseg.WordTag, 0) + for wordTag := range posseg.Cut(sentence, true) { + wordTags = append(wordTags, wordTag) + } for i, _ := range wordTags { if _, ok := posFilt[wordTags[i].Tag]; ok { for j := i + 1; j < i+span; j++ { diff --git a/posseg/posseg.go b/posseg/posseg.go index 549517e..39d41ef 100644 --- a/posseg/posseg.go +++ b/posseg/posseg.go @@ -219,12 +219,12 @@ func cutDAGNoHMM(sentence string) []WordTag { return result } -func Cut(sentence string, HMM bool) []WordTag { +func Cut(sentence string, HMM bool) chan WordTag { for key := range jiebago.UserWordTagTab { wordTagMap[key] = jiebago.UserWordTagTab[key] delete(jiebago.UserWordTagTab, key) } - result := make([]WordTag, 0) + result := make(chan WordTag) blocks := jiebago.RegexpSplit(reHanInternal, sentence) var cut cutFunc if HMM { @@ -232,31 +232,34 @@ func Cut(sentence string, HMM bool) []WordTag { } else { cut = cutDAGNoHMM } - for _, blk := range blocks { - if reHanInternal.MatchString(blk) { - for _, wordTag := range cut(blk) { - result = append(result, wordTag) - } - } else { - for _, x := range jiebago.RegexpSplit(reSkipInternal, blk) { - if reSkipInternal.MatchString(x) { - result = append(result, WordTag{x, "x"}) - } else { - for _, xx := range x { - s := string(xx) - switch { - case reNum.MatchString(s): - result = append(result, WordTag{s, "m"}) - case reEng.MatchString(x): - result = append(result, WordTag{x, "eng"}) - break - default: - result = append(result, WordTag{s, "x"}) + go func() { + for _, blk := range blocks { + if reHanInternal.MatchString(blk) { + for _, wordTag := range cut(blk) { + result <- wordTag + } + } else { + for _, x := range jiebago.RegexpSplit(reSkipInternal, blk) { + if reSkipInternal.MatchString(x) { + result <- WordTag{x, "x"} + } else { + for _, xx := range x { + s := string(xx) + switch { + case reNum.MatchString(s): + result <- WordTag{s, "m"} + case reEng.MatchString(x): + result <- WordTag{x, "eng"} + break + default: + result <- WordTag{s, "x"} + } } } } } } - } + close(result) + }() return result } diff --git a/posseg/posseg_test.go b/posseg/posseg_test.go index 6308ed1..5771d7e 100644 --- a/posseg/posseg_test.go +++ b/posseg/posseg_test.go @@ -268,10 +268,18 @@ var ( } ) +func chanToArray(ch chan WordTag) []WordTag { + result := make([]WordTag, 0) + for word := range ch { + result = append(result, word) + } + return result +} + func TestCut(t *testing.T) { SetDictionary("../dict.txt") for index, content := range test_contents { - result := Cut(content, true) + result := chanToArray(Cut(content, true)) if len(defaultCutResult[index]) != len(result) { t.Error(content) } @@ -280,7 +288,7 @@ func TestCut(t *testing.T) { t.Error(content) } } - result = Cut(content, false) + result = chanToArray(Cut(content, false)) if len(noHMMCutResult[index]) != len(result) { t.Error(content) } @@ -305,7 +313,7 @@ func TestBug132(t *testing.T) { WordTag{"又", "d"}, WordTag{"啞", "v"}, } - result := Cut(sentence, true) + result := chanToArray(Cut(sentence, true)) if len(cutResult) != len(result) { t.Error(result) } @@ -337,7 +345,7 @@ func TestBug137(t *testing.T) { WordTag{"研究", "vn"}, WordTag{"組", "x"}, } - result := Cut(sentence, true) + result := chanToArray(Cut(sentence, true)) if len(cutResult) != len(result) { t.Error(result) } @@ -392,7 +400,7 @@ func TestUserDict(t *testing.T) { WordTag{"N", "eng"}, WordTag{"类型", "n"}} - result := Cut(sentence, true) + result := chanToArray(Cut(sentence, true)) if len(cutResult) != len(result) { t.Error(result) }