优化 TextRanker

2026-07-17 02:40:23 +08:00 · 2022-11-30 12:56:04 +08:00
parent 21cdb2e863
commit a8d1e81f73
3 changed files with 16 additions and 15 deletions
--- a/analyse/example_test.go
+++ b/analyse/example_test.go
@@ -1,13 +1,11 @@
-package analyse_test
+package analyse

 import (
 	"fmt"
-
-	"github.com/fumiama/jieba/analyse"
 )

 func Example_extractTags() {
-	var t analyse.TagExtracter
+	var t TagExtracter
 	t.LoadDictionary("../dict.txt")
 	t.LoadIdf("idf.txt")

@@ -22,8 +20,11 @@ func Example_extractTags() {
 }

 func Example_textRank() {
-	var t analyse.TextRanker
-	t.LoadDictionary("../dict.txt")
+	t, err := LoadDictionary("../dict.txt")
+	if err != nil {
+		panic(err)
+	}
+
 	sentence := "此外，公司拟对全资子公司吉林欧亚置业有限公司增资4.3亿元，增资后，吉林欧亚置业注册资本由7000万元增加到5亿元。吉林欧亚置业主要经营范围为房地产开发及百货零售等业务。目前在建吉林欧亚城市商业综合体项目。2013年，实现营业收入0万元，实现净利润-139.13万元。"

 	result := t.TextRank(sentence, 10)
--- a/analyse/textrank.go
+++ b/analyse/textrank.go
@@ -132,7 +132,7 @@ func (t *TextRanker) TextRankWithPOS(sentence string, topK int, allowPOS []strin
 	}
 	span := 5
 	var pairs []posseg.Segment
-	for pair := range t.seg.Cut(sentence, true) {
+	for pair := range (*posseg.Segmenter)(t).Cut(sentence, true) {
 		pairs = append(pairs, pair)
 	}
 	for i := range pairs {
@@ -169,12 +169,10 @@ func (t *TextRanker) TextRank(sentence string, topK int) Segments {
 }

 // TextRanker is used to extract tags from sentence.
-type TextRanker struct {
-	seg *posseg.Segmenter
-}
+type TextRanker posseg.Segmenter

 // LoadDictionary reads a given file and create a new dictionary file for Textranker.
-func (t *TextRanker) LoadDictionary(fileName string) error {
-	t.seg = new(posseg.Segmenter)
-	return t.seg.LoadDictionary(fileName)
+func LoadDictionary(fileName string) (TextRanker, error) {
+	seg := posseg.Segmenter{}
+	return TextRanker(seg), seg.LoadDictionary(fileName)
 }
--- a/analyse/textrank_test.go
+++ b/analyse/textrank_test.go
@@ -23,8 +23,10 @@ var (
 )

 func TestTextRank(t *testing.T) {
-	var tr TextRanker
-	tr.LoadDictionary("../dict.txt")
+	tr, err := LoadDictionary("../dict.txt")
+	if err != nil {
+		t.Fatal(err)
+	}
 	results := tr.TextRank(sentence, 10)
 	for index, tw := range results {
 		if tw.text != tagRanks[index].text || math.Abs(tw.weight-tagRanks[index].weight) > 1e-6 {