1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-28 08:02:45 +08:00

优化 TextRanker

This commit is contained in:
源文雨
2022-11-30 12:56:04 +08:00
parent 21cdb2e863
commit a8d1e81f73
3 changed files with 16 additions and 15 deletions

View File

@@ -1,13 +1,11 @@
package analyse_test package analyse
import ( import (
"fmt" "fmt"
"github.com/fumiama/jieba/analyse"
) )
func Example_extractTags() { func Example_extractTags() {
var t analyse.TagExtracter var t TagExtracter
t.LoadDictionary("../dict.txt") t.LoadDictionary("../dict.txt")
t.LoadIdf("idf.txt") t.LoadIdf("idf.txt")
@@ -22,8 +20,11 @@ func Example_extractTags() {
} }
func Example_textRank() { func Example_textRank() {
var t analyse.TextRanker t, err := LoadDictionary("../dict.txt")
t.LoadDictionary("../dict.txt") if err != nil {
panic(err)
}
sentence := "此外公司拟对全资子公司吉林欧亚置业有限公司增资4.3亿元增资后吉林欧亚置业注册资本由7000万元增加到5亿元。吉林欧亚置业主要经营范围为房地产开发及百货零售等业务。目前在建吉林欧亚城市商业综合体项目。2013年实现营业收入0万元实现净利润-139.13万元。" sentence := "此外公司拟对全资子公司吉林欧亚置业有限公司增资4.3亿元增资后吉林欧亚置业注册资本由7000万元增加到5亿元。吉林欧亚置业主要经营范围为房地产开发及百货零售等业务。目前在建吉林欧亚城市商业综合体项目。2013年实现营业收入0万元实现净利润-139.13万元。"
result := t.TextRank(sentence, 10) result := t.TextRank(sentence, 10)

View File

@@ -132,7 +132,7 @@ func (t *TextRanker) TextRankWithPOS(sentence string, topK int, allowPOS []strin
} }
span := 5 span := 5
var pairs []posseg.Segment var pairs []posseg.Segment
for pair := range t.seg.Cut(sentence, true) { for pair := range (*posseg.Segmenter)(t).Cut(sentence, true) {
pairs = append(pairs, pair) pairs = append(pairs, pair)
} }
for i := range pairs { for i := range pairs {
@@ -169,12 +169,10 @@ func (t *TextRanker) TextRank(sentence string, topK int) Segments {
} }
// TextRanker is used to extract tags from sentence. // TextRanker is used to extract tags from sentence.
type TextRanker struct { type TextRanker posseg.Segmenter
seg *posseg.Segmenter
}
// LoadDictionary reads a given file and create a new dictionary file for Textranker. // LoadDictionary reads a given file and create a new dictionary file for Textranker.
func (t *TextRanker) LoadDictionary(fileName string) error { func LoadDictionary(fileName string) (TextRanker, error) {
t.seg = new(posseg.Segmenter) seg := posseg.Segmenter{}
return t.seg.LoadDictionary(fileName) return TextRanker(seg), seg.LoadDictionary(fileName)
} }

View File

@@ -23,8 +23,10 @@ var (
) )
func TestTextRank(t *testing.T) { func TestTextRank(t *testing.T) {
var tr TextRanker tr, err := LoadDictionary("../dict.txt")
tr.LoadDictionary("../dict.txt") if err != nil {
t.Fatal(err)
}
results := tr.TextRank(sentence, 10) results := tr.TextRank(sentence, 10)
for index, tw := range results { for index, tw := range results {
if tw.text != tagRanks[index].text || math.Abs(tw.weight-tagRanks[index].weight) > 1e-6 { if tw.text != tagRanks[index].text || math.Abs(tw.weight-tagRanks[index].weight) > 1e-6 {