1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-05 00:32:51 +08:00

优化 TextRanker

This commit is contained in:
源文雨
2022-11-30 12:56:04 +08:00
parent 21cdb2e863
commit a8d1e81f73
3 changed files with 16 additions and 15 deletions

View File

@@ -1,13 +1,11 @@
package analyse_test
package analyse
import (
"fmt"
"github.com/fumiama/jieba/analyse"
)
func Example_extractTags() {
var t analyse.TagExtracter
var t TagExtracter
t.LoadDictionary("../dict.txt")
t.LoadIdf("idf.txt")
@@ -22,8 +20,11 @@ func Example_extractTags() {
}
func Example_textRank() {
var t analyse.TextRanker
t.LoadDictionary("../dict.txt")
t, err := LoadDictionary("../dict.txt")
if err != nil {
panic(err)
}
sentence := "此外公司拟对全资子公司吉林欧亚置业有限公司增资4.3亿元增资后吉林欧亚置业注册资本由7000万元增加到5亿元。吉林欧亚置业主要经营范围为房地产开发及百货零售等业务。目前在建吉林欧亚城市商业综合体项目。2013年实现营业收入0万元实现净利润-139.13万元。"
result := t.TextRank(sentence, 10)

View File

@@ -132,7 +132,7 @@ func (t *TextRanker) TextRankWithPOS(sentence string, topK int, allowPOS []strin
}
span := 5
var pairs []posseg.Segment
for pair := range t.seg.Cut(sentence, true) {
for pair := range (*posseg.Segmenter)(t).Cut(sentence, true) {
pairs = append(pairs, pair)
}
for i := range pairs {
@@ -169,12 +169,10 @@ func (t *TextRanker) TextRank(sentence string, topK int) Segments {
}
// TextRanker is used to extract tags from sentence.
type TextRanker struct {
seg *posseg.Segmenter
}
type TextRanker posseg.Segmenter
// LoadDictionary reads a given file and create a new dictionary file for Textranker.
func (t *TextRanker) LoadDictionary(fileName string) error {
t.seg = new(posseg.Segmenter)
return t.seg.LoadDictionary(fileName)
func LoadDictionary(fileName string) (TextRanker, error) {
seg := posseg.Segmenter{}
return TextRanker(seg), seg.LoadDictionary(fileName)
}

View File

@@ -23,8 +23,10 @@ var (
)
func TestTextRank(t *testing.T) {
var tr TextRanker
tr.LoadDictionary("../dict.txt")
tr, err := LoadDictionary("../dict.txt")
if err != nil {
t.Fatal(err)
}
results := tr.TextRank(sentence, 10)
for index, tw := range results {
if tw.text != tagRanks[index].text || math.Abs(tw.weight-tagRanks[index].weight) > 1e-6 {