mirror of
https://github.com/fumiama/jieba.git
synced 2026-06-28 16:10:32 +08:00
优化 TextRanker
This commit is contained in:
@@ -1,13 +1,11 @@
|
|||||||
package analyse_test
|
package analyse
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"github.com/fumiama/jieba/analyse"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func Example_extractTags() {
|
func Example_extractTags() {
|
||||||
var t analyse.TagExtracter
|
var t TagExtracter
|
||||||
t.LoadDictionary("../dict.txt")
|
t.LoadDictionary("../dict.txt")
|
||||||
t.LoadIdf("idf.txt")
|
t.LoadIdf("idf.txt")
|
||||||
|
|
||||||
@@ -22,8 +20,11 @@ func Example_extractTags() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func Example_textRank() {
|
func Example_textRank() {
|
||||||
var t analyse.TextRanker
|
t, err := LoadDictionary("../dict.txt")
|
||||||
t.LoadDictionary("../dict.txt")
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
sentence := "此外,公司拟对全资子公司吉林欧亚置业有限公司增资4.3亿元,增资后,吉林欧亚置业注册资本由7000万元增加到5亿元。吉林欧亚置业主要经营范围为房地产开发及百货零售等业务。目前在建吉林欧亚城市商业综合体项目。2013年,实现营业收入0万元,实现净利润-139.13万元。"
|
sentence := "此外,公司拟对全资子公司吉林欧亚置业有限公司增资4.3亿元,增资后,吉林欧亚置业注册资本由7000万元增加到5亿元。吉林欧亚置业主要经营范围为房地产开发及百货零售等业务。目前在建吉林欧亚城市商业综合体项目。2013年,实现营业收入0万元,实现净利润-139.13万元。"
|
||||||
|
|
||||||
result := t.TextRank(sentence, 10)
|
result := t.TextRank(sentence, 10)
|
||||||
|
|||||||
@@ -132,7 +132,7 @@ func (t *TextRanker) TextRankWithPOS(sentence string, topK int, allowPOS []strin
|
|||||||
}
|
}
|
||||||
span := 5
|
span := 5
|
||||||
var pairs []posseg.Segment
|
var pairs []posseg.Segment
|
||||||
for pair := range t.seg.Cut(sentence, true) {
|
for pair := range (*posseg.Segmenter)(t).Cut(sentence, true) {
|
||||||
pairs = append(pairs, pair)
|
pairs = append(pairs, pair)
|
||||||
}
|
}
|
||||||
for i := range pairs {
|
for i := range pairs {
|
||||||
@@ -169,12 +169,10 @@ func (t *TextRanker) TextRank(sentence string, topK int) Segments {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// TextRanker is used to extract tags from sentence.
|
// TextRanker is used to extract tags from sentence.
|
||||||
type TextRanker struct {
|
type TextRanker posseg.Segmenter
|
||||||
seg *posseg.Segmenter
|
|
||||||
}
|
|
||||||
|
|
||||||
// LoadDictionary reads a given file and create a new dictionary file for Textranker.
|
// LoadDictionary reads a given file and create a new dictionary file for Textranker.
|
||||||
func (t *TextRanker) LoadDictionary(fileName string) error {
|
func LoadDictionary(fileName string) (TextRanker, error) {
|
||||||
t.seg = new(posseg.Segmenter)
|
seg := posseg.Segmenter{}
|
||||||
return t.seg.LoadDictionary(fileName)
|
return TextRanker(seg), seg.LoadDictionary(fileName)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,8 +23,10 @@ var (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func TestTextRank(t *testing.T) {
|
func TestTextRank(t *testing.T) {
|
||||||
var tr TextRanker
|
tr, err := LoadDictionary("../dict.txt")
|
||||||
tr.LoadDictionary("../dict.txt")
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
results := tr.TextRank(sentence, 10)
|
results := tr.TextRank(sentence, 10)
|
||||||
for index, tw := range results {
|
for index, tw := range results {
|
||||||
if tw.text != tagRanks[index].text || math.Abs(tw.weight-tagRanks[index].weight) > 1e-6 {
|
if tw.text != tagRanks[index].text || math.Abs(tw.weight-tagRanks[index].weight) > 1e-6 {
|
||||||
|
|||||||
Reference in New Issue
Block a user