优化 dict, add fs.File 支持

2026-06-18 09:20:26 +08:00 · 2022-11-30 14:14:48 +08:00
parent c8785c7994
commit f3da9e6420
22 changed files with 190 additions and 91 deletions
--- a/analyse/example_test.go
+++ b/analyse/example_test.go
@@ -6,8 +6,8 @@ import (

 func Example_extractTags() {
 	var t TagExtracter
-	t.LoadDictionary("../dict.txt")
-	t.LoadIdf("idf.txt")
+	t.LoadDictionaryAt("../dict.txt")
+	t.LoadIdfAt("idf.txt")

 	sentence := "这是一个伸手不见五指的黑夜。我叫孙悟空，我爱北京，我爱Python和C++。"
 	segments := t.ExtractTags(sentence, 5)
@@ -20,7 +20,7 @@ func Example_extractTags() {
 }

 func Example_textRank() {
-	t, err := NewTextRanker("../dict.txt")
+	t, err := NewTextRankerAt("../dict.txt")
 	if err != nil {
 		panic(err)
 	}
--- a/analyse/idf.go
+++ b/analyse/idf.go
@@ -1,6 +1,7 @@
 package analyse

 import (
+	"io/fs"
 	"sort"
 	"sync"

@@ -38,8 +39,12 @@ func (i *Idf) Load(tokens ...dictionary.Token) {
 	i.Unlock()
 }

-func (i *Idf) loadDictionary(fileName string) error {
-	return dictionary.LoadDictionary(i, fileName)
+func (i *Idf) loadDictionary(file fs.File) error {
+	return dictionary.LoadDictionary(i, file)
+}
+
+func (i *Idf) loadDictionaryAt(fileName string) error {
+	return dictionary.LoadDictionaryAt(i, fileName)
 }

 // Frequency returns the IDF of given word.
--- a/analyse/stopwords.go
+++ b/analyse/stopwords.go
@@ -1,6 +1,7 @@
 package analyse

 import (
+	"io/fs"
 	"sync"

 	"github.com/fumiama/jieba/dictionary"
@@ -82,6 +83,10 @@ func (s *StopWord) Load(tokens ...dictionary.Token) {
 	s.Unlock()
 }

-func (s *StopWord) loadDictionary(fileName string) error {
-	return dictionary.LoadDictionary(s, fileName)
+func (s *StopWord) loadDictionary(file fs.File) error {
+	return dictionary.LoadDictionary(s, file)
+}
+
+func (s *StopWord) loadDictionaryAt(file string) error {
+	return dictionary.LoadDictionaryAt(s, file)
 }
--- a/analyse/tag_extracker.go
+++ b/analyse/tag_extracker.go
@@ -2,6 +2,7 @@
 package analyse

 import (
+	"io/fs"
 	"sort"
 	"strings"
 	"unicode/utf8"
@@ -52,22 +53,41 @@ type TagExtracter struct {
 }

 // LoadDictionary reads the given filename and create a new dictionary.
-func (t *TagExtracter) LoadDictionary(fileName string) error {
+func (t *TagExtracter) LoadDictionary(file fs.File) error {
 	t.stopWord = NewStopWord()
 	t.seg = new(jieba.Segmenter)
-	return t.seg.LoadDictionary(fileName)
+	return t.seg.LoadDictionary(file)
+}
+
+// LoadDictionaryAt reads the given filename and create a new dictionary.
+func (t *TagExtracter) LoadDictionaryAt(fileName string) error {
+	t.stopWord = NewStopWord()
+	t.seg = new(jieba.Segmenter)
+	return t.seg.LoadDictionaryAt(fileName)
 }

 // LoadIdf reads the given file and create a new Idf dictionary.
-func (t *TagExtracter) LoadIdf(fileName string) error {
+func (t *TagExtracter) LoadIdf(file fs.File) error {
 	t.idf = NewIdf()
-	return t.idf.loadDictionary(fileName)
+	return t.idf.loadDictionary(file)
+}
+
+// LoadIdfAt reads the given file and create a new Idf dictionary.
+func (t *TagExtracter) LoadIdfAt(fileName string) error {
+	t.idf = NewIdf()
+	return t.idf.loadDictionaryAt(fileName)
 }

 // LoadStopWords reads the given file and create a new StopWord dictionary.
-func (t *TagExtracter) LoadStopWords(fileName string) error {
+func (t *TagExtracter) LoadStopWords(file fs.File) error {
 	t.stopWord = NewStopWord()
-	return t.stopWord.loadDictionary(fileName)
+	return t.stopWord.loadDictionary(file)
+}
+
+// LoadStopWordsAt reads the given file and create a new StopWord dictionary.
+func (t *TagExtracter) LoadStopWordsAt(file string) error {
+	t.stopWord = NewStopWord()
+	return t.stopWord.loadDictionaryAt(file)
 }

 // ExtractTags extracts the topK key words from sentence.
--- a/analyse/tag_extracker_test.go
+++ b/analyse/tag_extracker_test.go
@@ -256,8 +256,8 @@ var (

 func TestExtractTags(t *testing.T) {
 	var te TagExtracter
-	te.LoadDictionary("../dict.txt")
-	te.LoadIdf("idf.txt")
+	te.LoadDictionaryAt("../dict.txt")
+	te.LoadIdfAt("idf.txt")

 	for index, sentence := range testContents {
 		result := te.ExtractTags(sentence, 20)
@@ -274,8 +274,8 @@ func TestExtractTags(t *testing.T) {

 func TestExtratTagsWithWeight(t *testing.T) {
 	var te TagExtracter
-	te.LoadDictionary("../dict.txt")
-	te.LoadIdf("idf.txt")
+	te.LoadDictionaryAt("../dict.txt")
+	te.LoadIdfAt("idf.txt")
 	result := te.ExtractTags(Lyric, 10)
 	for index, tag := range result {
 		if LyciWeight[index].text != tag.text ||
@@ -287,9 +287,9 @@ func TestExtratTagsWithWeight(t *testing.T) {

 func TestExtractTagsWithStopWordsFile(t *testing.T) {
 	var te TagExtracter
-	te.LoadDictionary("../dict.txt")
-	te.LoadIdf("idf.txt")
-	te.LoadStopWords("stop_words.txt")
+	te.LoadDictionaryAt("../dict.txt")
+	te.LoadIdfAt("idf.txt")
+	te.LoadStopWordsAt("stop_words.txt")
 	result := te.ExtractTags(Lyric, 7)
 	for index, tag := range result {
 		if LyciWeight2[index].text != tag.text ||
--- a/analyse/textrank.go
+++ b/analyse/textrank.go
@@ -2,6 +2,7 @@ package analyse

 import (
 	"hash/crc64"
+	"io/fs"
 	"math"
 	"sort"

@@ -173,7 +174,13 @@ func (t *TextRanker) TextRank(sentence string, topK int) Segments {
 type TextRanker posseg.Segmenter

 // NewTextRanker reads a given file and create a new dictionary file for Textranker.
-func NewTextRanker(fileName string) (TextRanker, error) {
+func NewTextRanker(file fs.File) (TextRanker, error) {
 	seg := posseg.Segmenter{}
-	return TextRanker(seg), seg.LoadDictionary(fileName)
+	return TextRanker(seg), seg.LoadDictionary(file)
+}
+
+// NewTextRankerAt reads a given file and create a new dictionary file for Textranker.
+func NewTextRankerAt(fileName string) (TextRanker, error) {
+	seg := posseg.Segmenter{}
+	return TextRanker(seg), seg.LoadDictionaryAt(fileName)
 }
--- a/analyse/textrank_test.go
+++ b/analyse/textrank_test.go
@@ -23,7 +23,7 @@ var (
 )

 func TestTextRank(t *testing.T) {
-	tr, err := NewTextRanker("../dict.txt")
+	tr, err := NewTextRankerAt("../dict.txt")
 	if err != nil {
 		t.Fatal(err)
 	}