mirror of
https://github.com/fumiama/jieba.git
synced 2026-06-18 09:20:26 +08:00
优化 dict, add fs.File 支持
This commit is contained in:
@@ -6,8 +6,8 @@ import (
|
||||
|
||||
func Example_extractTags() {
|
||||
var t TagExtracter
|
||||
t.LoadDictionary("../dict.txt")
|
||||
t.LoadIdf("idf.txt")
|
||||
t.LoadDictionaryAt("../dict.txt")
|
||||
t.LoadIdfAt("idf.txt")
|
||||
|
||||
sentence := "这是一个伸手不见五指的黑夜。我叫孙悟空,我爱北京,我爱Python和C++。"
|
||||
segments := t.ExtractTags(sentence, 5)
|
||||
@@ -20,7 +20,7 @@ func Example_extractTags() {
|
||||
}
|
||||
|
||||
func Example_textRank() {
|
||||
t, err := NewTextRanker("../dict.txt")
|
||||
t, err := NewTextRankerAt("../dict.txt")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package analyse
|
||||
|
||||
import (
|
||||
"io/fs"
|
||||
"sort"
|
||||
"sync"
|
||||
|
||||
@@ -38,8 +39,12 @@ func (i *Idf) Load(tokens ...dictionary.Token) {
|
||||
i.Unlock()
|
||||
}
|
||||
|
||||
func (i *Idf) loadDictionary(fileName string) error {
|
||||
return dictionary.LoadDictionary(i, fileName)
|
||||
func (i *Idf) loadDictionary(file fs.File) error {
|
||||
return dictionary.LoadDictionary(i, file)
|
||||
}
|
||||
|
||||
func (i *Idf) loadDictionaryAt(fileName string) error {
|
||||
return dictionary.LoadDictionaryAt(i, fileName)
|
||||
}
|
||||
|
||||
// Frequency returns the IDF of given word.
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package analyse
|
||||
|
||||
import (
|
||||
"io/fs"
|
||||
"sync"
|
||||
|
||||
"github.com/fumiama/jieba/dictionary"
|
||||
@@ -82,6 +83,10 @@ func (s *StopWord) Load(tokens ...dictionary.Token) {
|
||||
s.Unlock()
|
||||
}
|
||||
|
||||
func (s *StopWord) loadDictionary(fileName string) error {
|
||||
return dictionary.LoadDictionary(s, fileName)
|
||||
func (s *StopWord) loadDictionary(file fs.File) error {
|
||||
return dictionary.LoadDictionary(s, file)
|
||||
}
|
||||
|
||||
func (s *StopWord) loadDictionaryAt(file string) error {
|
||||
return dictionary.LoadDictionaryAt(s, file)
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
package analyse
|
||||
|
||||
import (
|
||||
"io/fs"
|
||||
"sort"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
@@ -52,22 +53,41 @@ type TagExtracter struct {
|
||||
}
|
||||
|
||||
// LoadDictionary reads the given filename and create a new dictionary.
|
||||
func (t *TagExtracter) LoadDictionary(fileName string) error {
|
||||
func (t *TagExtracter) LoadDictionary(file fs.File) error {
|
||||
t.stopWord = NewStopWord()
|
||||
t.seg = new(jieba.Segmenter)
|
||||
return t.seg.LoadDictionary(fileName)
|
||||
return t.seg.LoadDictionary(file)
|
||||
}
|
||||
|
||||
// LoadDictionaryAt reads the given filename and create a new dictionary.
|
||||
func (t *TagExtracter) LoadDictionaryAt(fileName string) error {
|
||||
t.stopWord = NewStopWord()
|
||||
t.seg = new(jieba.Segmenter)
|
||||
return t.seg.LoadDictionaryAt(fileName)
|
||||
}
|
||||
|
||||
// LoadIdf reads the given file and create a new Idf dictionary.
|
||||
func (t *TagExtracter) LoadIdf(fileName string) error {
|
||||
func (t *TagExtracter) LoadIdf(file fs.File) error {
|
||||
t.idf = NewIdf()
|
||||
return t.idf.loadDictionary(fileName)
|
||||
return t.idf.loadDictionary(file)
|
||||
}
|
||||
|
||||
// LoadIdfAt reads the given file and create a new Idf dictionary.
|
||||
func (t *TagExtracter) LoadIdfAt(fileName string) error {
|
||||
t.idf = NewIdf()
|
||||
return t.idf.loadDictionaryAt(fileName)
|
||||
}
|
||||
|
||||
// LoadStopWords reads the given file and create a new StopWord dictionary.
|
||||
func (t *TagExtracter) LoadStopWords(fileName string) error {
|
||||
func (t *TagExtracter) LoadStopWords(file fs.File) error {
|
||||
t.stopWord = NewStopWord()
|
||||
return t.stopWord.loadDictionary(fileName)
|
||||
return t.stopWord.loadDictionary(file)
|
||||
}
|
||||
|
||||
// LoadStopWordsAt reads the given file and create a new StopWord dictionary.
|
||||
func (t *TagExtracter) LoadStopWordsAt(file string) error {
|
||||
t.stopWord = NewStopWord()
|
||||
return t.stopWord.loadDictionaryAt(file)
|
||||
}
|
||||
|
||||
// ExtractTags extracts the topK key words from sentence.
|
||||
|
||||
@@ -256,8 +256,8 @@ var (
|
||||
|
||||
func TestExtractTags(t *testing.T) {
|
||||
var te TagExtracter
|
||||
te.LoadDictionary("../dict.txt")
|
||||
te.LoadIdf("idf.txt")
|
||||
te.LoadDictionaryAt("../dict.txt")
|
||||
te.LoadIdfAt("idf.txt")
|
||||
|
||||
for index, sentence := range testContents {
|
||||
result := te.ExtractTags(sentence, 20)
|
||||
@@ -274,8 +274,8 @@ func TestExtractTags(t *testing.T) {
|
||||
|
||||
func TestExtratTagsWithWeight(t *testing.T) {
|
||||
var te TagExtracter
|
||||
te.LoadDictionary("../dict.txt")
|
||||
te.LoadIdf("idf.txt")
|
||||
te.LoadDictionaryAt("../dict.txt")
|
||||
te.LoadIdfAt("idf.txt")
|
||||
result := te.ExtractTags(Lyric, 10)
|
||||
for index, tag := range result {
|
||||
if LyciWeight[index].text != tag.text ||
|
||||
@@ -287,9 +287,9 @@ func TestExtratTagsWithWeight(t *testing.T) {
|
||||
|
||||
func TestExtractTagsWithStopWordsFile(t *testing.T) {
|
||||
var te TagExtracter
|
||||
te.LoadDictionary("../dict.txt")
|
||||
te.LoadIdf("idf.txt")
|
||||
te.LoadStopWords("stop_words.txt")
|
||||
te.LoadDictionaryAt("../dict.txt")
|
||||
te.LoadIdfAt("idf.txt")
|
||||
te.LoadStopWordsAt("stop_words.txt")
|
||||
result := te.ExtractTags(Lyric, 7)
|
||||
for index, tag := range result {
|
||||
if LyciWeight2[index].text != tag.text ||
|
||||
|
||||
@@ -2,6 +2,7 @@ package analyse
|
||||
|
||||
import (
|
||||
"hash/crc64"
|
||||
"io/fs"
|
||||
"math"
|
||||
"sort"
|
||||
|
||||
@@ -173,7 +174,13 @@ func (t *TextRanker) TextRank(sentence string, topK int) Segments {
|
||||
type TextRanker posseg.Segmenter
|
||||
|
||||
// NewTextRanker reads a given file and create a new dictionary file for Textranker.
|
||||
func NewTextRanker(fileName string) (TextRanker, error) {
|
||||
func NewTextRanker(file fs.File) (TextRanker, error) {
|
||||
seg := posseg.Segmenter{}
|
||||
return TextRanker(seg), seg.LoadDictionary(fileName)
|
||||
return TextRanker(seg), seg.LoadDictionary(file)
|
||||
}
|
||||
|
||||
// NewTextRankerAt reads a given file and create a new dictionary file for Textranker.
|
||||
func NewTextRankerAt(fileName string) (TextRanker, error) {
|
||||
seg := posseg.Segmenter{}
|
||||
return TextRanker(seg), seg.LoadDictionaryAt(fileName)
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@ var (
|
||||
)
|
||||
|
||||
func TestTextRank(t *testing.T) {
|
||||
tr, err := NewTextRanker("../dict.txt")
|
||||
tr, err := NewTextRankerAt("../dict.txt")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user