1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-23 12:40:39 +08:00

优化 dict, add fs.File 支持

This commit is contained in:
源文雨
2022-11-30 14:14:48 +08:00
parent c8785c7994
commit f3da9e6420
22 changed files with 190 additions and 91 deletions

View File

@@ -1,6 +1,7 @@
package posseg
import (
"io/fs"
"math"
"sync"
@@ -69,6 +70,10 @@ func (d *Dictionary) Pos(key string) (string, bool) {
return pos, ok
}
func (d *Dictionary) loadDictionary(fileName string) error {
return dictionary.LoadDictionary(d, fileName)
func (d *Dictionary) loadDictionary(file fs.File) error {
return dictionary.LoadDictionary(d, file)
}
func (d *Dictionary) loadDictionaryAt(file string) error {
return dictionary.LoadDictionaryAt(d, file)
}

View File

@@ -8,7 +8,7 @@ import (
func Example() {
var seg posseg.Segmenter
seg.LoadDictionary("../dict.txt")
seg.LoadDictionaryAt("../dict.txt")
for segment := range seg.Cut("我爱北京天安门", true) {
fmt.Printf("%s %s\n", segment.Text(), segment.Pos())

View File

@@ -2,6 +2,7 @@
package posseg
import (
"io/fs"
"math"
"regexp"
@@ -39,17 +40,31 @@ type Segmenter struct {
}
// LoadDictionary loads dictionary from given file name.
// Everytime LoadDictionary is called, previously loaded dictionary will be cleard.
func (seg *Segmenter) LoadDictionary(fileName string) error {
// Everytime LoadDictionaryAt is called, previously loaded dictionary will be cleard.
func (seg *Segmenter) LoadDictionary(file fs.File) error {
seg.dict = &Dictionary{freqMap: make(map[string]float64), posMap: make(map[string]string)}
return seg.dict.loadDictionary(fileName)
return seg.dict.loadDictionary(file)
}
// LoadDictionaryAt loads dictionary from given file name.
// Everytime LoadDictionaryAt is called, previously loaded dictionary will be cleard.
func (seg *Segmenter) LoadDictionaryAt(fileName string) error {
seg.dict = &Dictionary{freqMap: make(map[string]float64), posMap: make(map[string]string)}
return seg.dict.loadDictionaryAt(fileName)
}
// LoadUserDictionary loads a user specified dictionary, it must be called
// after LoadDictionary, and it will not clear any previous loaded dictionary,
// instead it will override exist entries.
func (seg *Segmenter) LoadUserDictionary(fileName string) error {
return seg.dict.loadDictionary(fileName)
func (seg *Segmenter) LoadUserDictionary(file fs.File) error {
return seg.dict.loadDictionary(file)
}
// LoadUserDictionaryAt loads a user specified dictionary, it must be called
// after LoadDictionary, and it will not clear any previous loaded dictionary,
// instead it will override exist entries.
func (seg *Segmenter) LoadUserDictionaryAt(fileName string) error {
return seg.dict.loadDictionaryAt(fileName)
}
func (seg *Segmenter) cutDetailInternal(sentence string) <-chan Segment {

View File

@@ -269,7 +269,7 @@ var (
)
func init() {
seg.LoadDictionary("../dict.txt")
seg.LoadDictionaryAt("../dict.txt")
}
func chanToArray(ch <-chan Segment) []Segment {
@@ -357,8 +357,8 @@ func TestBug137(t *testing.T) {
}
func TestUserDict(t *testing.T) {
seg.LoadUserDictionary("../userdict.txt")
defer seg.LoadDictionary("../dict.txt")
seg.LoadUserDictionaryAt("../userdict.txt")
defer seg.LoadDictionaryAt("../dict.txt")
sentence := "李小福是创新办主任也是云计算方面的专家; 什么是八一双鹿例如我输入一个带“韩玉赏鉴”的标题在自定义词库中也增加了此词为N类型"
cutResult := []Segment{