mirror of
https://github.com/fumiama/jieba.git
synced 2026-06-23 12:40:39 +08:00
优化 dict, add fs.File 支持
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
package posseg
|
||||
|
||||
import (
|
||||
"io/fs"
|
||||
"math"
|
||||
"sync"
|
||||
|
||||
@@ -69,6 +70,10 @@ func (d *Dictionary) Pos(key string) (string, bool) {
|
||||
return pos, ok
|
||||
}
|
||||
|
||||
func (d *Dictionary) loadDictionary(fileName string) error {
|
||||
return dictionary.LoadDictionary(d, fileName)
|
||||
func (d *Dictionary) loadDictionary(file fs.File) error {
|
||||
return dictionary.LoadDictionary(d, file)
|
||||
}
|
||||
|
||||
func (d *Dictionary) loadDictionaryAt(file string) error {
|
||||
return dictionary.LoadDictionaryAt(d, file)
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@ import (
|
||||
|
||||
func Example() {
|
||||
var seg posseg.Segmenter
|
||||
seg.LoadDictionary("../dict.txt")
|
||||
seg.LoadDictionaryAt("../dict.txt")
|
||||
|
||||
for segment := range seg.Cut("我爱北京天安门", true) {
|
||||
fmt.Printf("%s %s\n", segment.Text(), segment.Pos())
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
package posseg
|
||||
|
||||
import (
|
||||
"io/fs"
|
||||
"math"
|
||||
"regexp"
|
||||
|
||||
@@ -39,17 +40,31 @@ type Segmenter struct {
|
||||
}
|
||||
|
||||
// LoadDictionary loads dictionary from given file name.
|
||||
// Everytime LoadDictionary is called, previously loaded dictionary will be cleard.
|
||||
func (seg *Segmenter) LoadDictionary(fileName string) error {
|
||||
// Everytime LoadDictionaryAt is called, previously loaded dictionary will be cleard.
|
||||
func (seg *Segmenter) LoadDictionary(file fs.File) error {
|
||||
seg.dict = &Dictionary{freqMap: make(map[string]float64), posMap: make(map[string]string)}
|
||||
return seg.dict.loadDictionary(fileName)
|
||||
return seg.dict.loadDictionary(file)
|
||||
}
|
||||
|
||||
// LoadDictionaryAt loads dictionary from given file name.
|
||||
// Everytime LoadDictionaryAt is called, previously loaded dictionary will be cleard.
|
||||
func (seg *Segmenter) LoadDictionaryAt(fileName string) error {
|
||||
seg.dict = &Dictionary{freqMap: make(map[string]float64), posMap: make(map[string]string)}
|
||||
return seg.dict.loadDictionaryAt(fileName)
|
||||
}
|
||||
|
||||
// LoadUserDictionary loads a user specified dictionary, it must be called
|
||||
// after LoadDictionary, and it will not clear any previous loaded dictionary,
|
||||
// instead it will override exist entries.
|
||||
func (seg *Segmenter) LoadUserDictionary(fileName string) error {
|
||||
return seg.dict.loadDictionary(fileName)
|
||||
func (seg *Segmenter) LoadUserDictionary(file fs.File) error {
|
||||
return seg.dict.loadDictionary(file)
|
||||
}
|
||||
|
||||
// LoadUserDictionaryAt loads a user specified dictionary, it must be called
|
||||
// after LoadDictionary, and it will not clear any previous loaded dictionary,
|
||||
// instead it will override exist entries.
|
||||
func (seg *Segmenter) LoadUserDictionaryAt(fileName string) error {
|
||||
return seg.dict.loadDictionaryAt(fileName)
|
||||
}
|
||||
|
||||
func (seg *Segmenter) cutDetailInternal(sentence string) <-chan Segment {
|
||||
|
||||
@@ -269,7 +269,7 @@ var (
|
||||
)
|
||||
|
||||
func init() {
|
||||
seg.LoadDictionary("../dict.txt")
|
||||
seg.LoadDictionaryAt("../dict.txt")
|
||||
}
|
||||
|
||||
func chanToArray(ch <-chan Segment) []Segment {
|
||||
@@ -357,8 +357,8 @@ func TestBug137(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestUserDict(t *testing.T) {
|
||||
seg.LoadUserDictionary("../userdict.txt")
|
||||
defer seg.LoadDictionary("../dict.txt")
|
||||
seg.LoadUserDictionaryAt("../userdict.txt")
|
||||
defer seg.LoadDictionaryAt("../dict.txt")
|
||||
sentence := "李小福是创新办主任也是云计算方面的专家; 什么是八一双鹿例如我输入一个带“韩玉赏鉴”的标题,在自定义词库中也增加了此词为N类型"
|
||||
|
||||
cutResult := []Segment{
|
||||
|
||||
Reference in New Issue
Block a user