mirror of
https://github.com/fumiama/jieba.git
synced 2026-06-26 06:50:23 +08:00
refactor posseg, added Posseg struct
This commit is contained in:
38
trie.go
38
trie.go
@@ -7,18 +7,14 @@ import (
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Trie store the total frequency and map of all words and their frequenciesb
|
||||
var Trie *trie
|
||||
|
||||
type trie struct {
|
||||
type Jieba struct {
|
||||
Total float64
|
||||
Freq map[string]float64
|
||||
}
|
||||
|
||||
func (t *trie) load(dictFileName string) error {
|
||||
func (j *Jieba) load(dictFileName string) error {
|
||||
dictFilePath, err := DictPath(dictFileName)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -55,7 +51,7 @@ func (t *trie) load(dictFileName string) error {
|
||||
|
||||
if isDictCached {
|
||||
dec := gob.NewDecoder(cacheFile)
|
||||
err = dec.Decode(&t)
|
||||
err = dec.Decode(&j)
|
||||
if err != nil {
|
||||
isDictCached = false
|
||||
} else {
|
||||
@@ -70,7 +66,7 @@ func (t *trie) load(dictFileName string) error {
|
||||
}
|
||||
|
||||
for _, wtf := range wtfs {
|
||||
t.addWord(wtf)
|
||||
j.AddWord(wtf)
|
||||
}
|
||||
// dump trie
|
||||
cacheFile, err = os.OpenFile(cacheFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
|
||||
@@ -79,7 +75,7 @@ func (t *trie) load(dictFileName string) error {
|
||||
}
|
||||
defer cacheFile.Close()
|
||||
enc := gob.NewEncoder(cacheFile)
|
||||
err = enc.Encode(t)
|
||||
err = enc.Encode(j)
|
||||
if err != nil {
|
||||
return err
|
||||
} else {
|
||||
@@ -89,30 +85,27 @@ func (t *trie) load(dictFileName string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *trie) addWord(wtf *WordTagFreq) {
|
||||
t.Freq[wtf.Word] = wtf.Freq
|
||||
t.Total += wtf.Freq
|
||||
func (j *Jieba) AddWord(wtf *WordTagFreq) {
|
||||
j.Freq[wtf.Word] = wtf.Freq
|
||||
j.Total += wtf.Freq
|
||||
runes := []rune(wtf.Word)
|
||||
count := len(runes)
|
||||
for i := 0; i < count; i++ {
|
||||
wfrag := string(runes[0 : i+1])
|
||||
if _, ok := t.Freq[wfrag]; !ok {
|
||||
t.Freq[wfrag] = 0.0
|
||||
if _, ok := j.Freq[wfrag]; !ok {
|
||||
j.Freq[wfrag] = 0.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Load user specified dictionary file.
|
||||
func LoadUserDict(dictFilePath string) error {
|
||||
func (j *Jieba) LoadUserDict(dictFilePath string) error {
|
||||
wtfs, err := ParseDictFile(dictFilePath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, wtf := range wtfs {
|
||||
if len(wtf.Tag) > 0 {
|
||||
UserWordTagTab[wtf.Word] = strings.TrimSpace(wtf.Tag)
|
||||
}
|
||||
Trie.addWord(wtf)
|
||||
j.AddWord(wtf)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -120,7 +113,8 @@ func LoadUserDict(dictFilePath string) error {
|
||||
// Set the dictionary, could be absolute path of dictionary file, or dictionary
|
||||
// name in current directory. This function must be called before cut any
|
||||
// sentence.
|
||||
func SetDictionary(dictFileName string) error {
|
||||
Trie = &trie{Total: 0.0, Freq: make(map[string]float64)}
|
||||
return Trie.load(dictFileName)
|
||||
func NewJieba(dictFileName string) (*Jieba, error) {
|
||||
j := &Jieba{Total: 0.0, Freq: make(map[string]float64)}
|
||||
err := j.load(dictFileName)
|
||||
return j, err
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user