1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-26 06:50:23 +08:00

refactor posseg, added Posseg struct

This commit is contained in:
Wang Bin
2015-03-24 16:54:02 +08:00
parent 0027927b6d
commit 73d87e4ed6
6 changed files with 146 additions and 109 deletions

38
trie.go
View File

@@ -7,18 +7,14 @@ import (
"log"
"os"
"path/filepath"
"strings"
)
// Trie store the total frequency and map of all words and their frequenciesb
var Trie *trie
type trie struct {
type Jieba struct {
Total float64
Freq map[string]float64
}
func (t *trie) load(dictFileName string) error {
func (j *Jieba) load(dictFileName string) error {
dictFilePath, err := DictPath(dictFileName)
if err != nil {
return err
@@ -55,7 +51,7 @@ func (t *trie) load(dictFileName string) error {
if isDictCached {
dec := gob.NewDecoder(cacheFile)
err = dec.Decode(&t)
err = dec.Decode(&j)
if err != nil {
isDictCached = false
} else {
@@ -70,7 +66,7 @@ func (t *trie) load(dictFileName string) error {
}
for _, wtf := range wtfs {
t.addWord(wtf)
j.AddWord(wtf)
}
// dump trie
cacheFile, err = os.OpenFile(cacheFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
@@ -79,7 +75,7 @@ func (t *trie) load(dictFileName string) error {
}
defer cacheFile.Close()
enc := gob.NewEncoder(cacheFile)
err = enc.Encode(t)
err = enc.Encode(j)
if err != nil {
return err
} else {
@@ -89,30 +85,27 @@ func (t *trie) load(dictFileName string) error {
return nil
}
func (t *trie) addWord(wtf *WordTagFreq) {
t.Freq[wtf.Word] = wtf.Freq
t.Total += wtf.Freq
func (j *Jieba) AddWord(wtf *WordTagFreq) {
j.Freq[wtf.Word] = wtf.Freq
j.Total += wtf.Freq
runes := []rune(wtf.Word)
count := len(runes)
for i := 0; i < count; i++ {
wfrag := string(runes[0 : i+1])
if _, ok := t.Freq[wfrag]; !ok {
t.Freq[wfrag] = 0.0
if _, ok := j.Freq[wfrag]; !ok {
j.Freq[wfrag] = 0.0
}
}
}
// Load user specified dictionary file.
func LoadUserDict(dictFilePath string) error {
func (j *Jieba) LoadUserDict(dictFilePath string) error {
wtfs, err := ParseDictFile(dictFilePath)
if err != nil {
return err
}
for _, wtf := range wtfs {
if len(wtf.Tag) > 0 {
UserWordTagTab[wtf.Word] = strings.TrimSpace(wtf.Tag)
}
Trie.addWord(wtf)
j.AddWord(wtf)
}
return nil
}
@@ -120,7 +113,8 @@ func LoadUserDict(dictFilePath string) error {
// Set the dictionary, could be absolute path of dictionary file, or dictionary
// name in current directory. This function must be called before cut any
// sentence.
func SetDictionary(dictFileName string) error {
Trie = &trie{Total: 0.0, Freq: make(map[string]float64)}
return Trie.load(dictFileName)
func NewJieba(dictFileName string) (*Jieba, error) {
j := &Jieba{Total: 0.0, Freq: make(map[string]float64)}
err := j.load(dictFileName)
return j, err
}