1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-13 05:31:02 +08:00

refactor variable name

This commit is contained in:
Wang Bin
2015-02-26 16:07:08 +08:00
parent ecf2da12f4
commit aa9ad48b1c
4 changed files with 25 additions and 28 deletions

View File

@@ -82,7 +82,7 @@ func GetDAG(sentence string) map[int][]int {
i = k i = k
frag = string(runes[k]) frag = string(runes[k])
for { for {
if freq, ok := T.Freq[frag]; !ok { if freq, ok := Trie.Freq[frag]; !ok {
break break
} else { } else {
if freq > 0.0 { if freq > 0.0 {
@@ -108,14 +108,14 @@ func Calc(sentence string, dag map[int][]int) map[int]*Route {
number := len(runes) number := len(runes)
routes := make(map[int]*Route) routes := make(map[int]*Route)
routes[number] = &Route{Freq: 0.0, Index: 0} routes[number] = &Route{Freq: 0.0, Index: 0}
logTotal := math.Log(T.Total) logTotal := math.Log(Trie.Total)
for idx := number - 1; idx >= 0; idx-- { for idx := number - 1; idx >= 0; idx-- {
candidates := make(Routes, 0) candidates := make(Routes, 0)
for _, i := range dag[idx] { for _, i := range dag[idx] {
word := string(runes[idx : i+1]) word := string(runes[idx : i+1])
var route *Route var route *Route
if _, ok := T.Freq[word]; ok { if _, ok := Trie.Freq[word]; ok {
route = &Route{Freq: math.Log(T.Freq[word]) - logTotal + routes[i+1].Freq, Index: i} route = &Route{Freq: math.Log(Trie.Freq[word]) - logTotal + routes[i+1].Freq, Index: i}
} else { } else {
route = &Route{Freq: math.Log(1.0) - logTotal + routes[i+1].Freq, Index: i} route = &Route{Freq: math.Log(1.0) - logTotal + routes[i+1].Freq, Index: i}
} }
@@ -153,7 +153,7 @@ func cut_DAG(sentence string) []string {
buf = make([]rune, 0) buf = make([]rune, 0)
} else { } else {
bufString := string(buf) bufString := string(buf)
if v, ok := T.Freq[bufString]; !ok || v == 0.0 { if v, ok := Trie.Freq[bufString]; !ok || v == 0.0 {
recognized := finalseg.Cut(bufString) recognized := finalseg.Cut(bufString)
for _, t := range recognized { for _, t := range recognized {
result = append(result, t) result = append(result, t)
@@ -176,7 +176,7 @@ func cut_DAG(sentence string) []string {
result = append(result, string(buf)) result = append(result, string(buf))
} else { } else {
bufString := string(buf) bufString := string(buf)
if v, ok := T.Freq[bufString]; !ok || v == 0.0 { if v, ok := Trie.Freq[bufString]; !ok || v == 0.0 {
recognized := finalseg.Cut(bufString) recognized := finalseg.Cut(bufString)
for _, t := range recognized { for _, t := range recognized {
result = append(result, t) result = append(result, t)
@@ -320,7 +320,7 @@ func CutForSearch(sentence string, hmm bool) []string {
var gram2 string var gram2 string
for i := 0; i < len(runes)-increment+1; i++ { for i := 0; i < len(runes)-increment+1; i++ {
gram2 = string(runes[i : i+increment]) gram2 = string(runes[i : i+increment])
if v, ok := T.Freq[gram2]; ok && v > 0.0 { if v, ok := Trie.Freq[gram2]; ok && v > 0.0 {
result = append(result, gram2) result = append(result, gram2)
} }
} }

View File

@@ -121,7 +121,7 @@ func cut_DAG(sentence string) []WordTag {
buf = make([]rune, 0) buf = make([]rune, 0)
} else { } else {
bufString := string(buf) bufString := string(buf)
if v, ok := jiebago.T.Freq[bufString]; !ok || v == 0.0 { if v, ok := jiebago.Trie.Freq[bufString]; !ok || v == 0.0 {
recognized := cutDetail(bufString) recognized := cutDetail(bufString)
for _, t := range recognized { for _, t := range recognized {
result = append(result, t) result = append(result, t)
@@ -160,7 +160,7 @@ func cut_DAG(sentence string) []WordTag {
} }
} else { } else {
bufString := string(buf) bufString := string(buf)
if v, ok := jiebago.T.Freq[bufString]; !ok || v == 0.0 { if v, ok := jiebago.Trie.Freq[bufString]; !ok || v == 0.0 {
recognized := cutDetail(bufString) recognized := cutDetail(bufString)
for _, t := range recognized { for _, t := range recognized {
result = append(result, t) result = append(result, t)

View File

@@ -24,7 +24,7 @@ func Tokenize(sentence string, mode string, HMM bool) []Token {
if width > step { if width > step {
for i := 0; i < width-step+1; i++ { for i := 0; i < width-step+1; i++ {
gram := string(runes[i : i+step]) gram := string(runes[i : i+step])
if _, ok := T.Freq[gram]; ok { if _, ok := Trie.Freq[gram]; ok {
tokens = append(tokens, Token{gram, start + i, start + i + step}) tokens = append(tokens, Token{gram, start + i, start + i + step})
} }
} }

33
trie.go
View File

@@ -11,14 +11,14 @@ import (
"strings" "strings"
) )
var T *Trie var Trie *trie
type Trie struct { type trie struct {
Total float64 Total float64
Freq map[string]float64 Freq map[string]float64
} }
func (t Trie) MarshalBinary() ([]byte, error) { func (t trie) MarshalBinary() ([]byte, error) {
var b bytes.Buffer var b bytes.Buffer
enc := gob.NewEncoder(&b) enc := gob.NewEncoder(&b)
err := enc.Encode(t.Total) err := enc.Encode(t.Total)
@@ -32,7 +32,7 @@ func (t Trie) MarshalBinary() ([]byte, error) {
return b.Bytes(), nil return b.Bytes(), nil
} }
func (t *Trie) UnmarshalBinary(data []byte) error { func (t *trie) UnmarshalBinary(data []byte) error {
b := bytes.NewBuffer(data) b := bytes.NewBuffer(data)
dec := gob.NewDecoder(b) dec := gob.NewDecoder(b)
err := dec.Decode(&t.Total) err := dec.Decode(&t.Total)
@@ -46,7 +46,7 @@ func (t *Trie) UnmarshalBinary(data []byte) error {
return nil return nil
} }
func newTrie(dictFileName string) error { func (t *trie) load(dictFileName string) error {
dictFilePath, err := DictPath(dictFileName) dictFilePath, err := DictPath(dictFileName)
if err != nil { if err != nil {
return err return err
@@ -83,7 +83,7 @@ func newTrie(dictFileName string) error {
if isDictCached { if isDictCached {
dec := gob.NewDecoder(cacheFile) dec := gob.NewDecoder(cacheFile)
err = dec.Decode(&T) err = dec.Decode(&t)
if err != nil { if err != nil {
isDictCached = false isDictCached = false
} else { } else {
@@ -98,7 +98,7 @@ func newTrie(dictFileName string) error {
} }
for _, wtf := range wtfs { for _, wtf := range wtfs {
T.addWord(wtf) t.addWord(wtf)
} }
// dump trie // dump trie
cacheFile, err = os.OpenFile(cacheFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) cacheFile, err = os.OpenFile(cacheFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
@@ -107,7 +107,7 @@ func newTrie(dictFileName string) error {
} }
defer cacheFile.Close() defer cacheFile.Close()
enc := gob.NewEncoder(cacheFile) enc := gob.NewEncoder(cacheFile)
err = enc.Encode(T) err = enc.Encode(t)
if err != nil { if err != nil {
return err return err
} else { } else {
@@ -117,7 +117,7 @@ func newTrie(dictFileName string) error {
return nil return nil
} }
func (t *Trie) addWord(wtf *WordTagFreq) { func (t *trie) addWord(wtf *WordTagFreq) {
t.Freq[wtf.Word] = wtf.Freq t.Freq[wtf.Word] = wtf.Freq
t.Total += wtf.Freq t.Total += wtf.Freq
runes := []rune(wtf.Word) runes := []rune(wtf.Word)
@@ -129,12 +129,6 @@ func (t *Trie) addWord(wtf *WordTagFreq) {
} }
} }
} }
func addWord(wtf *WordTagFreq) {
if len(wtf.Tag) > 0 {
UserWordTagTab[wtf.Word] = strings.TrimSpace(wtf.Tag)
}
T.addWord(wtf)
}
func LoadUserDict(dictFilePath string) error { func LoadUserDict(dictFilePath string) error {
wtfs, err := ParseDictFile(dictFilePath) wtfs, err := ParseDictFile(dictFilePath)
@@ -142,12 +136,15 @@ func LoadUserDict(dictFilePath string) error {
return err return err
} }
for _, wtf := range wtfs { for _, wtf := range wtfs {
addWord(wtf) if len(wtf.Tag) > 0 {
UserWordTagTab[wtf.Word] = strings.TrimSpace(wtf.Tag)
}
Trie.addWord(wtf)
} }
return nil return nil
} }
func SetDictionary(dictFileName string) error { func SetDictionary(dictFileName string) error {
T = &Trie{Total: 0.0, Freq: make(map[string]float64)} Trie = &trie{Total: 0.0, Freq: make(map[string]float64)}
return newTrie(dictFileName) return Trie.load(dictFileName)
} }