From aa9ad48b1cdbd67d63dff92b9a5017e9d9ffd5a6 Mon Sep 17 00:00:00 2001 From: Wang Bin Date: Thu, 26 Feb 2015 16:07:08 +0800 Subject: [PATCH] refactor variable name --- jieba.go | 14 +++++++------- posseg/posseg.go | 4 ++-- tokenize.go | 2 +- trie.go | 33 +++++++++++++++------------------ 4 files changed, 25 insertions(+), 28 deletions(-) diff --git a/jieba.go b/jieba.go index 60090b3..cf8b4ef 100644 --- a/jieba.go +++ b/jieba.go @@ -82,7 +82,7 @@ func GetDAG(sentence string) map[int][]int { i = k frag = string(runes[k]) for { - if freq, ok := T.Freq[frag]; !ok { + if freq, ok := Trie.Freq[frag]; !ok { break } else { if freq > 0.0 { @@ -108,14 +108,14 @@ func Calc(sentence string, dag map[int][]int) map[int]*Route { number := len(runes) routes := make(map[int]*Route) routes[number] = &Route{Freq: 0.0, Index: 0} - logTotal := math.Log(T.Total) + logTotal := math.Log(Trie.Total) for idx := number - 1; idx >= 0; idx-- { candidates := make(Routes, 0) for _, i := range dag[idx] { word := string(runes[idx : i+1]) var route *Route - if _, ok := T.Freq[word]; ok { - route = &Route{Freq: math.Log(T.Freq[word]) - logTotal + routes[i+1].Freq, Index: i} + if _, ok := Trie.Freq[word]; ok { + route = &Route{Freq: math.Log(Trie.Freq[word]) - logTotal + routes[i+1].Freq, Index: i} } else { route = &Route{Freq: math.Log(1.0) - logTotal + routes[i+1].Freq, Index: i} } @@ -153,7 +153,7 @@ func cut_DAG(sentence string) []string { buf = make([]rune, 0) } else { bufString := string(buf) - if v, ok := T.Freq[bufString]; !ok || v == 0.0 { + if v, ok := Trie.Freq[bufString]; !ok || v == 0.0 { recognized := finalseg.Cut(bufString) for _, t := range recognized { result = append(result, t) @@ -176,7 +176,7 @@ func cut_DAG(sentence string) []string { result = append(result, string(buf)) } else { bufString := string(buf) - if v, ok := T.Freq[bufString]; !ok || v == 0.0 { + if v, ok := Trie.Freq[bufString]; !ok || v == 0.0 { recognized := finalseg.Cut(bufString) for _, t := range recognized { result = append(result, t) @@ -320,7 +320,7 @@ func CutForSearch(sentence string, hmm bool) []string { var gram2 string for i := 0; i < len(runes)-increment+1; i++ { gram2 = string(runes[i : i+increment]) - if v, ok := T.Freq[gram2]; ok && v > 0.0 { + if v, ok := Trie.Freq[gram2]; ok && v > 0.0 { result = append(result, gram2) } } diff --git a/posseg/posseg.go b/posseg/posseg.go index d31d5ad..376791e 100644 --- a/posseg/posseg.go +++ b/posseg/posseg.go @@ -121,7 +121,7 @@ func cut_DAG(sentence string) []WordTag { buf = make([]rune, 0) } else { bufString := string(buf) - if v, ok := jiebago.T.Freq[bufString]; !ok || v == 0.0 { + if v, ok := jiebago.Trie.Freq[bufString]; !ok || v == 0.0 { recognized := cutDetail(bufString) for _, t := range recognized { result = append(result, t) @@ -160,7 +160,7 @@ func cut_DAG(sentence string) []WordTag { } } else { bufString := string(buf) - if v, ok := jiebago.T.Freq[bufString]; !ok || v == 0.0 { + if v, ok := jiebago.Trie.Freq[bufString]; !ok || v == 0.0 { recognized := cutDetail(bufString) for _, t := range recognized { result = append(result, t) diff --git a/tokenize.go b/tokenize.go index e22086e..950c056 100644 --- a/tokenize.go +++ b/tokenize.go @@ -24,7 +24,7 @@ func Tokenize(sentence string, mode string, HMM bool) []Token { if width > step { for i := 0; i < width-step+1; i++ { gram := string(runes[i : i+step]) - if _, ok := T.Freq[gram]; ok { + if _, ok := Trie.Freq[gram]; ok { tokens = append(tokens, Token{gram, start + i, start + i + step}) } } diff --git a/trie.go b/trie.go index 6a362b2..adb220a 100644 --- a/trie.go +++ b/trie.go @@ -11,14 +11,14 @@ import ( "strings" ) -var T *Trie +var Trie *trie -type Trie struct { +type trie struct { Total float64 Freq map[string]float64 } -func (t Trie) MarshalBinary() ([]byte, error) { +func (t trie) MarshalBinary() ([]byte, error) { var b bytes.Buffer enc := gob.NewEncoder(&b) err := enc.Encode(t.Total) @@ -32,7 +32,7 @@ func (t Trie) MarshalBinary() ([]byte, error) { return b.Bytes(), nil } -func (t *Trie) UnmarshalBinary(data []byte) error { +func (t *trie) UnmarshalBinary(data []byte) error { b := bytes.NewBuffer(data) dec := gob.NewDecoder(b) err := dec.Decode(&t.Total) @@ -46,7 +46,7 @@ func (t *Trie) UnmarshalBinary(data []byte) error { return nil } -func newTrie(dictFileName string) error { +func (t *trie) load(dictFileName string) error { dictFilePath, err := DictPath(dictFileName) if err != nil { return err @@ -83,7 +83,7 @@ func newTrie(dictFileName string) error { if isDictCached { dec := gob.NewDecoder(cacheFile) - err = dec.Decode(&T) + err = dec.Decode(&t) if err != nil { isDictCached = false } else { @@ -98,7 +98,7 @@ func newTrie(dictFileName string) error { } for _, wtf := range wtfs { - T.addWord(wtf) + t.addWord(wtf) } // dump trie cacheFile, err = os.OpenFile(cacheFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) @@ -107,7 +107,7 @@ func newTrie(dictFileName string) error { } defer cacheFile.Close() enc := gob.NewEncoder(cacheFile) - err = enc.Encode(T) + err = enc.Encode(t) if err != nil { return err } else { @@ -117,7 +117,7 @@ func newTrie(dictFileName string) error { return nil } -func (t *Trie) addWord(wtf *WordTagFreq) { +func (t *trie) addWord(wtf *WordTagFreq) { t.Freq[wtf.Word] = wtf.Freq t.Total += wtf.Freq runes := []rune(wtf.Word) @@ -129,12 +129,6 @@ func (t *Trie) addWord(wtf *WordTagFreq) { } } } -func addWord(wtf *WordTagFreq) { - if len(wtf.Tag) > 0 { - UserWordTagTab[wtf.Word] = strings.TrimSpace(wtf.Tag) - } - T.addWord(wtf) -} func LoadUserDict(dictFilePath string) error { wtfs, err := ParseDictFile(dictFilePath) @@ -142,12 +136,15 @@ func LoadUserDict(dictFilePath string) error { return err } for _, wtf := range wtfs { - addWord(wtf) + if len(wtf.Tag) > 0 { + UserWordTagTab[wtf.Word] = strings.TrimSpace(wtf.Tag) + } + Trie.addWord(wtf) } return nil } func SetDictionary(dictFileName string) error { - T = &Trie{Total: 0.0, Freq: make(map[string]float64)} - return newTrie(dictFileName) + Trie = &trie{Total: 0.0, Freq: make(map[string]float64)} + return Trie.load(dictFileName) }