From 0f7c56b4ef0621b2c603a3099d6d076ddd86b319 Mon Sep 17 00:00:00 2001 From: Wang Bin Date: Wed, 4 Feb 2015 14:47:59 +0800 Subject: [PATCH] small code refactor --- .travis.yml | 2 +- jieba.go | 22 ++++++++-------------- posseg/posseg.go | 4 ++-- tokenize.go | 2 +- trie_node.go | 9 ++++++++- 5 files changed, 20 insertions(+), 19 deletions(-) diff --git a/.travis.yml b/.travis.yml index 6a363c7..e070646 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,3 +1,3 @@ language: go go: - - 1.3 + - 1.4.1 diff --git a/jieba.go b/jieba.go index f5d6b92..e8e4750 100644 --- a/jieba.go +++ b/jieba.go @@ -9,7 +9,6 @@ import ( var ( Dictionary = "dict.txt" - trie *Trie UserWordTagTab = make(map[string]string) ) @@ -77,10 +76,10 @@ func GetDAG(sentence string) map[int][]int { i = k frag = string(runes[k]) for { - if !trie.Nodes.Contains(frag) { + if !T.Nodes.Contains(frag) { break } - if _, ok := trie.Freq[frag]; ok { + if _, ok := T.Freq[frag]; ok { tmpList = append(tmpList, i) } i += 1 @@ -112,10 +111,10 @@ func Calc(sentence string, dag map[int][]int, idx int) map[int]*Route { word = string(runes[idx : i+1]) } var route *Route - if _, ok := trie.Freq[word]; ok { - route = &Route{trie.Freq[word] + routes[i+1].Freq, i} + if _, ok := T.Freq[word]; ok { + route = &Route{T.Freq[word] + routes[i+1].Freq, i} } else { - route = &Route{trie.MinFreq + routes[i+1].Freq, i} + route = &Route{T.MinFreq + routes[i+1].Freq, i} } candidates = append(candidates, route) } @@ -151,7 +150,7 @@ func cut_DAG(sentence string) []string { buf = make([]rune, 0) } else { bufString := string(buf) - if _, ok := trie.Freq[bufString]; !ok { + if _, ok := T.Freq[bufString]; !ok { recognized := finalseg.Cut(bufString) for _, t := range recognized { result = append(result, t) @@ -174,7 +173,7 @@ func cut_DAG(sentence string) []string { result = append(result, string(buf)) } else { bufString := string(buf) - if _, ok := trie.Freq[bufString]; !ok { + if _, ok := T.Freq[bufString]; !ok { recognized := finalseg.Cut(bufString) for _, t := range recognized { result = append(result, t) @@ -318,7 +317,7 @@ func CutForSearch(sentence string, hmm bool) []string { var gram2 string for i := 0; i < len(runes)-increment+1; i++ { gram2 = string(runes[i : i+increment]) - if _, ok := trie.Freq[gram2]; ok { + if _, ok := T.Freq[gram2]; ok { result = append(result, gram2) } } @@ -328,8 +327,3 @@ func CutForSearch(sentence string, hmm bool) []string { } return result } - -func SetDictionary(dict_path string) (err error) { - trie, err = newTrie(dict_path) - return -} diff --git a/posseg/posseg.go b/posseg/posseg.go index 84456b4..14dafc6 100644 --- a/posseg/posseg.go +++ b/posseg/posseg.go @@ -142,7 +142,7 @@ func cut_DAG(sentence string) []WordTag { buf = make([]rune, 0) } else { bufString := string(buf) - if _, ok := jiebago.TT.Freq[bufString]; !ok { + if _, ok := jiebago.T.Freq[bufString]; !ok { recognized := cutDetail(bufString) for _, t := range recognized { result = append(result, t) @@ -181,7 +181,7 @@ func cut_DAG(sentence string) []WordTag { } } else { bufString := string(buf) - if _, ok := jiebago.TT.Freq[bufString]; !ok { + if _, ok := jiebago.T.Freq[bufString]; !ok { recognized := cutDetail(bufString) for _, t := range recognized { result = append(result, t) diff --git a/tokenize.go b/tokenize.go index 3a1457b..e22086e 100644 --- a/tokenize.go +++ b/tokenize.go @@ -24,7 +24,7 @@ func Tokenize(sentence string, mode string, HMM bool) []Token { if width > step { for i := 0; i < width-step+1; i++ { gram := string(runes[i : i+step]) - if _, ok := trie.Freq[gram]; ok { + if _, ok := T.Freq[gram]; ok { tokens = append(tokens, Token{gram, start + i, start + i + step}) } } diff --git a/trie_node.go b/trie_node.go index 2890db2..6d3adea 100644 --- a/trie_node.go +++ b/trie_node.go @@ -15,6 +15,8 @@ import ( "strings" ) +var T *Trie + type Trie struct { Nodes mapset.Set MinFreq float64 @@ -181,7 +183,7 @@ func addWord(word string, freq float64, tag string) { if len(tag) > 0 { UserWordTagTab[word] = strings.TrimSpace(tag) } - trie.addWord(word, freq) + T.addWord(word, freq) } func LoadUserDict(filePath string) error { @@ -210,3 +212,8 @@ func LoadUserDict(filePath string) error { return scanner.Err() } + +func SetDictionary(dict_path string) (err error) { + T, err = newTrie(dict_path) + return +}