From 5702495bf61ee86a45caf60b7f069ff3895e4e65 Mon Sep 17 00:00:00 2001 From: Wang Bin Date: Wed, 25 Feb 2015 16:01:39 +0800 Subject: [PATCH] removed MinFreq, correpsonding to jieba commit #caae26fbfafd75062742823a23e1cc81368b1451 --- jieba.go | 15 ++++++--------- trie_node.go | 27 ++++----------------------- 2 files changed, 10 insertions(+), 32 deletions(-) diff --git a/jieba.go b/jieba.go index e183b6e..8ae8962 100644 --- a/jieba.go +++ b/jieba.go @@ -3,6 +3,7 @@ package jiebago import ( "fmt" "github.com/wangbin/jiebago/finalseg" + "math" "regexp" "sort" ) @@ -100,21 +101,17 @@ func Calc(sentence string, dag map[int][]int) map[int]*Route { runes := []rune(sentence) number := len(runes) routes := make(map[int]*Route) - routes[number] = &Route{0.0, 0} + routes[number] = &Route{Freq: 0.0, Index: 0} + logTotal := math.Log(T.Total) for idx := number - 1; idx >= 0; idx-- { candidates := make(Routes, 0) for _, i := range dag[idx] { - var word string - if i <= idx-1 { - word = string(runes[i+1 : idx]) - } else { - word = string(runes[idx : i+1]) - } + word := string(runes[idx : i+1]) var route *Route if _, ok := T.Freq[word]; ok { - route = &Route{T.Freq[word] + routes[i+1].Freq, i} + route = &Route{Freq: math.Log(T.Freq[word]) - logTotal + routes[i+1].Freq, Index: i} } else { - route = &Route{T.MinFreq + routes[i+1].Freq, i} + route = &Route{Freq: math.Log(1.0) - logTotal + routes[i+1].Freq, Index: i} } candidates = append(candidates, route) } diff --git a/trie_node.go b/trie_node.go index 44e375d..22f66b0 100644 --- a/trie_node.go +++ b/trie_node.go @@ -8,7 +8,6 @@ import ( "fmt" mapset "github.com/deckarep/golang-set" "log" - "math" "os" "path/filepath" "strconv" @@ -18,10 +17,9 @@ import ( var T *Trie type Trie struct { - Nodes mapset.Set - MinFreq float64 - Total float64 - Freq map[string]float64 + Nodes mapset.Set + Total float64 + Freq map[string]float64 } func (t Trie) MarshalBinary() ([]byte, error) { @@ -31,10 +29,6 @@ func (t Trie) MarshalBinary() ([]byte, error) { if err != nil { return nil, err } - err = enc.Encode(t.MinFreq) - if err != nil { - return nil, err - } err = enc.Encode(t.Total) if err != nil { return nil, err @@ -55,10 +49,6 @@ func (t *Trie) UnmarshalBinary(data []byte) error { return err } t.Nodes = mapset.NewSetFromSlice(nodes) - err = dec.Decode(&t.MinFreq) - if err != nil { - return err - } err = dec.Decode(&t.Total) if err != nil { return err @@ -121,7 +111,7 @@ func newTrie(fileName string) (*Trie, error) { } if !isDictCached { - trie = &Trie{Nodes: mapset.NewSet(), MinFreq: 0.0, Total: 0.0, + trie = &Trie{Nodes: mapset.NewSet(), Total: 0.0, Freq: make(map[string]float64)} file, openError := os.Open(filePath) @@ -142,15 +132,6 @@ func newTrie(fileName string) (*Trie, error) { return nil, scanErr } - var val float64 - for key := range trie.Freq { - val = math.Log(trie.Freq[key] / trie.Total) - if val < trie.MinFreq { - trie.MinFreq = val - } - trie.Freq[key] = val - } - // dump trie cacheFile, err = os.OpenFile(cacheFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) if err != nil {