1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-23 12:40:39 +08:00

removed MinFreq, correpsonding to jieba commit #caae26fbfafd75062742823a23e1cc81368b1451

This commit is contained in:
Wang Bin
2015-02-25 16:01:39 +08:00
parent 2515d2e5a0
commit 5702495bf6
2 changed files with 10 additions and 32 deletions

View File

@@ -3,6 +3,7 @@ package jiebago
import ( import (
"fmt" "fmt"
"github.com/wangbin/jiebago/finalseg" "github.com/wangbin/jiebago/finalseg"
"math"
"regexp" "regexp"
"sort" "sort"
) )
@@ -100,21 +101,17 @@ func Calc(sentence string, dag map[int][]int) map[int]*Route {
runes := []rune(sentence) runes := []rune(sentence)
number := len(runes) number := len(runes)
routes := make(map[int]*Route) routes := make(map[int]*Route)
routes[number] = &Route{0.0, 0} routes[number] = &Route{Freq: 0.0, Index: 0}
logTotal := math.Log(T.Total)
for idx := number - 1; idx >= 0; idx-- { for idx := number - 1; idx >= 0; idx-- {
candidates := make(Routes, 0) candidates := make(Routes, 0)
for _, i := range dag[idx] { for _, i := range dag[idx] {
var word string word := string(runes[idx : i+1])
if i <= idx-1 {
word = string(runes[i+1 : idx])
} else {
word = string(runes[idx : i+1])
}
var route *Route var route *Route
if _, ok := T.Freq[word]; ok { if _, ok := T.Freq[word]; ok {
route = &Route{T.Freq[word] + routes[i+1].Freq, i} route = &Route{Freq: math.Log(T.Freq[word]) - logTotal + routes[i+1].Freq, Index: i}
} else { } else {
route = &Route{T.MinFreq + routes[i+1].Freq, i} route = &Route{Freq: math.Log(1.0) - logTotal + routes[i+1].Freq, Index: i}
} }
candidates = append(candidates, route) candidates = append(candidates, route)
} }

View File

@@ -8,7 +8,6 @@ import (
"fmt" "fmt"
mapset "github.com/deckarep/golang-set" mapset "github.com/deckarep/golang-set"
"log" "log"
"math"
"os" "os"
"path/filepath" "path/filepath"
"strconv" "strconv"
@@ -18,10 +17,9 @@ import (
var T *Trie var T *Trie
type Trie struct { type Trie struct {
Nodes mapset.Set Nodes mapset.Set
MinFreq float64 Total float64
Total float64 Freq map[string]float64
Freq map[string]float64
} }
func (t Trie) MarshalBinary() ([]byte, error) { func (t Trie) MarshalBinary() ([]byte, error) {
@@ -31,10 +29,6 @@ func (t Trie) MarshalBinary() ([]byte, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
err = enc.Encode(t.MinFreq)
if err != nil {
return nil, err
}
err = enc.Encode(t.Total) err = enc.Encode(t.Total)
if err != nil { if err != nil {
return nil, err return nil, err
@@ -55,10 +49,6 @@ func (t *Trie) UnmarshalBinary(data []byte) error {
return err return err
} }
t.Nodes = mapset.NewSetFromSlice(nodes) t.Nodes = mapset.NewSetFromSlice(nodes)
err = dec.Decode(&t.MinFreq)
if err != nil {
return err
}
err = dec.Decode(&t.Total) err = dec.Decode(&t.Total)
if err != nil { if err != nil {
return err return err
@@ -121,7 +111,7 @@ func newTrie(fileName string) (*Trie, error) {
} }
if !isDictCached { if !isDictCached {
trie = &Trie{Nodes: mapset.NewSet(), MinFreq: 0.0, Total: 0.0, trie = &Trie{Nodes: mapset.NewSet(), Total: 0.0,
Freq: make(map[string]float64)} Freq: make(map[string]float64)}
file, openError := os.Open(filePath) file, openError := os.Open(filePath)
@@ -142,15 +132,6 @@ func newTrie(fileName string) (*Trie, error) {
return nil, scanErr return nil, scanErr
} }
var val float64
for key := range trie.Freq {
val = math.Log(trie.Freq[key] / trie.Total)
if val < trie.MinFreq {
trie.MinFreq = val
}
trie.Freq[key] = val
}
// dump trie // dump trie
cacheFile, err = os.OpenFile(cacheFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) cacheFile, err = os.OpenFile(cacheFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
if err != nil { if err != nil {