1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-05 00:32:51 +08:00

removed MinFreq, correpsonding to jieba commit #caae26fbfafd75062742823a23e1cc81368b1451

This commit is contained in:
Wang Bin
2015-02-25 16:01:39 +08:00
parent 2515d2e5a0
commit 5702495bf6
2 changed files with 10 additions and 32 deletions

View File

@@ -3,6 +3,7 @@ package jiebago
import (
"fmt"
"github.com/wangbin/jiebago/finalseg"
"math"
"regexp"
"sort"
)
@@ -100,21 +101,17 @@ func Calc(sentence string, dag map[int][]int) map[int]*Route {
runes := []rune(sentence)
number := len(runes)
routes := make(map[int]*Route)
routes[number] = &Route{0.0, 0}
routes[number] = &Route{Freq: 0.0, Index: 0}
logTotal := math.Log(T.Total)
for idx := number - 1; idx >= 0; idx-- {
candidates := make(Routes, 0)
for _, i := range dag[idx] {
var word string
if i <= idx-1 {
word = string(runes[i+1 : idx])
} else {
word = string(runes[idx : i+1])
}
word := string(runes[idx : i+1])
var route *Route
if _, ok := T.Freq[word]; ok {
route = &Route{T.Freq[word] + routes[i+1].Freq, i}
route = &Route{Freq: math.Log(T.Freq[word]) - logTotal + routes[i+1].Freq, Index: i}
} else {
route = &Route{T.MinFreq + routes[i+1].Freq, i}
route = &Route{Freq: math.Log(1.0) - logTotal + routes[i+1].Freq, Index: i}
}
candidates = append(candidates, route)
}

View File

@@ -8,7 +8,6 @@ import (
"fmt"
mapset "github.com/deckarep/golang-set"
"log"
"math"
"os"
"path/filepath"
"strconv"
@@ -18,10 +17,9 @@ import (
var T *Trie
type Trie struct {
Nodes mapset.Set
MinFreq float64
Total float64
Freq map[string]float64
Nodes mapset.Set
Total float64
Freq map[string]float64
}
func (t Trie) MarshalBinary() ([]byte, error) {
@@ -31,10 +29,6 @@ func (t Trie) MarshalBinary() ([]byte, error) {
if err != nil {
return nil, err
}
err = enc.Encode(t.MinFreq)
if err != nil {
return nil, err
}
err = enc.Encode(t.Total)
if err != nil {
return nil, err
@@ -55,10 +49,6 @@ func (t *Trie) UnmarshalBinary(data []byte) error {
return err
}
t.Nodes = mapset.NewSetFromSlice(nodes)
err = dec.Decode(&t.MinFreq)
if err != nil {
return err
}
err = dec.Decode(&t.Total)
if err != nil {
return err
@@ -121,7 +111,7 @@ func newTrie(fileName string) (*Trie, error) {
}
if !isDictCached {
trie = &Trie{Nodes: mapset.NewSet(), MinFreq: 0.0, Total: 0.0,
trie = &Trie{Nodes: mapset.NewSet(), Total: 0.0,
Freq: make(map[string]float64)}
file, openError := os.Open(filePath)
@@ -142,15 +132,6 @@ func newTrie(fileName string) (*Trie, error) {
return nil, scanErr
}
var val float64
for key := range trie.Freq {
val = math.Log(trie.Freq[key] / trie.Total)
if val < trie.MinFreq {
trie.MinFreq = val
}
trie.Freq[key] = val
}
// dump trie
cacheFile, err = os.OpenFile(cacheFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
if err != nil {