mirror of
https://github.com/fumiama/jieba.git
synced 2026-06-23 12:40:39 +08:00
removed MinFreq, correpsonding to jieba commit #caae26fbfafd75062742823a23e1cc81368b1451
This commit is contained in:
15
jieba.go
15
jieba.go
@@ -3,6 +3,7 @@ package jiebago
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/wangbin/jiebago/finalseg"
|
"github.com/wangbin/jiebago/finalseg"
|
||||||
|
"math"
|
||||||
"regexp"
|
"regexp"
|
||||||
"sort"
|
"sort"
|
||||||
)
|
)
|
||||||
@@ -100,21 +101,17 @@ func Calc(sentence string, dag map[int][]int) map[int]*Route {
|
|||||||
runes := []rune(sentence)
|
runes := []rune(sentence)
|
||||||
number := len(runes)
|
number := len(runes)
|
||||||
routes := make(map[int]*Route)
|
routes := make(map[int]*Route)
|
||||||
routes[number] = &Route{0.0, 0}
|
routes[number] = &Route{Freq: 0.0, Index: 0}
|
||||||
|
logTotal := math.Log(T.Total)
|
||||||
for idx := number - 1; idx >= 0; idx-- {
|
for idx := number - 1; idx >= 0; idx-- {
|
||||||
candidates := make(Routes, 0)
|
candidates := make(Routes, 0)
|
||||||
for _, i := range dag[idx] {
|
for _, i := range dag[idx] {
|
||||||
var word string
|
word := string(runes[idx : i+1])
|
||||||
if i <= idx-1 {
|
|
||||||
word = string(runes[i+1 : idx])
|
|
||||||
} else {
|
|
||||||
word = string(runes[idx : i+1])
|
|
||||||
}
|
|
||||||
var route *Route
|
var route *Route
|
||||||
if _, ok := T.Freq[word]; ok {
|
if _, ok := T.Freq[word]; ok {
|
||||||
route = &Route{T.Freq[word] + routes[i+1].Freq, i}
|
route = &Route{Freq: math.Log(T.Freq[word]) - logTotal + routes[i+1].Freq, Index: i}
|
||||||
} else {
|
} else {
|
||||||
route = &Route{T.MinFreq + routes[i+1].Freq, i}
|
route = &Route{Freq: math.Log(1.0) - logTotal + routes[i+1].Freq, Index: i}
|
||||||
}
|
}
|
||||||
candidates = append(candidates, route)
|
candidates = append(candidates, route)
|
||||||
}
|
}
|
||||||
|
|||||||
27
trie_node.go
27
trie_node.go
@@ -8,7 +8,6 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
mapset "github.com/deckarep/golang-set"
|
mapset "github.com/deckarep/golang-set"
|
||||||
"log"
|
"log"
|
||||||
"math"
|
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
@@ -18,10 +17,9 @@ import (
|
|||||||
var T *Trie
|
var T *Trie
|
||||||
|
|
||||||
type Trie struct {
|
type Trie struct {
|
||||||
Nodes mapset.Set
|
Nodes mapset.Set
|
||||||
MinFreq float64
|
Total float64
|
||||||
Total float64
|
Freq map[string]float64
|
||||||
Freq map[string]float64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t Trie) MarshalBinary() ([]byte, error) {
|
func (t Trie) MarshalBinary() ([]byte, error) {
|
||||||
@@ -31,10 +29,6 @@ func (t Trie) MarshalBinary() ([]byte, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
err = enc.Encode(t.MinFreq)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
err = enc.Encode(t.Total)
|
err = enc.Encode(t.Total)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -55,10 +49,6 @@ func (t *Trie) UnmarshalBinary(data []byte) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
t.Nodes = mapset.NewSetFromSlice(nodes)
|
t.Nodes = mapset.NewSetFromSlice(nodes)
|
||||||
err = dec.Decode(&t.MinFreq)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
err = dec.Decode(&t.Total)
|
err = dec.Decode(&t.Total)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -121,7 +111,7 @@ func newTrie(fileName string) (*Trie, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !isDictCached {
|
if !isDictCached {
|
||||||
trie = &Trie{Nodes: mapset.NewSet(), MinFreq: 0.0, Total: 0.0,
|
trie = &Trie{Nodes: mapset.NewSet(), Total: 0.0,
|
||||||
Freq: make(map[string]float64)}
|
Freq: make(map[string]float64)}
|
||||||
|
|
||||||
file, openError := os.Open(filePath)
|
file, openError := os.Open(filePath)
|
||||||
@@ -142,15 +132,6 @@ func newTrie(fileName string) (*Trie, error) {
|
|||||||
return nil, scanErr
|
return nil, scanErr
|
||||||
}
|
}
|
||||||
|
|
||||||
var val float64
|
|
||||||
for key := range trie.Freq {
|
|
||||||
val = math.Log(trie.Freq[key] / trie.Total)
|
|
||||||
if val < trie.MinFreq {
|
|
||||||
trie.MinFreq = val
|
|
||||||
}
|
|
||||||
trie.Freq[key] = val
|
|
||||||
}
|
|
||||||
|
|
||||||
// dump trie
|
// dump trie
|
||||||
cacheFile, err = os.OpenFile(cacheFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
|
cacheFile, err = os.OpenFile(cacheFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
Reference in New Issue
Block a user