1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-23 12:40:39 +08:00

refactor analyse module

This commit is contained in:
Wang Bin
2015-05-04 16:39:37 +08:00
parent 500e6bd10e
commit 52fad00403
7 changed files with 187 additions and 236 deletions

View File

@@ -1,30 +1,50 @@
package analyse
import (
"github.com/wangbin/jiebago"
"sort"
"sync"
"github.com/wangbin/jiebago/dictionary"
)
type idf struct {
type Idf struct {
freqMap map[string]float64
median float64
freqs []float64
sync.RWMutex
}
func (l *IDFLoader) AddEntry(entry jiebago.Entry) {
l.IDFFreq[entry.Word] = entry.Freq
l.freqs = append(l.freqs, entry.Freq)
func (i *Idf) AddToken(token dictionary.Token) {
i.Lock()
i.freqMap[token.Text()] = token.Frequency()
i.freqs = append(i.freqs, token.Frequency())
sort.Float64s(i.freqs)
i.median = i.freqs[len(i.freqs)/2]
i.Unlock()
}
func NewIDFLoader(IDFFileName string) (*IDFLoader, error) {
loader := &IDFLoader{make(map[string]float64), 0.0, make([]float64, 0)}
err := jiebago.LoadDict(loader, IDFFileName, false)
if err != nil {
return nil, err
func (i *Idf) Load(ch <-chan dictionary.Token) {
i.Lock()
for token := range ch {
i.freqMap[token.Text()] = token.Frequency()
i.freqs = append(i.freqs, token.Frequency())
}
sort.Float64s(loader.freqs)
loader.Median = loader.freqs[len(loader.freqs)/2]
loader.freqs = []float64{}
return loader, nil
sort.Float64s(i.freqs)
i.median = i.freqs[len(i.freqs)/2]
i.Unlock()
}
func (i *Idf) loadDictionary(fileName string) error {
return dictionary.LoadDictionary(i, fileName)
}
func (i Idf) Frequency(key string) (float64, bool) {
i.RLock()
freq, ok := i.freqMap[key]
i.RUnlock()
return freq, ok
}
func NewIdf() *Idf {
return &Idf{freqMap: make(map[string]float64), freqs: make([]float64, 0)}
}