mirror of
https://github.com/fumiama/jieba.git
synced 2026-06-23 12:40:39 +08:00
refactor analyse module
This commit is contained in:
@@ -1,30 +1,50 @@
|
||||
package analyse
|
||||
|
||||
import (
|
||||
"github.com/wangbin/jiebago"
|
||||
"sort"
|
||||
"sync"
|
||||
|
||||
"github.com/wangbin/jiebago/dictionary"
|
||||
)
|
||||
|
||||
type idf struct {
|
||||
type Idf struct {
|
||||
freqMap map[string]float64
|
||||
median float64
|
||||
freqs []float64
|
||||
sync.RWMutex
|
||||
}
|
||||
|
||||
func (l *IDFLoader) AddEntry(entry jiebago.Entry) {
|
||||
l.IDFFreq[entry.Word] = entry.Freq
|
||||
l.freqs = append(l.freqs, entry.Freq)
|
||||
func (i *Idf) AddToken(token dictionary.Token) {
|
||||
i.Lock()
|
||||
i.freqMap[token.Text()] = token.Frequency()
|
||||
i.freqs = append(i.freqs, token.Frequency())
|
||||
sort.Float64s(i.freqs)
|
||||
i.median = i.freqs[len(i.freqs)/2]
|
||||
i.Unlock()
|
||||
}
|
||||
|
||||
func NewIDFLoader(IDFFileName string) (*IDFLoader, error) {
|
||||
loader := &IDFLoader{make(map[string]float64), 0.0, make([]float64, 0)}
|
||||
err := jiebago.LoadDict(loader, IDFFileName, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
func (i *Idf) Load(ch <-chan dictionary.Token) {
|
||||
i.Lock()
|
||||
for token := range ch {
|
||||
i.freqMap[token.Text()] = token.Frequency()
|
||||
i.freqs = append(i.freqs, token.Frequency())
|
||||
}
|
||||
|
||||
sort.Float64s(loader.freqs)
|
||||
loader.Median = loader.freqs[len(loader.freqs)/2]
|
||||
loader.freqs = []float64{}
|
||||
return loader, nil
|
||||
sort.Float64s(i.freqs)
|
||||
i.median = i.freqs[len(i.freqs)/2]
|
||||
i.Unlock()
|
||||
}
|
||||
|
||||
func (i *Idf) loadDictionary(fileName string) error {
|
||||
return dictionary.LoadDictionary(i, fileName)
|
||||
}
|
||||
|
||||
func (i Idf) Frequency(key string) (float64, bool) {
|
||||
i.RLock()
|
||||
freq, ok := i.freqMap[key]
|
||||
i.RUnlock()
|
||||
return freq, ok
|
||||
}
|
||||
|
||||
func NewIdf() *Idf {
|
||||
return &Idf{freqMap: make(map[string]float64), freqs: make([]float64, 0)}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user