1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-12 13:10:25 +08:00

优化 analyze

This commit is contained in:
源文雨
2022-11-30 14:18:36 +08:00
parent f3da9e6420
commit 6b239b5918
2 changed files with 10 additions and 9 deletions

View File

@@ -11,10 +11,10 @@ import (
// Idf represents a thread-safe dictionary for all words with their
// IDFs(Inverse Document Frequency).
type Idf struct {
freqMap map[string]float64
median float64
freqs []float64
sync.RWMutex
median float64
freqMap map[string]float64
freqs []float64
}
// AddToken adds a new word with IDF into it's dictionary.
@@ -57,5 +57,5 @@ func (i *Idf) Frequency(key string) (float64, bool) {
// NewIdf creates a new Idf instance.
func NewIdf() *Idf {
return &Idf{freqMap: make(map[string]float64), freqs: make([]float64, 0)}
return &Idf{freqMap: make(map[string]float64, 256), freqs: make([]float64, 0, 256)}
}

View File

@@ -112,15 +112,16 @@ func (t *TagExtracter) ExtractTags(sentence string, topK int) (tags Segments) {
for _, freq := range freqMap {
total += freq
}
ws := make(Segments, 0)
var s Segment
ws := make(Segments, len(freqMap))
i := 0
for k, v := range freqMap {
ws[i].text = k
if freq, ok := t.idf.Frequency(k); ok {
s = Segment{text: k, weight: freq * float64(v) / float64(total)}
ws[i].weight = freq * float64(v) / float64(total)
} else {
s = Segment{text: k, weight: t.idf.median * float64(v) / float64(total)}
ws[i].weight = t.idf.median * float64(v) / float64(total)
}
ws = append(ws, s)
i++
}
sort.Sort(sort.Reverse(ws))
if len(ws) > topK {