mirror of
https://github.com/fumiama/jieba.git
synced 2026-06-12 13:10:25 +08:00
优化 analyze
This commit is contained in:
@@ -11,10 +11,10 @@ import (
|
||||
// Idf represents a thread-safe dictionary for all words with their
|
||||
// IDFs(Inverse Document Frequency).
|
||||
type Idf struct {
|
||||
freqMap map[string]float64
|
||||
median float64
|
||||
freqs []float64
|
||||
sync.RWMutex
|
||||
median float64
|
||||
freqMap map[string]float64
|
||||
freqs []float64
|
||||
}
|
||||
|
||||
// AddToken adds a new word with IDF into it's dictionary.
|
||||
@@ -57,5 +57,5 @@ func (i *Idf) Frequency(key string) (float64, bool) {
|
||||
|
||||
// NewIdf creates a new Idf instance.
|
||||
func NewIdf() *Idf {
|
||||
return &Idf{freqMap: make(map[string]float64), freqs: make([]float64, 0)}
|
||||
return &Idf{freqMap: make(map[string]float64, 256), freqs: make([]float64, 0, 256)}
|
||||
}
|
||||
|
||||
@@ -112,15 +112,16 @@ func (t *TagExtracter) ExtractTags(sentence string, topK int) (tags Segments) {
|
||||
for _, freq := range freqMap {
|
||||
total += freq
|
||||
}
|
||||
ws := make(Segments, 0)
|
||||
var s Segment
|
||||
ws := make(Segments, len(freqMap))
|
||||
i := 0
|
||||
for k, v := range freqMap {
|
||||
ws[i].text = k
|
||||
if freq, ok := t.idf.Frequency(k); ok {
|
||||
s = Segment{text: k, weight: freq * float64(v) / float64(total)}
|
||||
ws[i].weight = freq * float64(v) / float64(total)
|
||||
} else {
|
||||
s = Segment{text: k, weight: t.idf.median * float64(v) / float64(total)}
|
||||
ws[i].weight = t.idf.median * float64(v) / float64(total)
|
||||
}
|
||||
ws = append(ws, s)
|
||||
i++
|
||||
}
|
||||
sort.Sort(sort.Reverse(ws))
|
||||
if len(ws) > topK {
|
||||
|
||||
Reference in New Issue
Block a user