mirror of
https://github.com/fumiama/jieba.git
synced 2026-06-25 06:09:44 +08:00
优化 analyze
This commit is contained in:
@@ -11,10 +11,10 @@ import (
|
|||||||
// Idf represents a thread-safe dictionary for all words with their
|
// Idf represents a thread-safe dictionary for all words with their
|
||||||
// IDFs(Inverse Document Frequency).
|
// IDFs(Inverse Document Frequency).
|
||||||
type Idf struct {
|
type Idf struct {
|
||||||
freqMap map[string]float64
|
|
||||||
median float64
|
|
||||||
freqs []float64
|
|
||||||
sync.RWMutex
|
sync.RWMutex
|
||||||
|
median float64
|
||||||
|
freqMap map[string]float64
|
||||||
|
freqs []float64
|
||||||
}
|
}
|
||||||
|
|
||||||
// AddToken adds a new word with IDF into it's dictionary.
|
// AddToken adds a new word with IDF into it's dictionary.
|
||||||
@@ -57,5 +57,5 @@ func (i *Idf) Frequency(key string) (float64, bool) {
|
|||||||
|
|
||||||
// NewIdf creates a new Idf instance.
|
// NewIdf creates a new Idf instance.
|
||||||
func NewIdf() *Idf {
|
func NewIdf() *Idf {
|
||||||
return &Idf{freqMap: make(map[string]float64), freqs: make([]float64, 0)}
|
return &Idf{freqMap: make(map[string]float64, 256), freqs: make([]float64, 0, 256)}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -112,15 +112,16 @@ func (t *TagExtracter) ExtractTags(sentence string, topK int) (tags Segments) {
|
|||||||
for _, freq := range freqMap {
|
for _, freq := range freqMap {
|
||||||
total += freq
|
total += freq
|
||||||
}
|
}
|
||||||
ws := make(Segments, 0)
|
ws := make(Segments, len(freqMap))
|
||||||
var s Segment
|
i := 0
|
||||||
for k, v := range freqMap {
|
for k, v := range freqMap {
|
||||||
|
ws[i].text = k
|
||||||
if freq, ok := t.idf.Frequency(k); ok {
|
if freq, ok := t.idf.Frequency(k); ok {
|
||||||
s = Segment{text: k, weight: freq * float64(v) / float64(total)}
|
ws[i].weight = freq * float64(v) / float64(total)
|
||||||
} else {
|
} else {
|
||||||
s = Segment{text: k, weight: t.idf.median * float64(v) / float64(total)}
|
ws[i].weight = t.idf.median * float64(v) / float64(total)
|
||||||
}
|
}
|
||||||
ws = append(ws, s)
|
i++
|
||||||
}
|
}
|
||||||
sort.Sort(sort.Reverse(ws))
|
sort.Sort(sort.Reverse(ws))
|
||||||
if len(ws) > topK {
|
if len(ws) > topK {
|
||||||
|
|||||||
Reference in New Issue
Block a user