diff --git a/analyse/idf.go b/analyse/idf.go index fdbdac8..4f55a45 100755 --- a/analyse/idf.go +++ b/analyse/idf.go @@ -11,10 +11,10 @@ import ( // Idf represents a thread-safe dictionary for all words with their // IDFs(Inverse Document Frequency). type Idf struct { - freqMap map[string]float64 - median float64 - freqs []float64 sync.RWMutex + median float64 + freqMap map[string]float64 + freqs []float64 } // AddToken adds a new word with IDF into it's dictionary. @@ -57,5 +57,5 @@ func (i *Idf) Frequency(key string) (float64, bool) { // NewIdf creates a new Idf instance. func NewIdf() *Idf { - return &Idf{freqMap: make(map[string]float64), freqs: make([]float64, 0)} + return &Idf{freqMap: make(map[string]float64, 256), freqs: make([]float64, 0, 256)} } diff --git a/analyse/tag_extracker.go b/analyse/tag_extracker.go index ee938a6..3a1e7bb 100755 --- a/analyse/tag_extracker.go +++ b/analyse/tag_extracker.go @@ -112,15 +112,16 @@ func (t *TagExtracter) ExtractTags(sentence string, topK int) (tags Segments) { for _, freq := range freqMap { total += freq } - ws := make(Segments, 0) - var s Segment + ws := make(Segments, len(freqMap)) + i := 0 for k, v := range freqMap { + ws[i].text = k if freq, ok := t.idf.Frequency(k); ok { - s = Segment{text: k, weight: freq * float64(v) / float64(total)} + ws[i].weight = freq * float64(v) / float64(total) } else { - s = Segment{text: k, weight: t.idf.median * float64(v) / float64(total)} + ws[i].weight = t.idf.median * float64(v) / float64(total) } - ws = append(ws, s) + i++ } sort.Sort(sort.Reverse(ws)) if len(ws) > topK {