mirror of
https://github.com/fumiama/jieba.git
synced 2026-06-23 04:30:44 +08:00
code refactor, added more documents
This commit is contained in:
@@ -7,6 +7,8 @@ import (
|
||||
"github.com/wangbin/jiebago/dictionary"
|
||||
)
|
||||
|
||||
// Idf represents a thread-safe dictionary for all words with their
|
||||
// IDFs(Inverse Document Frequency).
|
||||
type Idf struct {
|
||||
freqMap map[string]float64
|
||||
median float64
|
||||
@@ -14,6 +16,7 @@ type Idf struct {
|
||||
sync.RWMutex
|
||||
}
|
||||
|
||||
// AddToken adds a new word with IDF into it's dictionary.
|
||||
func (i *Idf) AddToken(token dictionary.Token) {
|
||||
i.Lock()
|
||||
i.freqMap[token.Text()] = token.Frequency()
|
||||
@@ -23,6 +26,7 @@ func (i *Idf) AddToken(token dictionary.Token) {
|
||||
i.Unlock()
|
||||
}
|
||||
|
||||
// Load loads all tokens from channel into it's dictionary.
|
||||
func (i *Idf) Load(ch <-chan dictionary.Token) {
|
||||
i.Lock()
|
||||
for token := range ch {
|
||||
@@ -38,6 +42,7 @@ func (i *Idf) loadDictionary(fileName string) error {
|
||||
return dictionary.LoadDictionary(i, fileName)
|
||||
}
|
||||
|
||||
// Frequency returns the IDF of given word.
|
||||
func (i *Idf) Frequency(key string) (float64, bool) {
|
||||
i.RLock()
|
||||
freq, ok := i.freqMap[key]
|
||||
@@ -45,6 +50,7 @@ func (i *Idf) Frequency(key string) (float64, bool) {
|
||||
return freq, ok
|
||||
}
|
||||
|
||||
// NewIdf creates a new Idf instance.
|
||||
func NewIdf() *Idf {
|
||||
return &Idf{freqMap: make(map[string]float64), freqs: make([]float64, 0)}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user