1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-05 00:32:51 +08:00
Files
jieba/dictionary.go
2026-01-04 00:00:01 +08:00

65 lines
1.3 KiB
Go
Executable File

package jieba
import (
"io"
"math"
"sync"
"github.com/fumiama/jieba/dictionary"
)
// A Dictionary represents a thread-safe dictionary used for word segmentation.
type Dictionary struct {
sync.RWMutex
total, logTotal float64
freqMap map[string]float64
}
// Load loads all tokens
func (d *Dictionary) Load(tokens ...dictionary.Token) {
d.Lock()
for _, token := range tokens {
d.addToken(token)
}
d.Unlock()
d.updateLogTotal()
}
// AddToken adds one token
func (d *Dictionary) AddToken(token dictionary.Token) {
d.Lock()
d.addToken(token)
d.Unlock()
d.updateLogTotal()
}
func (d *Dictionary) addToken(token dictionary.Token) {
d.freqMap[token.Text()] = token.Frequency()
d.total += token.Frequency()
for i := range token.Text() {
if _, ok := d.freqMap[token.Text()[:i]]; i > 0 && !ok {
d.freqMap[token.Text()[:i]] = 0.0
}
}
}
func (d *Dictionary) updateLogTotal() {
d.logTotal = math.Log(d.total)
}
// Frequency returns the frequency and existence of give word
func (d *Dictionary) Frequency(key string) (float64, bool) {
d.RLock()
freq, ok := d.freqMap[key]
d.RUnlock()
return freq, ok
}
func (d *Dictionary) loadDictionary(file io.Reader) error {
return dictionary.LoadDictionary(d, file)
}
func (d *Dictionary) loadDictionaryAt(file string) error {
return dictionary.LoadDictionaryAt(d, file)
}