1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-05 00:32:51 +08:00
Files
jieba/dictionary.go
2022-12-03 10:54:06 +08:00

68 lines
1.4 KiB
Go
Executable File

package jieba
import (
"io"
"math"
"sync"
"github.com/fumiama/jieba/dictionary"
)
// A Dictionary represents a thread-safe dictionary used for word segmentation.
type Dictionary struct {
sync.RWMutex
total, logTotal float64
freqMap map[string]float64
}
// Load loads all tokens
func (d *Dictionary) Load(tokens ...dictionary.Token) {
d.Lock()
for _, token := range tokens {
d.addToken(token)
}
d.Unlock()
d.updateLogTotal()
}
// AddToken adds one token
func (d *Dictionary) AddToken(token dictionary.Token) {
d.Lock()
d.addToken(token)
d.Unlock()
d.updateLogTotal()
}
func (d *Dictionary) addToken(token dictionary.Token) {
d.freqMap[token.Text()] = token.Frequency()
d.total += token.Frequency()
runes := []rune(token.Text())
n := len(runes)
for i := 0; i < n; i++ { //TODO: n-1?
frag := string(runes[:i+1])
if _, ok := d.freqMap[frag]; !ok {
d.freqMap[frag] = 0.0
}
}
}
func (d *Dictionary) updateLogTotal() {
d.logTotal = math.Log(d.total)
}
// Frequency returns the frequency and existence of give word
func (d *Dictionary) Frequency(key string) (float64, bool) {
d.RLock()
freq, ok := d.freqMap[key]
d.RUnlock()
return freq, ok
}
func (d *Dictionary) loadDictionary(file io.Reader) error {
return dictionary.LoadDictionary(d, file)
}
func (d *Dictionary) loadDictionaryAt(file string) error {
return dictionary.LoadDictionaryAt(d, file)
}