1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-12 21:20:26 +08:00

small refactor, replace WordTagFreq with Entry

This commit is contained in:
Wang Bin
2015-03-25 17:53:25 +08:00
parent 800ecaa8c9
commit 7fe5e7d4c4
4 changed files with 41 additions and 38 deletions

27
dict.go
View File

@@ -7,18 +7,13 @@ import (
"strings" "strings"
) )
type WordTagFreq struct { func ParseDictFile(dictFilePath string) ([]*Entry, error) {
Word, Tag string dictFile, err := os.Open(dictFilePath)
Freq float64
}
func ParseDictFile(dictFilePath string) (wtfs []*WordTagFreq, err error) {
var dictFile *os.File
dictFile, err = os.Open(dictFilePath)
if err != nil { if err != nil {
return return nil, err
} }
defer dictFile.Close() defer dictFile.Close()
entries := make([]*Entry, 0)
scanner := bufio.NewScanner(dictFile) scanner := bufio.NewScanner(dictFile)
for scanner.Scan() { for scanner.Scan() {
line := scanner.Text() line := scanner.Text()
@@ -26,18 +21,18 @@ func ParseDictFile(dictFilePath string) (wtfs []*WordTagFreq, err error) {
length := len(fields) length := len(fields)
word := fields[0] word := fields[0]
word = strings.Replace(word, "\ufeff", "", 1) word = strings.Replace(word, "\ufeff", "", 1)
wtf := &WordTagFreq{Word: word} entry := NewEntry()
entry.Word = word
if length > 1 { if length > 1 {
wtf.Freq, err = strconv.ParseFloat(fields[1], 64) entry.Freq, err = strconv.ParseFloat(fields[1], 64)
if err != nil { if err != nil {
return return nil, err
} }
} }
if length > 2 { if length > 2 {
wtf.Tag = fields[2] entry.Flag = fields[2]
} }
wtfs = append(wtfs, wtf) entries = append(entries, entry)
} }
err = scanner.Err() return entries, scanner.Err()
return
} }

View File

@@ -5,11 +5,16 @@ type Pair struct {
Flag string Flag string
} }
type Token struct { type Entry struct {
*Pair *Pair
Freq float64 Freq float64
} }
type DictLoader interface { func NewEntry() *Entry {
Add(*Token) return &Entry{new(Pair), 0.0}
}
type Loader interface {
AddEntry(Entry)
CachePath(string) string
} }

View File

@@ -25,11 +25,11 @@ type Posseg struct {
Flag map[string]string Flag map[string]string
} }
func (p *Posseg) Add(wtf *jiebago.WordTagFreq) { func (p *Posseg) AddEntry(entry *jiebago.Entry) {
if len(wtf.Tag) > 0 { if len(entry.Tag) > 0 {
p.Flag[wtf.Word] = strings.TrimSpace(wtf.Tag) p.Flag[Entry.Word] = strings.TrimSpace(Entry.Flag)
} }
p.AddWord(wtf) p.Add(entry.Word, entry.Freq)
} }
// Set dictionary, it could be absolute path of dictionary file, or dictionary // Set dictionary, it could be absolute path of dictionary file, or dictionary

33
trie.go
View File

@@ -60,13 +60,13 @@ func (j *Jieba) load(dictFileName string) error {
} }
if !isDictCached { if !isDictCached {
wtfs, err := ParseDictFile(dictFilePath) entries, err := ParseDictFile(dictFilePath)
if err != nil { if err != nil {
return err return err
} }
for _, wtf := range wtfs { for _, entry := range entries {
j.AddWord(wtf) j.AddEntry(entry)
} }
// dump trie // dump trie
cacheFile, err = os.OpenFile(cacheFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) cacheFile, err = os.OpenFile(cacheFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
@@ -85,27 +85,30 @@ func (j *Jieba) load(dictFileName string) error {
return nil return nil
} }
func (j *Jieba) AddWord(wtf *WordTagFreq) { func (j *Jieba) AddEntry(entry *Entry) {
j.Freq[wtf.Word] = wtf.Freq j.Add(entry.Word, entry.Freq)
j.Total += wtf.Freq }
runes := []rune(wtf.Word)
count := len(runes) func (j *Jieba) Add(word string, freq float64) {
for i := 0; i < count; i++ { j.Freq[word] = freq
wfrag := string(runes[0 : i+1]) j.Total += freq
if _, ok := j.Freq[wfrag]; !ok { runes := []rune(word)
j.Freq[wfrag] = 0.0 for i := 0; i < len(runes); i++ {
frag := string(runes[0 : i+1])
if _, ok := j.Freq[frag]; !ok {
j.Freq[frag] = 0.0
} }
} }
} }
// Load user specified dictionary file. // Load user specified dictionary file.
func (j *Jieba) LoadUserDict(dictFilePath string) error { func (j *Jieba) LoadUserDict(dictFilePath string) error {
wtfs, err := ParseDictFile(dictFilePath) entries, err := ParseDictFile(dictFilePath)
if err != nil { if err != nil {
return err return err
} }
for _, wtf := range wtfs { for _, entry := range entries {
j.AddWord(wtf) j.Add(entry.Word, entry.Freq)
} }
return nil return nil
} }