diff --git a/dict.go b/dict.go index 4a5d3db..5dd5fe9 100644 --- a/dict.go +++ b/dict.go @@ -7,18 +7,13 @@ import ( "strings" ) -type WordTagFreq struct { - Word, Tag string - Freq float64 -} - -func ParseDictFile(dictFilePath string) (wtfs []*WordTagFreq, err error) { - var dictFile *os.File - dictFile, err = os.Open(dictFilePath) +func ParseDictFile(dictFilePath string) ([]*Entry, error) { + dictFile, err := os.Open(dictFilePath) if err != nil { - return + return nil, err } defer dictFile.Close() + entries := make([]*Entry, 0) scanner := bufio.NewScanner(dictFile) for scanner.Scan() { line := scanner.Text() @@ -26,18 +21,18 @@ func ParseDictFile(dictFilePath string) (wtfs []*WordTagFreq, err error) { length := len(fields) word := fields[0] word = strings.Replace(word, "\ufeff", "", 1) - wtf := &WordTagFreq{Word: word} + entry := NewEntry() + entry.Word = word if length > 1 { - wtf.Freq, err = strconv.ParseFloat(fields[1], 64) + entry.Freq, err = strconv.ParseFloat(fields[1], 64) if err != nil { - return + return nil, err } } if length > 2 { - wtf.Tag = fields[2] + entry.Flag = fields[2] } - wtfs = append(wtfs, wtf) + entries = append(entries, entry) } - err = scanner.Err() - return + return entries, scanner.Err() } diff --git a/dictionary.go b/dictionary.go index 53e0b78..97bb9ac 100644 --- a/dictionary.go +++ b/dictionary.go @@ -5,11 +5,16 @@ type Pair struct { Flag string } -type Token struct { +type Entry struct { *Pair Freq float64 } -type DictLoader interface { - Add(*Token) +func NewEntry() *Entry { + return &Entry{new(Pair), 0.0} +} + +type Loader interface { + AddEntry(Entry) + CachePath(string) string } diff --git a/posseg/posseg.go b/posseg/posseg.go index d7996bd..f5b281e 100644 --- a/posseg/posseg.go +++ b/posseg/posseg.go @@ -25,11 +25,11 @@ type Posseg struct { Flag map[string]string } -func (p *Posseg) Add(wtf *jiebago.WordTagFreq) { - if len(wtf.Tag) > 0 { - p.Flag[wtf.Word] = strings.TrimSpace(wtf.Tag) +func (p *Posseg) AddEntry(entry *jiebago.Entry) { + if len(entry.Tag) > 0 { + p.Flag[Entry.Word] = strings.TrimSpace(Entry.Flag) } - p.AddWord(wtf) + p.Add(entry.Word, entry.Freq) } // Set dictionary, it could be absolute path of dictionary file, or dictionary diff --git a/trie.go b/trie.go index bf8920f..5351024 100644 --- a/trie.go +++ b/trie.go @@ -60,13 +60,13 @@ func (j *Jieba) load(dictFileName string) error { } if !isDictCached { - wtfs, err := ParseDictFile(dictFilePath) + entries, err := ParseDictFile(dictFilePath) if err != nil { return err } - for _, wtf := range wtfs { - j.AddWord(wtf) + for _, entry := range entries { + j.AddEntry(entry) } // dump trie cacheFile, err = os.OpenFile(cacheFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) @@ -85,27 +85,30 @@ func (j *Jieba) load(dictFileName string) error { return nil } -func (j *Jieba) AddWord(wtf *WordTagFreq) { - j.Freq[wtf.Word] = wtf.Freq - j.Total += wtf.Freq - runes := []rune(wtf.Word) - count := len(runes) - for i := 0; i < count; i++ { - wfrag := string(runes[0 : i+1]) - if _, ok := j.Freq[wfrag]; !ok { - j.Freq[wfrag] = 0.0 +func (j *Jieba) AddEntry(entry *Entry) { + j.Add(entry.Word, entry.Freq) +} + +func (j *Jieba) Add(word string, freq float64) { + j.Freq[word] = freq + j.Total += freq + runes := []rune(word) + for i := 0; i < len(runes); i++ { + frag := string(runes[0 : i+1]) + if _, ok := j.Freq[frag]; !ok { + j.Freq[frag] = 0.0 } } } // Load user specified dictionary file. func (j *Jieba) LoadUserDict(dictFilePath string) error { - wtfs, err := ParseDictFile(dictFilePath) + entries, err := ParseDictFile(dictFilePath) if err != nil { return err } - for _, wtf := range wtfs { - j.AddWord(wtf) + for _, entry := range entries { + j.Add(entry.Word, entry.Freq) } return nil }