mirror of
https://github.com/fumiama/jieba.git
synced 2026-06-12 13:10:25 +08:00
small refactor, replace WordTagFreq with Entry
This commit is contained in:
27
dict.go
27
dict.go
@@ -7,18 +7,13 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
type WordTagFreq struct {
|
func ParseDictFile(dictFilePath string) ([]*Entry, error) {
|
||||||
Word, Tag string
|
dictFile, err := os.Open(dictFilePath)
|
||||||
Freq float64
|
|
||||||
}
|
|
||||||
|
|
||||||
func ParseDictFile(dictFilePath string) (wtfs []*WordTagFreq, err error) {
|
|
||||||
var dictFile *os.File
|
|
||||||
dictFile, err = os.Open(dictFilePath)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return nil, err
|
||||||
}
|
}
|
||||||
defer dictFile.Close()
|
defer dictFile.Close()
|
||||||
|
entries := make([]*Entry, 0)
|
||||||
scanner := bufio.NewScanner(dictFile)
|
scanner := bufio.NewScanner(dictFile)
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
line := scanner.Text()
|
line := scanner.Text()
|
||||||
@@ -26,18 +21,18 @@ func ParseDictFile(dictFilePath string) (wtfs []*WordTagFreq, err error) {
|
|||||||
length := len(fields)
|
length := len(fields)
|
||||||
word := fields[0]
|
word := fields[0]
|
||||||
word = strings.Replace(word, "\ufeff", "", 1)
|
word = strings.Replace(word, "\ufeff", "", 1)
|
||||||
wtf := &WordTagFreq{Word: word}
|
entry := NewEntry()
|
||||||
|
entry.Word = word
|
||||||
if length > 1 {
|
if length > 1 {
|
||||||
wtf.Freq, err = strconv.ParseFloat(fields[1], 64)
|
entry.Freq, err = strconv.ParseFloat(fields[1], 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return nil, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if length > 2 {
|
if length > 2 {
|
||||||
wtf.Tag = fields[2]
|
entry.Flag = fields[2]
|
||||||
}
|
}
|
||||||
wtfs = append(wtfs, wtf)
|
entries = append(entries, entry)
|
||||||
}
|
}
|
||||||
err = scanner.Err()
|
return entries, scanner.Err()
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,11 +5,16 @@ type Pair struct {
|
|||||||
Flag string
|
Flag string
|
||||||
}
|
}
|
||||||
|
|
||||||
type Token struct {
|
type Entry struct {
|
||||||
*Pair
|
*Pair
|
||||||
Freq float64
|
Freq float64
|
||||||
}
|
}
|
||||||
|
|
||||||
type DictLoader interface {
|
func NewEntry() *Entry {
|
||||||
Add(*Token)
|
return &Entry{new(Pair), 0.0}
|
||||||
|
}
|
||||||
|
|
||||||
|
type Loader interface {
|
||||||
|
AddEntry(Entry)
|
||||||
|
CachePath(string) string
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -25,11 +25,11 @@ type Posseg struct {
|
|||||||
Flag map[string]string
|
Flag map[string]string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Posseg) Add(wtf *jiebago.WordTagFreq) {
|
func (p *Posseg) AddEntry(entry *jiebago.Entry) {
|
||||||
if len(wtf.Tag) > 0 {
|
if len(entry.Tag) > 0 {
|
||||||
p.Flag[wtf.Word] = strings.TrimSpace(wtf.Tag)
|
p.Flag[Entry.Word] = strings.TrimSpace(Entry.Flag)
|
||||||
}
|
}
|
||||||
p.AddWord(wtf)
|
p.Add(entry.Word, entry.Freq)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set dictionary, it could be absolute path of dictionary file, or dictionary
|
// Set dictionary, it could be absolute path of dictionary file, or dictionary
|
||||||
|
|||||||
33
trie.go
33
trie.go
@@ -60,13 +60,13 @@ func (j *Jieba) load(dictFileName string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !isDictCached {
|
if !isDictCached {
|
||||||
wtfs, err := ParseDictFile(dictFilePath)
|
entries, err := ParseDictFile(dictFilePath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, wtf := range wtfs {
|
for _, entry := range entries {
|
||||||
j.AddWord(wtf)
|
j.AddEntry(entry)
|
||||||
}
|
}
|
||||||
// dump trie
|
// dump trie
|
||||||
cacheFile, err = os.OpenFile(cacheFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
|
cacheFile, err = os.OpenFile(cacheFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
|
||||||
@@ -85,27 +85,30 @@ func (j *Jieba) load(dictFileName string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (j *Jieba) AddWord(wtf *WordTagFreq) {
|
func (j *Jieba) AddEntry(entry *Entry) {
|
||||||
j.Freq[wtf.Word] = wtf.Freq
|
j.Add(entry.Word, entry.Freq)
|
||||||
j.Total += wtf.Freq
|
}
|
||||||
runes := []rune(wtf.Word)
|
|
||||||
count := len(runes)
|
func (j *Jieba) Add(word string, freq float64) {
|
||||||
for i := 0; i < count; i++ {
|
j.Freq[word] = freq
|
||||||
wfrag := string(runes[0 : i+1])
|
j.Total += freq
|
||||||
if _, ok := j.Freq[wfrag]; !ok {
|
runes := []rune(word)
|
||||||
j.Freq[wfrag] = 0.0
|
for i := 0; i < len(runes); i++ {
|
||||||
|
frag := string(runes[0 : i+1])
|
||||||
|
if _, ok := j.Freq[frag]; !ok {
|
||||||
|
j.Freq[frag] = 0.0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load user specified dictionary file.
|
// Load user specified dictionary file.
|
||||||
func (j *Jieba) LoadUserDict(dictFilePath string) error {
|
func (j *Jieba) LoadUserDict(dictFilePath string) error {
|
||||||
wtfs, err := ParseDictFile(dictFilePath)
|
entries, err := ParseDictFile(dictFilePath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
for _, wtf := range wtfs {
|
for _, entry := range entries {
|
||||||
j.AddWord(wtf)
|
j.Add(entry.Word, entry.Freq)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user