1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-23 20:50:27 +08:00

refactor to generalize set dictionary function, not finished yet

This commit is contained in:
Wang Bin
2015-03-25 18:46:14 +08:00
parent 59da5b5e3a
commit e155fe5467
2 changed files with 28 additions and 35 deletions

View File

@@ -107,6 +107,6 @@ func (j *Jieba) LoadUserDict(dictFilePath string) error {
// sentence. // sentence.
func NewJieba(dictFileName string) (*Jieba, error) { func NewJieba(dictFileName string) (*Jieba, error) {
j := &Jieba{Total: 0.0, Freq: make(map[string]float64)} j := &Jieba{Total: 0.0, Freq: make(map[string]float64)}
err := j.load(dictFileName) err := SetDict(j, dictFileName, false)
return j, err return j, err
} }

61
util.go
View File

@@ -2,9 +2,9 @@ package jiebago
import ( import (
"bufio" "bufio"
// "crypto/md5" "crypto/md5"
// "encoding/gob" "encoding/gob"
// "fmt" "fmt"
"os" "os"
"path/filepath" "path/filepath"
"regexp" "regexp"
@@ -54,21 +54,20 @@ func LoadDict(l DictLoader, dictFilePath string, usingFlag bool) error {
return scanner.Err() return scanner.Err()
} }
/*
func cachePath(dictPath string) string { func cachePath(dictPath string) string {
return filepath.Join(os.TempDir(), return filepath.Join(os.TempDir(),
fmt.Sprintf("jieba.%x.cache", md5.Sum([]byte(f.dictFilePath)))) fmt.Sprintf("jieba.%x.cache", md5.Sum([]byte(dictPath))))
} }
func fileInfo(filePath string, missingOk bool) (*os.FileInfo, err) { func fileInfo(filePath string, missingOk bool) (os.FileInfo, error) {
fileInfo, err := os.Stat(filePath) fi, err := os.Stat(filePath)
if missingOk && err.Err == os.ErrNotExist { if missingOk && err == os.ErrNotExist {
return fileInfo, nil return fi, nil
} }
return fileInfo, err return fi, err
} }
func isCached(dictPath, cachePath string) (bool, error) { func cached(dictPath, cachePath string) (bool, error) {
dictFileInfo, err := fileInfo(dictPath, false) dictFileInfo, err := fileInfo(dictPath, false)
if err != nil { if err != nil {
return false, err return false, err
@@ -80,60 +79,54 @@ func isCached(dictPath, cachePath string) (bool, error) {
return cacheFileInfo.ModTime().After(dictFileInfo.ModTime()), nil return cacheFileInfo.ModTime().After(dictFileInfo.ModTime()), nil
} }
func load(cachePath string, d DictLoader) error { func load(l DictLoader, cachePath string) error {
cacheFile, err := os.Open(cachePath)
if err != nil {
return err
}
defer cacheFile.Close()
dec := gob.NewDecoder(cacheFile) dec := gob.NewDecoder(cacheFile)
return dec.Decode(&d) return dec.Decode(&l)
} }
func dump(cachePath string, d DictLoader) error { func dump(l DictLoader, cachePath string) error {
cacheFile, err = os.OpenFile(cachePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) cacheFile, err := os.OpenFile(cachePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
if err != nil { if err != nil {
return err return err
} }
defer cacheFile.Close() defer cacheFile.Close()
enc := gob.NewEncoder(cacheFile) enc := gob.NewEncoder(cacheFile)
return enc.Encode(d) return enc.Encode(l)
} }
func SetDict(s Segmenter, dictName string, pos bool) error { func SetDict(l DictLoader, dictName string, pos bool) error {
dictPath, err := DictPath(dictName) dictPath, err := DictPath(dictName)
if err != nil { if err != nil {
return err return err
} }
cachePath = cachePath(dictPath) cachePath := cachePath(dictPath)
cached, err := isCached(dictPath, cachePath) cached, err := cached(dictPath, cachePath)
if err != nil { if err != nil {
return err return err
} }
if cached { if cached {
err = load(cachePath, s) err = load(l, cachePath)
if err == nil { if err == nil {
return nil return nil
} }
cached = false cached = false
} }
err = read(dictPath, s, pos) err = LoadDict(l, dictPath, pos)
if err != nil { if err != nil {
return err return err
} }
err = dump(cachePath, s) return dump(l, cachePath)
if err != nil {
return err
}
} }
func LoadUserDict(dictName string, s Segmenter, pos bool) error {
dictPath, err := DictPath(dictName)
if err != nil {
return err
}
return read(dictPath, s, pos)
}
*/
// Split sentence using regular expression. // Split sentence using regular expression.
func RegexpSplit(r *regexp.Regexp, sentence string) chan string { func RegexpSplit(r *regexp.Regexp, sentence string) chan string {
result := make(chan string) result := make(chan string)