1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-05 00:32:51 +08:00

refactor to generalize set dictionary function, not finished yet

This commit is contained in:
Wang Bin
2015-03-25 18:46:14 +08:00
parent 59da5b5e3a
commit e155fe5467
2 changed files with 28 additions and 35 deletions

View File

@@ -107,6 +107,6 @@ func (j *Jieba) LoadUserDict(dictFilePath string) error {
// sentence.
func NewJieba(dictFileName string) (*Jieba, error) {
j := &Jieba{Total: 0.0, Freq: make(map[string]float64)}
err := j.load(dictFileName)
err := SetDict(j, dictFileName, false)
return j, err
}

61
util.go
View File

@@ -2,9 +2,9 @@ package jiebago
import (
"bufio"
// "crypto/md5"
// "encoding/gob"
// "fmt"
"crypto/md5"
"encoding/gob"
"fmt"
"os"
"path/filepath"
"regexp"
@@ -54,21 +54,20 @@ func LoadDict(l DictLoader, dictFilePath string, usingFlag bool) error {
return scanner.Err()
}
/*
func cachePath(dictPath string) string {
return filepath.Join(os.TempDir(),
fmt.Sprintf("jieba.%x.cache", md5.Sum([]byte(f.dictFilePath))))
fmt.Sprintf("jieba.%x.cache", md5.Sum([]byte(dictPath))))
}
func fileInfo(filePath string, missingOk bool) (*os.FileInfo, err) {
fileInfo, err := os.Stat(filePath)
if missingOk && err.Err == os.ErrNotExist {
return fileInfo, nil
func fileInfo(filePath string, missingOk bool) (os.FileInfo, error) {
fi, err := os.Stat(filePath)
if missingOk && err == os.ErrNotExist {
return fi, nil
}
return fileInfo, err
return fi, err
}
func isCached(dictPath, cachePath string) (bool, error) {
func cached(dictPath, cachePath string) (bool, error) {
dictFileInfo, err := fileInfo(dictPath, false)
if err != nil {
return false, err
@@ -80,60 +79,54 @@ func isCached(dictPath, cachePath string) (bool, error) {
return cacheFileInfo.ModTime().After(dictFileInfo.ModTime()), nil
}
func load(cachePath string, d DictLoader) error {
func load(l DictLoader, cachePath string) error {
cacheFile, err := os.Open(cachePath)
if err != nil {
return err
}
defer cacheFile.Close()
dec := gob.NewDecoder(cacheFile)
return dec.Decode(&d)
return dec.Decode(&l)
}
func dump(cachePath string, d DictLoader) error {
cacheFile, err = os.OpenFile(cachePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
func dump(l DictLoader, cachePath string) error {
cacheFile, err := os.OpenFile(cachePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
if err != nil {
return err
}
defer cacheFile.Close()
enc := gob.NewEncoder(cacheFile)
return enc.Encode(d)
return enc.Encode(l)
}
func SetDict(s Segmenter, dictName string, pos bool) error {
func SetDict(l DictLoader, dictName string, pos bool) error {
dictPath, err := DictPath(dictName)
if err != nil {
return err
}
cachePath = cachePath(dictPath)
cached, err := isCached(dictPath, cachePath)
cachePath := cachePath(dictPath)
cached, err := cached(dictPath, cachePath)
if err != nil {
return err
}
if cached {
err = load(cachePath, s)
err = load(l, cachePath)
if err == nil {
return nil
}
cached = false
}
err = read(dictPath, s, pos)
err = LoadDict(l, dictPath, pos)
if err != nil {
return err
}
err = dump(cachePath, s)
if err != nil {
return err
}
return dump(l, cachePath)
}
func LoadUserDict(dictName string, s Segmenter, pos bool) error {
dictPath, err := DictPath(dictName)
if err != nil {
return err
}
return read(dictPath, s, pos)
}
*/
// Split sentence using regular expression.
func RegexpSplit(r *regexp.Regexp, sentence string) chan string {
result := make(chan string)