diff --git a/trie.go b/trie.go index 8aa2f64..0477455 100644 --- a/trie.go +++ b/trie.go @@ -1,86 +1,10 @@ package jiebago -import ( - "crypto/md5" - "encoding/gob" - "fmt" - "log" - "os" - "path/filepath" -) - type Jieba struct { Total float64 Freq map[string]float64 } -func (j *Jieba) load(dictFileName string) error { - dictFilePath, err := DictPath(dictFileName) - if err != nil { - return err - } - - dictFileInfo, err := os.Stat(dictFilePath) - if err != nil { - return err - } - - log.Printf("Building Trie..., from %s\n", dictFilePath) - h := fmt.Sprintf("%x", md5.Sum([]byte(dictFilePath))) - cacheFileName := fmt.Sprintf("jieba.%s.cache", h) - cacheFilePath := filepath.Join(os.TempDir(), cacheFileName) - isDictCached := true - - cacheFileInfo, err := os.Stat(cacheFilePath) - if err != nil { - isDictCached = false - } - - if isDictCached { - isDictCached = cacheFileInfo.ModTime().After(dictFileInfo.ModTime()) - } - - var cacheFile *os.File - if isDictCached { - cacheFile, err = os.Open(cacheFilePath) - if err != nil { - isDictCached = false - } - defer cacheFile.Close() - } - - if isDictCached { - dec := gob.NewDecoder(cacheFile) - err = dec.Decode(&j) - if err != nil { - isDictCached = false - } else { - log.Printf("loaded model from cache %s\n", cacheFilePath) - } - } - - if !isDictCached { - err = LoadDict(j, dictFilePath, false) - if err != nil { - return err - } - // dump trie - cacheFile, err = os.OpenFile(cacheFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) - if err != nil { - return err - } - defer cacheFile.Close() - enc := gob.NewEncoder(cacheFile) - err = enc.Encode(j) - if err != nil { - return err - } else { - log.Printf("dumped model from cache %s\n", cacheFilePath) - } - } - return nil -} - func (j *Jieba) AddEntry(entry *Entry) { j.Add(entry.Word, entry.Freq) } diff --git a/util.go b/util.go index ecb8949..decfb99 100644 --- a/util.go +++ b/util.go @@ -5,6 +5,7 @@ import ( "crypto/md5" "encoding/gob" "fmt" + "log" "os" "path/filepath" "regexp" @@ -26,6 +27,8 @@ func DictPath(dictFileName string) (string, error) { } func LoadDict(l DictLoader, dictFilePath string, usingFlag bool) error { + log.Printf("Building Trie..., from %s\n", dictFilePath) + dictFile, err := os.Open(dictFilePath) if err != nil { return err @@ -59,22 +62,14 @@ func cachePath(dictPath string) string { fmt.Sprintf("jieba.%x.cache", md5.Sum([]byte(dictPath)))) } -func fileInfo(filePath string, missingOk bool) (os.FileInfo, error) { - fi, err := os.Stat(filePath) - if missingOk && err == os.ErrNotExist { - return fi, nil - } - return fi, err -} - func cached(dictPath, cachePath string) (bool, error) { - dictFileInfo, err := fileInfo(dictPath, false) + dictFileInfo, err := os.Stat(dictPath) if err != nil { return false, err } - cacheFileInfo, err := fileInfo(cachePath, true) + cacheFileInfo, err := os.Stat(cachePath) if err != nil { - return false, err + return false, nil } return cacheFileInfo.ModTime().After(dictFileInfo.ModTime()), nil } @@ -87,7 +82,7 @@ func load(l DictLoader, cachePath string) error { defer cacheFile.Close() dec := gob.NewDecoder(cacheFile) - return dec.Decode(&l) + return dec.Decode(l) } func dump(l DictLoader, cachePath string) error { @@ -114,6 +109,7 @@ func SetDict(l DictLoader, dictName string, pos bool) error { if cached { err = load(l, cachePath) if err == nil { + log.Printf("loaded model from cache %s\n", cachePath) return nil } cached = false @@ -124,7 +120,12 @@ func SetDict(l DictLoader, dictName string, pos bool) error { return err } - return dump(l, cachePath) + err = dump(l, cachePath) + if err == nil { + log.Printf("dumped model from cache %s\n", cachePath) + return nil + } + return err } // Split sentence using regular expression.