From d257da40a7ccee137c724cfa65faa2d87f6d3fc6 Mon Sep 17 00:00:00 2001 From: Wang Bin Date: Fri, 20 Mar 2015 18:38:08 +0800 Subject: [PATCH] try to refactor, not finished yet --- cache/cache.go | 9 ++++ cache/filebased.go | 115 +++++++++++++++++++++++++++++++++++++++++++++ dict.go | 4 +- 3 files changed, 126 insertions(+), 2 deletions(-) create mode 100644 cache/cache.go create mode 100644 cache/filebased.go diff --git a/cache/cache.go b/cache/cache.go new file mode 100644 index 0000000..8eb6e08 --- /dev/null +++ b/cache/cache.go @@ -0,0 +1,9 @@ +package cache + +type Cacher interface { + SetDict(string) error + LoadUserDict(string) error + AddWord(string, string, float64) + Get(string) (float64, bool) + Total() float64 +} diff --git a/cache/filebased.go b/cache/filebased.go new file mode 100644 index 0000000..8286226 --- /dev/null +++ b/cache/filebased.go @@ -0,0 +1,115 @@ +package cache + +type FileBasedCache struct { + freq map[[]rune]float64 + total float64 + dictPath, cachePath string +} + +func (f *FileBasedCache) SetDict(dictFileName string) error { + if len(f.freq) > 0 { + f.freq = make(map[[]rune]flot64) + } + + if err := f.setFilePath(dictFileName); err != nil { + return err + } + + cached, err := f.cached() + if err != nil { + return err + } + + if cached { + if err := f.load(cacheFilePath); err == nil { + return nil + } // TODO: logging? + } + + if err := f.read(dictFilePath); err != nil { + return err + } + + if err := f.dump(cacheFilePath); err != nil { + return err + } +} + +func (f *FileBasedCache) LoadUserDict(userDictFileName string) error { + return nil +} + +func (f *FileBasedCache) AddWord(word, tag string, freq float64) { + +} + +func (f *FileBasedCache) Get(key string) (float64, bool) { + val, ok := f.freq[[]rune(key)] + return val, ok +} + +func (f *FileBasedCache) Total() float64 { + return f.total +} + +func (f *FileBasedCache) setFilePath() (err error) { // TODO: specify the temp dir + f.dictFilePath, err = DictPath(dictFileName) + if err != nil { + return + } + f.cacheFilePath = filepath.Join(os.TempDir(), + fmt.Sprintf("jieba.%x.cache", md5.Sum([]byte(f.dictFilePath)))) + return +} + +func (f *FileBasedCache) cached() (bool, error) { + dictFileInfo, err := os.Stat(f.dictFilePath) // TODO: logging + if err != nil { + return false, err + } + + cacheFileInfo, err := os.Stat(cacheFilePath) + if err != nil { // TODO: logging + return false, nil + } + + return cacheFileInfo.ModTime().After(dictFileInfo.ModTime()), nil +} + +func (f *FileBasedCache) load(cacheFilePath string) error { + cacheFile, err := os.Open(cacheFilePath) + if err != nil { + return err + } + defer cacheFile.Close() + + dec := gob.NewDecoder(cacheFile) + return dec.Decode(&f) +} + +func (f *FileBasedCache) read(dictFilePath string) error { + wtfs, err := ParseDictFile(dictFilePath) + if err != nil { + return err + } + + for _, wtf := range wtfs { + t.addWord(wtf) // TODO: add word, ignore frequency + } + + return nil +} + +func (f *FileBasedCache) dump(cacheFilePath string) error { + cacheFile, err := os.OpenFile(cacheFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + return err + } + defer cacheFile.Close() + + enc := gob.NewEncoder(cacheFile) + if err := enc.Encode(f); err != nil { + return err + } + return nil +} diff --git a/dict.go b/dict.go index 50f7e0e..9d4704a 100644 --- a/dict.go +++ b/dict.go @@ -18,11 +18,11 @@ func DictPath(dictFileName string) (string, error) { return dictFileName, nil } var dictFilePath string - pwd, err := os.Getwd() + cwd, err := os.Getwd() if err != nil { return dictFilePath, err } - dictFilePath = filepath.Clean(filepath.Join(pwd, dictFileName)) + dictFilePath = filepath.Clean(filepath.Join(cwd, dictFileName)) return dictFilePath, nil }