1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-22 04:02:52 +08:00

small refactor, put parse dictionary function in seperate gile

This commit is contained in:
Wang Bin
2015-02-26 14:22:29 +08:00
parent 95a27da5cf
commit 60b2c9f763
2 changed files with 99 additions and 68 deletions

53
dict.go Normal file
View File

@@ -0,0 +1,53 @@
package jiebago
import (
"bufio"
"os"
"path/filepath"
"strconv"
"strings"
)
type WordTagFreq struct {
Word, Tag string
Freq float64
}
func DictPath(dictFileName string) (string, error) {
if filepath.IsAbs(dictFileName) {
return dictFileName, nil
}
var dictFilePath string
pwd, err := os.Getwd()
if err != nil {
return dictFilePath, err
}
dictFilePath = filepath.Clean(filepath.Join(pwd, dictFileName))
return dictFilePath, nil
}
func ParseDictFile(dictFile *os.File) (wtfs []*WordTagFreq, err error) {
scanner := bufio.NewScanner(dictFile)
for scanner.Scan() {
line := scanner.Text()
fields := strings.Split(line, " ")
length := len(fields)
word := fields[0]
word = strings.Replace(word, "\ufeff", "", 1)
wtf := &WordTagFreq{Word: word}
if length > 1 {
wtf.Freq, err = strconv.ParseFloat(fields[1], 64)
if err != nil {
return nil, err
}
}
if length > 2 {
wtf.Tag = fields[2]
}
wtfs = append(wtfs, wtf)
}
if err = scanner.Err(); err != nil {
return nil, err
}
return wtfs, nil
}