mirror of
https://github.com/fumiama/jieba.git
synced 2026-06-05 00:32:51 +08:00
优化 dict, add fs.File 支持
This commit is contained in:
@@ -4,8 +4,8 @@ package dictionary
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
@@ -17,7 +17,7 @@ type DictLoader interface {
|
||||
AddToken(Token)
|
||||
}
|
||||
|
||||
func loadDictionary(file *os.File) (tokens []Token, err error) {
|
||||
func loadDictionary(file fs.File) (tokens []Token, err error) {
|
||||
scanner := bufio.NewScanner(file)
|
||||
var token Token
|
||||
var line string
|
||||
@@ -45,12 +45,18 @@ func loadDictionary(file *os.File) (tokens []Token, err error) {
|
||||
}
|
||||
|
||||
// LoadDictionary reads the given file and passes all tokens to a DictLoader.
|
||||
func LoadDictionary(dl DictLoader, fileName string) error {
|
||||
filePath, err := dictPath(fileName)
|
||||
func LoadDictionary(dl DictLoader, file fs.File) error {
|
||||
tokens, err := loadDictionary(file)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dictFile, err := os.Open(filePath)
|
||||
dl.Load(tokens...)
|
||||
return nil
|
||||
}
|
||||
|
||||
// LoadDictionaryAt reads the given file and passes all tokens to a DictLoader.
|
||||
func LoadDictionaryAt(dl DictLoader, file string) error {
|
||||
dictFile, err := os.Open(file)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -62,16 +68,3 @@ func LoadDictionary(dl DictLoader, fileName string) error {
|
||||
dl.Load(tokens...)
|
||||
return nil
|
||||
}
|
||||
|
||||
func dictPath(dictFileName string) (string, error) {
|
||||
if filepath.IsAbs(dictFileName) {
|
||||
return dictFileName, nil
|
||||
}
|
||||
var dictFilePath string
|
||||
cwd, err := os.Getwd()
|
||||
if err != nil {
|
||||
return dictFilePath, err
|
||||
}
|
||||
dictFilePath = filepath.Clean(filepath.Join(cwd, dictFileName))
|
||||
return dictFilePath, nil
|
||||
}
|
||||
|
||||
@@ -33,7 +33,7 @@ func (d *Dict) AddToken(token Token) {
|
||||
|
||||
func TestLoadDictionary(t *testing.T) {
|
||||
d := &Dict{freqMap: make(map[string]float64), posMap: make(map[string]string)}
|
||||
err := LoadDictionary(d, "../userdict.txt")
|
||||
err := LoadDictionaryAt(d, "../userdict.txt")
|
||||
if err != nil {
|
||||
t.Fatalf(err.Error())
|
||||
}
|
||||
@@ -48,8 +48,8 @@ func TestLoadDictionary(t *testing.T) {
|
||||
|
||||
func TestAddToken(t *testing.T) {
|
||||
d := &Dict{freqMap: make(map[string]float64), posMap: make(map[string]string)}
|
||||
LoadDictionary(d, "../userdict.txt")
|
||||
d.AddToken(Token{"好用", 99, "a"})
|
||||
LoadDictionaryAt(d, "../userdict.txt")
|
||||
d.AddToken(Token{99, "好用", "a"})
|
||||
if d.freqMap["好用"] != 99 {
|
||||
t.Fatalf("Failed to add token, got frequency %f, expected 99", d.freqMap["好用"])
|
||||
}
|
||||
|
||||
@@ -2,12 +2,12 @@ package dictionary
|
||||
|
||||
// Token represents a Chinese word with (optional) frequency and POS.
|
||||
type Token struct {
|
||||
text string
|
||||
frequency float64
|
||||
text string
|
||||
pos string
|
||||
}
|
||||
|
||||
//Text returns token's text.
|
||||
// Text returns token's text.
|
||||
func (t Token) Text() string {
|
||||
return t.text
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user