1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-05 00:32:51 +08:00

优化 dict, add fs.File 支持

This commit is contained in:
源文雨
2022-11-30 14:14:48 +08:00
parent c8785c7994
commit f3da9e6420
22 changed files with 190 additions and 91 deletions

View File

@@ -4,8 +4,8 @@ package dictionary
import (
"bufio"
"io/fs"
"os"
"path/filepath"
"strconv"
"strings"
)
@@ -17,7 +17,7 @@ type DictLoader interface {
AddToken(Token)
}
func loadDictionary(file *os.File) (tokens []Token, err error) {
func loadDictionary(file fs.File) (tokens []Token, err error) {
scanner := bufio.NewScanner(file)
var token Token
var line string
@@ -45,12 +45,18 @@ func loadDictionary(file *os.File) (tokens []Token, err error) {
}
// LoadDictionary reads the given file and passes all tokens to a DictLoader.
func LoadDictionary(dl DictLoader, fileName string) error {
filePath, err := dictPath(fileName)
func LoadDictionary(dl DictLoader, file fs.File) error {
tokens, err := loadDictionary(file)
if err != nil {
return err
}
dictFile, err := os.Open(filePath)
dl.Load(tokens...)
return nil
}
// LoadDictionaryAt reads the given file and passes all tokens to a DictLoader.
func LoadDictionaryAt(dl DictLoader, file string) error {
dictFile, err := os.Open(file)
if err != nil {
return err
}
@@ -62,16 +68,3 @@ func LoadDictionary(dl DictLoader, fileName string) error {
dl.Load(tokens...)
return nil
}
func dictPath(dictFileName string) (string, error) {
if filepath.IsAbs(dictFileName) {
return dictFileName, nil
}
var dictFilePath string
cwd, err := os.Getwd()
if err != nil {
return dictFilePath, err
}
dictFilePath = filepath.Clean(filepath.Join(cwd, dictFileName))
return dictFilePath, nil
}

View File

@@ -33,7 +33,7 @@ func (d *Dict) AddToken(token Token) {
func TestLoadDictionary(t *testing.T) {
d := &Dict{freqMap: make(map[string]float64), posMap: make(map[string]string)}
err := LoadDictionary(d, "../userdict.txt")
err := LoadDictionaryAt(d, "../userdict.txt")
if err != nil {
t.Fatalf(err.Error())
}
@@ -48,8 +48,8 @@ func TestLoadDictionary(t *testing.T) {
func TestAddToken(t *testing.T) {
d := &Dict{freqMap: make(map[string]float64), posMap: make(map[string]string)}
LoadDictionary(d, "../userdict.txt")
d.AddToken(Token{"好用", 99, "a"})
LoadDictionaryAt(d, "../userdict.txt")
d.AddToken(Token{99, "好用", "a"})
if d.freqMap["好用"] != 99 {
t.Fatalf("Failed to add token, got frequency %f, expected 99", d.freqMap["好用"])
}

View File

@@ -2,12 +2,12 @@ package dictionary
// Token represents a Chinese word with (optional) frequency and POS.
type Token struct {
text string
frequency float64
text string
pos string
}
//Text returns token's text.
// Text returns token's text.
func (t Token) Text() string {
return t.text
}