mirror of
https://github.com/fumiama/jieba.git
synced 2026-06-05 00:32:51 +08:00
70 lines
1.5 KiB
Go
Executable File
70 lines
1.5 KiB
Go
Executable File
// Package dictionary contains a interface and wraps all io related work.
|
|
// It is used by jieba module to read/write files.
|
|
package dictionary
|
|
|
|
import (
|
|
"bufio"
|
|
"io"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
)
|
|
|
|
// DictLoader is the interface that could add one token or load tokens
|
|
type DictLoader interface {
|
|
Load(...Token)
|
|
AddToken(Token)
|
|
}
|
|
|
|
func loadDictionary(file io.Reader) (tokens []Token, err error) {
|
|
scanner := bufio.NewScanner(file)
|
|
var token Token
|
|
var line string
|
|
var fields []string
|
|
for scanner.Scan() {
|
|
line = scanner.Text()
|
|
fields = strings.Split(line, " ")
|
|
token.text = strings.TrimSpace(strings.Replace(fields[0], "\ufeff", "", 1))
|
|
if length := len(fields); length > 1 {
|
|
token.frequency, err = strconv.ParseFloat(fields[1], 64)
|
|
if err != nil {
|
|
return
|
|
}
|
|
if length > 2 {
|
|
token.pos = strings.TrimSpace(fields[2])
|
|
}
|
|
}
|
|
tokens = append(tokens, token)
|
|
}
|
|
|
|
if err = scanner.Err(); err != nil {
|
|
return
|
|
}
|
|
return tokens, nil
|
|
}
|
|
|
|
// LoadDictionary reads the given file and passes all tokens to a DictLoader.
|
|
func LoadDictionary(dl DictLoader, file io.Reader) error {
|
|
tokens, err := loadDictionary(file)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
dl.Load(tokens...)
|
|
return nil
|
|
}
|
|
|
|
// LoadDictionaryAt reads the given file and passes all tokens to a DictLoader.
|
|
func LoadDictionaryAt(dl DictLoader, file string) error {
|
|
dictFile, err := os.Open(file)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
tokens, err := loadDictionary(dictFile)
|
|
dictFile.Close()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
dl.Load(tokens...)
|
|
return nil
|
|
}
|