优化 dict, add fs.File 支持

2026-06-30 09:00:30 +08:00 · 2022-11-30 14:14:48 +08:00
parent c8785c7994
commit f3da9e6420
22 changed files with 190 additions and 91 deletions
--- a/analyse/example_test.go
+++ b/analyse/example_test.go
@@ -6,8 +6,8 @@ import (
 func Example_extractTags() {
 	var t TagExtracter
-	t.LoadDictionary("../dict.txt")
+	t.LoadDictionaryAt("../dict.txt")
-	t.LoadIdf("idf.txt")
+	t.LoadIdfAt("idf.txt")
 	sentence := "这是一个伸手不见五指的黑夜。我叫孙悟空，我爱北京，我爱Python和C++。"
 	segments := t.ExtractTags(sentence, 5)
@@ -20,7 +20,7 @@ func Example_extractTags() {
 }
 func Example_textRank() {
-	t, err := NewTextRanker("../dict.txt")
+	t, err := NewTextRankerAt("../dict.txt")
 	if err != nil {
 		panic(err)
 	}
--- a/analyse/idf.go
+++ b/analyse/idf.go
@@ -1,6 +1,7 @@
 package analyse
 import (
 	"io/fs"
 	"sort"
 	"sync"
@@ -38,8 +39,12 @@ func (i *Idf) Load(tokens ...dictionary.Token) {
 	i.Unlock()
 }
-func (i *Idf) loadDictionary(fileName string) error {
+func (i *Idf) loadDictionary(file fs.File) error {
-	return dictionary.LoadDictionary(i, fileName)
+	return dictionary.LoadDictionary(i, file)
 }
 func (i *Idf) loadDictionaryAt(fileName string) error {
 	return dictionary.LoadDictionaryAt(i, fileName)
 }
 // Frequency returns the IDF of given word.
--- a/analyse/stopwords.go
+++ b/analyse/stopwords.go
@@ -1,6 +1,7 @@
 package analyse
 import (
 	"io/fs"
 	"sync"
 	"github.com/fumiama/jieba/dictionary"
@@ -82,6 +83,10 @@ func (s *StopWord) Load(tokens ...dictionary.Token) {
 	s.Unlock()
 }
-func (s *StopWord) loadDictionary(fileName string) error {
+func (s *StopWord) loadDictionary(file fs.File) error {
-	return dictionary.LoadDictionary(s, fileName)
+	return dictionary.LoadDictionary(s, file)
 }
 func (s *StopWord) loadDictionaryAt(file string) error {
 	return dictionary.LoadDictionaryAt(s, file)
 }
--- a/analyse/tag_extracker.go
+++ b/analyse/tag_extracker.go
@@ -2,6 +2,7 @@
 package analyse
 import (
 	"io/fs"
 	"sort"
 	"strings"
 	"unicode/utf8"
@@ -52,22 +53,41 @@ type TagExtracter struct {
 }
 // LoadDictionary reads the given filename and create a new dictionary.
-func (t *TagExtracter) LoadDictionary(fileName string) error {
+func (t *TagExtracter) LoadDictionary(file fs.File) error {
 	t.stopWord = NewStopWord()
 	t.seg = new(jieba.Segmenter)
-	return t.seg.LoadDictionary(fileName)
+	return t.seg.LoadDictionary(file)
 }
 // LoadDictionaryAt reads the given filename and create a new dictionary.
 func (t *TagExtracter) LoadDictionaryAt(fileName string) error {
 	t.stopWord = NewStopWord()
 	t.seg = new(jieba.Segmenter)
 	return t.seg.LoadDictionaryAt(fileName)
 }
 // LoadIdf reads the given file and create a new Idf dictionary.
-func (t *TagExtracter) LoadIdf(fileName string) error {
+func (t *TagExtracter) LoadIdf(file fs.File) error {
 	t.idf = NewIdf()
-	return t.idf.loadDictionary(fileName)
+	return t.idf.loadDictionary(file)
 }
 // LoadIdfAt reads the given file and create a new Idf dictionary.
 func (t *TagExtracter) LoadIdfAt(fileName string) error {
 	t.idf = NewIdf()
 	return t.idf.loadDictionaryAt(fileName)
 }
 // LoadStopWords reads the given file and create a new StopWord dictionary.
-func (t *TagExtracter) LoadStopWords(fileName string) error {
+func (t *TagExtracter) LoadStopWords(file fs.File) error {
 	t.stopWord = NewStopWord()
-	return t.stopWord.loadDictionary(fileName)
+	return t.stopWord.loadDictionary(file)
 }
 // LoadStopWordsAt reads the given file and create a new StopWord dictionary.
 func (t *TagExtracter) LoadStopWordsAt(file string) error {
 	t.stopWord = NewStopWord()
 	return t.stopWord.loadDictionaryAt(file)
 }
 // ExtractTags extracts the topK key words from sentence.
--- a/analyse/tag_extracker_test.go
+++ b/analyse/tag_extracker_test.go
@@ -256,8 +256,8 @@ var (
 func TestExtractTags(t *testing.T) {
 	var te TagExtracter
-	te.LoadDictionary("../dict.txt")
+	te.LoadDictionaryAt("../dict.txt")
-	te.LoadIdf("idf.txt")
+	te.LoadIdfAt("idf.txt")
 	for index, sentence := range testContents {
 		result := te.ExtractTags(sentence, 20)
@@ -274,8 +274,8 @@ func TestExtractTags(t *testing.T) {
 func TestExtratTagsWithWeight(t *testing.T) {
 	var te TagExtracter
-	te.LoadDictionary("../dict.txt")
+	te.LoadDictionaryAt("../dict.txt")
-	te.LoadIdf("idf.txt")
+	te.LoadIdfAt("idf.txt")
 	result := te.ExtractTags(Lyric, 10)
 	for index, tag := range result {
 		if LyciWeight[index].text != tag.text ||
@@ -287,9 +287,9 @@ func TestExtratTagsWithWeight(t *testing.T) {
 func TestExtractTagsWithStopWordsFile(t *testing.T) {
 	var te TagExtracter
-	te.LoadDictionary("../dict.txt")
+	te.LoadDictionaryAt("../dict.txt")
-	te.LoadIdf("idf.txt")
+	te.LoadIdfAt("idf.txt")
-	te.LoadStopWords("stop_words.txt")
+	te.LoadStopWordsAt("stop_words.txt")
 	result := te.ExtractTags(Lyric, 7)
 	for index, tag := range result {
 		if LyciWeight2[index].text != tag.text ||
--- a/analyse/textrank.go
+++ b/analyse/textrank.go
@@ -2,6 +2,7 @@ package analyse
 import (
 	"hash/crc64"
 	"io/fs"
 	"math"
 	"sort"
@@ -173,7 +174,13 @@ func (t *TextRanker) TextRank(sentence string, topK int) Segments {
 type TextRanker posseg.Segmenter
 // NewTextRanker reads a given file and create a new dictionary file for Textranker.
-func NewTextRanker(fileName string) (TextRanker, error) {
+func NewTextRanker(file fs.File) (TextRanker, error) {
 	seg := posseg.Segmenter{}
-	return TextRanker(seg), seg.LoadDictionary(fileName)
+	return TextRanker(seg), seg.LoadDictionary(file)
 }
 // NewTextRankerAt reads a given file and create a new dictionary file for Textranker.
 func NewTextRankerAt(fileName string) (TextRanker, error) {
 	seg := posseg.Segmenter{}
 	return TextRanker(seg), seg.LoadDictionaryAt(fileName)
 }
--- a/analyse/textrank_test.go
+++ b/analyse/textrank_test.go
@@ -23,7 +23,7 @@ var (
 )
 func TestTextRank(t *testing.T) {
-	tr, err := NewTextRanker("../dict.txt")
+	tr, err := NewTextRankerAt("../dict.txt")
 	if err != nil {
 		t.Fatal(err)
 	}
--- a/dictionary.go
+++ b/dictionary.go
@@ -1,6 +1,7 @@
 package jieba
 import (
 	"io/fs"
 	"math"
 	"sync"
@@ -57,6 +58,10 @@ func (d *Dictionary) Frequency(key string) (float64, bool) {
 	return freq, ok
 }
-func (d *Dictionary) loadDictionary(fileName string) error {
+func (d *Dictionary) loadDictionary(file fs.File) error {
-	return dictionary.LoadDictionary(d, fileName)
+	return dictionary.LoadDictionary(d, file)
 }
 func (d *Dictionary) loadDictionaryAt(file string) error {
 	return dictionary.LoadDictionaryAt(d, file)
 }
--- a/dictionary/dictionary.go
+++ b/dictionary/dictionary.go
@@ -4,8 +4,8 @@ package dictionary
 import (
 	"bufio"
 	"io/fs"
 	"os"
 	"path/filepath"
 	"strconv"
 	"strings"
 )
@@ -17,7 +17,7 @@ type DictLoader interface {
 	AddToken(Token)
 }
-func loadDictionary(file *os.File) (tokens []Token, err error) {
+func loadDictionary(file fs.File) (tokens []Token, err error) {
 	scanner := bufio.NewScanner(file)
 	var token Token
 	var line string
@@ -45,12 +45,18 @@ func loadDictionary(file *os.File) (tokens []Token, err error) {
 }
 // LoadDictionary reads the given file and passes all tokens to a DictLoader.
-func LoadDictionary(dl DictLoader, fileName string) error {
+func LoadDictionary(dl DictLoader, file fs.File) error {
-	filePath, err := dictPath(fileName)
+	tokens, err := loadDictionary(file)
 	if err != nil {
 		return err
 	}
-	dictFile, err := os.Open(filePath)
+	dl.Load(tokens...)
 	return nil
 }
 // LoadDictionaryAt reads the given file and passes all tokens to a DictLoader.
 func LoadDictionaryAt(dl DictLoader, file string) error {
 	dictFile, err := os.Open(file)
 	if err != nil {
 		return err
 	}
@@ -62,16 +68,3 @@ func LoadDictionary(dl DictLoader, fileName string) error {
 	dl.Load(tokens...)
 	return nil
 }
 func dictPath(dictFileName string) (string, error) {
 	if filepath.IsAbs(dictFileName) {
 		return dictFileName, nil
 	}
 	var dictFilePath string
 	cwd, err := os.Getwd()
 	if err != nil {
 		return dictFilePath, err
 	}
 	dictFilePath = filepath.Clean(filepath.Join(cwd, dictFileName))
 	return dictFilePath, nil
 }
--- a/dictionary/dictionary_test.go
+++ b/dictionary/dictionary_test.go
@@ -33,7 +33,7 @@ func (d *Dict) AddToken(token Token) {
 func TestLoadDictionary(t *testing.T) {
 	d := &Dict{freqMap: make(map[string]float64), posMap: make(map[string]string)}
-	err := LoadDictionary(d, "../userdict.txt")
+	err := LoadDictionaryAt(d, "../userdict.txt")
 	if err != nil {
 		t.Fatalf(err.Error())
 	}
@@ -48,8 +48,8 @@ func TestLoadDictionary(t *testing.T) {
 func TestAddToken(t *testing.T) {
 	d := &Dict{freqMap: make(map[string]float64), posMap: make(map[string]string)}
-	LoadDictionary(d, "../userdict.txt")
+	LoadDictionaryAt(d, "../userdict.txt")
-	d.AddToken(Token{"好用", 99, "a"})
+	d.AddToken(Token{99, "好用", "a"})
 	if d.freqMap["好用"] != 99 {
 		t.Fatalf("Failed to add token, got frequency %f, expected 99", d.freqMap["好用"])
 	}
--- a/dictionary/token.go
+++ b/dictionary/token.go
@@ -2,8 +2,8 @@ package dictionary
 // Token represents a Chinese word with (optional) frequency and POS.
 type Token struct {
 	text      string
 	frequency float64
 	text      string
 	pos       string
 }
--- a/example_parallel_cut_test.go
+++ b/example_parallel_cut_test.go
@@ -36,7 +36,7 @@ func Example_parallelCut() {
 	runtime.GOMAXPROCS(numThreads)
 	// Load dictionary
-	segmenter.LoadDictionary("dict.txt")
+	segmenter.LoadDictionaryAt("dict.txt")
 	// open file for segmentation
 	file, err := os.Open("README.md")
--- a/example_test.go
+++ b/example_test.go
@@ -6,7 +6,7 @@ import (
 func Example() {
 	var seg Segmenter
-	seg.LoadDictionary("dict.txt")
+	seg.LoadDictionaryAt("dict.txt")
 	fmt.Print("【全模式】：")
 	fmt.Println(seg.CutAll("我来到北京清华大学"))
@@ -28,7 +28,7 @@ func Example() {
 func Example_suggestFrequency() {
 	var seg Segmenter
-	seg.LoadDictionary("dict.txt")
+	seg.LoadDictionaryAt("dict.txt")
 	sentence := "超敏C反应蛋白是什么？"
 	fmt.Print("Before:")
@@ -76,13 +76,13 @@ func Example_suggestFrequency() {
 func Example_loadUserDictionary() {
 	var seg Segmenter
-	seg.LoadDictionary("dict.txt")
+	seg.LoadDictionaryAt("dict.txt")
 	sentence := "李小福是创新办主任也是云计算方面的专家"
 	fmt.Print("Before:")
 	fmt.Println(seg.Cut(sentence, true))
-	seg.LoadUserDictionary("userdict.txt")
+	seg.LoadUserDictionaryAt("userdict.txt")
 	fmt.Print("After:")
 	fmt.Println(seg.Cut(sentence, true))
--- a/jieba.go
+++ b/jieba.go
@@ -2,6 +2,7 @@
 package jieba
 import (
 	"io/fs"
 	"math"
 	"regexp"
 	"strings"
@@ -93,16 +94,30 @@ func (seg *Segmenter) SuggestFrequency(words ...string) float64 {
 // LoadDictionary loads dictionary from given file name. Everytime
 // LoadDictionary is called, previously loaded dictionary will be cleard.
-func (seg *Segmenter) LoadDictionary(fileName string) error {
+func (seg *Segmenter) LoadDictionary(file fs.File) error {
 	seg.dict = &Dictionary{freqMap: make(map[string]float64)}
-	return seg.dict.loadDictionary(fileName)
+	return seg.dict.loadDictionary(file)
 }
 // LoadDictionaryAt loads dictionary from given file name. Everytime
 // LoadDictionaryAt is called, previously loaded dictionary will be cleard.
 func (seg *Segmenter) LoadDictionaryAt(file string) error {
 	seg.dict = &Dictionary{freqMap: make(map[string]float64)}
 	return seg.dict.loadDictionaryAt(file)
 }
 // LoadUserDictionary loads a user specified dictionary, it must be called
 // after LoadDictionary, and it will not clear any previous loaded dictionary,
 // instead it will override exist entries.
-func (seg *Segmenter) LoadUserDictionary(fileName string) error {
+func (seg *Segmenter) LoadUserDictionary(file fs.File) error {
-	return seg.dict.loadDictionary(fileName)
+	return seg.dict.loadDictionary(file)
 }
 // LoadUserDictionaryAt loads a user specified dictionary, it must be called
 // after LoadDictionary, and it will not clear any previous loaded dictionary,
 // instead it will override exist entries.
 func (seg *Segmenter) LoadUserDictionaryAt(file string) error {
 	return seg.dict.loadDictionaryAt(file)
 }
 func (seg *Segmenter) dag(runes []rune) map[int][]int {
--- a/jieba_test.go
+++ b/jieba_test.go
@@ -616,7 +616,7 @@ var (
 )
 func init() {
-	seg.LoadDictionary("dict.txt")
+	seg.LoadDictionaryAt("dict.txt")
 }
 func TestCutDAG(t *testing.T) {
@@ -715,7 +715,7 @@ func TestCutForSearch(t *testing.T) {
 func TestLoadDictionary(t *testing.T) {
 	var result []string
-	seg.LoadDictionary("foobar.txt")
+	seg.LoadDictionaryAt("foobar.txt")
 	for index, content := range testContents {
 		result = seg.Cut(content, true)
 		if len(result) != len(userDictCutResult[index]) {
@@ -728,11 +728,11 @@ func TestLoadDictionary(t *testing.T) {
 			}
 		}
 	}
-	seg.LoadDictionary("dict.txt")
+	seg.LoadDictionaryAt("dict.txt")
 }
 func TestLoadUserDictionary(t *testing.T) {
-	seg.LoadUserDictionary("userdict.txt")
+	seg.LoadUserDictionaryAt("userdict.txt")
 	sentence := "李小福是创新办主任也是云计算方面的专家; 什么是八一双鹿例如我输入一个带“韩玉赏鉴”的标题，在自定义词库中也增加了此词为N类型"
 	result := []string{"李小福", "是", "创新办", "主任", "也", "是", "云计算", "方面", "的", "专家", ";", " ", "什么", "是", "八一双鹿", "例如", "我", "输入", "一个", "带", "“", "韩玉赏鉴", "”", "的", "标题", "，", "在", "自定义词", "库中", "也", "增加", "了", "此", "词为", "N", "类型"}
@@ -771,7 +771,7 @@ func TestLoadUserDictionary(t *testing.T) {
 			t.Fatal(word)
 		}
 	}
-	seg.LoadDictionary("dict.txt")
+	seg.LoadDictionaryAt("dict.txt")
 }
 func BenchmarkCutNoHMM(b *testing.B) {
--- a/posseg/dictionary.go
+++ b/posseg/dictionary.go
@@ -1,6 +1,7 @@
 package posseg
 import (
 	"io/fs"
 	"math"
 	"sync"
@@ -69,6 +70,10 @@ func (d *Dictionary) Pos(key string) (string, bool) {
 	return pos, ok
 }
-func (d *Dictionary) loadDictionary(fileName string) error {
+func (d *Dictionary) loadDictionary(file fs.File) error {
-	return dictionary.LoadDictionary(d, fileName)
+	return dictionary.LoadDictionary(d, file)
 }
 func (d *Dictionary) loadDictionaryAt(file string) error {
 	return dictionary.LoadDictionaryAt(d, file)
 }
--- a/posseg/example_test.go
+++ b/posseg/example_test.go
@@ -8,7 +8,7 @@ import (
 func Example() {
 	var seg posseg.Segmenter
-	seg.LoadDictionary("../dict.txt")
+	seg.LoadDictionaryAt("../dict.txt")
 	for segment := range seg.Cut("我爱北京天安门", true) {
 		fmt.Printf("%s %s\n", segment.Text(), segment.Pos())
--- a/posseg/posseg.go
+++ b/posseg/posseg.go
@@ -2,6 +2,7 @@
 package posseg
 import (
 	"io/fs"
 	"math"
 	"regexp"
@@ -39,17 +40,31 @@ type Segmenter struct {
 }
 // LoadDictionary loads dictionary from given file name.
-// Everytime LoadDictionary is called, previously loaded dictionary will be cleard.
+// Everytime LoadDictionaryAt is called, previously loaded dictionary will be cleard.
-func (seg *Segmenter) LoadDictionary(fileName string) error {
+func (seg *Segmenter) LoadDictionary(file fs.File) error {
 	seg.dict = &Dictionary{freqMap: make(map[string]float64), posMap: make(map[string]string)}
-	return seg.dict.loadDictionary(fileName)
+	return seg.dict.loadDictionary(file)
 }
 // LoadDictionaryAt loads dictionary from given file name.
 // Everytime LoadDictionaryAt is called, previously loaded dictionary will be cleard.
 func (seg *Segmenter) LoadDictionaryAt(fileName string) error {
 	seg.dict = &Dictionary{freqMap: make(map[string]float64), posMap: make(map[string]string)}
 	return seg.dict.loadDictionaryAt(fileName)
 }
 // LoadUserDictionary loads a user specified dictionary, it must be called
 // after LoadDictionary, and it will not clear any previous loaded dictionary,
 // instead it will override exist entries.
-func (seg *Segmenter) LoadUserDictionary(fileName string) error {
+func (seg *Segmenter) LoadUserDictionary(file fs.File) error {
-	return seg.dict.loadDictionary(fileName)
+	return seg.dict.loadDictionary(file)
 }
 // LoadUserDictionaryAt loads a user specified dictionary, it must be called
 // after LoadDictionary, and it will not clear any previous loaded dictionary,
 // instead it will override exist entries.
 func (seg *Segmenter) LoadUserDictionaryAt(fileName string) error {
 	return seg.dict.loadDictionaryAt(fileName)
 }
 func (seg *Segmenter) cutDetailInternal(sentence string) <-chan Segment {
--- a/posseg/posseg_test.go
+++ b/posseg/posseg_test.go
@@ -269,7 +269,7 @@ var (
 )
 func init() {
-	seg.LoadDictionary("../dict.txt")
+	seg.LoadDictionaryAt("../dict.txt")
 }
 func chanToArray(ch <-chan Segment) []Segment {
@@ -357,8 +357,8 @@ func TestBug137(t *testing.T) {
 }
 func TestUserDict(t *testing.T) {
-	seg.LoadUserDictionary("../userdict.txt")
+	seg.LoadUserDictionaryAt("../userdict.txt")
-	defer seg.LoadDictionary("../dict.txt")
+	defer seg.LoadDictionaryAt("../dict.txt")
 	sentence := "李小福是创新办主任也是云计算方面的专家; 什么是八一双鹿例如我输入一个带“韩玉赏鉴”的标题，在自定义词库中也增加了此词为N类型"
 	cutResult := []Segment{
--- a/tokenizers/example_test.go
+++ b/tokenizers/example_test.go
@@ -10,7 +10,7 @@ func Example() {
 	sentence := []byte("永和服装饰品有限公司")
 	// default mode
-	tokenizer, _ := tokenizers.NewJiebaTokenizer("../dict.txt", true, false)
+	tokenizer, _ := tokenizers.NewJiebaTokenizerAt("../dict.txt", true, false)
 	fmt.Println("Default Mode:")
 	for _, token := range tokenizer.Tokenize(sentence) {
 		fmt.Printf(
@@ -19,7 +19,7 @@ func Example() {
 	}
 	//search mode
-	tokenizer, _ = tokenizers.NewJiebaTokenizer("../dict.txt", true, true)
+	tokenizer, _ = tokenizers.NewJiebaTokenizerAt("../dict.txt", true, true)
 	fmt.Println("Search Mode:")
 	for _, token := range tokenizer.Tokenize(sentence) {
 		fmt.Printf(
--- a/tokenizers/tokenizer.go
+++ b/tokenizers/tokenizer.go
@@ -1,7 +1,7 @@
 package tokenizers
 import (
-	"fmt"
+	"io/fs"
 	"regexp"
 	"strconv"
@@ -24,6 +24,36 @@ type JiebaTokenizer struct {
 /*
 NewJiebaTokenizer creates a new JiebaTokenizer.
 Parameters:
 	dictFile: the dictioanry file.
 	hmm: whether to use Hidden Markov Model to cut unknown words,
 	i.e. not found in dictionary. For example word "安卓" (means "Android" in
 	English) not in the dictionary file. If hmm is set to false, it will be
 	cutted into two single words "安" and "卓", if hmm is set to true, it will
 	be traded as one single word because Jieba using Hidden Markov Model with
 	Viterbi algorithm to guess the best possibility.
 	searchMode: whether to further cut long words into serveral short words.
 	In Chinese, some long words may contains other words, for example "交换机"
 	is a Chinese word for "Switcher", if sechMode is false, it will trade
 	"交换机" as a single word. If searchMode is true, it will further split
 	this word into "交换", "换机", which are valid Chinese words.
 */
 func NewJiebaTokenizer(dictFile fs.File, hmm, searchMode bool) (analysis.Tokenizer, error) {
 	var seg jieba.Segmenter
 	err := seg.LoadDictionary(dictFile)
 	return &JiebaTokenizer{
 		seg:        seg,
 		hmm:        hmm,
 		searchMode: searchMode,
 	}, err
 }
 /*
 NewJiebaTokenizerAt creates a new JiebaTokenizer.
 Parameters:
 	dictFilePath: path of the dictioanry file.
@@ -41,9 +71,9 @@ Parameters:
 	"交换机" as a single word. If searchMode is true, it will further split
 	this word into "交换", "换机", which are valid Chinese words.
 */
-func NewJiebaTokenizer(dictFilePath string, hmm, searchMode bool) (analysis.Tokenizer, error) {
+func NewJiebaTokenizerAt(dictFilePath string, hmm, searchMode bool) (analysis.Tokenizer, error) {
 	var seg jieba.Segmenter
-	err := seg.LoadDictionary(dictFilePath)
+	err := seg.LoadDictionaryAt(dictFilePath)
 	return &JiebaTokenizer{
 		seg:        seg,
 		hmm:        hmm,
@@ -107,18 +137,13 @@ JiebaTokenizerConstructor creates a JiebaTokenizer.
 Parameter config should contains at least one parameter:
-	file: the path of the dictionary file.
+	file: the path of the dictionary file or fs.File.
 	hmm: optional, specify whether to use Hidden Markov Model, see NewJiebaTokenizer for details.
 	search: optional, speficy whether to use search mode, see NewJiebaTokenizer for details.
 */
-func JiebaTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (
+func JiebaTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
 	analysis.Tokenizer, error) {
 	dictFilePath, ok := config["file"].(string)
 	if !ok {
 		return nil, fmt.Errorf("must specify dictionary file path")
 	}
 	hmm, ok := config["hmm"].(bool)
 	if !ok {
 		hmm = true
@@ -127,8 +152,12 @@ func JiebaTokenizerConstructor(config map[string]interface{}, cache *registry.Ca
 	if !ok {
 		searchMode = true
 	}
-
+	dictFilePath, ok := config["file"].(string)
-	return NewJiebaTokenizer(dictFilePath, hmm, searchMode)
+	if ok {
 		return NewJiebaTokenizerAt(dictFilePath, hmm, searchMode)
 	}
 	dictFile := config["file"].(fs.File)
 	return NewJiebaTokenizer(dictFile, hmm, searchMode)
 }
 func detectTokenType(term string) analysis.TokenType {
--- a/tokenizers/tokenizer_test.go
+++ b/tokenizers/tokenizer_test.go
@@ -5219,7 +5219,7 @@ func TestJiebaTokenizerDefaultModeWithHMM(t *testing.T) {
 		},
 	}
-	tokenizer, _ := NewJiebaTokenizer("../dict.txt", true, false)
+	tokenizer, _ := NewJiebaTokenizerAt("../dict.txt", true, false)
 	for _, test := range tests {
 		actual := tokenizer.Tokenize(test.input)
 		if !reflect.DeepEqual(actual, test.output) {
@@ -11057,7 +11057,7 @@ func TestJiebaTokenizerSearchModeWithHMM(t *testing.T) {
 		},
 	}
-	tokenizer, _ := NewJiebaTokenizer("../dict.txt", true, true)
+	tokenizer, _ := NewJiebaTokenizerAt("../dict.txt", true, true)
 	for _, test := range tests {
 		actual := tokenizer.Tokenize(test.input)
 		if !reflect.DeepEqual(actual, test.output) {
@@ -16474,7 +16474,7 @@ func TestJiebaTokenizerDefaultModeWithoutHMM(t *testing.T) {
 		},
 	}
-	tokenizer, _ := NewJiebaTokenizer("../dict.txt", false, false)
+	tokenizer, _ := NewJiebaTokenizerAt("../dict.txt", false, false)
 	for _, test := range tests {
 		actual := tokenizer.Tokenize(test.input)
 		if !reflect.DeepEqual(actual, test.output) {
@@ -22506,7 +22506,7 @@ func TestJiebaTokenizerSearchModeWithoutHMM(t *testing.T) {
 		},
 	}
-	tokenizer, _ := NewJiebaTokenizer("../dict.txt", false, true)
+	tokenizer, _ := NewJiebaTokenizerAt("../dict.txt", false, true)
 	for _, test := range tests {
 		actual := tokenizer.Tokenize(test.input)
 		if !reflect.DeepEqual(actual, test.output) {