1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-05 00:32:51 +08:00

优化 jieba

This commit is contained in:
源文雨
2022-11-30 16:00:56 +08:00
parent 4d76899e79
commit 6982ead703
8 changed files with 114 additions and 109 deletions

View File

@@ -18,7 +18,7 @@ var ideographRegexp = regexp.MustCompile(`\p{Han}+`)
// JiebaTokenizer is the beleve tokenizer for jieba.
type JiebaTokenizer struct {
seg jieba.Segmenter
seg *jieba.Segmenter
hmm, searchMode bool
}
@@ -43,8 +43,7 @@ Parameters:
this word into "交换", "换机", which are valid Chinese words.
*/
func NewJiebaTokenizer(dictFile fs.File, hmm, searchMode bool) (analysis.Tokenizer, error) {
var seg jieba.Segmenter
err := seg.LoadDictionary(dictFile)
seg, err := jieba.LoadDictionary(dictFile)
return &JiebaTokenizer{
seg: seg,
hmm: hmm,
@@ -73,8 +72,7 @@ Parameters:
this word into "交换", "换机", which are valid Chinese words.
*/
func NewJiebaTokenizerAt(dictFilePath string, hmm, searchMode bool) (analysis.Tokenizer, error) {
var seg jieba.Segmenter
err := seg.LoadDictionaryAt(dictFilePath)
seg, err := jieba.LoadDictionaryAt(dictFilePath)
return &JiebaTokenizer{
seg: seg,
hmm: hmm,