1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-05 00:32:51 +08:00

优化 tag_extracker

This commit is contained in:
源文雨
2022-11-30 13:35:21 +08:00
parent ae85ccb20a
commit d487545eb5
14 changed files with 60 additions and 62 deletions

View File

@@ -7,7 +7,7 @@ import (
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/registry"
jiebago "github.com/fumiama/jieba"
jieba "github.com/fumiama/jieba"
)
// Name is the jieba tokenizer name.
@@ -15,9 +15,9 @@ const Name = "jieba"
var ideographRegexp = regexp.MustCompile(`\p{Han}+`)
// JiebaTokenizer is the beleve tokenizer for jiebago.
// JiebaTokenizer is the beleve tokenizer for jieba.
type JiebaTokenizer struct {
seg jiebago.Segmenter
seg jieba.Segmenter
hmm, searchMode bool
}
@@ -42,7 +42,7 @@ Parameters:
this word into "交换", "换机", which are valid Chinese words.
*/
func NewJiebaTokenizer(dictFilePath string, hmm, searchMode bool) (analysis.Tokenizer, error) {
var seg jiebago.Segmenter
var seg jieba.Segmenter
err := seg.LoadDictionary(dictFilePath)
return &JiebaTokenizer{
seg: seg,