1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-12 13:10:25 +08:00

finished all OOP refactor

This commit is contained in:
Wang Bin
2015-03-24 18:34:07 +08:00
parent 73d87e4ed6
commit 1c378c28a7
7 changed files with 116 additions and 127 deletions

View File

@@ -14,16 +14,16 @@ const Name = "jieba"
var IdeographRegexp = regexp.MustCompile(`\p{Han}+`)
type JiebaTokenizer struct {
dictFileName string
j *jiebago.Jieba
hmm, searchMode bool
}
func NewJiebaTokenizer(dictFileName string, hmm, searchMode bool) (analysis.Tokenizer, error) {
err := jiebago.SetDictionary(dictFileName)
j, err := jiebago.NewJieba(dictFileName)
return &JiebaTokenizer{
dictFileName: dictFileName,
hmm: hmm,
searchMode: searchMode,
j: j,
hmm: hmm,
searchMode: searchMode,
}, err
}
@@ -35,7 +35,7 @@ func (jt *JiebaTokenizer) Tokenize(input []byte) analysis.TokenStream {
pos := 1
var width int
var gram string
for word := range jiebago.Cut(string(input), false, jt.hmm) {
for word := range jt.j.Cut(string(input), false, jt.hmm) {
if jt.searchMode {
runes := []rune(word)
width = len(runes)
@@ -44,7 +44,7 @@ func (jt *JiebaTokenizer) Tokenize(input []byte) analysis.TokenStream {
for i := 0; i < width-step+1; i++ {
gram = string(runes[i : i+step])
gramLen := len(gram)
if value, ok := jiebago.Trie.Freq[gram]; ok && value > 0 {
if value, ok := jt.j.Freq[gram]; ok && value > 0 {
gramStart := start + len(string(runes[:i]))
token := analysis.Token{
Term: []byte(gram),