mirror of
https://github.com/fumiama/jieba.git
synced 2026-06-05 00:32:51 +08:00
uniform the api
This commit is contained in:
@@ -161,7 +161,7 @@ func (t *TextRanker) TextRank(sentence string, topK int) wordWeights {
|
||||
// name in current directory. This function must be called before cut any
|
||||
// sentence.
|
||||
func NewTextRanker(dictFileName string) (*TextRanker, error) {
|
||||
p, err := posseg.NewPosseg(dictFileName)
|
||||
p, err := posseg.Open(dictFileName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
23
jieba.go
23
jieba.go
@@ -2,17 +2,19 @@
|
||||
package jiebago
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"github.com/wangbin/jiebago/finalseg"
|
||||
"regexp"
|
||||
"sort"
|
||||
)
|
||||
|
||||
var (
|
||||
reEng = regexp.MustCompile(`[[:alnum:]]`)
|
||||
reHanCutAll = regexp.MustCompile(`\p{Han}+`)
|
||||
reSkipCutAll = regexp.MustCompile(`[^[:alnum:]+#\n]`)
|
||||
reHanDefault = regexp.MustCompile(`([\p{Han}+[:alnum:]+#&\._]+)`)
|
||||
reSkipDefault = regexp.MustCompile(`(\r\n|\s)`)
|
||||
ErrInitialized = errors.New("already initialized")
|
||||
reEng = regexp.MustCompile(`[[:alnum:]]`)
|
||||
reHanCutAll = regexp.MustCompile(`\p{Han}+`)
|
||||
reSkipCutAll = regexp.MustCompile(`[^[:alnum:]+#\n]`)
|
||||
reHanDefault = regexp.MustCompile(`([\p{Han}+[:alnum:]+#&\._]+)`)
|
||||
reSkipDefault = regexp.MustCompile(`(\r\n|\s)`)
|
||||
)
|
||||
|
||||
type Segmenter interface {
|
||||
@@ -51,8 +53,15 @@ func (j *Jieba) Add(word string, freq float64) {
|
||||
}
|
||||
|
||||
// Load user specified dictionary file.
|
||||
func (j *Jieba) LoadUserDict(dictFilePath string) error {
|
||||
return LoadDict(j, dictFilePath, false)
|
||||
func (j *Jieba) LoadUserDict(dictFileName string) error {
|
||||
return LoadDict(j, dictFileName, false)
|
||||
}
|
||||
|
||||
func (j *Jieba) SetDict(dictFileName string) error {
|
||||
if len(j.freqMap) > 0 || j.total > 0.0 {
|
||||
return ErrInitialized
|
||||
}
|
||||
return LoadDict(j, dictFileName, false)
|
||||
}
|
||||
|
||||
func New() *Jieba {
|
||||
|
||||
@@ -22,30 +22,43 @@ type Pair struct {
|
||||
|
||||
type Posseg struct {
|
||||
*jiebago.Jieba
|
||||
Flag map[string]string
|
||||
flagMap map[string]string
|
||||
}
|
||||
|
||||
func (p *Posseg) AddEntry(entry jiebago.Entry) {
|
||||
if len(entry.Flag) > 0 {
|
||||
p.Flag[entry.Word] = strings.TrimSpace(entry.Flag)
|
||||
p.flagMap[entry.Word] = strings.TrimSpace(entry.Flag)
|
||||
}
|
||||
p.Add(entry.Word, entry.Freq)
|
||||
}
|
||||
|
||||
func (p Posseg) Flag(word string) (string, bool) {
|
||||
flag, ok := p.flagMap[word]
|
||||
return flag, ok
|
||||
}
|
||||
|
||||
// Set dictionary, it could be absolute path of dictionary file, or dictionary
|
||||
// name in current diectory.
|
||||
func NewPosseg(dictFileName string) (*Posseg, error) {
|
||||
p := &Posseg{jiebago.New(), make(map[string]string)}
|
||||
func Open(dictFileName string) (*Posseg, error) {
|
||||
p := New()
|
||||
err := jiebago.LoadDict(p, dictFileName, true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return p, nil
|
||||
return p, err
|
||||
}
|
||||
|
||||
// Load user specified dictionary file.
|
||||
func (p *Posseg) LoadUserDict(dictFilePath string) error {
|
||||
return jiebago.LoadDict(p, dictFilePath, true)
|
||||
func (p *Posseg) LoadUserDict(dictFileName string) error {
|
||||
return jiebago.LoadDict(p, dictFileName, true)
|
||||
}
|
||||
|
||||
func (p *Posseg) SetDict(dictFileName string) error {
|
||||
if len(p.flagMap) > 0 || p.Total() > 0.0 {
|
||||
return jiebago.ErrInitialized
|
||||
}
|
||||
return jiebago.LoadDict(p, dictFileName, false)
|
||||
}
|
||||
|
||||
func New() *Posseg {
|
||||
return &Posseg{jiebago.New(), make(map[string]string)}
|
||||
}
|
||||
|
||||
func (p *Posseg) cutDetailInternal(sentence string) chan Pair {
|
||||
@@ -128,7 +141,7 @@ func (p *Posseg) cutDAG(sentence string) chan Pair {
|
||||
if len(buf) > 0 {
|
||||
if len(buf) == 1 {
|
||||
sbuf := string(buf)
|
||||
if tag, ok := p.Flag[sbuf]; ok {
|
||||
if tag, ok := p.Flag(sbuf); ok {
|
||||
result <- Pair{sbuf, tag}
|
||||
} else {
|
||||
result <- Pair{sbuf, "x"}
|
||||
@@ -143,7 +156,7 @@ func (p *Posseg) cutDAG(sentence string) chan Pair {
|
||||
} else {
|
||||
for _, elem := range buf {
|
||||
selem := string(elem)
|
||||
if tag, ok := p.Flag[selem]; ok {
|
||||
if tag, ok := p.Flag(selem); ok {
|
||||
result <- Pair{string(elem), tag}
|
||||
} else {
|
||||
result <- Pair{string(elem), "x"}
|
||||
@@ -155,7 +168,7 @@ func (p *Posseg) cutDAG(sentence string) chan Pair {
|
||||
}
|
||||
}
|
||||
sl_word := string(l_word)
|
||||
if tag, ok := p.Flag[sl_word]; ok {
|
||||
if tag, ok := p.Flag(sl_word); ok {
|
||||
result <- Pair{sl_word, tag}
|
||||
} else {
|
||||
result <- Pair{sl_word, "x"}
|
||||
@@ -167,7 +180,7 @@ func (p *Posseg) cutDAG(sentence string) chan Pair {
|
||||
if len(buf) > 0 {
|
||||
if len(buf) == 1 {
|
||||
sbuf := string(buf)
|
||||
if tag, ok := p.Flag[sbuf]; ok {
|
||||
if tag, ok := p.Flag(sbuf); ok {
|
||||
result <- Pair{sbuf, tag}
|
||||
} else {
|
||||
result <- Pair{sbuf, "x"}
|
||||
@@ -181,7 +194,7 @@ func (p *Posseg) cutDAG(sentence string) chan Pair {
|
||||
} else {
|
||||
for _, elem := range buf {
|
||||
selem := string(elem)
|
||||
if tag, ok := p.Flag[selem]; ok {
|
||||
if tag, ok := p.Flag(selem); ok {
|
||||
result <- Pair{selem, tag}
|
||||
} else {
|
||||
result <- Pair{selem, "x"}
|
||||
@@ -221,7 +234,7 @@ func (p *Posseg) cutDAGNoHMM(sentence string) chan Pair {
|
||||
buf = make([]rune, 0)
|
||||
}
|
||||
sl_word := string(l_word)
|
||||
if tag, ok := p.Flag[sl_word]; ok {
|
||||
if tag, ok := p.Flag(sl_word); ok {
|
||||
result <- Pair{sl_word, tag}
|
||||
} else {
|
||||
result <- Pair{sl_word, "x"}
|
||||
|
||||
@@ -276,7 +276,7 @@ func chanToArray(ch chan Pair) []Pair {
|
||||
}
|
||||
|
||||
func TestCut(t *testing.T) {
|
||||
p, err := NewPosseg("../dict.txt")
|
||||
p, err := Open("../dict.txt")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -307,7 +307,7 @@ func TestBug132(t *testing.T) {
|
||||
/*
|
||||
https://github.com/fxsjy/jieba/issues/132
|
||||
*/
|
||||
p, _ := NewPosseg("../dict.txt")
|
||||
p, _ := Open("../dict.txt")
|
||||
sentence := "又跛又啞"
|
||||
cutResult := []Pair{
|
||||
Pair{"又", "d"},
|
||||
@@ -330,7 +330,7 @@ func TestBug137(t *testing.T) {
|
||||
/*
|
||||
https://github.com/fxsjy/jieba/issues/137
|
||||
*/
|
||||
p, _ := NewPosseg("../dict.txt")
|
||||
p, _ := Open("../dict.txt")
|
||||
sentence := "前港督衛奕信在八八年十月宣布成立中央政策研究組"
|
||||
cutResult := []Pair{
|
||||
Pair{"前", "f"},
|
||||
@@ -359,7 +359,7 @@ func TestBug137(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestUserDict(t *testing.T) {
|
||||
p, _ := NewPosseg("../dict.txt")
|
||||
p, _ := Open("../dict.txt")
|
||||
p.LoadUserDict("../userdict.txt")
|
||||
sentence := "李小福是创新办主任也是云计算方面的专家; 什么是八一双鹿例如我输入一个带“韩玉赏鉴”的标题,在自定义词库中也增加了此词为N类型"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user