mirror of
https://github.com/fumiama/jieba.git
synced 2026-07-01 01:20:29 +08:00
code refactor, added more documents
This commit is contained in:
@@ -17,27 +17,36 @@ var (
|
||||
reSkipInternal = regexp.MustCompile(`(\r\n|\s)`)
|
||||
)
|
||||
|
||||
// Segment represents a word with it's POS
|
||||
type Segment struct {
|
||||
text, pos string
|
||||
}
|
||||
|
||||
// Text returns the Segment's text.
|
||||
func (s Segment) Text() string {
|
||||
return s.text
|
||||
}
|
||||
|
||||
// Pos returns the Segment's POS.
|
||||
func (s Segment) Pos() string {
|
||||
return s.pos
|
||||
}
|
||||
|
||||
// Segmenter is a Chinese words segmentation struct.
|
||||
type Segmenter struct {
|
||||
dict *Dictionary
|
||||
}
|
||||
|
||||
// LoadDictionary loads dictionary from given file name.
|
||||
// Everytime LoadDictionary is called, previously loaded dictionary will be cleard.
|
||||
func (seg *Segmenter) LoadDictionary(fileName string) error {
|
||||
seg.dict = &Dictionary{freqMap: make(map[string]float64), posMap: make(map[string]string)}
|
||||
return seg.dict.loadDictionary(fileName)
|
||||
}
|
||||
|
||||
// LoadUserDictionary loads a user specified dictionary, it must be called
|
||||
// after LoadDictionary, and it will not clear any previous loaded dictionary,
|
||||
// instead it will override exist entries.
|
||||
func (seg *Segmenter) LoadUserDictionary(fileName string) error {
|
||||
return seg.dict.loadDictionary(fileName)
|
||||
}
|
||||
@@ -52,19 +61,19 @@ func (seg *Segmenter) cutDetailInternal(sentence string) <-chan Segment {
|
||||
next := 0
|
||||
for i, char := range runes {
|
||||
pos := posList[i]
|
||||
switch pos.Tag() {
|
||||
switch pos.position() {
|
||||
case "B":
|
||||
begin = i
|
||||
case "E":
|
||||
result <- Segment{string(runes[begin : i+1]), pos.POS()}
|
||||
result <- Segment{string(runes[begin : i+1]), pos.pos()}
|
||||
next = i + 1
|
||||
case "S":
|
||||
result <- Segment{string(char), pos.POS()}
|
||||
result <- Segment{string(char), pos.pos()}
|
||||
next = i + 1
|
||||
}
|
||||
}
|
||||
if next < len(runes) {
|
||||
result <- Segment{string(runes[next:]), posList[next].POS()}
|
||||
result <- Segment{string(runes[next:]), posList[next].pos()}
|
||||
}
|
||||
close(result)
|
||||
}()
|
||||
@@ -117,7 +126,7 @@ func (seg *Segmenter) dag(runes []rune) map[int][]int {
|
||||
if freq > 0.0 {
|
||||
dag[k] = append(dag[k], i)
|
||||
}
|
||||
i += 1
|
||||
i++
|
||||
if i >= n {
|
||||
break
|
||||
}
|
||||
@@ -170,7 +179,7 @@ func (seg *Segmenter) cutDAG(sentence string) <-chan Segment {
|
||||
routes := seg.calc(runes)
|
||||
var y int
|
||||
length := len(runes)
|
||||
buf := make([]rune, 0)
|
||||
var buf []rune
|
||||
for x := 0; x < length; {
|
||||
y = routes[x].index + 1
|
||||
frag := runes[x:y]
|
||||
@@ -253,7 +262,7 @@ func (seg *Segmenter) cutDAGNoHMM(sentence string) <-chan Segment {
|
||||
routes := seg.calc(runes)
|
||||
var y int
|
||||
length := len(runes)
|
||||
buf := make([]rune, 0)
|
||||
var buf []rune
|
||||
for x := 0; x < length; {
|
||||
y = routes[x].index + 1
|
||||
frag := runes[x:y]
|
||||
@@ -283,6 +292,8 @@ func (seg *Segmenter) cutDAGNoHMM(sentence string) <-chan Segment {
|
||||
return result
|
||||
}
|
||||
|
||||
// Cut cuts a sentence into words.
|
||||
// Parameter hmm controls whether to use the Hidden Markov Model.
|
||||
func (seg *Segmenter) Cut(sentence string, hmm bool) <-chan Segment {
|
||||
result := make(chan Segment)
|
||||
var cut cutFunc
|
||||
|
||||
Reference in New Issue
Block a user