1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-27 07:30:32 +08:00

small code refactor

This commit is contained in:
Wang Bin
2015-05-07 10:59:01 +08:00
parent c068670e9b
commit 939e903f31

View File

@@ -186,41 +186,42 @@ func (seg *Segmenter) cutDAG(sentence string) <-chan Segment {
frag := runes[x:y] frag := runes[x:y]
if y-x == 1 { if y-x == 1 {
buf = append(buf, frag...) buf = append(buf, frag...)
} else { x = y
if len(buf) > 0 { continue
bufString := string(buf) }
if len(buf) == 1 { if len(buf) > 0 {
if tag, ok := seg.dict.Pos(bufString); ok { bufString := string(buf)
result <- Segment{bufString, tag} if len(buf) == 1 {
} else { if tag, ok := seg.dict.Pos(bufString); ok {
result <- Segment{bufString, "x"} result <- Segment{bufString, tag}
}
buf = make([]rune, 0)
} else { } else {
if v, ok := seg.dict.Frequency(bufString); !ok || v == 0.0 { result <- Segment{bufString, "x"}
for t := range seg.cutDetail(bufString) { }
result <- t buf = make([]rune, 0)
} continue
}
if v, ok := seg.dict.Frequency(bufString); !ok || v == 0.0 {
for t := range seg.cutDetail(bufString) {
result <- t
}
} else {
for _, elem := range buf {
selem := string(elem)
if tag, ok := seg.dict.Pos(selem); ok {
result <- Segment{selem, tag}
} else { } else {
for _, elem := range buf { result <- Segment{selem, "x"}
selem := string(elem)
if tag, ok := seg.dict.Pos(selem); ok {
result <- Segment{selem, tag}
} else {
result <- Segment{selem, "x"}
}
}
} }
buf = make([]rune, 0)
} }
} }
word := string(frag) buf = make([]rune, 0)
if tag, ok := seg.dict.Pos(word); ok { }
result <- Segment{word, tag} word := string(frag)
} else { if tag, ok := seg.dict.Pos(word); ok {
result <- Segment{word, "x"} result <- Segment{word, tag}
} } else {
result <- Segment{word, "x"}
} }
x = y x = y
} }
@@ -270,19 +271,20 @@ func (seg *Segmenter) cutDAGNoHMM(sentence string) <-chan Segment {
if reEng1.MatchString(string(frag)) && len(frag) == 1 { if reEng1.MatchString(string(frag)) && len(frag) == 1 {
buf = append(buf, frag...) buf = append(buf, frag...)
x = y x = y
} else { continue
if len(buf) > 0 {
result <- Segment{string(buf), "eng"}
buf = make([]rune, 0)
}
word := string(frag)
if tag, ok := seg.dict.Pos(word); ok {
result <- Segment{word, tag}
} else {
result <- Segment{word, "x"}
}
x = y
} }
if len(buf) > 0 {
result <- Segment{string(buf), "eng"}
buf = make([]rune, 0)
}
word := string(frag)
if tag, ok := seg.dict.Pos(word); ok {
result <- Segment{word, tag}
} else {
result <- Segment{word, "x"}
}
x = y
} }
if len(buf) > 0 { if len(buf) > 0 {
result <- Segment{string(buf), "eng"} result <- Segment{string(buf), "eng"}
@@ -309,22 +311,22 @@ func (seg *Segmenter) Cut(sentence string, hmm bool) <-chan Segment {
for wordTag := range cut(blk) { for wordTag := range cut(blk) {
result <- wordTag result <- wordTag
} }
} else { continue
for _, x := range util.RegexpSplit(reSkipInternal, blk, -1) { }
if reSkipInternal.MatchString(x) { for _, x := range util.RegexpSplit(reSkipInternal, blk, -1) {
result <- Segment{x, "x"} if reSkipInternal.MatchString(x) {
} else { result <- Segment{x, "x"}
for _, xx := range x { continue
s := string(xx) }
switch { for _, xx := range x {
case reNum.MatchString(s): s := string(xx)
result <- Segment{s, "m"} switch {
case reEng.MatchString(x): case reNum.MatchString(s):
result <- Segment{x, "eng"} result <- Segment{s, "m"}
default: case reEng.MatchString(x):
result <- Segment{s, "x"} result <- Segment{x, "eng"}
} default:
} result <- Segment{s, "x"}
} }
} }
} }