mirror of
https://github.com/fumiama/jieba.git
synced 2026-06-12 21:20:26 +08:00
code refactor, added more documents
This commit is contained in:
@@ -13,10 +13,10 @@ func cutHan(sentence string) chan string {
|
||||
result := make(chan string)
|
||||
go func() {
|
||||
runes := []rune(sentence)
|
||||
_, pos_list := viterbi(runes, []byte{'B', 'M', 'E', 'S'})
|
||||
_, posList := viterbi(runes, []byte{'B', 'M', 'E', 'S'})
|
||||
begin, next := 0, 0
|
||||
for i, char := range runes {
|
||||
pos := pos_list[i]
|
||||
pos := posList[i]
|
||||
switch pos {
|
||||
case 'B':
|
||||
begin = i
|
||||
@@ -36,6 +36,8 @@ func cutHan(sentence string) chan string {
|
||||
return result
|
||||
}
|
||||
|
||||
// Cut cuts sentence into words using Hidden Markov Model with Viterbi
|
||||
// algorithm. It is used by Jiebago for unknonw words.
|
||||
func Cut(sentence string) chan string {
|
||||
result := make(chan string)
|
||||
s := sentence
|
||||
|
||||
@@ -6,7 +6,7 @@ import (
|
||||
)
|
||||
|
||||
func chanToArray(ch chan string) []string {
|
||||
result := make([]string, 0)
|
||||
var result []string
|
||||
for word := range ch {
|
||||
result = append(result, word)
|
||||
}
|
||||
|
||||
@@ -67,11 +67,11 @@ func viterbi(obs []rune, states []byte) (float64, []byte) {
|
||||
V[t] = make(map[byte]float64)
|
||||
for _, y := range states {
|
||||
ps0 := make(probStates, 0)
|
||||
var em_p float64
|
||||
var emP float64
|
||||
if val, ok := probEmit[y][obs[t]]; ok {
|
||||
em_p = val
|
||||
emP = val
|
||||
} else {
|
||||
em_p = minFloat
|
||||
emP = minFloat
|
||||
}
|
||||
for _, y0 := range prevStatus[y] {
|
||||
var transP float64
|
||||
@@ -80,7 +80,7 @@ func viterbi(obs []rune, states []byte) (float64, []byte) {
|
||||
} else {
|
||||
transP = minFloat
|
||||
}
|
||||
prob0 := V[t-1][y0] + transP + em_p
|
||||
prob0 := V[t-1][y0] + transP + emP
|
||||
ps0 = append(ps0, &probState{prob: prob0, state: y0})
|
||||
}
|
||||
sort.Sort(sort.Reverse(ps0))
|
||||
|
||||
Reference in New Issue
Block a user