1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-12 21:20:26 +08:00

code refactor, added more documents

This commit is contained in:
Wang Bin
2015-05-06 12:55:04 +08:00
parent 87caff09cb
commit 122bad0a8d
23 changed files with 228 additions and 142 deletions

View File

@@ -13,10 +13,10 @@ func cutHan(sentence string) chan string {
result := make(chan string)
go func() {
runes := []rune(sentence)
_, pos_list := viterbi(runes, []byte{'B', 'M', 'E', 'S'})
_, posList := viterbi(runes, []byte{'B', 'M', 'E', 'S'})
begin, next := 0, 0
for i, char := range runes {
pos := pos_list[i]
pos := posList[i]
switch pos {
case 'B':
begin = i
@@ -36,6 +36,8 @@ func cutHan(sentence string) chan string {
return result
}
// Cut cuts sentence into words using Hidden Markov Model with Viterbi
// algorithm. It is used by Jiebago for unknonw words.
func Cut(sentence string) chan string {
result := make(chan string)
s := sentence

View File

@@ -6,7 +6,7 @@ import (
)
func chanToArray(ch chan string) []string {
result := make([]string, 0)
var result []string
for word := range ch {
result = append(result, word)
}

View File

@@ -67,11 +67,11 @@ func viterbi(obs []rune, states []byte) (float64, []byte) {
V[t] = make(map[byte]float64)
for _, y := range states {
ps0 := make(probStates, 0)
var em_p float64
var emP float64
if val, ok := probEmit[y][obs[t]]; ok {
em_p = val
emP = val
} else {
em_p = minFloat
emP = minFloat
}
for _, y0 := range prevStatus[y] {
var transP float64
@@ -80,7 +80,7 @@ func viterbi(obs []rune, states []byte) (float64, []byte) {
} else {
transP = minFloat
}
prob0 := V[t-1][y0] + transP + em_p
prob0 := V[t-1][y0] + transP + emP
ps0 = append(ps0, &probState{prob: prob0, state: y0})
}
sort.Sort(sort.Reverse(ps0))