1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-07-01 09:30:29 +08:00

removed unecessary stateTag struct, using string instead

This commit is contained in:
Wang Bin
2015-03-25 15:13:46 +08:00
parent 1c378c28a7
commit 8687ca58b8
7 changed files with 85513 additions and 85537 deletions

View File

@@ -5,26 +5,13 @@ import (
"sort"
)
type stateTag struct {
State byte
Tag string
}
func (st stateTag) String() string {
return fmt.Sprintf("(%q, %s)", st.State, st.Tag)
}
func emptyStateTag() stateTag {
return stateTag{' ', ""}
}
type probState struct {
Prob float64
ST stateTag
prob float64
state string
}
func (ps probState) String() string {
return fmt.Sprintf("(%v: %f)", ps.ST, ps.Prob)
return fmt.Sprintf("(%v: %f)", ps.state, ps.prob)
}
type probStates []probState
@@ -34,39 +21,36 @@ func (pss probStates) Len() int {
}
func (pss probStates) Less(i, j int) bool {
if pss[i].Prob == pss[j].Prob {
if pss[i].ST.State == pss[j].ST.State {
return pss[i].ST.Tag < pss[j].ST.Tag
}
return pss[i].ST.State < pss[j].ST.State
if pss[i].prob == pss[j].prob {
return pss[i].state < pss[j].state
}
return pss[i].Prob < pss[j].Prob
return pss[i].prob < pss[j].prob
}
func (pss probStates) Swap(i, j int) {
pss[i], pss[j] = pss[j], pss[i]
}
func viterbi(obs []rune) (float64, []stateTag) {
func viterbi(obs []rune) []string {
obsLength := len(obs)
V := make([]map[stateTag]float64, obsLength)
V[0] = make(map[stateTag]float64)
mem_path := make([]map[stateTag]stateTag, obsLength)
mem_path[0] = make(map[stateTag]stateTag)
V := make([]map[string]float64, obsLength)
V[0] = make(map[string]float64)
mem_path := make([]map[string]string, obsLength)
mem_path[0] = make(map[string]string)
ys := charStateTab.get(obs[0]) // default is all_states
for _, y := range ys {
V[0][y] = probEmit[y].get(obs[0]) + probStart[y]
mem_path[0][y] = emptyStateTag()
mem_path[0][y] = ""
}
for t := 1; t < obsLength; t++ {
prev_states := make([]stateTag, 0)
prev_states := make([]string, 0)
for x, _ := range mem_path[t-1] {
if len(probTrans[x]) > 0 {
prev_states = append(prev_states, x)
}
}
//use Go's map to implement Python's Set()
prev_states_expect_next := make(map[stateTag]stateTag)
prev_states_expect_next := make(map[string]string)
for _, x := range prev_states {
for y, _ := range probTrans[x] {
prev_states_expect_next[y] = y
@@ -74,7 +58,7 @@ func viterbi(obs []rune) (float64, []stateTag) {
}
tmp_obs_states := charStateTab.get(obs[t])
obs_states := make([]stateTag, 0)
obs_states := make([]string, 0)
for index, _ := range tmp_obs_states {
if _, ok := prev_states_expect_next[tmp_obs_states[index]]; ok {
obs_states = append(obs_states, tmp_obs_states[index])
@@ -88,40 +72,35 @@ func viterbi(obs []rune) (float64, []stateTag) {
if len(obs_states) == 0 {
obs_states = probTransKeys
}
mem_path[t] = make(map[stateTag]stateTag) // TODO: value needed or not?
V[t] = make(map[stateTag]float64)
mem_path[t] = make(map[string]string) // TODO: value needed or not?
V[t] = make(map[string]float64)
for _, y := range obs_states {
pss := make(probStates, 0)
for _, y0 := range prev_states {
ps := probState{
Prob: V[t-1][y0] + probTrans[y0].Get(y) + probEmit[y].get(obs[t]),
ST: y0}
prob: V[t-1][y0] + probTrans[y0].Get(y) + probEmit[y].get(obs[t]),
state: y0}
pss = append(pss, ps)
}
sort.Sort(sort.Reverse(pss))
V[t][y] = pss[0].Prob
mem_path[t][y] = pss[0].ST
V[t][y] = pss[0].prob
mem_path[t][y] = pss[0].state
}
}
last := make(probStates, 0)
length := len(mem_path)
vlength := len(V)
for y, _ := range mem_path[length-1] {
ps := probState{Prob: V[vlength-1][y], ST: y}
for y := range mem_path[length-1] {
ps := probState{prob: V[vlength-1][y], state: y}
last = append(last, ps)
}
sort.Sort(sort.Reverse(last))
prob := last[0].Prob
state := last[0].ST
route := make([]stateTag, len(obs))
i := obsLength - 1
for {
if i < 0 {
break
}
state := last[0].state
route := make([]string, len(obs))
for i := obsLength - 1; i >= 0; i-- {
route[i] = state
state = mem_path[i][state]
i -= 1
}
return prob, route
return route
}