1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-07-03 10:30:28 +08:00

removed unecessary stateTag struct, using string instead

This commit is contained in:
Wang Bin
2015-03-25 15:13:46 +08:00
parent 1c378c28a7
commit 8687ca58b8
7 changed files with 85513 additions and 85537 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -66,24 +66,24 @@ func (p *Posseg) cutDetailInternal(sentence string) chan WordTag {
go func() { go func() {
runes := []rune(sentence) runes := []rune(sentence)
_, posList := viterbi(runes) posList := viterbi(runes)
begin := 0 begin := 0
next := 0 next := 0
for i, char := range runes { for i, char := range runes {
pos := posList[i].State pos := posList[i]
switch pos { switch pos[0] {
case 'B': case 'B':
begin = i begin = i
case 'E': case 'E':
result <- WordTag{string(runes[begin : i+1]), posList[i].Tag} result <- WordTag{string(runes[begin : i+1]), string(pos[1:])}
next = i + 1 next = i + 1
case 'S': case 'S':
result <- WordTag{string(char), posList[i].Tag} result <- WordTag{string(char), string(pos[1:])}
next = i + 1 next = i + 1
} }
} }
if next < len(runes) { if next < len(runes) {
result <- WordTag{string(runes[next:]), posList[next].Tag} result <- WordTag{string(runes[next:]), string(posList[next][1:])}
} }
close(result) close(result)
}() }()

File diff suppressed because it is too large Load Diff

View File

@@ -1,264 +1,264 @@
package posseg package posseg
var ( var (
probStart = make(map[stateTag]float64) probStart = make(map[string]float64)
) )
func init() { func init() {
probStart[stateTag{'B', "a"}] = -4.762305214596967 probStart["Ba"] = -4.762305214596967
probStart[stateTag{'B', "ad"}] = -6.680066036784177 probStart["Bad"] = -6.680066036784177
probStart[stateTag{'B', "ag"}] = -3.14e+100 probStart["Bag"] = -3.14e+100
probStart[stateTag{'B', "an"}] = -8.697083223018778 probStart["Ban"] = -8.697083223018778
probStart[stateTag{'B', "b"}] = -5.018374362109218 probStart["Bb"] = -5.018374362109218
probStart[stateTag{'B', "bg"}] = -3.14e+100 probStart["Bbg"] = -3.14e+100
probStart[stateTag{'B', "c"}] = -3.423880184954888 probStart["Bc"] = -3.423880184954888
probStart[stateTag{'B', "d"}] = -3.9750475297585357 probStart["Bd"] = -3.9750475297585357
probStart[stateTag{'B', "df"}] = -8.888974230828882 probStart["Bdf"] = -8.888974230828882
probStart[stateTag{'B', "dg"}] = -3.14e+100 probStart["Bdg"] = -3.14e+100
probStart[stateTag{'B', "e"}] = -8.563551830394255 probStart["Be"] = -8.563551830394255
probStart[stateTag{'B', "en"}] = -3.14e+100 probStart["Ben"] = -3.14e+100
probStart[stateTag{'B', "f"}] = -5.491630418482717 probStart["Bf"] = -5.491630418482717
probStart[stateTag{'B', "g"}] = -3.14e+100 probStart["Bg"] = -3.14e+100
probStart[stateTag{'B', "h"}] = -13.533365129970255 probStart["Bh"] = -13.533365129970255
probStart[stateTag{'B', "i"}] = -6.1157847275557105 probStart["Bi"] = -6.1157847275557105
probStart[stateTag{'B', "in"}] = -3.14e+100 probStart["Bin"] = -3.14e+100
probStart[stateTag{'B', "j"}] = -5.0576191284681915 probStart["Bj"] = -5.0576191284681915
probStart[stateTag{'B', "jn"}] = -3.14e+100 probStart["Bjn"] = -3.14e+100
probStart[stateTag{'B', "k"}] = -3.14e+100 probStart["Bk"] = -3.14e+100
probStart[stateTag{'B', "l"}] = -4.905883584659895 probStart["Bl"] = -4.905883584659895
probStart[stateTag{'B', "ln"}] = -3.14e+100 probStart["Bln"] = -3.14e+100
probStart[stateTag{'B', "m"}] = -3.6524299819046386 probStart["Bm"] = -3.6524299819046386
probStart[stateTag{'B', "mg"}] = -3.14e+100 probStart["Bmg"] = -3.14e+100
probStart[stateTag{'B', "mq"}] = -6.78695300139688 probStart["Bmq"] = -6.78695300139688
probStart[stateTag{'B', "n"}] = -1.6966257797548328 probStart["Bn"] = -1.6966257797548328
probStart[stateTag{'B', "ng"}] = -3.14e+100 probStart["Bng"] = -3.14e+100
probStart[stateTag{'B', "nr"}] = -2.2310495913769506 probStart["Bnr"] = -2.2310495913769506
probStart[stateTag{'B', "nrfg"}] = -5.873722175405573 probStart["Bnrfg"] = -5.873722175405573
probStart[stateTag{'B', "nrt"}] = -4.985642733519195 probStart["Bnrt"] = -4.985642733519195
probStart[stateTag{'B', "ns"}] = -2.8228438314969213 probStart["Bns"] = -2.8228438314969213
probStart[stateTag{'B', "nt"}] = -4.846091668182416 probStart["Bnt"] = -4.846091668182416
probStart[stateTag{'B', "nz"}] = -3.94698846057672 probStart["Bnz"] = -3.94698846057672
probStart[stateTag{'B', "o"}] = -8.433498702146057 probStart["Bo"] = -8.433498702146057
probStart[stateTag{'B', "p"}] = -4.200984132085048 probStart["Bp"] = -4.200984132085048
probStart[stateTag{'B', "q"}] = -6.998123858956596 probStart["Bq"] = -6.998123858956596
probStart[stateTag{'B', "qe"}] = -3.14e+100 probStart["Bqe"] = -3.14e+100
probStart[stateTag{'B', "qg"}] = -3.14e+100 probStart["Bqg"] = -3.14e+100
probStart[stateTag{'B', "r"}] = -3.4098187790818413 probStart["Br"] = -3.4098187790818413
probStart[stateTag{'B', "rg"}] = -3.14e+100 probStart["Brg"] = -3.14e+100
probStart[stateTag{'B', "rr"}] = -12.434752841302146 probStart["Brr"] = -12.434752841302146
probStart[stateTag{'B', "rz"}] = -7.946116471570005 probStart["Brz"] = -7.946116471570005
probStart[stateTag{'B', "s"}] = -5.522673590839954 probStart["Bs"] = -5.522673590839954
probStart[stateTag{'B', "t"}] = -3.3647479094528574 probStart["Bt"] = -3.3647479094528574
probStart[stateTag{'B', "tg"}] = -3.14e+100 probStart["Btg"] = -3.14e+100
probStart[stateTag{'B', "u"}] = -9.163917277503234 probStart["Bu"] = -9.163917277503234
probStart[stateTag{'B', "ud"}] = -3.14e+100 probStart["Bud"] = -3.14e+100
probStart[stateTag{'B', "ug"}] = -3.14e+100 probStart["Bug"] = -3.14e+100
probStart[stateTag{'B', "uj"}] = -3.14e+100 probStart["Buj"] = -3.14e+100
probStart[stateTag{'B', "ul"}] = -3.14e+100 probStart["Bul"] = -3.14e+100
probStart[stateTag{'B', "uv"}] = -3.14e+100 probStart["Buv"] = -3.14e+100
probStart[stateTag{'B', "uz"}] = -3.14e+100 probStart["Buz"] = -3.14e+100
probStart[stateTag{'B', "v"}] = -2.6740584874265685 probStart["Bv"] = -2.6740584874265685
probStart[stateTag{'B', "vd"}] = -9.044728760238115 probStart["Bvd"] = -9.044728760238115
probStart[stateTag{'B', "vg"}] = -3.14e+100 probStart["Bvg"] = -3.14e+100
probStart[stateTag{'B', "vi"}] = -12.434752841302146 probStart["Bvi"] = -12.434752841302146
probStart[stateTag{'B', "vn"}] = -4.3315610890163585 probStart["Bvn"] = -4.3315610890163585
probStart[stateTag{'B', "vq"}] = -12.147070768850364 probStart["Bvq"] = -12.147070768850364
probStart[stateTag{'B', "w"}] = -3.14e+100 probStart["Bw"] = -3.14e+100
probStart[stateTag{'B', "x"}] = -3.14e+100 probStart["Bx"] = -3.14e+100
probStart[stateTag{'B', "y"}] = -9.844485675856319 probStart["By"] = -9.844485675856319
probStart[stateTag{'B', "yg"}] = -3.14e+100 probStart["Byg"] = -3.14e+100
probStart[stateTag{'B', "z"}] = -7.045681111485645 probStart["Bz"] = -7.045681111485645
probStart[stateTag{'B', "zg"}] = -3.14e+100 probStart["Bzg"] = -3.14e+100
probStart[stateTag{'E', "a"}] = -3.14e+100 probStart["Ea"] = -3.14e+100
probStart[stateTag{'E', "ad"}] = -3.14e+100 probStart["Ead"] = -3.14e+100
probStart[stateTag{'E', "ag"}] = -3.14e+100 probStart["Eag"] = -3.14e+100
probStart[stateTag{'E', "an"}] = -3.14e+100 probStart["Ean"] = -3.14e+100
probStart[stateTag{'E', "b"}] = -3.14e+100 probStart["Eb"] = -3.14e+100
probStart[stateTag{'E', "bg"}] = -3.14e+100 probStart["Ebg"] = -3.14e+100
probStart[stateTag{'E', "c"}] = -3.14e+100 probStart["Ec"] = -3.14e+100
probStart[stateTag{'E', "d"}] = -3.14e+100 probStart["Ed"] = -3.14e+100
probStart[stateTag{'E', "df"}] = -3.14e+100 probStart["Edf"] = -3.14e+100
probStart[stateTag{'E', "dg"}] = -3.14e+100 probStart["Edg"] = -3.14e+100
probStart[stateTag{'E', "e"}] = -3.14e+100 probStart["Ee"] = -3.14e+100
probStart[stateTag{'E', "en"}] = -3.14e+100 probStart["Een"] = -3.14e+100
probStart[stateTag{'E', "f"}] = -3.14e+100 probStart["Ef"] = -3.14e+100
probStart[stateTag{'E', "g"}] = -3.14e+100 probStart["Eg"] = -3.14e+100
probStart[stateTag{'E', "h"}] = -3.14e+100 probStart["Eh"] = -3.14e+100
probStart[stateTag{'E', "i"}] = -3.14e+100 probStart["Ei"] = -3.14e+100
probStart[stateTag{'E', "in"}] = -3.14e+100 probStart["Ein"] = -3.14e+100
probStart[stateTag{'E', "j"}] = -3.14e+100 probStart["Ej"] = -3.14e+100
probStart[stateTag{'E', "jn"}] = -3.14e+100 probStart["Ejn"] = -3.14e+100
probStart[stateTag{'E', "k"}] = -3.14e+100 probStart["Ek"] = -3.14e+100
probStart[stateTag{'E', "l"}] = -3.14e+100 probStart["El"] = -3.14e+100
probStart[stateTag{'E', "ln"}] = -3.14e+100 probStart["Eln"] = -3.14e+100
probStart[stateTag{'E', "m"}] = -3.14e+100 probStart["Em"] = -3.14e+100
probStart[stateTag{'E', "mg"}] = -3.14e+100 probStart["Emg"] = -3.14e+100
probStart[stateTag{'E', "mq"}] = -3.14e+100 probStart["Emq"] = -3.14e+100
probStart[stateTag{'E', "n"}] = -3.14e+100 probStart["En"] = -3.14e+100
probStart[stateTag{'E', "ng"}] = -3.14e+100 probStart["Eng"] = -3.14e+100
probStart[stateTag{'E', "nr"}] = -3.14e+100 probStart["Enr"] = -3.14e+100
probStart[stateTag{'E', "nrfg"}] = -3.14e+100 probStart["Enrfg"] = -3.14e+100
probStart[stateTag{'E', "nrt"}] = -3.14e+100 probStart["Enrt"] = -3.14e+100
probStart[stateTag{'E', "ns"}] = -3.14e+100 probStart["Ens"] = -3.14e+100
probStart[stateTag{'E', "nt"}] = -3.14e+100 probStart["Ent"] = -3.14e+100
probStart[stateTag{'E', "nz"}] = -3.14e+100 probStart["Enz"] = -3.14e+100
probStart[stateTag{'E', "o"}] = -3.14e+100 probStart["Eo"] = -3.14e+100
probStart[stateTag{'E', "p"}] = -3.14e+100 probStart["Ep"] = -3.14e+100
probStart[stateTag{'E', "q"}] = -3.14e+100 probStart["Eq"] = -3.14e+100
probStart[stateTag{'E', "qe"}] = -3.14e+100 probStart["Eqe"] = -3.14e+100
probStart[stateTag{'E', "qg"}] = -3.14e+100 probStart["Eqg"] = -3.14e+100
probStart[stateTag{'E', "r"}] = -3.14e+100 probStart["Er"] = -3.14e+100
probStart[stateTag{'E', "rg"}] = -3.14e+100 probStart["Erg"] = -3.14e+100
probStart[stateTag{'E', "rr"}] = -3.14e+100 probStart["Err"] = -3.14e+100
probStart[stateTag{'E', "rz"}] = -3.14e+100 probStart["Erz"] = -3.14e+100
probStart[stateTag{'E', "s"}] = -3.14e+100 probStart["Es"] = -3.14e+100
probStart[stateTag{'E', "t"}] = -3.14e+100 probStart["Et"] = -3.14e+100
probStart[stateTag{'E', "tg"}] = -3.14e+100 probStart["Etg"] = -3.14e+100
probStart[stateTag{'E', "u"}] = -3.14e+100 probStart["Eu"] = -3.14e+100
probStart[stateTag{'E', "ud"}] = -3.14e+100 probStart["Eud"] = -3.14e+100
probStart[stateTag{'E', "ug"}] = -3.14e+100 probStart["Eug"] = -3.14e+100
probStart[stateTag{'E', "uj"}] = -3.14e+100 probStart["Euj"] = -3.14e+100
probStart[stateTag{'E', "ul"}] = -3.14e+100 probStart["Eul"] = -3.14e+100
probStart[stateTag{'E', "uv"}] = -3.14e+100 probStart["Euv"] = -3.14e+100
probStart[stateTag{'E', "uz"}] = -3.14e+100 probStart["Euz"] = -3.14e+100
probStart[stateTag{'E', "v"}] = -3.14e+100 probStart["Ev"] = -3.14e+100
probStart[stateTag{'E', "vd"}] = -3.14e+100 probStart["Evd"] = -3.14e+100
probStart[stateTag{'E', "vg"}] = -3.14e+100 probStart["Evg"] = -3.14e+100
probStart[stateTag{'E', "vi"}] = -3.14e+100 probStart["Evi"] = -3.14e+100
probStart[stateTag{'E', "vn"}] = -3.14e+100 probStart["Evn"] = -3.14e+100
probStart[stateTag{'E', "vq"}] = -3.14e+100 probStart["Evq"] = -3.14e+100
probStart[stateTag{'E', "w"}] = -3.14e+100 probStart["Ew"] = -3.14e+100
probStart[stateTag{'E', "x"}] = -3.14e+100 probStart["Ex"] = -3.14e+100
probStart[stateTag{'E', "y"}] = -3.14e+100 probStart["Ey"] = -3.14e+100
probStart[stateTag{'E', "yg"}] = -3.14e+100 probStart["Eyg"] = -3.14e+100
probStart[stateTag{'E', "z"}] = -3.14e+100 probStart["Ez"] = -3.14e+100
probStart[stateTag{'E', "zg"}] = -3.14e+100 probStart["Ezg"] = -3.14e+100
probStart[stateTag{'M', "a"}] = -3.14e+100 probStart["Ma"] = -3.14e+100
probStart[stateTag{'M', "ad"}] = -3.14e+100 probStart["Mad"] = -3.14e+100
probStart[stateTag{'M', "ag"}] = -3.14e+100 probStart["Mag"] = -3.14e+100
probStart[stateTag{'M', "an"}] = -3.14e+100 probStart["Man"] = -3.14e+100
probStart[stateTag{'M', "b"}] = -3.14e+100 probStart["Mb"] = -3.14e+100
probStart[stateTag{'M', "bg"}] = -3.14e+100 probStart["Mbg"] = -3.14e+100
probStart[stateTag{'M', "c"}] = -3.14e+100 probStart["Mc"] = -3.14e+100
probStart[stateTag{'M', "d"}] = -3.14e+100 probStart["Md"] = -3.14e+100
probStart[stateTag{'M', "df"}] = -3.14e+100 probStart["Mdf"] = -3.14e+100
probStart[stateTag{'M', "dg"}] = -3.14e+100 probStart["Mdg"] = -3.14e+100
probStart[stateTag{'M', "e"}] = -3.14e+100 probStart["Me"] = -3.14e+100
probStart[stateTag{'M', "en"}] = -3.14e+100 probStart["Men"] = -3.14e+100
probStart[stateTag{'M', "f"}] = -3.14e+100 probStart["Mf"] = -3.14e+100
probStart[stateTag{'M', "g"}] = -3.14e+100 probStart["Mg"] = -3.14e+100
probStart[stateTag{'M', "h"}] = -3.14e+100 probStart["Mh"] = -3.14e+100
probStart[stateTag{'M', "i"}] = -3.14e+100 probStart["Mi"] = -3.14e+100
probStart[stateTag{'M', "in"}] = -3.14e+100 probStart["Min"] = -3.14e+100
probStart[stateTag{'M', "j"}] = -3.14e+100 probStart["Mj"] = -3.14e+100
probStart[stateTag{'M', "jn"}] = -3.14e+100 probStart["Mjn"] = -3.14e+100
probStart[stateTag{'M', "k"}] = -3.14e+100 probStart["Mk"] = -3.14e+100
probStart[stateTag{'M', "l"}] = -3.14e+100 probStart["Ml"] = -3.14e+100
probStart[stateTag{'M', "ln"}] = -3.14e+100 probStart["Mln"] = -3.14e+100
probStart[stateTag{'M', "m"}] = -3.14e+100 probStart["Mm"] = -3.14e+100
probStart[stateTag{'M', "mg"}] = -3.14e+100 probStart["Mmg"] = -3.14e+100
probStart[stateTag{'M', "mq"}] = -3.14e+100 probStart["Mmq"] = -3.14e+100
probStart[stateTag{'M', "n"}] = -3.14e+100 probStart["Mn"] = -3.14e+100
probStart[stateTag{'M', "ng"}] = -3.14e+100 probStart["Mng"] = -3.14e+100
probStart[stateTag{'M', "nr"}] = -3.14e+100 probStart["Mnr"] = -3.14e+100
probStart[stateTag{'M', "nrfg"}] = -3.14e+100 probStart["Mnrfg"] = -3.14e+100
probStart[stateTag{'M', "nrt"}] = -3.14e+100 probStart["Mnrt"] = -3.14e+100
probStart[stateTag{'M', "ns"}] = -3.14e+100 probStart["Mns"] = -3.14e+100
probStart[stateTag{'M', "nt"}] = -3.14e+100 probStart["Mnt"] = -3.14e+100
probStart[stateTag{'M', "nz"}] = -3.14e+100 probStart["Mnz"] = -3.14e+100
probStart[stateTag{'M', "o"}] = -3.14e+100 probStart["Mo"] = -3.14e+100
probStart[stateTag{'M', "p"}] = -3.14e+100 probStart["Mp"] = -3.14e+100
probStart[stateTag{'M', "q"}] = -3.14e+100 probStart["Mq"] = -3.14e+100
probStart[stateTag{'M', "qe"}] = -3.14e+100 probStart["Mqe"] = -3.14e+100
probStart[stateTag{'M', "qg"}] = -3.14e+100 probStart["Mqg"] = -3.14e+100
probStart[stateTag{'M', "r"}] = -3.14e+100 probStart["Mr"] = -3.14e+100
probStart[stateTag{'M', "rg"}] = -3.14e+100 probStart["Mrg"] = -3.14e+100
probStart[stateTag{'M', "rr"}] = -3.14e+100 probStart["Mrr"] = -3.14e+100
probStart[stateTag{'M', "rz"}] = -3.14e+100 probStart["Mrz"] = -3.14e+100
probStart[stateTag{'M', "s"}] = -3.14e+100 probStart["Ms"] = -3.14e+100
probStart[stateTag{'M', "t"}] = -3.14e+100 probStart["Mt"] = -3.14e+100
probStart[stateTag{'M', "tg"}] = -3.14e+100 probStart["Mtg"] = -3.14e+100
probStart[stateTag{'M', "u"}] = -3.14e+100 probStart["Mu"] = -3.14e+100
probStart[stateTag{'M', "ud"}] = -3.14e+100 probStart["Mud"] = -3.14e+100
probStart[stateTag{'M', "ug"}] = -3.14e+100 probStart["Mug"] = -3.14e+100
probStart[stateTag{'M', "uj"}] = -3.14e+100 probStart["Muj"] = -3.14e+100
probStart[stateTag{'M', "ul"}] = -3.14e+100 probStart["Mul"] = -3.14e+100
probStart[stateTag{'M', "uv"}] = -3.14e+100 probStart["Muv"] = -3.14e+100
probStart[stateTag{'M', "uz"}] = -3.14e+100 probStart["Muz"] = -3.14e+100
probStart[stateTag{'M', "v"}] = -3.14e+100 probStart["Mv"] = -3.14e+100
probStart[stateTag{'M', "vd"}] = -3.14e+100 probStart["Mvd"] = -3.14e+100
probStart[stateTag{'M', "vg"}] = -3.14e+100 probStart["Mvg"] = -3.14e+100
probStart[stateTag{'M', "vi"}] = -3.14e+100 probStart["Mvi"] = -3.14e+100
probStart[stateTag{'M', "vn"}] = -3.14e+100 probStart["Mvn"] = -3.14e+100
probStart[stateTag{'M', "vq"}] = -3.14e+100 probStart["Mvq"] = -3.14e+100
probStart[stateTag{'M', "w"}] = -3.14e+100 probStart["Mw"] = -3.14e+100
probStart[stateTag{'M', "x"}] = -3.14e+100 probStart["Mx"] = -3.14e+100
probStart[stateTag{'M', "y"}] = -3.14e+100 probStart["My"] = -3.14e+100
probStart[stateTag{'M', "yg"}] = -3.14e+100 probStart["Myg"] = -3.14e+100
probStart[stateTag{'M', "z"}] = -3.14e+100 probStart["Mz"] = -3.14e+100
probStart[stateTag{'M', "zg"}] = -3.14e+100 probStart["Mzg"] = -3.14e+100
probStart[stateTag{'S', "a"}] = -3.9025396831295227 probStart["Sa"] = -3.9025396831295227
probStart[stateTag{'S', "ad"}] = -11.048458480182255 probStart["Sad"] = -11.048458480182255
probStart[stateTag{'S', "ag"}] = -6.954113917960154 probStart["Sag"] = -6.954113917960154
probStart[stateTag{'S', "an"}] = -12.84021794941031 probStart["San"] = -12.84021794941031
probStart[stateTag{'S', "b"}] = -6.472888763970454 probStart["Sb"] = -6.472888763970454
probStart[stateTag{'S', "bg"}] = -3.14e+100 probStart["Sbg"] = -3.14e+100
probStart[stateTag{'S', "c"}] = -4.786966795861212 probStart["Sc"] = -4.786966795861212
probStart[stateTag{'S', "d"}] = -3.903919764181873 probStart["Sd"] = -3.903919764181873
probStart[stateTag{'S', "df"}] = -3.14e+100 probStart["Sdf"] = -3.14e+100
probStart[stateTag{'S', "dg"}] = -8.948397651299683 probStart["Sdg"] = -8.948397651299683
probStart[stateTag{'S', "e"}] = -5.942513006281674 probStart["Se"] = -5.942513006281674
probStart[stateTag{'S', "en"}] = -3.14e+100 probStart["Sen"] = -3.14e+100
probStart[stateTag{'S', "f"}] = -5.194820249981676 probStart["Sf"] = -5.194820249981676
probStart[stateTag{'S', "g"}] = -6.507826815331734 probStart["Sg"] = -6.507826815331734
probStart[stateTag{'S', "h"}] = -8.650563207383884 probStart["Sh"] = -8.650563207383884
probStart[stateTag{'S', "i"}] = -3.14e+100 probStart["Si"] = -3.14e+100
probStart[stateTag{'S', "in"}] = -3.14e+100 probStart["Sin"] = -3.14e+100
probStart[stateTag{'S', "j"}] = -4.911992119644354 probStart["Sj"] = -4.911992119644354
probStart[stateTag{'S', "jn"}] = -3.14e+100 probStart["Sjn"] = -3.14e+100
probStart[stateTag{'S', "k"}] = -6.940320595827818 probStart["Sk"] = -6.940320595827818
probStart[stateTag{'S', "l"}] = -3.14e+100 probStart["Sl"] = -3.14e+100
probStart[stateTag{'S', "ln"}] = -3.14e+100 probStart["Sln"] = -3.14e+100
probStart[stateTag{'S', "m"}] = -3.269200652116097 probStart["Sm"] = -3.269200652116097
probStart[stateTag{'S', "mg"}] = -10.825314928868044 probStart["Smg"] = -10.825314928868044
probStart[stateTag{'S', "mq"}] = -3.14e+100 probStart["Smq"] = -3.14e+100
probStart[stateTag{'S', "n"}] = -3.8551483897645107 probStart["Sn"] = -3.8551483897645107
probStart[stateTag{'S', "ng"}] = -4.913434861102905 probStart["Sng"] = -4.913434861102905
probStart[stateTag{'S', "nr"}] = -4.483663103956885 probStart["Snr"] = -4.483663103956885
probStart[stateTag{'S', "nrfg"}] = -3.14e+100 probStart["Snrfg"] = -3.14e+100
probStart[stateTag{'S', "nrt"}] = -3.14e+100 probStart["Snrt"] = -3.14e+100
probStart[stateTag{'S', "ns"}] = -3.14e+100 probStart["Sns"] = -3.14e+100
probStart[stateTag{'S', "nt"}] = -12.147070768850364 probStart["Snt"] = -12.147070768850364
probStart[stateTag{'S', "nz"}] = -3.14e+100 probStart["Snz"] = -3.14e+100
probStart[stateTag{'S', "o"}] = -8.464460927750023 probStart["So"] = -8.464460927750023
probStart[stateTag{'S', "p"}] = -2.9868401813596317 probStart["Sp"] = -2.9868401813596317
probStart[stateTag{'S', "q"}] = -4.888658618255058 probStart["Sq"] = -4.888658618255058
probStart[stateTag{'S', "qe"}] = -3.14e+100 probStart["Sqe"] = -3.14e+100
probStart[stateTag{'S', "qg"}] = -3.14e+100 probStart["Sqg"] = -3.14e+100
probStart[stateTag{'S', "r"}] = -2.7635336784127853 probStart["Sr"] = -2.7635336784127853
probStart[stateTag{'S', "rg"}] = -10.275268591948773 probStart["Srg"] = -10.275268591948773
probStart[stateTag{'S', "rr"}] = -3.14e+100 probStart["Srr"] = -3.14e+100
probStart[stateTag{'S', "rz"}] = -3.14e+100 probStart["Srz"] = -3.14e+100
probStart[stateTag{'S', "s"}] = -3.14e+100 probStart["Ss"] = -3.14e+100
probStart[stateTag{'S', "t"}] = -3.14e+100 probStart["St"] = -3.14e+100
probStart[stateTag{'S', "tg"}] = -6.272842531880403 probStart["Stg"] = -6.272842531880403
probStart[stateTag{'S', "u"}] = -6.940320595827818 probStart["Su"] = -6.940320595827818
probStart[stateTag{'S', "ud"}] = -7.728230161053767 probStart["Sud"] = -7.728230161053767
probStart[stateTag{'S', "ug"}] = -7.5394037026636855 probStart["Sug"] = -7.5394037026636855
probStart[stateTag{'S', "uj"}] = -6.85251045118004 probStart["Suj"] = -6.85251045118004
probStart[stateTag{'S', "ul"}] = -8.4153713175535 probStart["Sul"] = -8.4153713175535
probStart[stateTag{'S', "uv"}] = -8.15808672228609 probStart["Suv"] = -8.15808672228609
probStart[stateTag{'S', "uz"}] = -9.299258625372996 probStart["Suz"] = -9.299258625372996
probStart[stateTag{'S', "v"}] = -3.053292303412302 probStart["Sv"] = -3.053292303412302
probStart[stateTag{'S', "vd"}] = -3.14e+100 probStart["Svd"] = -3.14e+100
probStart[stateTag{'S', "vg"}] = -5.9430181843676895 probStart["Svg"] = -5.9430181843676895
probStart[stateTag{'S', "vi"}] = -3.14e+100 probStart["Svi"] = -3.14e+100
probStart[stateTag{'S', "vn"}] = -11.453923588290419 probStart["Svn"] = -11.453923588290419
probStart[stateTag{'S', "vq"}] = -3.14e+100 probStart["Svq"] = -3.14e+100
probStart[stateTag{'S', "w"}] = -3.14e+100 probStart["Sw"] = -3.14e+100
probStart[stateTag{'S', "x"}] = -8.427419656069674 probStart["Sx"] = -8.427419656069674
probStart[stateTag{'S', "y"}] = -6.1970794699489575 probStart["Sy"] = -6.1970794699489575
probStart[stateTag{'S', "yg"}] = -13.533365129970255 probStart["Syg"] = -13.533365129970255
probStart[stateTag{'S', "z"}] = -3.14e+100 probStart["Sz"] = -3.14e+100
probStart[stateTag{'S', "zg"}] = -3.14e+100 probStart["Szg"] = -3.14e+100
} }

File diff suppressed because it is too large Load Diff

View File

@@ -5,26 +5,13 @@ import (
"sort" "sort"
) )
type stateTag struct {
State byte
Tag string
}
func (st stateTag) String() string {
return fmt.Sprintf("(%q, %s)", st.State, st.Tag)
}
func emptyStateTag() stateTag {
return stateTag{' ', ""}
}
type probState struct { type probState struct {
Prob float64 prob float64
ST stateTag state string
} }
func (ps probState) String() string { func (ps probState) String() string {
return fmt.Sprintf("(%v: %f)", ps.ST, ps.Prob) return fmt.Sprintf("(%v: %f)", ps.state, ps.prob)
} }
type probStates []probState type probStates []probState
@@ -34,39 +21,36 @@ func (pss probStates) Len() int {
} }
func (pss probStates) Less(i, j int) bool { func (pss probStates) Less(i, j int) bool {
if pss[i].Prob == pss[j].Prob { if pss[i].prob == pss[j].prob {
if pss[i].ST.State == pss[j].ST.State { return pss[i].state < pss[j].state
return pss[i].ST.Tag < pss[j].ST.Tag
} }
return pss[i].ST.State < pss[j].ST.State return pss[i].prob < pss[j].prob
}
return pss[i].Prob < pss[j].Prob
} }
func (pss probStates) Swap(i, j int) { func (pss probStates) Swap(i, j int) {
pss[i], pss[j] = pss[j], pss[i] pss[i], pss[j] = pss[j], pss[i]
} }
func viterbi(obs []rune) (float64, []stateTag) { func viterbi(obs []rune) []string {
obsLength := len(obs) obsLength := len(obs)
V := make([]map[stateTag]float64, obsLength) V := make([]map[string]float64, obsLength)
V[0] = make(map[stateTag]float64) V[0] = make(map[string]float64)
mem_path := make([]map[stateTag]stateTag, obsLength) mem_path := make([]map[string]string, obsLength)
mem_path[0] = make(map[stateTag]stateTag) mem_path[0] = make(map[string]string)
ys := charStateTab.get(obs[0]) // default is all_states ys := charStateTab.get(obs[0]) // default is all_states
for _, y := range ys { for _, y := range ys {
V[0][y] = probEmit[y].get(obs[0]) + probStart[y] V[0][y] = probEmit[y].get(obs[0]) + probStart[y]
mem_path[0][y] = emptyStateTag() mem_path[0][y] = ""
} }
for t := 1; t < obsLength; t++ { for t := 1; t < obsLength; t++ {
prev_states := make([]stateTag, 0) prev_states := make([]string, 0)
for x, _ := range mem_path[t-1] { for x, _ := range mem_path[t-1] {
if len(probTrans[x]) > 0 { if len(probTrans[x]) > 0 {
prev_states = append(prev_states, x) prev_states = append(prev_states, x)
} }
} }
//use Go's map to implement Python's Set() //use Go's map to implement Python's Set()
prev_states_expect_next := make(map[stateTag]stateTag) prev_states_expect_next := make(map[string]string)
for _, x := range prev_states { for _, x := range prev_states {
for y, _ := range probTrans[x] { for y, _ := range probTrans[x] {
prev_states_expect_next[y] = y prev_states_expect_next[y] = y
@@ -74,7 +58,7 @@ func viterbi(obs []rune) (float64, []stateTag) {
} }
tmp_obs_states := charStateTab.get(obs[t]) tmp_obs_states := charStateTab.get(obs[t])
obs_states := make([]stateTag, 0) obs_states := make([]string, 0)
for index, _ := range tmp_obs_states { for index, _ := range tmp_obs_states {
if _, ok := prev_states_expect_next[tmp_obs_states[index]]; ok { if _, ok := prev_states_expect_next[tmp_obs_states[index]]; ok {
obs_states = append(obs_states, tmp_obs_states[index]) obs_states = append(obs_states, tmp_obs_states[index])
@@ -88,40 +72,35 @@ func viterbi(obs []rune) (float64, []stateTag) {
if len(obs_states) == 0 { if len(obs_states) == 0 {
obs_states = probTransKeys obs_states = probTransKeys
} }
mem_path[t] = make(map[stateTag]stateTag) // TODO: value needed or not? mem_path[t] = make(map[string]string) // TODO: value needed or not?
V[t] = make(map[stateTag]float64) V[t] = make(map[string]float64)
for _, y := range obs_states { for _, y := range obs_states {
pss := make(probStates, 0) pss := make(probStates, 0)
for _, y0 := range prev_states { for _, y0 := range prev_states {
ps := probState{ ps := probState{
Prob: V[t-1][y0] + probTrans[y0].Get(y) + probEmit[y].get(obs[t]), prob: V[t-1][y0] + probTrans[y0].Get(y) + probEmit[y].get(obs[t]),
ST: y0} state: y0}
pss = append(pss, ps) pss = append(pss, ps)
} }
sort.Sort(sort.Reverse(pss)) sort.Sort(sort.Reverse(pss))
V[t][y] = pss[0].Prob V[t][y] = pss[0].prob
mem_path[t][y] = pss[0].ST mem_path[t][y] = pss[0].state
} }
} }
last := make(probStates, 0) last := make(probStates, 0)
length := len(mem_path) length := len(mem_path)
vlength := len(V) vlength := len(V)
for y, _ := range mem_path[length-1] { for y := range mem_path[length-1] {
ps := probState{Prob: V[vlength-1][y], ST: y} ps := probState{prob: V[vlength-1][y], state: y}
last = append(last, ps) last = append(last, ps)
} }
sort.Sort(sort.Reverse(last)) sort.Sort(sort.Reverse(last))
prob := last[0].Prob state := last[0].state
state := last[0].ST route := make([]string, len(obs))
route := make([]stateTag, len(obs))
i := obsLength - 1 for i := obsLength - 1; i >= 0; i-- {
for {
if i < 0 {
break
}
route[i] = state route[i] = state
state = mem_path[i][state] state = mem_path[i][state]
i -= 1
} }
return prob, route return route
} }

View File

@@ -5,35 +5,32 @@ import (
) )
var ( var (
route1 = []stateTag{ route1 = []string{
stateTag{'B', "nr"}, "Bnr",
stateTag{'M', "nr"}, "Mnr",
stateTag{'E', "nr"}, "Enr",
stateTag{'S', "v"}, "Sv",
stateTag{'B', "v"}, "Bv",
stateTag{'E', "v"}, "Ev",
stateTag{'B', "n"}, "Bn",
stateTag{'M', "n"}, "Mn",
stateTag{'E', "n"}, "En",
stateTag{'S', "d"}, "Sd",
stateTag{'S', "v"}, "Sv",
stateTag{'S', "n"}, "Sn",
stateTag{'B', "v"}, "Bv",
stateTag{'E', "v"}, "Ev",
stateTag{'B', "nr"}, "Bnr",
stateTag{'M', "nr"}, "Mnr",
stateTag{'M', "nr"}, "Mnr",
stateTag{'M', "nr"}, "Mnr",
stateTag{'E', "nr"}, "Enr",
stateTag{'S', "zg"}} "Szg"}
) )
func TestViterbi(t *testing.T) { func TestViterbi(t *testing.T) {
ss := "李小福是创新办主任也是云计算方面的专家;" ss := "李小福是创新办主任也是云计算方面的专家;"
prob, route := viterbi([]rune(ss)) route := viterbi([]rune(ss))
if prob != MinFloat {
t.Error(prob)
}
if len(route) != len(route1) { if len(route) != len(route1) {
t.Error(len(route)) t.Error(len(route))
} }