mirror of
https://github.com/fumiama/jieba.git
synced 2026-06-09 02:50:24 +08:00
removed unecessary stateTag struct, using string instead
This commit is contained in:
158922
posseg/char_state_tab.go
158922
posseg/char_state_tab.go
File diff suppressed because it is too large
Load Diff
@@ -66,24 +66,24 @@ func (p *Posseg) cutDetailInternal(sentence string) chan WordTag {
|
||||
|
||||
go func() {
|
||||
runes := []rune(sentence)
|
||||
_, posList := viterbi(runes)
|
||||
posList := viterbi(runes)
|
||||
begin := 0
|
||||
next := 0
|
||||
for i, char := range runes {
|
||||
pos := posList[i].State
|
||||
switch pos {
|
||||
pos := posList[i]
|
||||
switch pos[0] {
|
||||
case 'B':
|
||||
begin = i
|
||||
case 'E':
|
||||
result <- WordTag{string(runes[begin : i+1]), posList[i].Tag}
|
||||
result <- WordTag{string(runes[begin : i+1]), string(pos[1:])}
|
||||
next = i + 1
|
||||
case 'S':
|
||||
result <- WordTag{string(char), posList[i].Tag}
|
||||
result <- WordTag{string(char), string(pos[1:])}
|
||||
next = i + 1
|
||||
}
|
||||
}
|
||||
if next < len(runes) {
|
||||
result <- WordTag{string(runes[next:]), posList[next].Tag}
|
||||
result <- WordTag{string(runes[next:]), string(posList[next][1:])}
|
||||
}
|
||||
close(result)
|
||||
}()
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,264 +1,264 @@
|
||||
package posseg
|
||||
|
||||
var (
|
||||
probStart = make(map[stateTag]float64)
|
||||
probStart = make(map[string]float64)
|
||||
)
|
||||
|
||||
func init() {
|
||||
probStart[stateTag{'B', "a"}] = -4.762305214596967
|
||||
probStart[stateTag{'B', "ad"}] = -6.680066036784177
|
||||
probStart[stateTag{'B', "ag"}] = -3.14e+100
|
||||
probStart[stateTag{'B', "an"}] = -8.697083223018778
|
||||
probStart[stateTag{'B', "b"}] = -5.018374362109218
|
||||
probStart[stateTag{'B', "bg"}] = -3.14e+100
|
||||
probStart[stateTag{'B', "c"}] = -3.423880184954888
|
||||
probStart[stateTag{'B', "d"}] = -3.9750475297585357
|
||||
probStart[stateTag{'B', "df"}] = -8.888974230828882
|
||||
probStart[stateTag{'B', "dg"}] = -3.14e+100
|
||||
probStart[stateTag{'B', "e"}] = -8.563551830394255
|
||||
probStart[stateTag{'B', "en"}] = -3.14e+100
|
||||
probStart[stateTag{'B', "f"}] = -5.491630418482717
|
||||
probStart[stateTag{'B', "g"}] = -3.14e+100
|
||||
probStart[stateTag{'B', "h"}] = -13.533365129970255
|
||||
probStart[stateTag{'B', "i"}] = -6.1157847275557105
|
||||
probStart[stateTag{'B', "in"}] = -3.14e+100
|
||||
probStart[stateTag{'B', "j"}] = -5.0576191284681915
|
||||
probStart[stateTag{'B', "jn"}] = -3.14e+100
|
||||
probStart[stateTag{'B', "k"}] = -3.14e+100
|
||||
probStart[stateTag{'B', "l"}] = -4.905883584659895
|
||||
probStart[stateTag{'B', "ln"}] = -3.14e+100
|
||||
probStart[stateTag{'B', "m"}] = -3.6524299819046386
|
||||
probStart[stateTag{'B', "mg"}] = -3.14e+100
|
||||
probStart[stateTag{'B', "mq"}] = -6.78695300139688
|
||||
probStart[stateTag{'B', "n"}] = -1.6966257797548328
|
||||
probStart[stateTag{'B', "ng"}] = -3.14e+100
|
||||
probStart[stateTag{'B', "nr"}] = -2.2310495913769506
|
||||
probStart[stateTag{'B', "nrfg"}] = -5.873722175405573
|
||||
probStart[stateTag{'B', "nrt"}] = -4.985642733519195
|
||||
probStart[stateTag{'B', "ns"}] = -2.8228438314969213
|
||||
probStart[stateTag{'B', "nt"}] = -4.846091668182416
|
||||
probStart[stateTag{'B', "nz"}] = -3.94698846057672
|
||||
probStart[stateTag{'B', "o"}] = -8.433498702146057
|
||||
probStart[stateTag{'B', "p"}] = -4.200984132085048
|
||||
probStart[stateTag{'B', "q"}] = -6.998123858956596
|
||||
probStart[stateTag{'B', "qe"}] = -3.14e+100
|
||||
probStart[stateTag{'B', "qg"}] = -3.14e+100
|
||||
probStart[stateTag{'B', "r"}] = -3.4098187790818413
|
||||
probStart[stateTag{'B', "rg"}] = -3.14e+100
|
||||
probStart[stateTag{'B', "rr"}] = -12.434752841302146
|
||||
probStart[stateTag{'B', "rz"}] = -7.946116471570005
|
||||
probStart[stateTag{'B', "s"}] = -5.522673590839954
|
||||
probStart[stateTag{'B', "t"}] = -3.3647479094528574
|
||||
probStart[stateTag{'B', "tg"}] = -3.14e+100
|
||||
probStart[stateTag{'B', "u"}] = -9.163917277503234
|
||||
probStart[stateTag{'B', "ud"}] = -3.14e+100
|
||||
probStart[stateTag{'B', "ug"}] = -3.14e+100
|
||||
probStart[stateTag{'B', "uj"}] = -3.14e+100
|
||||
probStart[stateTag{'B', "ul"}] = -3.14e+100
|
||||
probStart[stateTag{'B', "uv"}] = -3.14e+100
|
||||
probStart[stateTag{'B', "uz"}] = -3.14e+100
|
||||
probStart[stateTag{'B', "v"}] = -2.6740584874265685
|
||||
probStart[stateTag{'B', "vd"}] = -9.044728760238115
|
||||
probStart[stateTag{'B', "vg"}] = -3.14e+100
|
||||
probStart[stateTag{'B', "vi"}] = -12.434752841302146
|
||||
probStart[stateTag{'B', "vn"}] = -4.3315610890163585
|
||||
probStart[stateTag{'B', "vq"}] = -12.147070768850364
|
||||
probStart[stateTag{'B', "w"}] = -3.14e+100
|
||||
probStart[stateTag{'B', "x"}] = -3.14e+100
|
||||
probStart[stateTag{'B', "y"}] = -9.844485675856319
|
||||
probStart[stateTag{'B', "yg"}] = -3.14e+100
|
||||
probStart[stateTag{'B', "z"}] = -7.045681111485645
|
||||
probStart[stateTag{'B', "zg"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "a"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "ad"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "ag"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "an"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "b"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "bg"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "c"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "d"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "df"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "dg"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "e"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "en"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "f"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "g"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "h"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "i"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "in"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "j"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "jn"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "k"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "l"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "ln"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "m"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "mg"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "mq"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "n"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "ng"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "nr"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "nrfg"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "nrt"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "ns"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "nt"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "nz"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "o"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "p"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "q"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "qe"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "qg"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "r"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "rg"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "rr"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "rz"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "s"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "t"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "tg"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "u"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "ud"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "ug"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "uj"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "ul"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "uv"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "uz"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "v"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "vd"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "vg"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "vi"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "vn"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "vq"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "w"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "x"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "y"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "yg"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "z"}] = -3.14e+100
|
||||
probStart[stateTag{'E', "zg"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "a"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "ad"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "ag"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "an"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "b"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "bg"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "c"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "d"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "df"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "dg"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "e"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "en"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "f"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "g"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "h"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "i"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "in"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "j"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "jn"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "k"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "l"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "ln"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "m"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "mg"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "mq"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "n"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "ng"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "nr"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "nrfg"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "nrt"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "ns"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "nt"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "nz"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "o"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "p"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "q"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "qe"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "qg"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "r"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "rg"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "rr"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "rz"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "s"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "t"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "tg"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "u"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "ud"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "ug"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "uj"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "ul"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "uv"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "uz"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "v"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "vd"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "vg"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "vi"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "vn"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "vq"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "w"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "x"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "y"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "yg"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "z"}] = -3.14e+100
|
||||
probStart[stateTag{'M', "zg"}] = -3.14e+100
|
||||
probStart[stateTag{'S', "a"}] = -3.9025396831295227
|
||||
probStart[stateTag{'S', "ad"}] = -11.048458480182255
|
||||
probStart[stateTag{'S', "ag"}] = -6.954113917960154
|
||||
probStart[stateTag{'S', "an"}] = -12.84021794941031
|
||||
probStart[stateTag{'S', "b"}] = -6.472888763970454
|
||||
probStart[stateTag{'S', "bg"}] = -3.14e+100
|
||||
probStart[stateTag{'S', "c"}] = -4.786966795861212
|
||||
probStart[stateTag{'S', "d"}] = -3.903919764181873
|
||||
probStart[stateTag{'S', "df"}] = -3.14e+100
|
||||
probStart[stateTag{'S', "dg"}] = -8.948397651299683
|
||||
probStart[stateTag{'S', "e"}] = -5.942513006281674
|
||||
probStart[stateTag{'S', "en"}] = -3.14e+100
|
||||
probStart[stateTag{'S', "f"}] = -5.194820249981676
|
||||
probStart[stateTag{'S', "g"}] = -6.507826815331734
|
||||
probStart[stateTag{'S', "h"}] = -8.650563207383884
|
||||
probStart[stateTag{'S', "i"}] = -3.14e+100
|
||||
probStart[stateTag{'S', "in"}] = -3.14e+100
|
||||
probStart[stateTag{'S', "j"}] = -4.911992119644354
|
||||
probStart[stateTag{'S', "jn"}] = -3.14e+100
|
||||
probStart[stateTag{'S', "k"}] = -6.940320595827818
|
||||
probStart[stateTag{'S', "l"}] = -3.14e+100
|
||||
probStart[stateTag{'S', "ln"}] = -3.14e+100
|
||||
probStart[stateTag{'S', "m"}] = -3.269200652116097
|
||||
probStart[stateTag{'S', "mg"}] = -10.825314928868044
|
||||
probStart[stateTag{'S', "mq"}] = -3.14e+100
|
||||
probStart[stateTag{'S', "n"}] = -3.8551483897645107
|
||||
probStart[stateTag{'S', "ng"}] = -4.913434861102905
|
||||
probStart[stateTag{'S', "nr"}] = -4.483663103956885
|
||||
probStart[stateTag{'S', "nrfg"}] = -3.14e+100
|
||||
probStart[stateTag{'S', "nrt"}] = -3.14e+100
|
||||
probStart[stateTag{'S', "ns"}] = -3.14e+100
|
||||
probStart[stateTag{'S', "nt"}] = -12.147070768850364
|
||||
probStart[stateTag{'S', "nz"}] = -3.14e+100
|
||||
probStart[stateTag{'S', "o"}] = -8.464460927750023
|
||||
probStart[stateTag{'S', "p"}] = -2.9868401813596317
|
||||
probStart[stateTag{'S', "q"}] = -4.888658618255058
|
||||
probStart[stateTag{'S', "qe"}] = -3.14e+100
|
||||
probStart[stateTag{'S', "qg"}] = -3.14e+100
|
||||
probStart[stateTag{'S', "r"}] = -2.7635336784127853
|
||||
probStart[stateTag{'S', "rg"}] = -10.275268591948773
|
||||
probStart[stateTag{'S', "rr"}] = -3.14e+100
|
||||
probStart[stateTag{'S', "rz"}] = -3.14e+100
|
||||
probStart[stateTag{'S', "s"}] = -3.14e+100
|
||||
probStart[stateTag{'S', "t"}] = -3.14e+100
|
||||
probStart[stateTag{'S', "tg"}] = -6.272842531880403
|
||||
probStart[stateTag{'S', "u"}] = -6.940320595827818
|
||||
probStart[stateTag{'S', "ud"}] = -7.728230161053767
|
||||
probStart[stateTag{'S', "ug"}] = -7.5394037026636855
|
||||
probStart[stateTag{'S', "uj"}] = -6.85251045118004
|
||||
probStart[stateTag{'S', "ul"}] = -8.4153713175535
|
||||
probStart[stateTag{'S', "uv"}] = -8.15808672228609
|
||||
probStart[stateTag{'S', "uz"}] = -9.299258625372996
|
||||
probStart[stateTag{'S', "v"}] = -3.053292303412302
|
||||
probStart[stateTag{'S', "vd"}] = -3.14e+100
|
||||
probStart[stateTag{'S', "vg"}] = -5.9430181843676895
|
||||
probStart[stateTag{'S', "vi"}] = -3.14e+100
|
||||
probStart[stateTag{'S', "vn"}] = -11.453923588290419
|
||||
probStart[stateTag{'S', "vq"}] = -3.14e+100
|
||||
probStart[stateTag{'S', "w"}] = -3.14e+100
|
||||
probStart[stateTag{'S', "x"}] = -8.427419656069674
|
||||
probStart[stateTag{'S', "y"}] = -6.1970794699489575
|
||||
probStart[stateTag{'S', "yg"}] = -13.533365129970255
|
||||
probStart[stateTag{'S', "z"}] = -3.14e+100
|
||||
probStart[stateTag{'S', "zg"}] = -3.14e+100
|
||||
probStart["Ba"] = -4.762305214596967
|
||||
probStart["Bad"] = -6.680066036784177
|
||||
probStart["Bag"] = -3.14e+100
|
||||
probStart["Ban"] = -8.697083223018778
|
||||
probStart["Bb"] = -5.018374362109218
|
||||
probStart["Bbg"] = -3.14e+100
|
||||
probStart["Bc"] = -3.423880184954888
|
||||
probStart["Bd"] = -3.9750475297585357
|
||||
probStart["Bdf"] = -8.888974230828882
|
||||
probStart["Bdg"] = -3.14e+100
|
||||
probStart["Be"] = -8.563551830394255
|
||||
probStart["Ben"] = -3.14e+100
|
||||
probStart["Bf"] = -5.491630418482717
|
||||
probStart["Bg"] = -3.14e+100
|
||||
probStart["Bh"] = -13.533365129970255
|
||||
probStart["Bi"] = -6.1157847275557105
|
||||
probStart["Bin"] = -3.14e+100
|
||||
probStart["Bj"] = -5.0576191284681915
|
||||
probStart["Bjn"] = -3.14e+100
|
||||
probStart["Bk"] = -3.14e+100
|
||||
probStart["Bl"] = -4.905883584659895
|
||||
probStart["Bln"] = -3.14e+100
|
||||
probStart["Bm"] = -3.6524299819046386
|
||||
probStart["Bmg"] = -3.14e+100
|
||||
probStart["Bmq"] = -6.78695300139688
|
||||
probStart["Bn"] = -1.6966257797548328
|
||||
probStart["Bng"] = -3.14e+100
|
||||
probStart["Bnr"] = -2.2310495913769506
|
||||
probStart["Bnrfg"] = -5.873722175405573
|
||||
probStart["Bnrt"] = -4.985642733519195
|
||||
probStart["Bns"] = -2.8228438314969213
|
||||
probStart["Bnt"] = -4.846091668182416
|
||||
probStart["Bnz"] = -3.94698846057672
|
||||
probStart["Bo"] = -8.433498702146057
|
||||
probStart["Bp"] = -4.200984132085048
|
||||
probStart["Bq"] = -6.998123858956596
|
||||
probStart["Bqe"] = -3.14e+100
|
||||
probStart["Bqg"] = -3.14e+100
|
||||
probStart["Br"] = -3.4098187790818413
|
||||
probStart["Brg"] = -3.14e+100
|
||||
probStart["Brr"] = -12.434752841302146
|
||||
probStart["Brz"] = -7.946116471570005
|
||||
probStart["Bs"] = -5.522673590839954
|
||||
probStart["Bt"] = -3.3647479094528574
|
||||
probStart["Btg"] = -3.14e+100
|
||||
probStart["Bu"] = -9.163917277503234
|
||||
probStart["Bud"] = -3.14e+100
|
||||
probStart["Bug"] = -3.14e+100
|
||||
probStart["Buj"] = -3.14e+100
|
||||
probStart["Bul"] = -3.14e+100
|
||||
probStart["Buv"] = -3.14e+100
|
||||
probStart["Buz"] = -3.14e+100
|
||||
probStart["Bv"] = -2.6740584874265685
|
||||
probStart["Bvd"] = -9.044728760238115
|
||||
probStart["Bvg"] = -3.14e+100
|
||||
probStart["Bvi"] = -12.434752841302146
|
||||
probStart["Bvn"] = -4.3315610890163585
|
||||
probStart["Bvq"] = -12.147070768850364
|
||||
probStart["Bw"] = -3.14e+100
|
||||
probStart["Bx"] = -3.14e+100
|
||||
probStart["By"] = -9.844485675856319
|
||||
probStart["Byg"] = -3.14e+100
|
||||
probStart["Bz"] = -7.045681111485645
|
||||
probStart["Bzg"] = -3.14e+100
|
||||
probStart["Ea"] = -3.14e+100
|
||||
probStart["Ead"] = -3.14e+100
|
||||
probStart["Eag"] = -3.14e+100
|
||||
probStart["Ean"] = -3.14e+100
|
||||
probStart["Eb"] = -3.14e+100
|
||||
probStart["Ebg"] = -3.14e+100
|
||||
probStart["Ec"] = -3.14e+100
|
||||
probStart["Ed"] = -3.14e+100
|
||||
probStart["Edf"] = -3.14e+100
|
||||
probStart["Edg"] = -3.14e+100
|
||||
probStart["Ee"] = -3.14e+100
|
||||
probStart["Een"] = -3.14e+100
|
||||
probStart["Ef"] = -3.14e+100
|
||||
probStart["Eg"] = -3.14e+100
|
||||
probStart["Eh"] = -3.14e+100
|
||||
probStart["Ei"] = -3.14e+100
|
||||
probStart["Ein"] = -3.14e+100
|
||||
probStart["Ej"] = -3.14e+100
|
||||
probStart["Ejn"] = -3.14e+100
|
||||
probStart["Ek"] = -3.14e+100
|
||||
probStart["El"] = -3.14e+100
|
||||
probStart["Eln"] = -3.14e+100
|
||||
probStart["Em"] = -3.14e+100
|
||||
probStart["Emg"] = -3.14e+100
|
||||
probStart["Emq"] = -3.14e+100
|
||||
probStart["En"] = -3.14e+100
|
||||
probStart["Eng"] = -3.14e+100
|
||||
probStart["Enr"] = -3.14e+100
|
||||
probStart["Enrfg"] = -3.14e+100
|
||||
probStart["Enrt"] = -3.14e+100
|
||||
probStart["Ens"] = -3.14e+100
|
||||
probStart["Ent"] = -3.14e+100
|
||||
probStart["Enz"] = -3.14e+100
|
||||
probStart["Eo"] = -3.14e+100
|
||||
probStart["Ep"] = -3.14e+100
|
||||
probStart["Eq"] = -3.14e+100
|
||||
probStart["Eqe"] = -3.14e+100
|
||||
probStart["Eqg"] = -3.14e+100
|
||||
probStart["Er"] = -3.14e+100
|
||||
probStart["Erg"] = -3.14e+100
|
||||
probStart["Err"] = -3.14e+100
|
||||
probStart["Erz"] = -3.14e+100
|
||||
probStart["Es"] = -3.14e+100
|
||||
probStart["Et"] = -3.14e+100
|
||||
probStart["Etg"] = -3.14e+100
|
||||
probStart["Eu"] = -3.14e+100
|
||||
probStart["Eud"] = -3.14e+100
|
||||
probStart["Eug"] = -3.14e+100
|
||||
probStart["Euj"] = -3.14e+100
|
||||
probStart["Eul"] = -3.14e+100
|
||||
probStart["Euv"] = -3.14e+100
|
||||
probStart["Euz"] = -3.14e+100
|
||||
probStart["Ev"] = -3.14e+100
|
||||
probStart["Evd"] = -3.14e+100
|
||||
probStart["Evg"] = -3.14e+100
|
||||
probStart["Evi"] = -3.14e+100
|
||||
probStart["Evn"] = -3.14e+100
|
||||
probStart["Evq"] = -3.14e+100
|
||||
probStart["Ew"] = -3.14e+100
|
||||
probStart["Ex"] = -3.14e+100
|
||||
probStart["Ey"] = -3.14e+100
|
||||
probStart["Eyg"] = -3.14e+100
|
||||
probStart["Ez"] = -3.14e+100
|
||||
probStart["Ezg"] = -3.14e+100
|
||||
probStart["Ma"] = -3.14e+100
|
||||
probStart["Mad"] = -3.14e+100
|
||||
probStart["Mag"] = -3.14e+100
|
||||
probStart["Man"] = -3.14e+100
|
||||
probStart["Mb"] = -3.14e+100
|
||||
probStart["Mbg"] = -3.14e+100
|
||||
probStart["Mc"] = -3.14e+100
|
||||
probStart["Md"] = -3.14e+100
|
||||
probStart["Mdf"] = -3.14e+100
|
||||
probStart["Mdg"] = -3.14e+100
|
||||
probStart["Me"] = -3.14e+100
|
||||
probStart["Men"] = -3.14e+100
|
||||
probStart["Mf"] = -3.14e+100
|
||||
probStart["Mg"] = -3.14e+100
|
||||
probStart["Mh"] = -3.14e+100
|
||||
probStart["Mi"] = -3.14e+100
|
||||
probStart["Min"] = -3.14e+100
|
||||
probStart["Mj"] = -3.14e+100
|
||||
probStart["Mjn"] = -3.14e+100
|
||||
probStart["Mk"] = -3.14e+100
|
||||
probStart["Ml"] = -3.14e+100
|
||||
probStart["Mln"] = -3.14e+100
|
||||
probStart["Mm"] = -3.14e+100
|
||||
probStart["Mmg"] = -3.14e+100
|
||||
probStart["Mmq"] = -3.14e+100
|
||||
probStart["Mn"] = -3.14e+100
|
||||
probStart["Mng"] = -3.14e+100
|
||||
probStart["Mnr"] = -3.14e+100
|
||||
probStart["Mnrfg"] = -3.14e+100
|
||||
probStart["Mnrt"] = -3.14e+100
|
||||
probStart["Mns"] = -3.14e+100
|
||||
probStart["Mnt"] = -3.14e+100
|
||||
probStart["Mnz"] = -3.14e+100
|
||||
probStart["Mo"] = -3.14e+100
|
||||
probStart["Mp"] = -3.14e+100
|
||||
probStart["Mq"] = -3.14e+100
|
||||
probStart["Mqe"] = -3.14e+100
|
||||
probStart["Mqg"] = -3.14e+100
|
||||
probStart["Mr"] = -3.14e+100
|
||||
probStart["Mrg"] = -3.14e+100
|
||||
probStart["Mrr"] = -3.14e+100
|
||||
probStart["Mrz"] = -3.14e+100
|
||||
probStart["Ms"] = -3.14e+100
|
||||
probStart["Mt"] = -3.14e+100
|
||||
probStart["Mtg"] = -3.14e+100
|
||||
probStart["Mu"] = -3.14e+100
|
||||
probStart["Mud"] = -3.14e+100
|
||||
probStart["Mug"] = -3.14e+100
|
||||
probStart["Muj"] = -3.14e+100
|
||||
probStart["Mul"] = -3.14e+100
|
||||
probStart["Muv"] = -3.14e+100
|
||||
probStart["Muz"] = -3.14e+100
|
||||
probStart["Mv"] = -3.14e+100
|
||||
probStart["Mvd"] = -3.14e+100
|
||||
probStart["Mvg"] = -3.14e+100
|
||||
probStart["Mvi"] = -3.14e+100
|
||||
probStart["Mvn"] = -3.14e+100
|
||||
probStart["Mvq"] = -3.14e+100
|
||||
probStart["Mw"] = -3.14e+100
|
||||
probStart["Mx"] = -3.14e+100
|
||||
probStart["My"] = -3.14e+100
|
||||
probStart["Myg"] = -3.14e+100
|
||||
probStart["Mz"] = -3.14e+100
|
||||
probStart["Mzg"] = -3.14e+100
|
||||
probStart["Sa"] = -3.9025396831295227
|
||||
probStart["Sad"] = -11.048458480182255
|
||||
probStart["Sag"] = -6.954113917960154
|
||||
probStart["San"] = -12.84021794941031
|
||||
probStart["Sb"] = -6.472888763970454
|
||||
probStart["Sbg"] = -3.14e+100
|
||||
probStart["Sc"] = -4.786966795861212
|
||||
probStart["Sd"] = -3.903919764181873
|
||||
probStart["Sdf"] = -3.14e+100
|
||||
probStart["Sdg"] = -8.948397651299683
|
||||
probStart["Se"] = -5.942513006281674
|
||||
probStart["Sen"] = -3.14e+100
|
||||
probStart["Sf"] = -5.194820249981676
|
||||
probStart["Sg"] = -6.507826815331734
|
||||
probStart["Sh"] = -8.650563207383884
|
||||
probStart["Si"] = -3.14e+100
|
||||
probStart["Sin"] = -3.14e+100
|
||||
probStart["Sj"] = -4.911992119644354
|
||||
probStart["Sjn"] = -3.14e+100
|
||||
probStart["Sk"] = -6.940320595827818
|
||||
probStart["Sl"] = -3.14e+100
|
||||
probStart["Sln"] = -3.14e+100
|
||||
probStart["Sm"] = -3.269200652116097
|
||||
probStart["Smg"] = -10.825314928868044
|
||||
probStart["Smq"] = -3.14e+100
|
||||
probStart["Sn"] = -3.8551483897645107
|
||||
probStart["Sng"] = -4.913434861102905
|
||||
probStart["Snr"] = -4.483663103956885
|
||||
probStart["Snrfg"] = -3.14e+100
|
||||
probStart["Snrt"] = -3.14e+100
|
||||
probStart["Sns"] = -3.14e+100
|
||||
probStart["Snt"] = -12.147070768850364
|
||||
probStart["Snz"] = -3.14e+100
|
||||
probStart["So"] = -8.464460927750023
|
||||
probStart["Sp"] = -2.9868401813596317
|
||||
probStart["Sq"] = -4.888658618255058
|
||||
probStart["Sqe"] = -3.14e+100
|
||||
probStart["Sqg"] = -3.14e+100
|
||||
probStart["Sr"] = -2.7635336784127853
|
||||
probStart["Srg"] = -10.275268591948773
|
||||
probStart["Srr"] = -3.14e+100
|
||||
probStart["Srz"] = -3.14e+100
|
||||
probStart["Ss"] = -3.14e+100
|
||||
probStart["St"] = -3.14e+100
|
||||
probStart["Stg"] = -6.272842531880403
|
||||
probStart["Su"] = -6.940320595827818
|
||||
probStart["Sud"] = -7.728230161053767
|
||||
probStart["Sug"] = -7.5394037026636855
|
||||
probStart["Suj"] = -6.85251045118004
|
||||
probStart["Sul"] = -8.4153713175535
|
||||
probStart["Suv"] = -8.15808672228609
|
||||
probStart["Suz"] = -9.299258625372996
|
||||
probStart["Sv"] = -3.053292303412302
|
||||
probStart["Svd"] = -3.14e+100
|
||||
probStart["Svg"] = -5.9430181843676895
|
||||
probStart["Svi"] = -3.14e+100
|
||||
probStart["Svn"] = -11.453923588290419
|
||||
probStart["Svq"] = -3.14e+100
|
||||
probStart["Sw"] = -3.14e+100
|
||||
probStart["Sx"] = -8.427419656069674
|
||||
probStart["Sy"] = -6.1970794699489575
|
||||
probStart["Syg"] = -13.533365129970255
|
||||
probStart["Sz"] = -3.14e+100
|
||||
probStart["Szg"] = -3.14e+100
|
||||
}
|
||||
|
||||
10960
posseg/prob_trans.go
10960
posseg/prob_trans.go
File diff suppressed because it is too large
Load Diff
@@ -5,26 +5,13 @@ import (
|
||||
"sort"
|
||||
)
|
||||
|
||||
type stateTag struct {
|
||||
State byte
|
||||
Tag string
|
||||
}
|
||||
|
||||
func (st stateTag) String() string {
|
||||
return fmt.Sprintf("(%q, %s)", st.State, st.Tag)
|
||||
}
|
||||
|
||||
func emptyStateTag() stateTag {
|
||||
return stateTag{' ', ""}
|
||||
}
|
||||
|
||||
type probState struct {
|
||||
Prob float64
|
||||
ST stateTag
|
||||
prob float64
|
||||
state string
|
||||
}
|
||||
|
||||
func (ps probState) String() string {
|
||||
return fmt.Sprintf("(%v: %f)", ps.ST, ps.Prob)
|
||||
return fmt.Sprintf("(%v: %f)", ps.state, ps.prob)
|
||||
}
|
||||
|
||||
type probStates []probState
|
||||
@@ -34,39 +21,36 @@ func (pss probStates) Len() int {
|
||||
}
|
||||
|
||||
func (pss probStates) Less(i, j int) bool {
|
||||
if pss[i].Prob == pss[j].Prob {
|
||||
if pss[i].ST.State == pss[j].ST.State {
|
||||
return pss[i].ST.Tag < pss[j].ST.Tag
|
||||
}
|
||||
return pss[i].ST.State < pss[j].ST.State
|
||||
if pss[i].prob == pss[j].prob {
|
||||
return pss[i].state < pss[j].state
|
||||
}
|
||||
return pss[i].Prob < pss[j].Prob
|
||||
return pss[i].prob < pss[j].prob
|
||||
}
|
||||
|
||||
func (pss probStates) Swap(i, j int) {
|
||||
pss[i], pss[j] = pss[j], pss[i]
|
||||
}
|
||||
|
||||
func viterbi(obs []rune) (float64, []stateTag) {
|
||||
func viterbi(obs []rune) []string {
|
||||
obsLength := len(obs)
|
||||
V := make([]map[stateTag]float64, obsLength)
|
||||
V[0] = make(map[stateTag]float64)
|
||||
mem_path := make([]map[stateTag]stateTag, obsLength)
|
||||
mem_path[0] = make(map[stateTag]stateTag)
|
||||
V := make([]map[string]float64, obsLength)
|
||||
V[0] = make(map[string]float64)
|
||||
mem_path := make([]map[string]string, obsLength)
|
||||
mem_path[0] = make(map[string]string)
|
||||
ys := charStateTab.get(obs[0]) // default is all_states
|
||||
for _, y := range ys {
|
||||
V[0][y] = probEmit[y].get(obs[0]) + probStart[y]
|
||||
mem_path[0][y] = emptyStateTag()
|
||||
mem_path[0][y] = ""
|
||||
}
|
||||
for t := 1; t < obsLength; t++ {
|
||||
prev_states := make([]stateTag, 0)
|
||||
prev_states := make([]string, 0)
|
||||
for x, _ := range mem_path[t-1] {
|
||||
if len(probTrans[x]) > 0 {
|
||||
prev_states = append(prev_states, x)
|
||||
}
|
||||
}
|
||||
//use Go's map to implement Python's Set()
|
||||
prev_states_expect_next := make(map[stateTag]stateTag)
|
||||
prev_states_expect_next := make(map[string]string)
|
||||
for _, x := range prev_states {
|
||||
for y, _ := range probTrans[x] {
|
||||
prev_states_expect_next[y] = y
|
||||
@@ -74,7 +58,7 @@ func viterbi(obs []rune) (float64, []stateTag) {
|
||||
}
|
||||
tmp_obs_states := charStateTab.get(obs[t])
|
||||
|
||||
obs_states := make([]stateTag, 0)
|
||||
obs_states := make([]string, 0)
|
||||
for index, _ := range tmp_obs_states {
|
||||
if _, ok := prev_states_expect_next[tmp_obs_states[index]]; ok {
|
||||
obs_states = append(obs_states, tmp_obs_states[index])
|
||||
@@ -88,40 +72,35 @@ func viterbi(obs []rune) (float64, []stateTag) {
|
||||
if len(obs_states) == 0 {
|
||||
obs_states = probTransKeys
|
||||
}
|
||||
mem_path[t] = make(map[stateTag]stateTag) // TODO: value needed or not?
|
||||
V[t] = make(map[stateTag]float64)
|
||||
mem_path[t] = make(map[string]string) // TODO: value needed or not?
|
||||
V[t] = make(map[string]float64)
|
||||
for _, y := range obs_states {
|
||||
pss := make(probStates, 0)
|
||||
for _, y0 := range prev_states {
|
||||
ps := probState{
|
||||
Prob: V[t-1][y0] + probTrans[y0].Get(y) + probEmit[y].get(obs[t]),
|
||||
ST: y0}
|
||||
prob: V[t-1][y0] + probTrans[y0].Get(y) + probEmit[y].get(obs[t]),
|
||||
state: y0}
|
||||
pss = append(pss, ps)
|
||||
}
|
||||
sort.Sort(sort.Reverse(pss))
|
||||
V[t][y] = pss[0].Prob
|
||||
mem_path[t][y] = pss[0].ST
|
||||
V[t][y] = pss[0].prob
|
||||
mem_path[t][y] = pss[0].state
|
||||
}
|
||||
}
|
||||
last := make(probStates, 0)
|
||||
length := len(mem_path)
|
||||
vlength := len(V)
|
||||
for y, _ := range mem_path[length-1] {
|
||||
ps := probState{Prob: V[vlength-1][y], ST: y}
|
||||
for y := range mem_path[length-1] {
|
||||
ps := probState{prob: V[vlength-1][y], state: y}
|
||||
last = append(last, ps)
|
||||
}
|
||||
sort.Sort(sort.Reverse(last))
|
||||
prob := last[0].Prob
|
||||
state := last[0].ST
|
||||
route := make([]stateTag, len(obs))
|
||||
i := obsLength - 1
|
||||
for {
|
||||
if i < 0 {
|
||||
break
|
||||
}
|
||||
state := last[0].state
|
||||
route := make([]string, len(obs))
|
||||
|
||||
for i := obsLength - 1; i >= 0; i-- {
|
||||
route[i] = state
|
||||
state = mem_path[i][state]
|
||||
i -= 1
|
||||
}
|
||||
return prob, route
|
||||
return route
|
||||
}
|
||||
|
||||
@@ -5,35 +5,32 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
route1 = []stateTag{
|
||||
stateTag{'B', "nr"},
|
||||
stateTag{'M', "nr"},
|
||||
stateTag{'E', "nr"},
|
||||
stateTag{'S', "v"},
|
||||
stateTag{'B', "v"},
|
||||
stateTag{'E', "v"},
|
||||
stateTag{'B', "n"},
|
||||
stateTag{'M', "n"},
|
||||
stateTag{'E', "n"},
|
||||
stateTag{'S', "d"},
|
||||
stateTag{'S', "v"},
|
||||
stateTag{'S', "n"},
|
||||
stateTag{'B', "v"},
|
||||
stateTag{'E', "v"},
|
||||
stateTag{'B', "nr"},
|
||||
stateTag{'M', "nr"},
|
||||
stateTag{'M', "nr"},
|
||||
stateTag{'M', "nr"},
|
||||
stateTag{'E', "nr"},
|
||||
stateTag{'S', "zg"}}
|
||||
route1 = []string{
|
||||
"Bnr",
|
||||
"Mnr",
|
||||
"Enr",
|
||||
"Sv",
|
||||
"Bv",
|
||||
"Ev",
|
||||
"Bn",
|
||||
"Mn",
|
||||
"En",
|
||||
"Sd",
|
||||
"Sv",
|
||||
"Sn",
|
||||
"Bv",
|
||||
"Ev",
|
||||
"Bnr",
|
||||
"Mnr",
|
||||
"Mnr",
|
||||
"Mnr",
|
||||
"Enr",
|
||||
"Szg"}
|
||||
)
|
||||
|
||||
func TestViterbi(t *testing.T) {
|
||||
ss := "李小福是创新办主任也是云计算方面的专家;"
|
||||
prob, route := viterbi([]rune(ss))
|
||||
if prob != MinFloat {
|
||||
t.Error(prob)
|
||||
}
|
||||
route := viterbi([]rune(ss))
|
||||
if len(route) != len(route1) {
|
||||
t.Error(len(route))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user