mirror of
https://github.com/fumiama/jieba.git
synced 2026-06-05 00:32:51 +08:00
code refactor, use uint for map key to improve performance
This commit is contained in:
86165
posseg/char_state_tab.go
86165
posseg/char_state_tab.go
File diff suppressed because it is too large
Load Diff
@@ -76,19 +76,19 @@ func (p *Posseg) cutDetailInternal(sentence string) chan Pair {
|
||||
next := 0
|
||||
for i, char := range runes {
|
||||
pos := posList[i]
|
||||
switch pos[0] {
|
||||
case 'B':
|
||||
switch pos.Tag() {
|
||||
case "B":
|
||||
begin = i
|
||||
case 'E':
|
||||
result <- Pair{string(runes[begin : i+1]), string(pos[1:])}
|
||||
case "E":
|
||||
result <- Pair{string(runes[begin : i+1]), pos.POS()}
|
||||
next = i + 1
|
||||
case 'S':
|
||||
result <- Pair{string(char), string(pos[1:])}
|
||||
case "S":
|
||||
result <- Pair{string(char), pos.POS()}
|
||||
next = i + 1
|
||||
}
|
||||
}
|
||||
if next < len(runes) {
|
||||
result <- Pair{string(runes[next:]), string(posList[next][1:])}
|
||||
result <- Pair{string(runes[next:]), posList[next].POS()}
|
||||
}
|
||||
close(result)
|
||||
}()
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,264 +1,260 @@
|
||||
package posseg
|
||||
|
||||
var (
|
||||
probStart = make(map[string]float64)
|
||||
)
|
||||
|
||||
func init() {
|
||||
probStart["Ba"] = -4.762305214596967
|
||||
probStart["Bad"] = -6.680066036784177
|
||||
probStart["Bag"] = -3.14e+100
|
||||
probStart["Ban"] = -8.697083223018778
|
||||
probStart["Bb"] = -5.018374362109218
|
||||
probStart["Bbg"] = -3.14e+100
|
||||
probStart["Bc"] = -3.423880184954888
|
||||
probStart["Bd"] = -3.9750475297585357
|
||||
probStart["Bdf"] = -8.888974230828882
|
||||
probStart["Bdg"] = -3.14e+100
|
||||
probStart["Be"] = -8.563551830394255
|
||||
probStart["Ben"] = -3.14e+100
|
||||
probStart["Bf"] = -5.491630418482717
|
||||
probStart["Bg"] = -3.14e+100
|
||||
probStart["Bh"] = -13.533365129970255
|
||||
probStart["Bi"] = -6.1157847275557105
|
||||
probStart["Bin"] = -3.14e+100
|
||||
probStart["Bj"] = -5.0576191284681915
|
||||
probStart["Bjn"] = -3.14e+100
|
||||
probStart["Bk"] = -3.14e+100
|
||||
probStart["Bl"] = -4.905883584659895
|
||||
probStart["Bln"] = -3.14e+100
|
||||
probStart["Bm"] = -3.6524299819046386
|
||||
probStart["Bmg"] = -3.14e+100
|
||||
probStart["Bmq"] = -6.78695300139688
|
||||
probStart["Bn"] = -1.6966257797548328
|
||||
probStart["Bng"] = -3.14e+100
|
||||
probStart["Bnr"] = -2.2310495913769506
|
||||
probStart["Bnrfg"] = -5.873722175405573
|
||||
probStart["Bnrt"] = -4.985642733519195
|
||||
probStart["Bns"] = -2.8228438314969213
|
||||
probStart["Bnt"] = -4.846091668182416
|
||||
probStart["Bnz"] = -3.94698846057672
|
||||
probStart["Bo"] = -8.433498702146057
|
||||
probStart["Bp"] = -4.200984132085048
|
||||
probStart["Bq"] = -6.998123858956596
|
||||
probStart["Bqe"] = -3.14e+100
|
||||
probStart["Bqg"] = -3.14e+100
|
||||
probStart["Br"] = -3.4098187790818413
|
||||
probStart["Brg"] = -3.14e+100
|
||||
probStart["Brr"] = -12.434752841302146
|
||||
probStart["Brz"] = -7.946116471570005
|
||||
probStart["Bs"] = -5.522673590839954
|
||||
probStart["Bt"] = -3.3647479094528574
|
||||
probStart["Btg"] = -3.14e+100
|
||||
probStart["Bu"] = -9.163917277503234
|
||||
probStart["Bud"] = -3.14e+100
|
||||
probStart["Bug"] = -3.14e+100
|
||||
probStart["Buj"] = -3.14e+100
|
||||
probStart["Bul"] = -3.14e+100
|
||||
probStart["Buv"] = -3.14e+100
|
||||
probStart["Buz"] = -3.14e+100
|
||||
probStart["Bv"] = -2.6740584874265685
|
||||
probStart["Bvd"] = -9.044728760238115
|
||||
probStart["Bvg"] = -3.14e+100
|
||||
probStart["Bvi"] = -12.434752841302146
|
||||
probStart["Bvn"] = -4.3315610890163585
|
||||
probStart["Bvq"] = -12.147070768850364
|
||||
probStart["Bw"] = -3.14e+100
|
||||
probStart["Bx"] = -3.14e+100
|
||||
probStart["By"] = -9.844485675856319
|
||||
probStart["Byg"] = -3.14e+100
|
||||
probStart["Bz"] = -7.045681111485645
|
||||
probStart["Bzg"] = -3.14e+100
|
||||
probStart["Ea"] = -3.14e+100
|
||||
probStart["Ead"] = -3.14e+100
|
||||
probStart["Eag"] = -3.14e+100
|
||||
probStart["Ean"] = -3.14e+100
|
||||
probStart["Eb"] = -3.14e+100
|
||||
probStart["Ebg"] = -3.14e+100
|
||||
probStart["Ec"] = -3.14e+100
|
||||
probStart["Ed"] = -3.14e+100
|
||||
probStart["Edf"] = -3.14e+100
|
||||
probStart["Edg"] = -3.14e+100
|
||||
probStart["Ee"] = -3.14e+100
|
||||
probStart["Een"] = -3.14e+100
|
||||
probStart["Ef"] = -3.14e+100
|
||||
probStart["Eg"] = -3.14e+100
|
||||
probStart["Eh"] = -3.14e+100
|
||||
probStart["Ei"] = -3.14e+100
|
||||
probStart["Ein"] = -3.14e+100
|
||||
probStart["Ej"] = -3.14e+100
|
||||
probStart["Ejn"] = -3.14e+100
|
||||
probStart["Ek"] = -3.14e+100
|
||||
probStart["El"] = -3.14e+100
|
||||
probStart["Eln"] = -3.14e+100
|
||||
probStart["Em"] = -3.14e+100
|
||||
probStart["Emg"] = -3.14e+100
|
||||
probStart["Emq"] = -3.14e+100
|
||||
probStart["En"] = -3.14e+100
|
||||
probStart["Eng"] = -3.14e+100
|
||||
probStart["Enr"] = -3.14e+100
|
||||
probStart["Enrfg"] = -3.14e+100
|
||||
probStart["Enrt"] = -3.14e+100
|
||||
probStart["Ens"] = -3.14e+100
|
||||
probStart["Ent"] = -3.14e+100
|
||||
probStart["Enz"] = -3.14e+100
|
||||
probStart["Eo"] = -3.14e+100
|
||||
probStart["Ep"] = -3.14e+100
|
||||
probStart["Eq"] = -3.14e+100
|
||||
probStart["Eqe"] = -3.14e+100
|
||||
probStart["Eqg"] = -3.14e+100
|
||||
probStart["Er"] = -3.14e+100
|
||||
probStart["Erg"] = -3.14e+100
|
||||
probStart["Err"] = -3.14e+100
|
||||
probStart["Erz"] = -3.14e+100
|
||||
probStart["Es"] = -3.14e+100
|
||||
probStart["Et"] = -3.14e+100
|
||||
probStart["Etg"] = -3.14e+100
|
||||
probStart["Eu"] = -3.14e+100
|
||||
probStart["Eud"] = -3.14e+100
|
||||
probStart["Eug"] = -3.14e+100
|
||||
probStart["Euj"] = -3.14e+100
|
||||
probStart["Eul"] = -3.14e+100
|
||||
probStart["Euv"] = -3.14e+100
|
||||
probStart["Euz"] = -3.14e+100
|
||||
probStart["Ev"] = -3.14e+100
|
||||
probStart["Evd"] = -3.14e+100
|
||||
probStart["Evg"] = -3.14e+100
|
||||
probStart["Evi"] = -3.14e+100
|
||||
probStart["Evn"] = -3.14e+100
|
||||
probStart["Evq"] = -3.14e+100
|
||||
probStart["Ew"] = -3.14e+100
|
||||
probStart["Ex"] = -3.14e+100
|
||||
probStart["Ey"] = -3.14e+100
|
||||
probStart["Eyg"] = -3.14e+100
|
||||
probStart["Ez"] = -3.14e+100
|
||||
probStart["Ezg"] = -3.14e+100
|
||||
probStart["Ma"] = -3.14e+100
|
||||
probStart["Mad"] = -3.14e+100
|
||||
probStart["Mag"] = -3.14e+100
|
||||
probStart["Man"] = -3.14e+100
|
||||
probStart["Mb"] = -3.14e+100
|
||||
probStart["Mbg"] = -3.14e+100
|
||||
probStart["Mc"] = -3.14e+100
|
||||
probStart["Md"] = -3.14e+100
|
||||
probStart["Mdf"] = -3.14e+100
|
||||
probStart["Mdg"] = -3.14e+100
|
||||
probStart["Me"] = -3.14e+100
|
||||
probStart["Men"] = -3.14e+100
|
||||
probStart["Mf"] = -3.14e+100
|
||||
probStart["Mg"] = -3.14e+100
|
||||
probStart["Mh"] = -3.14e+100
|
||||
probStart["Mi"] = -3.14e+100
|
||||
probStart["Min"] = -3.14e+100
|
||||
probStart["Mj"] = -3.14e+100
|
||||
probStart["Mjn"] = -3.14e+100
|
||||
probStart["Mk"] = -3.14e+100
|
||||
probStart["Ml"] = -3.14e+100
|
||||
probStart["Mln"] = -3.14e+100
|
||||
probStart["Mm"] = -3.14e+100
|
||||
probStart["Mmg"] = -3.14e+100
|
||||
probStart["Mmq"] = -3.14e+100
|
||||
probStart["Mn"] = -3.14e+100
|
||||
probStart["Mng"] = -3.14e+100
|
||||
probStart["Mnr"] = -3.14e+100
|
||||
probStart["Mnrfg"] = -3.14e+100
|
||||
probStart["Mnrt"] = -3.14e+100
|
||||
probStart["Mns"] = -3.14e+100
|
||||
probStart["Mnt"] = -3.14e+100
|
||||
probStart["Mnz"] = -3.14e+100
|
||||
probStart["Mo"] = -3.14e+100
|
||||
probStart["Mp"] = -3.14e+100
|
||||
probStart["Mq"] = -3.14e+100
|
||||
probStart["Mqe"] = -3.14e+100
|
||||
probStart["Mqg"] = -3.14e+100
|
||||
probStart["Mr"] = -3.14e+100
|
||||
probStart["Mrg"] = -3.14e+100
|
||||
probStart["Mrr"] = -3.14e+100
|
||||
probStart["Mrz"] = -3.14e+100
|
||||
probStart["Ms"] = -3.14e+100
|
||||
probStart["Mt"] = -3.14e+100
|
||||
probStart["Mtg"] = -3.14e+100
|
||||
probStart["Mu"] = -3.14e+100
|
||||
probStart["Mud"] = -3.14e+100
|
||||
probStart["Mug"] = -3.14e+100
|
||||
probStart["Muj"] = -3.14e+100
|
||||
probStart["Mul"] = -3.14e+100
|
||||
probStart["Muv"] = -3.14e+100
|
||||
probStart["Muz"] = -3.14e+100
|
||||
probStart["Mv"] = -3.14e+100
|
||||
probStart["Mvd"] = -3.14e+100
|
||||
probStart["Mvg"] = -3.14e+100
|
||||
probStart["Mvi"] = -3.14e+100
|
||||
probStart["Mvn"] = -3.14e+100
|
||||
probStart["Mvq"] = -3.14e+100
|
||||
probStart["Mw"] = -3.14e+100
|
||||
probStart["Mx"] = -3.14e+100
|
||||
probStart["My"] = -3.14e+100
|
||||
probStart["Myg"] = -3.14e+100
|
||||
probStart["Mz"] = -3.14e+100
|
||||
probStart["Mzg"] = -3.14e+100
|
||||
probStart["Sa"] = -3.9025396831295227
|
||||
probStart["Sad"] = -11.048458480182255
|
||||
probStart["Sag"] = -6.954113917960154
|
||||
probStart["San"] = -12.84021794941031
|
||||
probStart["Sb"] = -6.472888763970454
|
||||
probStart["Sbg"] = -3.14e+100
|
||||
probStart["Sc"] = -4.786966795861212
|
||||
probStart["Sd"] = -3.903919764181873
|
||||
probStart["Sdf"] = -3.14e+100
|
||||
probStart["Sdg"] = -8.948397651299683
|
||||
probStart["Se"] = -5.942513006281674
|
||||
probStart["Sen"] = -3.14e+100
|
||||
probStart["Sf"] = -5.194820249981676
|
||||
probStart["Sg"] = -6.507826815331734
|
||||
probStart["Sh"] = -8.650563207383884
|
||||
probStart["Si"] = -3.14e+100
|
||||
probStart["Sin"] = -3.14e+100
|
||||
probStart["Sj"] = -4.911992119644354
|
||||
probStart["Sjn"] = -3.14e+100
|
||||
probStart["Sk"] = -6.940320595827818
|
||||
probStart["Sl"] = -3.14e+100
|
||||
probStart["Sln"] = -3.14e+100
|
||||
probStart["Sm"] = -3.269200652116097
|
||||
probStart["Smg"] = -10.825314928868044
|
||||
probStart["Smq"] = -3.14e+100
|
||||
probStart["Sn"] = -3.8551483897645107
|
||||
probStart["Sng"] = -4.913434861102905
|
||||
probStart["Snr"] = -4.483663103956885
|
||||
probStart["Snrfg"] = -3.14e+100
|
||||
probStart["Snrt"] = -3.14e+100
|
||||
probStart["Sns"] = -3.14e+100
|
||||
probStart["Snt"] = -12.147070768850364
|
||||
probStart["Snz"] = -3.14e+100
|
||||
probStart["So"] = -8.464460927750023
|
||||
probStart["Sp"] = -2.9868401813596317
|
||||
probStart["Sq"] = -4.888658618255058
|
||||
probStart["Sqe"] = -3.14e+100
|
||||
probStart["Sqg"] = -3.14e+100
|
||||
probStart["Sr"] = -2.7635336784127853
|
||||
probStart["Srg"] = -10.275268591948773
|
||||
probStart["Srr"] = -3.14e+100
|
||||
probStart["Srz"] = -3.14e+100
|
||||
probStart["Ss"] = -3.14e+100
|
||||
probStart["St"] = -3.14e+100
|
||||
probStart["Stg"] = -6.272842531880403
|
||||
probStart["Su"] = -6.940320595827818
|
||||
probStart["Sud"] = -7.728230161053767
|
||||
probStart["Sug"] = -7.5394037026636855
|
||||
probStart["Suj"] = -6.85251045118004
|
||||
probStart["Sul"] = -8.4153713175535
|
||||
probStart["Suv"] = -8.15808672228609
|
||||
probStart["Suz"] = -9.299258625372996
|
||||
probStart["Sv"] = -3.053292303412302
|
||||
probStart["Svd"] = -3.14e+100
|
||||
probStart["Svg"] = -5.9430181843676895
|
||||
probStart["Svi"] = -3.14e+100
|
||||
probStart["Svn"] = -11.453923588290419
|
||||
probStart["Svq"] = -3.14e+100
|
||||
probStart["Sw"] = -3.14e+100
|
||||
probStart["Sx"] = -8.427419656069674
|
||||
probStart["Sy"] = -6.1970794699489575
|
||||
probStart["Syg"] = -13.533365129970255
|
||||
probStart["Sz"] = -3.14e+100
|
||||
probStart["Szg"] = -3.14e+100
|
||||
var probStart = map[uint16]float64{
|
||||
100: -4.762305214596967,
|
||||
101: -6.680066036784177,
|
||||
102: -3.14e+100,
|
||||
103: -8.697083223018778,
|
||||
104: -5.018374362109218,
|
||||
105: -3.14e+100,
|
||||
106: -3.423880184954888,
|
||||
107: -3.9750475297585357,
|
||||
108: -8.888974230828882,
|
||||
109: -3.14e+100,
|
||||
110: -8.563551830394255,
|
||||
111: -3.14e+100,
|
||||
112: -5.491630418482717,
|
||||
113: -3.14e+100,
|
||||
114: -13.533365129970255,
|
||||
115: -6.1157847275557105,
|
||||
116: -3.14e+100,
|
||||
117: -5.0576191284681915,
|
||||
118: -3.14e+100,
|
||||
119: -3.14e+100,
|
||||
120: -4.905883584659895,
|
||||
121: -3.14e+100,
|
||||
122: -3.6524299819046386,
|
||||
123: -3.14e+100,
|
||||
124: -6.78695300139688,
|
||||
125: -1.6966257797548328,
|
||||
126: -3.14e+100,
|
||||
127: -2.2310495913769506,
|
||||
128: -5.873722175405573,
|
||||
129: -4.985642733519195,
|
||||
130: -2.8228438314969213,
|
||||
131: -4.846091668182416,
|
||||
132: -3.94698846057672,
|
||||
133: -8.433498702146057,
|
||||
134: -4.200984132085048,
|
||||
135: -6.998123858956596,
|
||||
136: -3.14e+100,
|
||||
137: -3.14e+100,
|
||||
138: -3.4098187790818413,
|
||||
139: -3.14e+100,
|
||||
140: -12.434752841302146,
|
||||
141: -7.946116471570005,
|
||||
142: -5.522673590839954,
|
||||
143: -3.3647479094528574,
|
||||
144: -3.14e+100,
|
||||
145: -9.163917277503234,
|
||||
146: -3.14e+100,
|
||||
147: -3.14e+100,
|
||||
148: -3.14e+100,
|
||||
149: -3.14e+100,
|
||||
150: -3.14e+100,
|
||||
151: -3.14e+100,
|
||||
152: -2.6740584874265685,
|
||||
153: -9.044728760238115,
|
||||
154: -3.14e+100,
|
||||
155: -12.434752841302146,
|
||||
156: -4.3315610890163585,
|
||||
157: -12.147070768850364,
|
||||
158: -3.14e+100,
|
||||
159: -3.14e+100,
|
||||
160: -9.844485675856319,
|
||||
161: -3.14e+100,
|
||||
162: -7.045681111485645,
|
||||
163: -3.14e+100,
|
||||
200: -3.14e+100,
|
||||
201: -3.14e+100,
|
||||
202: -3.14e+100,
|
||||
203: -3.14e+100,
|
||||
204: -3.14e+100,
|
||||
205: -3.14e+100,
|
||||
206: -3.14e+100,
|
||||
207: -3.14e+100,
|
||||
208: -3.14e+100,
|
||||
209: -3.14e+100,
|
||||
210: -3.14e+100,
|
||||
211: -3.14e+100,
|
||||
212: -3.14e+100,
|
||||
213: -3.14e+100,
|
||||
214: -3.14e+100,
|
||||
215: -3.14e+100,
|
||||
216: -3.14e+100,
|
||||
217: -3.14e+100,
|
||||
218: -3.14e+100,
|
||||
219: -3.14e+100,
|
||||
220: -3.14e+100,
|
||||
221: -3.14e+100,
|
||||
222: -3.14e+100,
|
||||
223: -3.14e+100,
|
||||
224: -3.14e+100,
|
||||
225: -3.14e+100,
|
||||
226: -3.14e+100,
|
||||
227: -3.14e+100,
|
||||
228: -3.14e+100,
|
||||
229: -3.14e+100,
|
||||
230: -3.14e+100,
|
||||
231: -3.14e+100,
|
||||
232: -3.14e+100,
|
||||
233: -3.14e+100,
|
||||
234: -3.14e+100,
|
||||
235: -3.14e+100,
|
||||
236: -3.14e+100,
|
||||
237: -3.14e+100,
|
||||
238: -3.14e+100,
|
||||
239: -3.14e+100,
|
||||
240: -3.14e+100,
|
||||
241: -3.14e+100,
|
||||
242: -3.14e+100,
|
||||
243: -3.14e+100,
|
||||
244: -3.14e+100,
|
||||
245: -3.14e+100,
|
||||
246: -3.14e+100,
|
||||
247: -3.14e+100,
|
||||
248: -3.14e+100,
|
||||
249: -3.14e+100,
|
||||
250: -3.14e+100,
|
||||
251: -3.14e+100,
|
||||
252: -3.14e+100,
|
||||
253: -3.14e+100,
|
||||
254: -3.14e+100,
|
||||
255: -3.14e+100,
|
||||
256: -3.14e+100,
|
||||
257: -3.14e+100,
|
||||
258: -3.14e+100,
|
||||
259: -3.14e+100,
|
||||
260: -3.14e+100,
|
||||
261: -3.14e+100,
|
||||
262: -3.14e+100,
|
||||
263: -3.14e+100,
|
||||
300: -3.14e+100,
|
||||
301: -3.14e+100,
|
||||
302: -3.14e+100,
|
||||
303: -3.14e+100,
|
||||
304: -3.14e+100,
|
||||
305: -3.14e+100,
|
||||
306: -3.14e+100,
|
||||
307: -3.14e+100,
|
||||
308: -3.14e+100,
|
||||
309: -3.14e+100,
|
||||
310: -3.14e+100,
|
||||
311: -3.14e+100,
|
||||
312: -3.14e+100,
|
||||
313: -3.14e+100,
|
||||
314: -3.14e+100,
|
||||
315: -3.14e+100,
|
||||
316: -3.14e+100,
|
||||
317: -3.14e+100,
|
||||
318: -3.14e+100,
|
||||
319: -3.14e+100,
|
||||
320: -3.14e+100,
|
||||
321: -3.14e+100,
|
||||
322: -3.14e+100,
|
||||
323: -3.14e+100,
|
||||
324: -3.14e+100,
|
||||
325: -3.14e+100,
|
||||
326: -3.14e+100,
|
||||
327: -3.14e+100,
|
||||
328: -3.14e+100,
|
||||
329: -3.14e+100,
|
||||
330: -3.14e+100,
|
||||
331: -3.14e+100,
|
||||
332: -3.14e+100,
|
||||
333: -3.14e+100,
|
||||
334: -3.14e+100,
|
||||
335: -3.14e+100,
|
||||
336: -3.14e+100,
|
||||
337: -3.14e+100,
|
||||
338: -3.14e+100,
|
||||
339: -3.14e+100,
|
||||
340: -3.14e+100,
|
||||
341: -3.14e+100,
|
||||
342: -3.14e+100,
|
||||
343: -3.14e+100,
|
||||
344: -3.14e+100,
|
||||
345: -3.14e+100,
|
||||
346: -3.14e+100,
|
||||
347: -3.14e+100,
|
||||
348: -3.14e+100,
|
||||
349: -3.14e+100,
|
||||
350: -3.14e+100,
|
||||
351: -3.14e+100,
|
||||
352: -3.14e+100,
|
||||
353: -3.14e+100,
|
||||
354: -3.14e+100,
|
||||
355: -3.14e+100,
|
||||
356: -3.14e+100,
|
||||
357: -3.14e+100,
|
||||
358: -3.14e+100,
|
||||
359: -3.14e+100,
|
||||
360: -3.14e+100,
|
||||
361: -3.14e+100,
|
||||
362: -3.14e+100,
|
||||
363: -3.14e+100,
|
||||
400: -3.9025396831295227,
|
||||
401: -11.048458480182255,
|
||||
402: -6.954113917960154,
|
||||
403: -12.84021794941031,
|
||||
404: -6.472888763970454,
|
||||
405: -3.14e+100,
|
||||
406: -4.786966795861212,
|
||||
407: -3.903919764181873,
|
||||
408: -3.14e+100,
|
||||
409: -8.948397651299683,
|
||||
410: -5.942513006281674,
|
||||
411: -3.14e+100,
|
||||
412: -5.194820249981676,
|
||||
413: -6.507826815331734,
|
||||
414: -8.650563207383884,
|
||||
415: -3.14e+100,
|
||||
416: -3.14e+100,
|
||||
417: -4.911992119644354,
|
||||
418: -3.14e+100,
|
||||
419: -6.940320595827818,
|
||||
420: -3.14e+100,
|
||||
421: -3.14e+100,
|
||||
422: -3.269200652116097,
|
||||
423: -10.825314928868044,
|
||||
424: -3.14e+100,
|
||||
425: -3.8551483897645107,
|
||||
426: -4.913434861102905,
|
||||
427: -4.483663103956885,
|
||||
428: -3.14e+100,
|
||||
429: -3.14e+100,
|
||||
430: -3.14e+100,
|
||||
431: -12.147070768850364,
|
||||
432: -3.14e+100,
|
||||
433: -8.464460927750023,
|
||||
434: -2.9868401813596317,
|
||||
435: -4.888658618255058,
|
||||
436: -3.14e+100,
|
||||
437: -3.14e+100,
|
||||
438: -2.7635336784127853,
|
||||
439: -10.275268591948773,
|
||||
440: -3.14e+100,
|
||||
441: -3.14e+100,
|
||||
442: -3.14e+100,
|
||||
443: -3.14e+100,
|
||||
444: -6.272842531880403,
|
||||
445: -6.940320595827818,
|
||||
446: -7.728230161053767,
|
||||
447: -7.5394037026636855,
|
||||
448: -6.85251045118004,
|
||||
449: -8.4153713175535,
|
||||
450: -8.15808672228609,
|
||||
451: -9.299258625372996,
|
||||
452: -3.053292303412302,
|
||||
453: -3.14e+100,
|
||||
454: -5.9430181843676895,
|
||||
455: -3.14e+100,
|
||||
456: -11.453923588290419,
|
||||
457: -3.14e+100,
|
||||
458: -3.14e+100,
|
||||
459: -8.427419656069674,
|
||||
460: -6.1970794699489575,
|
||||
461: -13.533365129970255,
|
||||
462: -3.14e+100,
|
||||
463: -3.14e+100,
|
||||
}
|
||||
|
||||
5739
posseg/prob_trans.go
5739
posseg/prob_trans.go
File diff suppressed because it is too large
Load Diff
@@ -7,7 +7,7 @@ import (
|
||||
|
||||
type probState struct {
|
||||
prob float64
|
||||
state string
|
||||
state uint16
|
||||
}
|
||||
|
||||
func (ps probState) String() string {
|
||||
@@ -31,26 +31,26 @@ func (pss probStates) Swap(i, j int) {
|
||||
pss[i], pss[j] = pss[j], pss[i]
|
||||
}
|
||||
|
||||
func viterbi(obs []rune) []string {
|
||||
func viterbi(obs []rune) []Tag {
|
||||
obsLength := len(obs)
|
||||
V := make([]map[string]float64, obsLength)
|
||||
V[0] = make(map[string]float64)
|
||||
mem_path := make([]map[string]string, obsLength)
|
||||
mem_path[0] = make(map[string]string)
|
||||
V := make([]map[uint16]float64, obsLength)
|
||||
V[0] = make(map[uint16]float64)
|
||||
mem_path := make([]map[uint16]uint16, obsLength)
|
||||
mem_path[0] = make(map[uint16]uint16)
|
||||
ys := charStateTab.get(obs[0]) // default is all_states
|
||||
for _, y := range ys {
|
||||
V[0][y] = probEmit[y].get(obs[0]) + probStart[y]
|
||||
mem_path[0][y] = ""
|
||||
mem_path[0][y] = 0
|
||||
}
|
||||
for t := 1; t < obsLength; t++ {
|
||||
prev_states := make([]string, 0)
|
||||
prev_states := make([]uint16, 0)
|
||||
for x := range mem_path[t-1] {
|
||||
if len(probTrans[x]) > 0 {
|
||||
prev_states = append(prev_states, x)
|
||||
}
|
||||
}
|
||||
//use Go's map to implement Python's Set()
|
||||
prev_states_expect_next := make(map[string]int)
|
||||
prev_states_expect_next := make(map[uint16]int)
|
||||
for _, x := range prev_states {
|
||||
for y := range probTrans[x] {
|
||||
prev_states_expect_next[y] = 1
|
||||
@@ -58,7 +58,7 @@ func viterbi(obs []rune) []string {
|
||||
}
|
||||
tmp_obs_states := charStateTab.get(obs[t])
|
||||
|
||||
obs_states := make([]string, 0)
|
||||
obs_states := make([]uint16, 0)
|
||||
for index := range tmp_obs_states {
|
||||
if _, ok := prev_states_expect_next[tmp_obs_states[index]]; ok {
|
||||
obs_states = append(obs_states, tmp_obs_states[index])
|
||||
@@ -72,8 +72,8 @@ func viterbi(obs []rune) []string {
|
||||
if len(obs_states) == 0 {
|
||||
obs_states = probTransKeys
|
||||
}
|
||||
mem_path[t] = make(map[string]string) // TODO: value needed or not?
|
||||
V[t] = make(map[string]float64)
|
||||
mem_path[t] = make(map[uint16]uint16)
|
||||
V[t] = make(map[uint16]float64)
|
||||
for _, y := range obs_states {
|
||||
var max, ps probState
|
||||
for i, y0 := range prev_states {
|
||||
@@ -97,10 +97,10 @@ func viterbi(obs []rune) []string {
|
||||
}
|
||||
sort.Sort(sort.Reverse(last))
|
||||
state := last[0].state
|
||||
route := make([]string, len(obs))
|
||||
route := make([]Tag, len(obs))
|
||||
|
||||
for i := obsLength - 1; i >= 0; i-- {
|
||||
route[i] = state
|
||||
route[i] = Tag(state)
|
||||
state = mem_path[i][state]
|
||||
}
|
||||
return route
|
||||
|
||||
@@ -4,38 +4,60 @@ import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
var (
|
||||
route1 = []string{
|
||||
"Bnr",
|
||||
"Mnr",
|
||||
"Enr",
|
||||
"Sv",
|
||||
"Bv",
|
||||
"Ev",
|
||||
"Bn",
|
||||
"Mn",
|
||||
"En",
|
||||
"Sd",
|
||||
"Sv",
|
||||
"Sn",
|
||||
"Bv",
|
||||
"Ev",
|
||||
"Bnr",
|
||||
"Mnr",
|
||||
"Mnr",
|
||||
"Mnr",
|
||||
"Enr",
|
||||
"Szg"}
|
||||
)
|
||||
var defaultRoute []Tag
|
||||
|
||||
func init() {
|
||||
var t Tag
|
||||
t, _ = NewTag("B", "nr")
|
||||
defaultRoute = append(defaultRoute, t)
|
||||
t, _ = NewTag("M", "nr")
|
||||
defaultRoute = append(defaultRoute, t)
|
||||
t, _ = NewTag("E", "nr")
|
||||
defaultRoute = append(defaultRoute, t)
|
||||
t, _ = NewTag("S", "v")
|
||||
defaultRoute = append(defaultRoute, t)
|
||||
t, _ = NewTag("B", "v")
|
||||
defaultRoute = append(defaultRoute, t)
|
||||
t, _ = NewTag("E", "v")
|
||||
defaultRoute = append(defaultRoute, t)
|
||||
t, _ = NewTag("B", "n")
|
||||
defaultRoute = append(defaultRoute, t)
|
||||
t, _ = NewTag("M", "n")
|
||||
defaultRoute = append(defaultRoute, t)
|
||||
t, _ = NewTag("E", "n")
|
||||
defaultRoute = append(defaultRoute, t)
|
||||
t, _ = NewTag("S", "d")
|
||||
defaultRoute = append(defaultRoute, t)
|
||||
t, _ = NewTag("S", "v")
|
||||
defaultRoute = append(defaultRoute, t)
|
||||
t, _ = NewTag("S", "n")
|
||||
defaultRoute = append(defaultRoute, t)
|
||||
t, _ = NewTag("B", "v")
|
||||
defaultRoute = append(defaultRoute, t)
|
||||
t, _ = NewTag("E", "v")
|
||||
defaultRoute = append(defaultRoute, t)
|
||||
t, _ = NewTag("B", "nr")
|
||||
defaultRoute = append(defaultRoute, t)
|
||||
t, _ = NewTag("M", "nr")
|
||||
defaultRoute = append(defaultRoute, t)
|
||||
t, _ = NewTag("M", "nr")
|
||||
defaultRoute = append(defaultRoute, t)
|
||||
t, _ = NewTag("M", "nr")
|
||||
defaultRoute = append(defaultRoute, t)
|
||||
t, _ = NewTag("E", "nr")
|
||||
defaultRoute = append(defaultRoute, t)
|
||||
t, _ = NewTag("S", "zg")
|
||||
defaultRoute = append(defaultRoute, t)
|
||||
}
|
||||
|
||||
func TestViterbi(t *testing.T) {
|
||||
ss := "李小福是创新办主任也是云计算方面的专家;"
|
||||
route := viterbi([]rune(ss))
|
||||
if len(route) != len(route1) {
|
||||
if len(route) != len(defaultRoute) {
|
||||
t.Fatal(len(route))
|
||||
}
|
||||
for index, _ := range route {
|
||||
if route[index] != route1[index] {
|
||||
for index := range route {
|
||||
if route[index] != defaultRoute[index] {
|
||||
t.Fatal(route[index])
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user