mirror of
https://github.com/fumiama/jieba.git
synced 2026-06-12 05:00:24 +08:00
code refactor, use uint for map key to improve performance
This commit is contained in:
86165
posseg/char_state_tab.go
86165
posseg/char_state_tab.go
File diff suppressed because it is too large
Load Diff
@@ -76,19 +76,19 @@ func (p *Posseg) cutDetailInternal(sentence string) chan Pair {
|
|||||||
next := 0
|
next := 0
|
||||||
for i, char := range runes {
|
for i, char := range runes {
|
||||||
pos := posList[i]
|
pos := posList[i]
|
||||||
switch pos[0] {
|
switch pos.Tag() {
|
||||||
case 'B':
|
case "B":
|
||||||
begin = i
|
begin = i
|
||||||
case 'E':
|
case "E":
|
||||||
result <- Pair{string(runes[begin : i+1]), string(pos[1:])}
|
result <- Pair{string(runes[begin : i+1]), pos.POS()}
|
||||||
next = i + 1
|
next = i + 1
|
||||||
case 'S':
|
case "S":
|
||||||
result <- Pair{string(char), string(pos[1:])}
|
result <- Pair{string(char), pos.POS()}
|
||||||
next = i + 1
|
next = i + 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if next < len(runes) {
|
if next < len(runes) {
|
||||||
result <- Pair{string(runes[next:]), string(posList[next][1:])}
|
result <- Pair{string(runes[next:]), posList[next].POS()}
|
||||||
}
|
}
|
||||||
close(result)
|
close(result)
|
||||||
}()
|
}()
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -1,264 +1,260 @@
|
|||||||
package posseg
|
package posseg
|
||||||
|
|
||||||
var (
|
var probStart = map[uint16]float64{
|
||||||
probStart = make(map[string]float64)
|
100: -4.762305214596967,
|
||||||
)
|
101: -6.680066036784177,
|
||||||
|
102: -3.14e+100,
|
||||||
func init() {
|
103: -8.697083223018778,
|
||||||
probStart["Ba"] = -4.762305214596967
|
104: -5.018374362109218,
|
||||||
probStart["Bad"] = -6.680066036784177
|
105: -3.14e+100,
|
||||||
probStart["Bag"] = -3.14e+100
|
106: -3.423880184954888,
|
||||||
probStart["Ban"] = -8.697083223018778
|
107: -3.9750475297585357,
|
||||||
probStart["Bb"] = -5.018374362109218
|
108: -8.888974230828882,
|
||||||
probStart["Bbg"] = -3.14e+100
|
109: -3.14e+100,
|
||||||
probStart["Bc"] = -3.423880184954888
|
110: -8.563551830394255,
|
||||||
probStart["Bd"] = -3.9750475297585357
|
111: -3.14e+100,
|
||||||
probStart["Bdf"] = -8.888974230828882
|
112: -5.491630418482717,
|
||||||
probStart["Bdg"] = -3.14e+100
|
113: -3.14e+100,
|
||||||
probStart["Be"] = -8.563551830394255
|
114: -13.533365129970255,
|
||||||
probStart["Ben"] = -3.14e+100
|
115: -6.1157847275557105,
|
||||||
probStart["Bf"] = -5.491630418482717
|
116: -3.14e+100,
|
||||||
probStart["Bg"] = -3.14e+100
|
117: -5.0576191284681915,
|
||||||
probStart["Bh"] = -13.533365129970255
|
118: -3.14e+100,
|
||||||
probStart["Bi"] = -6.1157847275557105
|
119: -3.14e+100,
|
||||||
probStart["Bin"] = -3.14e+100
|
120: -4.905883584659895,
|
||||||
probStart["Bj"] = -5.0576191284681915
|
121: -3.14e+100,
|
||||||
probStart["Bjn"] = -3.14e+100
|
122: -3.6524299819046386,
|
||||||
probStart["Bk"] = -3.14e+100
|
123: -3.14e+100,
|
||||||
probStart["Bl"] = -4.905883584659895
|
124: -6.78695300139688,
|
||||||
probStart["Bln"] = -3.14e+100
|
125: -1.6966257797548328,
|
||||||
probStart["Bm"] = -3.6524299819046386
|
126: -3.14e+100,
|
||||||
probStart["Bmg"] = -3.14e+100
|
127: -2.2310495913769506,
|
||||||
probStart["Bmq"] = -6.78695300139688
|
128: -5.873722175405573,
|
||||||
probStart["Bn"] = -1.6966257797548328
|
129: -4.985642733519195,
|
||||||
probStart["Bng"] = -3.14e+100
|
130: -2.8228438314969213,
|
||||||
probStart["Bnr"] = -2.2310495913769506
|
131: -4.846091668182416,
|
||||||
probStart["Bnrfg"] = -5.873722175405573
|
132: -3.94698846057672,
|
||||||
probStart["Bnrt"] = -4.985642733519195
|
133: -8.433498702146057,
|
||||||
probStart["Bns"] = -2.8228438314969213
|
134: -4.200984132085048,
|
||||||
probStart["Bnt"] = -4.846091668182416
|
135: -6.998123858956596,
|
||||||
probStart["Bnz"] = -3.94698846057672
|
136: -3.14e+100,
|
||||||
probStart["Bo"] = -8.433498702146057
|
137: -3.14e+100,
|
||||||
probStart["Bp"] = -4.200984132085048
|
138: -3.4098187790818413,
|
||||||
probStart["Bq"] = -6.998123858956596
|
139: -3.14e+100,
|
||||||
probStart["Bqe"] = -3.14e+100
|
140: -12.434752841302146,
|
||||||
probStart["Bqg"] = -3.14e+100
|
141: -7.946116471570005,
|
||||||
probStart["Br"] = -3.4098187790818413
|
142: -5.522673590839954,
|
||||||
probStart["Brg"] = -3.14e+100
|
143: -3.3647479094528574,
|
||||||
probStart["Brr"] = -12.434752841302146
|
144: -3.14e+100,
|
||||||
probStart["Brz"] = -7.946116471570005
|
145: -9.163917277503234,
|
||||||
probStart["Bs"] = -5.522673590839954
|
146: -3.14e+100,
|
||||||
probStart["Bt"] = -3.3647479094528574
|
147: -3.14e+100,
|
||||||
probStart["Btg"] = -3.14e+100
|
148: -3.14e+100,
|
||||||
probStart["Bu"] = -9.163917277503234
|
149: -3.14e+100,
|
||||||
probStart["Bud"] = -3.14e+100
|
150: -3.14e+100,
|
||||||
probStart["Bug"] = -3.14e+100
|
151: -3.14e+100,
|
||||||
probStart["Buj"] = -3.14e+100
|
152: -2.6740584874265685,
|
||||||
probStart["Bul"] = -3.14e+100
|
153: -9.044728760238115,
|
||||||
probStart["Buv"] = -3.14e+100
|
154: -3.14e+100,
|
||||||
probStart["Buz"] = -3.14e+100
|
155: -12.434752841302146,
|
||||||
probStart["Bv"] = -2.6740584874265685
|
156: -4.3315610890163585,
|
||||||
probStart["Bvd"] = -9.044728760238115
|
157: -12.147070768850364,
|
||||||
probStart["Bvg"] = -3.14e+100
|
158: -3.14e+100,
|
||||||
probStart["Bvi"] = -12.434752841302146
|
159: -3.14e+100,
|
||||||
probStart["Bvn"] = -4.3315610890163585
|
160: -9.844485675856319,
|
||||||
probStart["Bvq"] = -12.147070768850364
|
161: -3.14e+100,
|
||||||
probStart["Bw"] = -3.14e+100
|
162: -7.045681111485645,
|
||||||
probStart["Bx"] = -3.14e+100
|
163: -3.14e+100,
|
||||||
probStart["By"] = -9.844485675856319
|
200: -3.14e+100,
|
||||||
probStart["Byg"] = -3.14e+100
|
201: -3.14e+100,
|
||||||
probStart["Bz"] = -7.045681111485645
|
202: -3.14e+100,
|
||||||
probStart["Bzg"] = -3.14e+100
|
203: -3.14e+100,
|
||||||
probStart["Ea"] = -3.14e+100
|
204: -3.14e+100,
|
||||||
probStart["Ead"] = -3.14e+100
|
205: -3.14e+100,
|
||||||
probStart["Eag"] = -3.14e+100
|
206: -3.14e+100,
|
||||||
probStart["Ean"] = -3.14e+100
|
207: -3.14e+100,
|
||||||
probStart["Eb"] = -3.14e+100
|
208: -3.14e+100,
|
||||||
probStart["Ebg"] = -3.14e+100
|
209: -3.14e+100,
|
||||||
probStart["Ec"] = -3.14e+100
|
210: -3.14e+100,
|
||||||
probStart["Ed"] = -3.14e+100
|
211: -3.14e+100,
|
||||||
probStart["Edf"] = -3.14e+100
|
212: -3.14e+100,
|
||||||
probStart["Edg"] = -3.14e+100
|
213: -3.14e+100,
|
||||||
probStart["Ee"] = -3.14e+100
|
214: -3.14e+100,
|
||||||
probStart["Een"] = -3.14e+100
|
215: -3.14e+100,
|
||||||
probStart["Ef"] = -3.14e+100
|
216: -3.14e+100,
|
||||||
probStart["Eg"] = -3.14e+100
|
217: -3.14e+100,
|
||||||
probStart["Eh"] = -3.14e+100
|
218: -3.14e+100,
|
||||||
probStart["Ei"] = -3.14e+100
|
219: -3.14e+100,
|
||||||
probStart["Ein"] = -3.14e+100
|
220: -3.14e+100,
|
||||||
probStart["Ej"] = -3.14e+100
|
221: -3.14e+100,
|
||||||
probStart["Ejn"] = -3.14e+100
|
222: -3.14e+100,
|
||||||
probStart["Ek"] = -3.14e+100
|
223: -3.14e+100,
|
||||||
probStart["El"] = -3.14e+100
|
224: -3.14e+100,
|
||||||
probStart["Eln"] = -3.14e+100
|
225: -3.14e+100,
|
||||||
probStart["Em"] = -3.14e+100
|
226: -3.14e+100,
|
||||||
probStart["Emg"] = -3.14e+100
|
227: -3.14e+100,
|
||||||
probStart["Emq"] = -3.14e+100
|
228: -3.14e+100,
|
||||||
probStart["En"] = -3.14e+100
|
229: -3.14e+100,
|
||||||
probStart["Eng"] = -3.14e+100
|
230: -3.14e+100,
|
||||||
probStart["Enr"] = -3.14e+100
|
231: -3.14e+100,
|
||||||
probStart["Enrfg"] = -3.14e+100
|
232: -3.14e+100,
|
||||||
probStart["Enrt"] = -3.14e+100
|
233: -3.14e+100,
|
||||||
probStart["Ens"] = -3.14e+100
|
234: -3.14e+100,
|
||||||
probStart["Ent"] = -3.14e+100
|
235: -3.14e+100,
|
||||||
probStart["Enz"] = -3.14e+100
|
236: -3.14e+100,
|
||||||
probStart["Eo"] = -3.14e+100
|
237: -3.14e+100,
|
||||||
probStart["Ep"] = -3.14e+100
|
238: -3.14e+100,
|
||||||
probStart["Eq"] = -3.14e+100
|
239: -3.14e+100,
|
||||||
probStart["Eqe"] = -3.14e+100
|
240: -3.14e+100,
|
||||||
probStart["Eqg"] = -3.14e+100
|
241: -3.14e+100,
|
||||||
probStart["Er"] = -3.14e+100
|
242: -3.14e+100,
|
||||||
probStart["Erg"] = -3.14e+100
|
243: -3.14e+100,
|
||||||
probStart["Err"] = -3.14e+100
|
244: -3.14e+100,
|
||||||
probStart["Erz"] = -3.14e+100
|
245: -3.14e+100,
|
||||||
probStart["Es"] = -3.14e+100
|
246: -3.14e+100,
|
||||||
probStart["Et"] = -3.14e+100
|
247: -3.14e+100,
|
||||||
probStart["Etg"] = -3.14e+100
|
248: -3.14e+100,
|
||||||
probStart["Eu"] = -3.14e+100
|
249: -3.14e+100,
|
||||||
probStart["Eud"] = -3.14e+100
|
250: -3.14e+100,
|
||||||
probStart["Eug"] = -3.14e+100
|
251: -3.14e+100,
|
||||||
probStart["Euj"] = -3.14e+100
|
252: -3.14e+100,
|
||||||
probStart["Eul"] = -3.14e+100
|
253: -3.14e+100,
|
||||||
probStart["Euv"] = -3.14e+100
|
254: -3.14e+100,
|
||||||
probStart["Euz"] = -3.14e+100
|
255: -3.14e+100,
|
||||||
probStart["Ev"] = -3.14e+100
|
256: -3.14e+100,
|
||||||
probStart["Evd"] = -3.14e+100
|
257: -3.14e+100,
|
||||||
probStart["Evg"] = -3.14e+100
|
258: -3.14e+100,
|
||||||
probStart["Evi"] = -3.14e+100
|
259: -3.14e+100,
|
||||||
probStart["Evn"] = -3.14e+100
|
260: -3.14e+100,
|
||||||
probStart["Evq"] = -3.14e+100
|
261: -3.14e+100,
|
||||||
probStart["Ew"] = -3.14e+100
|
262: -3.14e+100,
|
||||||
probStart["Ex"] = -3.14e+100
|
263: -3.14e+100,
|
||||||
probStart["Ey"] = -3.14e+100
|
300: -3.14e+100,
|
||||||
probStart["Eyg"] = -3.14e+100
|
301: -3.14e+100,
|
||||||
probStart["Ez"] = -3.14e+100
|
302: -3.14e+100,
|
||||||
probStart["Ezg"] = -3.14e+100
|
303: -3.14e+100,
|
||||||
probStart["Ma"] = -3.14e+100
|
304: -3.14e+100,
|
||||||
probStart["Mad"] = -3.14e+100
|
305: -3.14e+100,
|
||||||
probStart["Mag"] = -3.14e+100
|
306: -3.14e+100,
|
||||||
probStart["Man"] = -3.14e+100
|
307: -3.14e+100,
|
||||||
probStart["Mb"] = -3.14e+100
|
308: -3.14e+100,
|
||||||
probStart["Mbg"] = -3.14e+100
|
309: -3.14e+100,
|
||||||
probStart["Mc"] = -3.14e+100
|
310: -3.14e+100,
|
||||||
probStart["Md"] = -3.14e+100
|
311: -3.14e+100,
|
||||||
probStart["Mdf"] = -3.14e+100
|
312: -3.14e+100,
|
||||||
probStart["Mdg"] = -3.14e+100
|
313: -3.14e+100,
|
||||||
probStart["Me"] = -3.14e+100
|
314: -3.14e+100,
|
||||||
probStart["Men"] = -3.14e+100
|
315: -3.14e+100,
|
||||||
probStart["Mf"] = -3.14e+100
|
316: -3.14e+100,
|
||||||
probStart["Mg"] = -3.14e+100
|
317: -3.14e+100,
|
||||||
probStart["Mh"] = -3.14e+100
|
318: -3.14e+100,
|
||||||
probStart["Mi"] = -3.14e+100
|
319: -3.14e+100,
|
||||||
probStart["Min"] = -3.14e+100
|
320: -3.14e+100,
|
||||||
probStart["Mj"] = -3.14e+100
|
321: -3.14e+100,
|
||||||
probStart["Mjn"] = -3.14e+100
|
322: -3.14e+100,
|
||||||
probStart["Mk"] = -3.14e+100
|
323: -3.14e+100,
|
||||||
probStart["Ml"] = -3.14e+100
|
324: -3.14e+100,
|
||||||
probStart["Mln"] = -3.14e+100
|
325: -3.14e+100,
|
||||||
probStart["Mm"] = -3.14e+100
|
326: -3.14e+100,
|
||||||
probStart["Mmg"] = -3.14e+100
|
327: -3.14e+100,
|
||||||
probStart["Mmq"] = -3.14e+100
|
328: -3.14e+100,
|
||||||
probStart["Mn"] = -3.14e+100
|
329: -3.14e+100,
|
||||||
probStart["Mng"] = -3.14e+100
|
330: -3.14e+100,
|
||||||
probStart["Mnr"] = -3.14e+100
|
331: -3.14e+100,
|
||||||
probStart["Mnrfg"] = -3.14e+100
|
332: -3.14e+100,
|
||||||
probStart["Mnrt"] = -3.14e+100
|
333: -3.14e+100,
|
||||||
probStart["Mns"] = -3.14e+100
|
334: -3.14e+100,
|
||||||
probStart["Mnt"] = -3.14e+100
|
335: -3.14e+100,
|
||||||
probStart["Mnz"] = -3.14e+100
|
336: -3.14e+100,
|
||||||
probStart["Mo"] = -3.14e+100
|
337: -3.14e+100,
|
||||||
probStart["Mp"] = -3.14e+100
|
338: -3.14e+100,
|
||||||
probStart["Mq"] = -3.14e+100
|
339: -3.14e+100,
|
||||||
probStart["Mqe"] = -3.14e+100
|
340: -3.14e+100,
|
||||||
probStart["Mqg"] = -3.14e+100
|
341: -3.14e+100,
|
||||||
probStart["Mr"] = -3.14e+100
|
342: -3.14e+100,
|
||||||
probStart["Mrg"] = -3.14e+100
|
343: -3.14e+100,
|
||||||
probStart["Mrr"] = -3.14e+100
|
344: -3.14e+100,
|
||||||
probStart["Mrz"] = -3.14e+100
|
345: -3.14e+100,
|
||||||
probStart["Ms"] = -3.14e+100
|
346: -3.14e+100,
|
||||||
probStart["Mt"] = -3.14e+100
|
347: -3.14e+100,
|
||||||
probStart["Mtg"] = -3.14e+100
|
348: -3.14e+100,
|
||||||
probStart["Mu"] = -3.14e+100
|
349: -3.14e+100,
|
||||||
probStart["Mud"] = -3.14e+100
|
350: -3.14e+100,
|
||||||
probStart["Mug"] = -3.14e+100
|
351: -3.14e+100,
|
||||||
probStart["Muj"] = -3.14e+100
|
352: -3.14e+100,
|
||||||
probStart["Mul"] = -3.14e+100
|
353: -3.14e+100,
|
||||||
probStart["Muv"] = -3.14e+100
|
354: -3.14e+100,
|
||||||
probStart["Muz"] = -3.14e+100
|
355: -3.14e+100,
|
||||||
probStart["Mv"] = -3.14e+100
|
356: -3.14e+100,
|
||||||
probStart["Mvd"] = -3.14e+100
|
357: -3.14e+100,
|
||||||
probStart["Mvg"] = -3.14e+100
|
358: -3.14e+100,
|
||||||
probStart["Mvi"] = -3.14e+100
|
359: -3.14e+100,
|
||||||
probStart["Mvn"] = -3.14e+100
|
360: -3.14e+100,
|
||||||
probStart["Mvq"] = -3.14e+100
|
361: -3.14e+100,
|
||||||
probStart["Mw"] = -3.14e+100
|
362: -3.14e+100,
|
||||||
probStart["Mx"] = -3.14e+100
|
363: -3.14e+100,
|
||||||
probStart["My"] = -3.14e+100
|
400: -3.9025396831295227,
|
||||||
probStart["Myg"] = -3.14e+100
|
401: -11.048458480182255,
|
||||||
probStart["Mz"] = -3.14e+100
|
402: -6.954113917960154,
|
||||||
probStart["Mzg"] = -3.14e+100
|
403: -12.84021794941031,
|
||||||
probStart["Sa"] = -3.9025396831295227
|
404: -6.472888763970454,
|
||||||
probStart["Sad"] = -11.048458480182255
|
405: -3.14e+100,
|
||||||
probStart["Sag"] = -6.954113917960154
|
406: -4.786966795861212,
|
||||||
probStart["San"] = -12.84021794941031
|
407: -3.903919764181873,
|
||||||
probStart["Sb"] = -6.472888763970454
|
408: -3.14e+100,
|
||||||
probStart["Sbg"] = -3.14e+100
|
409: -8.948397651299683,
|
||||||
probStart["Sc"] = -4.786966795861212
|
410: -5.942513006281674,
|
||||||
probStart["Sd"] = -3.903919764181873
|
411: -3.14e+100,
|
||||||
probStart["Sdf"] = -3.14e+100
|
412: -5.194820249981676,
|
||||||
probStart["Sdg"] = -8.948397651299683
|
413: -6.507826815331734,
|
||||||
probStart["Se"] = -5.942513006281674
|
414: -8.650563207383884,
|
||||||
probStart["Sen"] = -3.14e+100
|
415: -3.14e+100,
|
||||||
probStart["Sf"] = -5.194820249981676
|
416: -3.14e+100,
|
||||||
probStart["Sg"] = -6.507826815331734
|
417: -4.911992119644354,
|
||||||
probStart["Sh"] = -8.650563207383884
|
418: -3.14e+100,
|
||||||
probStart["Si"] = -3.14e+100
|
419: -6.940320595827818,
|
||||||
probStart["Sin"] = -3.14e+100
|
420: -3.14e+100,
|
||||||
probStart["Sj"] = -4.911992119644354
|
421: -3.14e+100,
|
||||||
probStart["Sjn"] = -3.14e+100
|
422: -3.269200652116097,
|
||||||
probStart["Sk"] = -6.940320595827818
|
423: -10.825314928868044,
|
||||||
probStart["Sl"] = -3.14e+100
|
424: -3.14e+100,
|
||||||
probStart["Sln"] = -3.14e+100
|
425: -3.8551483897645107,
|
||||||
probStart["Sm"] = -3.269200652116097
|
426: -4.913434861102905,
|
||||||
probStart["Smg"] = -10.825314928868044
|
427: -4.483663103956885,
|
||||||
probStart["Smq"] = -3.14e+100
|
428: -3.14e+100,
|
||||||
probStart["Sn"] = -3.8551483897645107
|
429: -3.14e+100,
|
||||||
probStart["Sng"] = -4.913434861102905
|
430: -3.14e+100,
|
||||||
probStart["Snr"] = -4.483663103956885
|
431: -12.147070768850364,
|
||||||
probStart["Snrfg"] = -3.14e+100
|
432: -3.14e+100,
|
||||||
probStart["Snrt"] = -3.14e+100
|
433: -8.464460927750023,
|
||||||
probStart["Sns"] = -3.14e+100
|
434: -2.9868401813596317,
|
||||||
probStart["Snt"] = -12.147070768850364
|
435: -4.888658618255058,
|
||||||
probStart["Snz"] = -3.14e+100
|
436: -3.14e+100,
|
||||||
probStart["So"] = -8.464460927750023
|
437: -3.14e+100,
|
||||||
probStart["Sp"] = -2.9868401813596317
|
438: -2.7635336784127853,
|
||||||
probStart["Sq"] = -4.888658618255058
|
439: -10.275268591948773,
|
||||||
probStart["Sqe"] = -3.14e+100
|
440: -3.14e+100,
|
||||||
probStart["Sqg"] = -3.14e+100
|
441: -3.14e+100,
|
||||||
probStart["Sr"] = -2.7635336784127853
|
442: -3.14e+100,
|
||||||
probStart["Srg"] = -10.275268591948773
|
443: -3.14e+100,
|
||||||
probStart["Srr"] = -3.14e+100
|
444: -6.272842531880403,
|
||||||
probStart["Srz"] = -3.14e+100
|
445: -6.940320595827818,
|
||||||
probStart["Ss"] = -3.14e+100
|
446: -7.728230161053767,
|
||||||
probStart["St"] = -3.14e+100
|
447: -7.5394037026636855,
|
||||||
probStart["Stg"] = -6.272842531880403
|
448: -6.85251045118004,
|
||||||
probStart["Su"] = -6.940320595827818
|
449: -8.4153713175535,
|
||||||
probStart["Sud"] = -7.728230161053767
|
450: -8.15808672228609,
|
||||||
probStart["Sug"] = -7.5394037026636855
|
451: -9.299258625372996,
|
||||||
probStart["Suj"] = -6.85251045118004
|
452: -3.053292303412302,
|
||||||
probStart["Sul"] = -8.4153713175535
|
453: -3.14e+100,
|
||||||
probStart["Suv"] = -8.15808672228609
|
454: -5.9430181843676895,
|
||||||
probStart["Suz"] = -9.299258625372996
|
455: -3.14e+100,
|
||||||
probStart["Sv"] = -3.053292303412302
|
456: -11.453923588290419,
|
||||||
probStart["Svd"] = -3.14e+100
|
457: -3.14e+100,
|
||||||
probStart["Svg"] = -5.9430181843676895
|
458: -3.14e+100,
|
||||||
probStart["Svi"] = -3.14e+100
|
459: -8.427419656069674,
|
||||||
probStart["Svn"] = -11.453923588290419
|
460: -6.1970794699489575,
|
||||||
probStart["Svq"] = -3.14e+100
|
461: -13.533365129970255,
|
||||||
probStart["Sw"] = -3.14e+100
|
462: -3.14e+100,
|
||||||
probStart["Sx"] = -8.427419656069674
|
463: -3.14e+100,
|
||||||
probStart["Sy"] = -6.1970794699489575
|
|
||||||
probStart["Syg"] = -13.533365129970255
|
|
||||||
probStart["Sz"] = -3.14e+100
|
|
||||||
probStart["Szg"] = -3.14e+100
|
|
||||||
}
|
}
|
||||||
|
|||||||
5739
posseg/prob_trans.go
5739
posseg/prob_trans.go
File diff suppressed because it is too large
Load Diff
@@ -7,7 +7,7 @@ import (
|
|||||||
|
|
||||||
type probState struct {
|
type probState struct {
|
||||||
prob float64
|
prob float64
|
||||||
state string
|
state uint16
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ps probState) String() string {
|
func (ps probState) String() string {
|
||||||
@@ -31,26 +31,26 @@ func (pss probStates) Swap(i, j int) {
|
|||||||
pss[i], pss[j] = pss[j], pss[i]
|
pss[i], pss[j] = pss[j], pss[i]
|
||||||
}
|
}
|
||||||
|
|
||||||
func viterbi(obs []rune) []string {
|
func viterbi(obs []rune) []Tag {
|
||||||
obsLength := len(obs)
|
obsLength := len(obs)
|
||||||
V := make([]map[string]float64, obsLength)
|
V := make([]map[uint16]float64, obsLength)
|
||||||
V[0] = make(map[string]float64)
|
V[0] = make(map[uint16]float64)
|
||||||
mem_path := make([]map[string]string, obsLength)
|
mem_path := make([]map[uint16]uint16, obsLength)
|
||||||
mem_path[0] = make(map[string]string)
|
mem_path[0] = make(map[uint16]uint16)
|
||||||
ys := charStateTab.get(obs[0]) // default is all_states
|
ys := charStateTab.get(obs[0]) // default is all_states
|
||||||
for _, y := range ys {
|
for _, y := range ys {
|
||||||
V[0][y] = probEmit[y].get(obs[0]) + probStart[y]
|
V[0][y] = probEmit[y].get(obs[0]) + probStart[y]
|
||||||
mem_path[0][y] = ""
|
mem_path[0][y] = 0
|
||||||
}
|
}
|
||||||
for t := 1; t < obsLength; t++ {
|
for t := 1; t < obsLength; t++ {
|
||||||
prev_states := make([]string, 0)
|
prev_states := make([]uint16, 0)
|
||||||
for x := range mem_path[t-1] {
|
for x := range mem_path[t-1] {
|
||||||
if len(probTrans[x]) > 0 {
|
if len(probTrans[x]) > 0 {
|
||||||
prev_states = append(prev_states, x)
|
prev_states = append(prev_states, x)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//use Go's map to implement Python's Set()
|
//use Go's map to implement Python's Set()
|
||||||
prev_states_expect_next := make(map[string]int)
|
prev_states_expect_next := make(map[uint16]int)
|
||||||
for _, x := range prev_states {
|
for _, x := range prev_states {
|
||||||
for y := range probTrans[x] {
|
for y := range probTrans[x] {
|
||||||
prev_states_expect_next[y] = 1
|
prev_states_expect_next[y] = 1
|
||||||
@@ -58,7 +58,7 @@ func viterbi(obs []rune) []string {
|
|||||||
}
|
}
|
||||||
tmp_obs_states := charStateTab.get(obs[t])
|
tmp_obs_states := charStateTab.get(obs[t])
|
||||||
|
|
||||||
obs_states := make([]string, 0)
|
obs_states := make([]uint16, 0)
|
||||||
for index := range tmp_obs_states {
|
for index := range tmp_obs_states {
|
||||||
if _, ok := prev_states_expect_next[tmp_obs_states[index]]; ok {
|
if _, ok := prev_states_expect_next[tmp_obs_states[index]]; ok {
|
||||||
obs_states = append(obs_states, tmp_obs_states[index])
|
obs_states = append(obs_states, tmp_obs_states[index])
|
||||||
@@ -72,8 +72,8 @@ func viterbi(obs []rune) []string {
|
|||||||
if len(obs_states) == 0 {
|
if len(obs_states) == 0 {
|
||||||
obs_states = probTransKeys
|
obs_states = probTransKeys
|
||||||
}
|
}
|
||||||
mem_path[t] = make(map[string]string) // TODO: value needed or not?
|
mem_path[t] = make(map[uint16]uint16)
|
||||||
V[t] = make(map[string]float64)
|
V[t] = make(map[uint16]float64)
|
||||||
for _, y := range obs_states {
|
for _, y := range obs_states {
|
||||||
var max, ps probState
|
var max, ps probState
|
||||||
for i, y0 := range prev_states {
|
for i, y0 := range prev_states {
|
||||||
@@ -97,10 +97,10 @@ func viterbi(obs []rune) []string {
|
|||||||
}
|
}
|
||||||
sort.Sort(sort.Reverse(last))
|
sort.Sort(sort.Reverse(last))
|
||||||
state := last[0].state
|
state := last[0].state
|
||||||
route := make([]string, len(obs))
|
route := make([]Tag, len(obs))
|
||||||
|
|
||||||
for i := obsLength - 1; i >= 0; i-- {
|
for i := obsLength - 1; i >= 0; i-- {
|
||||||
route[i] = state
|
route[i] = Tag(state)
|
||||||
state = mem_path[i][state]
|
state = mem_path[i][state]
|
||||||
}
|
}
|
||||||
return route
|
return route
|
||||||
|
|||||||
@@ -4,38 +4,60 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var defaultRoute []Tag
|
||||||
route1 = []string{
|
|
||||||
"Bnr",
|
func init() {
|
||||||
"Mnr",
|
var t Tag
|
||||||
"Enr",
|
t, _ = NewTag("B", "nr")
|
||||||
"Sv",
|
defaultRoute = append(defaultRoute, t)
|
||||||
"Bv",
|
t, _ = NewTag("M", "nr")
|
||||||
"Ev",
|
defaultRoute = append(defaultRoute, t)
|
||||||
"Bn",
|
t, _ = NewTag("E", "nr")
|
||||||
"Mn",
|
defaultRoute = append(defaultRoute, t)
|
||||||
"En",
|
t, _ = NewTag("S", "v")
|
||||||
"Sd",
|
defaultRoute = append(defaultRoute, t)
|
||||||
"Sv",
|
t, _ = NewTag("B", "v")
|
||||||
"Sn",
|
defaultRoute = append(defaultRoute, t)
|
||||||
"Bv",
|
t, _ = NewTag("E", "v")
|
||||||
"Ev",
|
defaultRoute = append(defaultRoute, t)
|
||||||
"Bnr",
|
t, _ = NewTag("B", "n")
|
||||||
"Mnr",
|
defaultRoute = append(defaultRoute, t)
|
||||||
"Mnr",
|
t, _ = NewTag("M", "n")
|
||||||
"Mnr",
|
defaultRoute = append(defaultRoute, t)
|
||||||
"Enr",
|
t, _ = NewTag("E", "n")
|
||||||
"Szg"}
|
defaultRoute = append(defaultRoute, t)
|
||||||
)
|
t, _ = NewTag("S", "d")
|
||||||
|
defaultRoute = append(defaultRoute, t)
|
||||||
|
t, _ = NewTag("S", "v")
|
||||||
|
defaultRoute = append(defaultRoute, t)
|
||||||
|
t, _ = NewTag("S", "n")
|
||||||
|
defaultRoute = append(defaultRoute, t)
|
||||||
|
t, _ = NewTag("B", "v")
|
||||||
|
defaultRoute = append(defaultRoute, t)
|
||||||
|
t, _ = NewTag("E", "v")
|
||||||
|
defaultRoute = append(defaultRoute, t)
|
||||||
|
t, _ = NewTag("B", "nr")
|
||||||
|
defaultRoute = append(defaultRoute, t)
|
||||||
|
t, _ = NewTag("M", "nr")
|
||||||
|
defaultRoute = append(defaultRoute, t)
|
||||||
|
t, _ = NewTag("M", "nr")
|
||||||
|
defaultRoute = append(defaultRoute, t)
|
||||||
|
t, _ = NewTag("M", "nr")
|
||||||
|
defaultRoute = append(defaultRoute, t)
|
||||||
|
t, _ = NewTag("E", "nr")
|
||||||
|
defaultRoute = append(defaultRoute, t)
|
||||||
|
t, _ = NewTag("S", "zg")
|
||||||
|
defaultRoute = append(defaultRoute, t)
|
||||||
|
}
|
||||||
|
|
||||||
func TestViterbi(t *testing.T) {
|
func TestViterbi(t *testing.T) {
|
||||||
ss := "李小福是创新办主任也是云计算方面的专家;"
|
ss := "李小福是创新办主任也是云计算方面的专家;"
|
||||||
route := viterbi([]rune(ss))
|
route := viterbi([]rune(ss))
|
||||||
if len(route) != len(route1) {
|
if len(route) != len(defaultRoute) {
|
||||||
t.Fatal(len(route))
|
t.Fatal(len(route))
|
||||||
}
|
}
|
||||||
for index, _ := range route {
|
for index := range route {
|
||||||
if route[index] != route1[index] {
|
if route[index] != defaultRoute[index] {
|
||||||
t.Fatal(route[index])
|
t.Fatal(route[index])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user