1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-13 05:31:02 +08:00

make some public variable/function to private

This commit is contained in:
Wang Bin
2015-02-28 18:17:48 +08:00
parent d06ba85b0b
commit 1c8d4fbf23
4 changed files with 65 additions and 65 deletions

View File

@@ -8,34 +8,34 @@ import (
"unicode/utf8" "unicode/utf8"
) )
type TfIdf struct { type wordWeight struct {
Word string Word string
Freq float64 Weight float64
} }
func (t TfIdf) String() string { func (w wordWeight) String() string {
return fmt.Sprintf("{%s: %f}", t.Word, t.Freq) return fmt.Sprintf("{%s: %f}", w.Word, w.Weight)
} }
type TfIdfs []TfIdf type wordWeights []wordWeight
func (tis TfIdfs) Len() int { func (ws wordWeights) Len() int {
return len(tis) return len(ws)
} }
func (tis TfIdfs) Less(i, j int) bool { func (ws wordWeights) Less(i, j int) bool {
if tis[i].Freq == tis[j].Freq { if ws[i].Weight == ws[j].Weight {
return tis[i].Word < tis[j].Word return ws[i].Word < ws[j].Word
} }
return tis[i].Freq < tis[j].Freq return ws[i].Weight < ws[j].Weight
} }
func (tis TfIdfs) Swap(i, j int) { func (ws wordWeights) Swap(i, j int) {
tis[i], tis[j] = tis[j], tis[i] ws[i], ws[j] = ws[j], ws[i]
} }
func ExtractTags(sentence string, topK int) (tags TfIdfs) { func ExtractTags(sentence string, topK int) (tags wordWeights) {
freq := make(map[string]float64) freq := make(map[string]float64)
for w := range jiebago.Cut(sentence, false, true) { for w := range jiebago.Cut(sentence, false, true) {
@@ -59,21 +59,21 @@ func ExtractTags(sentence string, topK int) (tags TfIdfs) {
for k, v := range freq { for k, v := range freq {
freq[k] = v / total freq[k] = v / total
} }
tis := make(TfIdfs, 0) ws := make(wordWeights, 0)
for k, v := range freq { for k, v := range freq {
var ti TfIdf var ti wordWeight
if freq_, ok := loader.Freq[k]; ok { if freq_, ok := loader.Freq[k]; ok {
ti = TfIdf{Word: k, Freq: freq_ * v} ti = wordWeight{Word: k, Weight: freq_ * v}
} else { } else {
ti = TfIdf{Word: k, Freq: loader.Median * v} ti = wordWeight{Word: k, Weight: loader.Median * v}
} }
tis = append(tis, ti) ws = append(ws, ti)
} }
sort.Sort(sort.Reverse(tis)) sort.Sort(sort.Reverse(ws))
if len(tis) > topK { if len(ws) > topK {
tags = tis[:topK] tags = ws[:topK]
} else { } else {
tags = tis tags = ws
} }
return tags return tags
} }

View File

@@ -228,30 +228,30 @@ var (
只是逼不得已 只是逼不得已
雖然沒有藉口 雖然沒有藉口
` `
LyciWeight = []TfIdf{ LyciWeight = []wordWeight{
TfIdf{Word: "所謂", Freq: 1.010262}, wordWeight{Word: "所謂", Weight: 1.010262},
TfIdf{Word: "是否", Freq: 0.738650}, wordWeight{Word: "是否", Weight: 0.738650},
TfIdf{Word: "一般", Freq: 0.607600}, wordWeight{Word: "一般", Weight: 0.607600},
TfIdf{Word: "雖然", Freq: 0.336754}, wordWeight{Word: "雖然", Weight: 0.336754},
TfIdf{Word: "退縮", Freq: 0.336754}, wordWeight{Word: "退縮", Weight: 0.336754},
TfIdf{Word: "肌迫", Freq: 0.336754}, wordWeight{Word: "肌迫", Weight: 0.336754},
TfIdf{Word: "矯作", Freq: 0.336754}, wordWeight{Word: "矯作", Weight: 0.336754},
TfIdf{Word: "沒有", Freq: 0.336754}, wordWeight{Word: "沒有", Weight: 0.336754},
TfIdf{Word: "怯懦", Freq: 0.271099}, wordWeight{Word: "怯懦", Weight: 0.271099},
TfIdf{Word: "隨便", Freq: 0.168377}, wordWeight{Word: "隨便", Weight: 0.168377},
} }
LyciWeight2 = []TfIdf{ LyciWeight2 = []wordWeight{
TfIdf{Word: "所謂", Freq: 1.215739}, wordWeight{Word: "所謂", Weight: 1.215739},
TfIdf{Word: "一般", Freq: 0.731179}, wordWeight{Word: "一般", Weight: 0.731179},
TfIdf{Word: "雖然", Freq: 0.405246}, wordWeight{Word: "雖然", Weight: 0.405246},
TfIdf{Word: "退縮", Freq: 0.405246}, wordWeight{Word: "退縮", Weight: 0.405246},
TfIdf{Word: "肌迫", Freq: 0.405246}, wordWeight{Word: "肌迫", Weight: 0.405246},
TfIdf{Word: "矯作", Freq: 0.405246}, wordWeight{Word: "矯作", Weight: 0.405246},
TfIdf{Word: "怯懦", Freq: 0.326238}, wordWeight{Word: "怯懦", Weight: 0.326238},
TfIdf{Word: "逼不得已", Freq: 0.202623}, wordWeight{Word: "逼不得已", Weight: 0.202623},
TfIdf{Word: "右銘", Freq: 0.202623}, wordWeight{Word: "右銘", Weight: 0.202623},
TfIdf{Word: "寬闊", Freq: 0.202623}, wordWeight{Word: "寬闊", Weight: 0.202623},
} }
) )
@@ -278,7 +278,7 @@ func TestExtratTagsWithWeight(t *testing.T) {
result := ExtractTags(Lyric, 10) result := ExtractTags(Lyric, 10)
for index, tag := range result { for index, tag := range result {
if LyciWeight[index].Word != tag.Word || if LyciWeight[index].Word != tag.Word ||
math.Abs(LyciWeight[index].Freq-tag.Freq) > 1e-6 { math.Abs(LyciWeight[index].Weight-tag.Weight) > 1e-6 {
t.Errorf("%v != %v", tag, LyciWeight[index]) t.Errorf("%v != %v", tag, LyciWeight[index])
} }
} }
@@ -291,7 +291,7 @@ func TestExtractTagsWithStopWordsFile(t *testing.T) {
result := ExtractTags(Lyric, 7) result := ExtractTags(Lyric, 7)
for index, tag := range result { for index, tag := range result {
if LyciWeight2[index].Word != tag.Word || if LyciWeight2[index].Word != tag.Word ||
math.Abs(LyciWeight2[index].Freq-tag.Freq) > 1e-6 { math.Abs(LyciWeight2[index].Weight-tag.Weight) > 1e-6 {
t.Errorf("%v != %v", tag, LyciWeight2[index]) t.Errorf("%v != %v", tag, LyciWeight2[index])
} }
} }

View File

@@ -65,7 +65,7 @@ func (u *undirectWeightedGraph) addEdge(start, end string, weight float64) {
} }
} }
func (u *undirectWeightedGraph) rank() TfIdfs { func (u *undirectWeightedGraph) rank() wordWeights {
if !sort.IsSorted(u.keys) { if !sort.IsSorted(u.keys) {
sort.Sort(u.keys) sort.Sort(u.keys)
} }
@@ -105,15 +105,15 @@ func (u *undirectWeightedGraph) rank() TfIdfs {
maxRank = w maxRank = w
} }
} }
result := make(TfIdfs, 0) result := make(wordWeights, 0)
for n, w := range ws { for n, w := range ws {
result = append(result, TfIdf{Word: n, Freq: (w - minRank/10.0) / (maxRank - minRank/10.0)}) result = append(result, wordWeight{Word: n, Weight: (w - minRank/10.0) / (maxRank - minRank/10.0)})
} }
sort.Sort(sort.Reverse(result)) sort.Sort(sort.Reverse(result))
return result return result
} }
func TextRankWithPOS(sentence string, topK int, allowPOS []string) TfIdfs { func TextRankWithPOS(sentence string, topK int, allowPOS []string) wordWeights {
posFilt := make(map[string]int) posFilt := make(map[string]int)
for _, pos := range allowPOS { for _, pos := range allowPOS {
posFilt[pos] = 1 posFilt[pos] = 1
@@ -152,7 +152,7 @@ func TextRankWithPOS(sentence string, topK int, allowPOS []string) TfIdfs {
return tags return tags
} }
func TextRank(sentence string, topK int) TfIdfs { func TextRank(sentence string, topK int) wordWeights {
return TextRankWithPOS(sentence, topK, defaultAllowPOS) return TextRankWithPOS(sentence, topK, defaultAllowPOS)
} }

View File

@@ -8,17 +8,17 @@ import (
var ( var (
sentence = "此外公司拟对全资子公司吉林欧亚置业有限公司增资4.3亿元增资后吉林欧亚置业注册资本由7000万元增加到5亿元。吉林欧亚置业主要经营范围为房地产开发及百货零售等业务。目前在建吉林欧亚城市商业综合体项目。2013年实现营业收入0万元实现净利润-139.13万元。" sentence = "此外公司拟对全资子公司吉林欧亚置业有限公司增资4.3亿元增资后吉林欧亚置业注册资本由7000万元增加到5亿元。吉林欧亚置业主要经营范围为房地产开发及百货零售等业务。目前在建吉林欧亚城市商业综合体项目。2013年实现营业收入0万元实现净利润-139.13万元。"
tagRanks = TfIdfs{ tagRanks = wordWeights{
TfIdf{Word: "吉林", Freq: 1.0}, wordWeight{Word: "吉林", Weight: 1.0},
TfIdf{Word: "欧亚", Freq: 0.87807810644}, wordWeight{Word: "欧亚", Weight: 0.87807810644},
TfIdf{Word: "置业", Freq: 0.562048250306}, wordWeight{Word: "置业", Weight: 0.562048250306},
TfIdf{Word: "实现", Freq: 0.520905743929}, wordWeight{Word: "实现", Weight: 0.520905743929},
TfIdf{Word: "收入", Freq: 0.384283870648}, wordWeight{Word: "收入", Weight: 0.384283870648},
TfIdf{Word: "增资", Freq: 0.360590945312}, wordWeight{Word: "增资", Weight: 0.360590945312},
TfIdf{Word: "子公司", Freq: 0.353131980904}, wordWeight{Word: "子公司", Weight: 0.353131980904},
TfIdf{Word: "城市", Freq: 0.307509449283}, wordWeight{Word: "城市", Weight: 0.307509449283},
TfIdf{Word: "全资", Freq: 0.306324426665}, wordWeight{Word: "全资", Weight: 0.306324426665},
TfIdf{Word: "商业", Freq: 0.306138241063}, wordWeight{Word: "商业", Weight: 0.306138241063},
} }
) )
@@ -26,7 +26,7 @@ func TestTextRank(t *testing.T) {
SetDictionary("../dict.txt") SetDictionary("../dict.txt")
results := TextRank(sentence, 10) results := TextRank(sentence, 10)
for index, tw := range results { for index, tw := range results {
if tw.Word != tagRanks[index].Word || math.Abs(tw.Freq-tagRanks[index].Freq) > 1e-6 { if tw.Word != tagRanks[index].Word || math.Abs(tw.Weight-tagRanks[index].Weight) > 1e-6 {
t.Errorf("%v != %v", tw, tagRanks[index]) t.Errorf("%v != %v", tw, tagRanks[index])
} }
} }