mirror of
https://github.com/fumiama/jieba.git
synced 2026-06-07 01:30:38 +08:00
make some public variable/function to private
This commit is contained in:
@@ -8,34 +8,34 @@ import (
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type TfIdf struct {
|
||||
Word string
|
||||
Freq float64
|
||||
type wordWeight struct {
|
||||
Word string
|
||||
Weight float64
|
||||
}
|
||||
|
||||
func (t TfIdf) String() string {
|
||||
return fmt.Sprintf("{%s: %f}", t.Word, t.Freq)
|
||||
func (w wordWeight) String() string {
|
||||
return fmt.Sprintf("{%s: %f}", w.Word, w.Weight)
|
||||
}
|
||||
|
||||
type TfIdfs []TfIdf
|
||||
type wordWeights []wordWeight
|
||||
|
||||
func (tis TfIdfs) Len() int {
|
||||
return len(tis)
|
||||
func (ws wordWeights) Len() int {
|
||||
return len(ws)
|
||||
}
|
||||
|
||||
func (tis TfIdfs) Less(i, j int) bool {
|
||||
if tis[i].Freq == tis[j].Freq {
|
||||
return tis[i].Word < tis[j].Word
|
||||
func (ws wordWeights) Less(i, j int) bool {
|
||||
if ws[i].Weight == ws[j].Weight {
|
||||
return ws[i].Word < ws[j].Word
|
||||
}
|
||||
|
||||
return tis[i].Freq < tis[j].Freq
|
||||
return ws[i].Weight < ws[j].Weight
|
||||
}
|
||||
|
||||
func (tis TfIdfs) Swap(i, j int) {
|
||||
tis[i], tis[j] = tis[j], tis[i]
|
||||
func (ws wordWeights) Swap(i, j int) {
|
||||
ws[i], ws[j] = ws[j], ws[i]
|
||||
}
|
||||
|
||||
func ExtractTags(sentence string, topK int) (tags TfIdfs) {
|
||||
func ExtractTags(sentence string, topK int) (tags wordWeights) {
|
||||
freq := make(map[string]float64)
|
||||
|
||||
for w := range jiebago.Cut(sentence, false, true) {
|
||||
@@ -59,21 +59,21 @@ func ExtractTags(sentence string, topK int) (tags TfIdfs) {
|
||||
for k, v := range freq {
|
||||
freq[k] = v / total
|
||||
}
|
||||
tis := make(TfIdfs, 0)
|
||||
ws := make(wordWeights, 0)
|
||||
for k, v := range freq {
|
||||
var ti TfIdf
|
||||
var ti wordWeight
|
||||
if freq_, ok := loader.Freq[k]; ok {
|
||||
ti = TfIdf{Word: k, Freq: freq_ * v}
|
||||
ti = wordWeight{Word: k, Weight: freq_ * v}
|
||||
} else {
|
||||
ti = TfIdf{Word: k, Freq: loader.Median * v}
|
||||
ti = wordWeight{Word: k, Weight: loader.Median * v}
|
||||
}
|
||||
tis = append(tis, ti)
|
||||
ws = append(ws, ti)
|
||||
}
|
||||
sort.Sort(sort.Reverse(tis))
|
||||
if len(tis) > topK {
|
||||
tags = tis[:topK]
|
||||
sort.Sort(sort.Reverse(ws))
|
||||
if len(ws) > topK {
|
||||
tags = ws[:topK]
|
||||
} else {
|
||||
tags = tis
|
||||
tags = ws
|
||||
}
|
||||
return tags
|
||||
}
|
||||
|
||||
@@ -228,30 +228,30 @@ var (
|
||||
只是逼不得已
|
||||
雖然沒有藉口
|
||||
`
|
||||
LyciWeight = []TfIdf{
|
||||
TfIdf{Word: "所謂", Freq: 1.010262},
|
||||
TfIdf{Word: "是否", Freq: 0.738650},
|
||||
TfIdf{Word: "一般", Freq: 0.607600},
|
||||
TfIdf{Word: "雖然", Freq: 0.336754},
|
||||
TfIdf{Word: "退縮", Freq: 0.336754},
|
||||
TfIdf{Word: "肌迫", Freq: 0.336754},
|
||||
TfIdf{Word: "矯作", Freq: 0.336754},
|
||||
TfIdf{Word: "沒有", Freq: 0.336754},
|
||||
TfIdf{Word: "怯懦", Freq: 0.271099},
|
||||
TfIdf{Word: "隨便", Freq: 0.168377},
|
||||
LyciWeight = []wordWeight{
|
||||
wordWeight{Word: "所謂", Weight: 1.010262},
|
||||
wordWeight{Word: "是否", Weight: 0.738650},
|
||||
wordWeight{Word: "一般", Weight: 0.607600},
|
||||
wordWeight{Word: "雖然", Weight: 0.336754},
|
||||
wordWeight{Word: "退縮", Weight: 0.336754},
|
||||
wordWeight{Word: "肌迫", Weight: 0.336754},
|
||||
wordWeight{Word: "矯作", Weight: 0.336754},
|
||||
wordWeight{Word: "沒有", Weight: 0.336754},
|
||||
wordWeight{Word: "怯懦", Weight: 0.271099},
|
||||
wordWeight{Word: "隨便", Weight: 0.168377},
|
||||
}
|
||||
|
||||
LyciWeight2 = []TfIdf{
|
||||
TfIdf{Word: "所謂", Freq: 1.215739},
|
||||
TfIdf{Word: "一般", Freq: 0.731179},
|
||||
TfIdf{Word: "雖然", Freq: 0.405246},
|
||||
TfIdf{Word: "退縮", Freq: 0.405246},
|
||||
TfIdf{Word: "肌迫", Freq: 0.405246},
|
||||
TfIdf{Word: "矯作", Freq: 0.405246},
|
||||
TfIdf{Word: "怯懦", Freq: 0.326238},
|
||||
TfIdf{Word: "逼不得已", Freq: 0.202623},
|
||||
TfIdf{Word: "右銘", Freq: 0.202623},
|
||||
TfIdf{Word: "寬闊", Freq: 0.202623},
|
||||
LyciWeight2 = []wordWeight{
|
||||
wordWeight{Word: "所謂", Weight: 1.215739},
|
||||
wordWeight{Word: "一般", Weight: 0.731179},
|
||||
wordWeight{Word: "雖然", Weight: 0.405246},
|
||||
wordWeight{Word: "退縮", Weight: 0.405246},
|
||||
wordWeight{Word: "肌迫", Weight: 0.405246},
|
||||
wordWeight{Word: "矯作", Weight: 0.405246},
|
||||
wordWeight{Word: "怯懦", Weight: 0.326238},
|
||||
wordWeight{Word: "逼不得已", Weight: 0.202623},
|
||||
wordWeight{Word: "右銘", Weight: 0.202623},
|
||||
wordWeight{Word: "寬闊", Weight: 0.202623},
|
||||
}
|
||||
)
|
||||
|
||||
@@ -278,7 +278,7 @@ func TestExtratTagsWithWeight(t *testing.T) {
|
||||
result := ExtractTags(Lyric, 10)
|
||||
for index, tag := range result {
|
||||
if LyciWeight[index].Word != tag.Word ||
|
||||
math.Abs(LyciWeight[index].Freq-tag.Freq) > 1e-6 {
|
||||
math.Abs(LyciWeight[index].Weight-tag.Weight) > 1e-6 {
|
||||
t.Errorf("%v != %v", tag, LyciWeight[index])
|
||||
}
|
||||
}
|
||||
@@ -291,7 +291,7 @@ func TestExtractTagsWithStopWordsFile(t *testing.T) {
|
||||
result := ExtractTags(Lyric, 7)
|
||||
for index, tag := range result {
|
||||
if LyciWeight2[index].Word != tag.Word ||
|
||||
math.Abs(LyciWeight2[index].Freq-tag.Freq) > 1e-6 {
|
||||
math.Abs(LyciWeight2[index].Weight-tag.Weight) > 1e-6 {
|
||||
t.Errorf("%v != %v", tag, LyciWeight2[index])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,7 +65,7 @@ func (u *undirectWeightedGraph) addEdge(start, end string, weight float64) {
|
||||
}
|
||||
}
|
||||
|
||||
func (u *undirectWeightedGraph) rank() TfIdfs {
|
||||
func (u *undirectWeightedGraph) rank() wordWeights {
|
||||
if !sort.IsSorted(u.keys) {
|
||||
sort.Sort(u.keys)
|
||||
}
|
||||
@@ -105,15 +105,15 @@ func (u *undirectWeightedGraph) rank() TfIdfs {
|
||||
maxRank = w
|
||||
}
|
||||
}
|
||||
result := make(TfIdfs, 0)
|
||||
result := make(wordWeights, 0)
|
||||
for n, w := range ws {
|
||||
result = append(result, TfIdf{Word: n, Freq: (w - minRank/10.0) / (maxRank - minRank/10.0)})
|
||||
result = append(result, wordWeight{Word: n, Weight: (w - minRank/10.0) / (maxRank - minRank/10.0)})
|
||||
}
|
||||
sort.Sort(sort.Reverse(result))
|
||||
return result
|
||||
}
|
||||
|
||||
func TextRankWithPOS(sentence string, topK int, allowPOS []string) TfIdfs {
|
||||
func TextRankWithPOS(sentence string, topK int, allowPOS []string) wordWeights {
|
||||
posFilt := make(map[string]int)
|
||||
for _, pos := range allowPOS {
|
||||
posFilt[pos] = 1
|
||||
@@ -152,7 +152,7 @@ func TextRankWithPOS(sentence string, topK int, allowPOS []string) TfIdfs {
|
||||
return tags
|
||||
}
|
||||
|
||||
func TextRank(sentence string, topK int) TfIdfs {
|
||||
func TextRank(sentence string, topK int) wordWeights {
|
||||
return TextRankWithPOS(sentence, topK, defaultAllowPOS)
|
||||
}
|
||||
|
||||
|
||||
@@ -8,17 +8,17 @@ import (
|
||||
var (
|
||||
sentence = "此外,公司拟对全资子公司吉林欧亚置业有限公司增资4.3亿元,增资后,吉林欧亚置业注册资本由7000万元增加到5亿元。吉林欧亚置业主要经营范围为房地产开发及百货零售等业务。目前在建吉林欧亚城市商业综合体项目。2013年,实现营业收入0万元,实现净利润-139.13万元。"
|
||||
|
||||
tagRanks = TfIdfs{
|
||||
TfIdf{Word: "吉林", Freq: 1.0},
|
||||
TfIdf{Word: "欧亚", Freq: 0.87807810644},
|
||||
TfIdf{Word: "置业", Freq: 0.562048250306},
|
||||
TfIdf{Word: "实现", Freq: 0.520905743929},
|
||||
TfIdf{Word: "收入", Freq: 0.384283870648},
|
||||
TfIdf{Word: "增资", Freq: 0.360590945312},
|
||||
TfIdf{Word: "子公司", Freq: 0.353131980904},
|
||||
TfIdf{Word: "城市", Freq: 0.307509449283},
|
||||
TfIdf{Word: "全资", Freq: 0.306324426665},
|
||||
TfIdf{Word: "商业", Freq: 0.306138241063},
|
||||
tagRanks = wordWeights{
|
||||
wordWeight{Word: "吉林", Weight: 1.0},
|
||||
wordWeight{Word: "欧亚", Weight: 0.87807810644},
|
||||
wordWeight{Word: "置业", Weight: 0.562048250306},
|
||||
wordWeight{Word: "实现", Weight: 0.520905743929},
|
||||
wordWeight{Word: "收入", Weight: 0.384283870648},
|
||||
wordWeight{Word: "增资", Weight: 0.360590945312},
|
||||
wordWeight{Word: "子公司", Weight: 0.353131980904},
|
||||
wordWeight{Word: "城市", Weight: 0.307509449283},
|
||||
wordWeight{Word: "全资", Weight: 0.306324426665},
|
||||
wordWeight{Word: "商业", Weight: 0.306138241063},
|
||||
}
|
||||
)
|
||||
|
||||
@@ -26,7 +26,7 @@ func TestTextRank(t *testing.T) {
|
||||
SetDictionary("../dict.txt")
|
||||
results := TextRank(sentence, 10)
|
||||
for index, tw := range results {
|
||||
if tw.Word != tagRanks[index].Word || math.Abs(tw.Freq-tagRanks[index].Freq) > 1e-6 {
|
||||
if tw.Word != tagRanks[index].Word || math.Abs(tw.Weight-tagRanks[index].Weight) > 1e-6 {
|
||||
t.Errorf("%v != %v", tw, tagRanks[index])
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user