mirror of
https://github.com/fumiama/jieba.git
synced 2026-06-13 05:31:02 +08:00
removed dict.go, functions move to util.go, also use interface to simplify code
This commit is contained in:
@@ -41,6 +41,10 @@ type TagExtracter struct {
|
||||
stopWords map[string]int
|
||||
}
|
||||
|
||||
func (t *TagExtracter) AddEntry(entry *jiebago.Entry) {
|
||||
t.stopWords[entry.Word] = 1
|
||||
}
|
||||
|
||||
func NewTagExtracter(dictFileName, IDFFileName string) (*TagExtracter, error) {
|
||||
j, err := jiebago.NewJieba(dictFileName)
|
||||
if err != nil {
|
||||
@@ -61,11 +65,7 @@ func (t *TagExtracter) SetStopWords(stopWordsFileName string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
wtfs, err := jiebago.ParseDictFile(stopWordsFilePath)
|
||||
for _, wtf := range wtfs {
|
||||
t.stopWords[wtf.Word] = 1
|
||||
}
|
||||
return nil
|
||||
return jiebago.LoadDict(t, stopWordsFilePath, false)
|
||||
}
|
||||
|
||||
// Keyword extraction.
|
||||
|
||||
@@ -8,6 +8,12 @@ import (
|
||||
type IDFLoader struct {
|
||||
IDFFreq map[string]float64
|
||||
Median float64
|
||||
freqs []float64
|
||||
}
|
||||
|
||||
func (l *IDFLoader) AddEntry(entry *jiebago.Entry) {
|
||||
l.IDFFreq[entry.Word] = entry.Freq
|
||||
l.freqs = append(l.freqs, entry.Freq)
|
||||
}
|
||||
|
||||
func NewIDFLoader(IDFFileName string) (*IDFLoader, error) {
|
||||
@@ -15,18 +21,14 @@ func NewIDFLoader(IDFFileName string) (*IDFLoader, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
wtfs, err := jiebago.ParseDictFile(IDFFilePath)
|
||||
loader := &IDFLoader{make(map[string]float64), 0.0, make([]float64, 0)}
|
||||
err = jiebago.LoadDict(loader, IDFFilePath, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
freqs := make([]float64, len(wtfs))
|
||||
loader := &IDFLoader{make(map[string]float64), 0.0}
|
||||
for index, wtf := range wtfs {
|
||||
loader.IDFFreq[wtf.Word] = wtf.Freq
|
||||
freqs[index] = wtf.Freq
|
||||
}
|
||||
sort.Float64s(freqs)
|
||||
loader.Median = freqs[len(freqs)/2]
|
||||
sort.Float64s(loader.freqs)
|
||||
loader.Median = loader.freqs[len(loader.freqs)/2]
|
||||
loader.freqs = []float64{}
|
||||
return loader, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user