1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-13 05:31:02 +08:00

removed dict.go, functions move to util.go, also use interface to simplify code

This commit is contained in:
Wang Bin
2015-03-25 18:28:37 +08:00
parent 7fe5e7d4c4
commit 59da5b5e3a
7 changed files with 57 additions and 115 deletions

View File

@@ -41,6 +41,10 @@ type TagExtracter struct {
stopWords map[string]int
}
func (t *TagExtracter) AddEntry(entry *jiebago.Entry) {
t.stopWords[entry.Word] = 1
}
func NewTagExtracter(dictFileName, IDFFileName string) (*TagExtracter, error) {
j, err := jiebago.NewJieba(dictFileName)
if err != nil {
@@ -61,11 +65,7 @@ func (t *TagExtracter) SetStopWords(stopWordsFileName string) error {
return err
}
wtfs, err := jiebago.ParseDictFile(stopWordsFilePath)
for _, wtf := range wtfs {
t.stopWords[wtf.Word] = 1
}
return nil
return jiebago.LoadDict(t, stopWordsFilePath, false)
}
// Keyword extraction.

View File

@@ -8,6 +8,12 @@ import (
type IDFLoader struct {
IDFFreq map[string]float64
Median float64
freqs []float64
}
func (l *IDFLoader) AddEntry(entry *jiebago.Entry) {
l.IDFFreq[entry.Word] = entry.Freq
l.freqs = append(l.freqs, entry.Freq)
}
func NewIDFLoader(IDFFileName string) (*IDFLoader, error) {
@@ -15,18 +21,14 @@ func NewIDFLoader(IDFFileName string) (*IDFLoader, error) {
if err != nil {
return nil, err
}
wtfs, err := jiebago.ParseDictFile(IDFFilePath)
loader := &IDFLoader{make(map[string]float64), 0.0, make([]float64, 0)}
err = jiebago.LoadDict(loader, IDFFilePath, false)
if err != nil {
return nil, err
}
freqs := make([]float64, len(wtfs))
loader := &IDFLoader{make(map[string]float64), 0.0}
for index, wtf := range wtfs {
loader.IDFFreq[wtf.Word] = wtf.Freq
freqs[index] = wtf.Freq
}
sort.Float64s(freqs)
loader.Median = freqs[len(freqs)/2]
sort.Float64s(loader.freqs)
loader.Median = loader.freqs[len(loader.freqs)/2]
loader.freqs = []float64{}
return loader, nil
}