From 73cb677aca816c1f83d17c8bba9e309151481a1a Mon Sep 17 00:00:00 2001 From: Wang Bin Date: Wed, 25 Feb 2015 12:04:09 +0800 Subject: [PATCH] added test for load cutomized stop words file --- analyse/analyse_test.go | 26 +++++++++++++++++++++ analyse/idf.go | 4 ++-- analyse/stop_words.txt | 51 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 analyse/stop_words.txt diff --git a/analyse/analyse_test.go b/analyse/analyse_test.go index f1dc9ed..63256fc 100644 --- a/analyse/analyse_test.go +++ b/analyse/analyse_test.go @@ -240,6 +240,19 @@ var ( TfIdf{Word: "怯懦", Freq: 0.271099}, TfIdf{Word: "隨便", Freq: 0.168377}, } + + LyciWeight2 = []TfIdf{ + TfIdf{Word: "所謂", Freq: 1.215739}, + TfIdf{Word: "一般", Freq: 0.731179}, + TfIdf{Word: "雖然", Freq: 0.405246}, + TfIdf{Word: "退縮", Freq: 0.405246}, + TfIdf{Word: "肌迫", Freq: 0.405246}, + TfIdf{Word: "矯作", Freq: 0.405246}, + TfIdf{Word: "怯懦", Freq: 0.326238}, + TfIdf{Word: "逼不得已", Freq: 0.202623}, + TfIdf{Word: "右銘", Freq: 0.202623}, + TfIdf{Word: "寬闊", Freq: 0.202623}, + } ) func TestExtractTags(t *testing.T) { @@ -270,3 +283,16 @@ func TestExtratTagsWithWeight(t *testing.T) { } } } + +func TestExtractTagsWithStopWordsFile(t *testing.T) { + jiebago.SetDictionary("../dict.txt") + SetIdf("idf.txt") + SetStopWords("stop_words.txt") + result := ExtractTags(Lyric, 7) + for index, tag := range result { + if LyciWeight2[index].Word != tag.Word || + math.Abs(LyciWeight2[index].Freq-tag.Freq) > 1e-6 { + t.Errorf("%v != %v", tag, LyciWeight2[index]) + } + } +} diff --git a/analyse/idf.go b/analyse/idf.go index e4250b4..ae03169 100644 --- a/analyse/idf.go +++ b/analyse/idf.go @@ -29,7 +29,7 @@ func NewIDFLoader() *IDFLoader { return loader } -func (loader *IDFLoader) NewPath(idfFilePath string) error { +func (loader *IDFLoader) newPath(idfFilePath string) error { if loader.Path == idfFilePath { return nil } @@ -67,5 +67,5 @@ func SetIdf(idfFilePath string) error { } idfFilePath = filepath.Clean(filepath.Join(pwd, idfFilePath)) } - return idfLoader.NewPath(idfFilePath) + return idfLoader.newPath(idfFilePath) } diff --git a/analyse/stop_words.txt b/analyse/stop_words.txt new file mode 100644 index 0000000..1cf8259 --- /dev/null +++ b/analyse/stop_words.txt @@ -0,0 +1,51 @@ +the +of +is +and +to +in +that +we +for +an +are +by +be +as +on +with +can +if +from +which +you +it +this +then +at +have +all +not +one +has +or +that +的 +了 +和 +是 +就 +都 +而 +及 +與 +著 +或 +一個 +沒有 +我們 +你們 +妳們 +他們 +她們 +是否 \ No newline at end of file