1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-05 00:32:51 +08:00

added test for load cutomized stop words file

This commit is contained in:
Wang Bin
2015-02-25 12:04:09 +08:00
parent e60dcd3e9e
commit 73cb677aca
3 changed files with 79 additions and 2 deletions

View File

@@ -240,6 +240,19 @@ var (
TfIdf{Word: "怯懦", Freq: 0.271099},
TfIdf{Word: "隨便", Freq: 0.168377},
}
LyciWeight2 = []TfIdf{
TfIdf{Word: "所謂", Freq: 1.215739},
TfIdf{Word: "一般", Freq: 0.731179},
TfIdf{Word: "雖然", Freq: 0.405246},
TfIdf{Word: "退縮", Freq: 0.405246},
TfIdf{Word: "肌迫", Freq: 0.405246},
TfIdf{Word: "矯作", Freq: 0.405246},
TfIdf{Word: "怯懦", Freq: 0.326238},
TfIdf{Word: "逼不得已", Freq: 0.202623},
TfIdf{Word: "右銘", Freq: 0.202623},
TfIdf{Word: "寬闊", Freq: 0.202623},
}
)
func TestExtractTags(t *testing.T) {
@@ -270,3 +283,16 @@ func TestExtratTagsWithWeight(t *testing.T) {
}
}
}
func TestExtractTagsWithStopWordsFile(t *testing.T) {
jiebago.SetDictionary("../dict.txt")
SetIdf("idf.txt")
SetStopWords("stop_words.txt")
result := ExtractTags(Lyric, 7)
for index, tag := range result {
if LyciWeight2[index].Word != tag.Word ||
math.Abs(LyciWeight2[index].Freq-tag.Freq) > 1e-6 {
t.Errorf("%v != %v", tag, LyciWeight2[index])
}
}
}

View File

@@ -29,7 +29,7 @@ func NewIDFLoader() *IDFLoader {
return loader
}
func (loader *IDFLoader) NewPath(idfFilePath string) error {
func (loader *IDFLoader) newPath(idfFilePath string) error {
if loader.Path == idfFilePath {
return nil
}
@@ -67,5 +67,5 @@ func SetIdf(idfFilePath string) error {
}
idfFilePath = filepath.Clean(filepath.Join(pwd, idfFilePath))
}
return idfLoader.NewPath(idfFilePath)
return idfLoader.newPath(idfFilePath)
}

51
analyse/stop_words.txt Normal file
View File

@@ -0,0 +1,51 @@
the
of
is
and
to
in
that
we
for
an
are
by
be
as
on
with
can
if
from
which
you
it
this
then
at
have
all
not
one
has
or
that
一個
沒有
我們
你們
妳們
他們
她們
是否