mirror of
https://github.com/fumiama/jieba.git
synced 2026-06-05 00:32:51 +08:00
added test for load cutomized stop words file
This commit is contained in:
@@ -240,6 +240,19 @@ var (
|
||||
TfIdf{Word: "怯懦", Freq: 0.271099},
|
||||
TfIdf{Word: "隨便", Freq: 0.168377},
|
||||
}
|
||||
|
||||
LyciWeight2 = []TfIdf{
|
||||
TfIdf{Word: "所謂", Freq: 1.215739},
|
||||
TfIdf{Word: "一般", Freq: 0.731179},
|
||||
TfIdf{Word: "雖然", Freq: 0.405246},
|
||||
TfIdf{Word: "退縮", Freq: 0.405246},
|
||||
TfIdf{Word: "肌迫", Freq: 0.405246},
|
||||
TfIdf{Word: "矯作", Freq: 0.405246},
|
||||
TfIdf{Word: "怯懦", Freq: 0.326238},
|
||||
TfIdf{Word: "逼不得已", Freq: 0.202623},
|
||||
TfIdf{Word: "右銘", Freq: 0.202623},
|
||||
TfIdf{Word: "寬闊", Freq: 0.202623},
|
||||
}
|
||||
)
|
||||
|
||||
func TestExtractTags(t *testing.T) {
|
||||
@@ -270,3 +283,16 @@ func TestExtratTagsWithWeight(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractTagsWithStopWordsFile(t *testing.T) {
|
||||
jiebago.SetDictionary("../dict.txt")
|
||||
SetIdf("idf.txt")
|
||||
SetStopWords("stop_words.txt")
|
||||
result := ExtractTags(Lyric, 7)
|
||||
for index, tag := range result {
|
||||
if LyciWeight2[index].Word != tag.Word ||
|
||||
math.Abs(LyciWeight2[index].Freq-tag.Freq) > 1e-6 {
|
||||
t.Errorf("%v != %v", tag, LyciWeight2[index])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@ func NewIDFLoader() *IDFLoader {
|
||||
return loader
|
||||
}
|
||||
|
||||
func (loader *IDFLoader) NewPath(idfFilePath string) error {
|
||||
func (loader *IDFLoader) newPath(idfFilePath string) error {
|
||||
if loader.Path == idfFilePath {
|
||||
return nil
|
||||
}
|
||||
@@ -67,5 +67,5 @@ func SetIdf(idfFilePath string) error {
|
||||
}
|
||||
idfFilePath = filepath.Clean(filepath.Join(pwd, idfFilePath))
|
||||
}
|
||||
return idfLoader.NewPath(idfFilePath)
|
||||
return idfLoader.newPath(idfFilePath)
|
||||
}
|
||||
|
||||
51
analyse/stop_words.txt
Normal file
51
analyse/stop_words.txt
Normal file
@@ -0,0 +1,51 @@
|
||||
the
|
||||
of
|
||||
is
|
||||
and
|
||||
to
|
||||
in
|
||||
that
|
||||
we
|
||||
for
|
||||
an
|
||||
are
|
||||
by
|
||||
be
|
||||
as
|
||||
on
|
||||
with
|
||||
can
|
||||
if
|
||||
from
|
||||
which
|
||||
you
|
||||
it
|
||||
this
|
||||
then
|
||||
at
|
||||
have
|
||||
all
|
||||
not
|
||||
one
|
||||
has
|
||||
or
|
||||
that
|
||||
的
|
||||
了
|
||||
和
|
||||
是
|
||||
就
|
||||
都
|
||||
而
|
||||
及
|
||||
與
|
||||
著
|
||||
或
|
||||
一個
|
||||
沒有
|
||||
我們
|
||||
你們
|
||||
妳們
|
||||
他們
|
||||
她們
|
||||
是否
|
||||
Reference in New Issue
Block a user