1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-11 20:50:29 +08:00

allow user to manually set idf file and stop words file, corresponding to jieba commit 16d626d3477c8649d42094d7d7d4ae7c619783de

This commit is contained in:
Wang Bin
2014-08-14 12:05:29 +08:00
parent d2acf94693
commit 052c571015
5 changed files with 270203 additions and 270171 deletions

View File

@@ -79,6 +79,7 @@
func main() {
jiebago.SetDictionary("/Path/to/default/dictionary/file")
analyse.SetIdf("/Path/to/idf/file")
fmt.Println(strings.Join(analyse.ExtractTags(sentence, 20), "/ "))
}

View File

@@ -38,8 +38,7 @@ func ExtractTags(sentence string, topK int) []string {
if utf8.RuneCountInString(w) < 2 {
continue
}
index := stopWords.Search(w)
if index < len(stopWords) && stopWords[index] == w {
if _, ok := stopWords[w]; ok {
continue
}
if f, ok := freq[w]; ok {

View File

@@ -183,6 +183,7 @@ var (
func TestExtractTags(t *testing.T) {
jiebago.SetDictionary("../dict.txt")
SetIdf("idf.txt")
for index, sentence := range test_contents {
result := ExtractTags(sentence, 20)
if len(result) != len(Tags[index]) {

File diff suppressed because it is too large Load Diff

270132
analyse/idf.txt Normal file

File diff suppressed because it is too large Load Diff