mirror of
https://github.com/fumiama/jieba.git
synced 2026-06-11 20:50:29 +08:00
allow user to manually set idf file and stop words file, corresponding to jieba commit 16d626d3477c8649d42094d7d7d4ae7c619783de
This commit is contained in:
@@ -79,6 +79,7 @@
|
||||
|
||||
func main() {
|
||||
jiebago.SetDictionary("/Path/to/default/dictionary/file")
|
||||
analyse.SetIdf("/Path/to/idf/file")
|
||||
fmt.Println(strings.Join(analyse.ExtractTags(sentence, 20), "/ "))
|
||||
}
|
||||
|
||||
|
||||
@@ -38,8 +38,7 @@ func ExtractTags(sentence string, topK int) []string {
|
||||
if utf8.RuneCountInString(w) < 2 {
|
||||
continue
|
||||
}
|
||||
index := stopWords.Search(w)
|
||||
if index < len(stopWords) && stopWords[index] == w {
|
||||
if _, ok := stopWords[w]; ok {
|
||||
continue
|
||||
}
|
||||
if f, ok := freq[w]; ok {
|
||||
|
||||
@@ -183,6 +183,7 @@ var (
|
||||
|
||||
func TestExtractTags(t *testing.T) {
|
||||
jiebago.SetDictionary("../dict.txt")
|
||||
SetIdf("idf.txt")
|
||||
for index, sentence := range test_contents {
|
||||
result := ExtractTags(sentence, 20)
|
||||
if len(result) != len(Tags[index]) {
|
||||
|
||||
270237
analyse/idf.go
270237
analyse/idf.go
File diff suppressed because it is too large
Load Diff
270132
analyse/idf.txt
Normal file
270132
analyse/idf.txt
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user