mirror of
https://github.com/fumiama/jieba.git
synced 2026-06-30 09:00:30 +08:00
allow user to manually set idf file and stop words file, corresponding to jieba commit 16d626d3477c8649d42094d7d7d4ae7c619783de
This commit is contained in:
@@ -79,6 +79,7 @@
|
|||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
jiebago.SetDictionary("/Path/to/default/dictionary/file")
|
jiebago.SetDictionary("/Path/to/default/dictionary/file")
|
||||||
|
analyse.SetIdf("/Path/to/idf/file")
|
||||||
fmt.Println(strings.Join(analyse.ExtractTags(sentence, 20), "/ "))
|
fmt.Println(strings.Join(analyse.ExtractTags(sentence, 20), "/ "))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -38,8 +38,7 @@ func ExtractTags(sentence string, topK int) []string {
|
|||||||
if utf8.RuneCountInString(w) < 2 {
|
if utf8.RuneCountInString(w) < 2 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
index := stopWords.Search(w)
|
if _, ok := stopWords[w]; ok {
|
||||||
if index < len(stopWords) && stopWords[index] == w {
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if f, ok := freq[w]; ok {
|
if f, ok := freq[w]; ok {
|
||||||
|
|||||||
@@ -183,6 +183,7 @@ var (
|
|||||||
|
|
||||||
func TestExtractTags(t *testing.T) {
|
func TestExtractTags(t *testing.T) {
|
||||||
jiebago.SetDictionary("../dict.txt")
|
jiebago.SetDictionary("../dict.txt")
|
||||||
|
SetIdf("idf.txt")
|
||||||
for index, sentence := range test_contents {
|
for index, sentence := range test_contents {
|
||||||
result := ExtractTags(sentence, 20)
|
result := ExtractTags(sentence, 20)
|
||||||
if len(result) != len(Tags[index]) {
|
if len(result) != len(Tags[index]) {
|
||||||
|
|||||||
270237
analyse/idf.go
270237
analyse/idf.go
File diff suppressed because it is too large
Load Diff
270132
analyse/idf.txt
Normal file
270132
analyse/idf.txt
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user