From c068670e9bbfd7d05499eae0ce6da145d715dbb3 Mon Sep 17 00:00:00 2001 From: Wang Bin Date: Wed, 6 May 2015 17:50:29 +0800 Subject: [PATCH] added more examples --- example_bleve_test.go | 126 +++++++++++++++++++++++++++++++++++++++++ example_test.go | 27 +++++---- posseg/example_test.go | 21 +++++++ 3 files changed, 160 insertions(+), 14 deletions(-) create mode 100644 example_bleve_test.go create mode 100644 posseg/example_test.go diff --git a/example_bleve_test.go b/example_bleve_test.go new file mode 100644 index 0000000..5fe0616 --- /dev/null +++ b/example_bleve_test.go @@ -0,0 +1,126 @@ +package jiebago_test + +import ( + "fmt" + "log" + "os" + + "github.com/blevesearch/bleve" + _ "github.com/wangbin/jiebago" +) + +func ExampleBeleveSearch() { + // open a new index + indexMapping := bleve.NewIndexMapping() + + err := indexMapping.AddCustomTokenizer("jieba", + map[string]interface{}{ + "file": "dict.txt", + "type": "jieba", + }) + if err != nil { + log.Fatal(err) + } + + // create a custom analyzer + err = indexMapping.AddCustomAnalyzer("jieba", + map[string]interface{}{ + "type": "custom", + "tokenizer": "jieba", + "token_filters": []string{ + "possessive_en", + "to_lower", + "stop_en", + }, + }) + + if err != nil { + log.Fatal(err) + } + + indexMapping.DefaultAnalyzer = "jieba" + cacheDir := "jieba.beleve" + os.Remove(cacheDir) + index, err := bleve.New(cacheDir, indexMapping) + + if err != nil { + log.Fatal(err) + } + + docs := []struct { + Title string + Name string + }{ + { + Title: "Doc 1", + Name: "This is the first document we’ve added", + }, + { + Title: "Doc 2", + Name: "The second one 你 中文测试中文 is even more interesting! 吃水果", + }, + { + Title: "Doc 3", + Name: "买水果然后来世博园。", + }, + { + Title: "Doc 4", + Name: "工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作", + }, + { + Title: "Doc 5", + Name: "咱俩交换一下吧。", + }, + } + // index docs + for _, doc := range docs { + index.Index(doc.Title, doc) + } + + // search for some text + for _, keyword := range []string{"水果世博园", "你", "first", "中文", "交换机", "交换"} { + query := bleve.NewMatchQuery(keyword) + search := bleve.NewSearchRequest(query) + search.Highlight = bleve.NewHighlight() + searchResults, err := index.Search(search) + if err != nil { + log.Fatal(err) + } + fmt.Printf("Result of \"%s\": %d matches:\n", keyword, searchResults.Total) + for i, hit := range searchResults.Hits { + rv := fmt.Sprintf("%d. %s, (%f)\n", i+searchResults.Request.From+1, hit.ID, hit.Score) + for fragmentField, fragments := range hit.Fragments { + rv += fmt.Sprintf("%s: ", fragmentField) + for _, fragment := range fragments { + rv += fmt.Sprintf("%s", fragment) + } + } + fmt.Printf("%s\n", rv) + } + } + // Output: + // Result of "水果世博园": 2 matches: + // 1. Doc 3, (1.099550) + // Name: 买水果然后来世博园。 + // 2. Doc 2, (0.031941) + // Name: The second one 你 中文测试中文 is even more interesting! 吃水果 + // Result of "你": 1 matches: + // 1. Doc 2, (0.391161) + // Name: The second one 中文测试中文 is even more interesting! 吃水果 + // Result of "first": 1 matches: + // 1. Doc 1, (0.512150) + // Name: This is the first document we’ve added + // Result of "中文": 1 matches: + // 1. Doc 2, (0.553186) + // Name: The second one 你 中文测试中文 is even more interesting! 吃水果 + // Result of "交换机": 2 matches: + // 1. Doc 4, (0.608495) + // Name: 工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作 + // 2. Doc 5, (0.086700) + // Name: 咱俩交换一下吧。 + // Result of "交换": 2 matches: + // 1. Doc 5, (0.534158) + // Name: 咱俩交换一下吧。 + // 2. Doc 4, (0.296297) + // Name: 工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作 +} diff --git a/example_test.go b/example_test.go index 08ef201..02e3f9a 100644 --- a/example_test.go +++ b/example_test.go @@ -60,30 +60,29 @@ func ExampleTokenize() { fmt.Println("Default Mode:") for _, token := range tokenizer.Tokenize(sentence) { fmt.Printf( - "Term: %s\tStart: %d\tEnd: %d\tPosition: %d\tType: %d\n", + "Term: %s Start: %d End: %d Position: %d Type: %d\n", token.Term, token.Start, token.End, token.Position, token.Type) } - fmt.Println() + //search mode tokenizer, _ = jiebago.NewJiebaTokenizer("dict.txt", true, true) fmt.Println("Search Mode:") for _, token := range tokenizer.Tokenize(sentence) { fmt.Printf( - "Term: %s\tStart: %d\tEnd: %d\tPosition: %d\tType: %d\n", + "Term: %s Start: %d End: %d Position: %d Type: %d\n", token.Term, token.Start, token.End, token.Position, token.Type) } // Output: // Default Mode: - // Term: 永和 Start: 0 End: 6 Position: 1 Type: 1 - // Term: 服装 Start: 6 End: 12 Position: 2 Type: 1 - // Term: 饰品 Start: 12 End: 18 Position: 3 Type: 1 - // Term: 有限公司 Start: 18 End: 30 Position: 4 Type: 1 - + // Term: 永和 Start: 0 End: 6 Position: 1 Type: 1 + // Term: 服装 Start: 6 End: 12 Position: 2 Type: 1 + // Term: 饰品 Start: 12 End: 18 Position: 3 Type: 1 + // Term: 有限公司 Start: 18 End: 30 Position: 4 Type: 1 // Search Mode: - // Term: 永和 Start: 0 End: 6 Position: 1 Type: 1 - // Term: 服装 Start: 6 End: 12 Position: 2 Type: 1 - // Term: 饰品 Start: 12 End: 18 Position: 3 Type: 1 - // Term: 有限 Start: 18 End: 24 Position: 4 Type: 1 - // Term: 公司 Start: 24 End: 30 Position: 5 Type: 1 - // Term: 有限公司 Start: 18 End: 30 Position: 6 Type: 1 + // Term: 永和 Start: 0 End: 6 Position: 1 Type: 1 + // Term: 服装 Start: 6 End: 12 Position: 2 Type: 1 + // Term: 饰品 Start: 12 End: 18 Position: 3 Type: 1 + // Term: 有限 Start: 18 End: 24 Position: 4 Type: 1 + // Term: 公司 Start: 24 End: 30 Position: 5 Type: 1 + // Term: 有限公司 Start: 18 End: 30 Position: 6 Type: 1 } diff --git a/posseg/example_test.go b/posseg/example_test.go new file mode 100644 index 0000000..2b5a5ae --- /dev/null +++ b/posseg/example_test.go @@ -0,0 +1,21 @@ +package posseg_test + +import ( + "fmt" + + "github.com/wangbin/jiebago/posseg" +) + +func Example() { + var seg posseg.Segmenter + seg.LoadDictionary("../dict.txt") + + for segment := range seg.Cut("我爱北京天安门", true) { + fmt.Printf("%s %s\n", segment.Text(), segment.Pos()) + } + // Output: + // 我 r + // 爱 v + // 北京 ns + // 天安门 ns +}