package tokenizers_test import ( "fmt" "log" "os" "github.com/blevesearch/bleve" _ "github.com/fumiama/jieba/tokenizers" ) func Example_beleveSearch() { // open a new index indexMapping := bleve.NewIndexMapping() err := indexMapping.AddCustomTokenizer("jieba", map[string]interface{}{ "file": "../dict.txt", "type": "jieba", }) if err != nil { log.Fatal(err) } // create a custom analyzer err = indexMapping.AddCustomAnalyzer("jieba", map[string]interface{}{ "type": "custom", "tokenizer": "jieba", "token_filters": []string{ "possessive_en", "to_lower", "stop_en", }, }) if err != nil { log.Fatal(err) } indexMapping.DefaultAnalyzer = "jieba" cacheDir := "jieba.beleve" os.RemoveAll(cacheDir) index, err := bleve.New(cacheDir, indexMapping) if err != nil { log.Fatal(err) } docs := []struct { Title string Name string }{ { Title: "Doc 1", Name: "This is the first document we’ve added", }, { Title: "Doc 2", Name: "The second one 你 中文测试中文 is even more interesting! 吃水果", }, { Title: "Doc 3", Name: "买水果然后来世博园。", }, { Title: "Doc 4", Name: "工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作", }, { Title: "Doc 5", Name: "咱俩交换一下吧。", }, } // index docs for _, doc := range docs { index.Index(doc.Title, doc) } // search for some text for _, keyword := range []string{"水果世博园", "你", "first", "中文", "交换机", "交换"} { query := bleve.NewQueryStringQuery(keyword) search := bleve.NewSearchRequest(query) search.Highlight = bleve.NewHighlight() searchResults, err := index.Search(search) if err != nil { log.Fatal(err) } fmt.Printf("Result of \"%s\": %d matches:\n", keyword, searchResults.Total) for i, hit := range searchResults.Hits { rv := fmt.Sprintf("%d. %s, (%f)\n", i+searchResults.Request.From+1, hit.ID, hit.Score) for fragmentField, fragments := range hit.Fragments { rv += fmt.Sprintf("%s: ", fragmentField) for _, fragment := range fragments { rv += fmt.Sprintf("%s", fragment) } } fmt.Printf("%s\n", rv) } } // Output: // Result of "水果世博园": 2 matches: // 1. Doc 3, (1.099550) // Name: 买水果然后来世博园。 // 2. Doc 2, (0.031941) // Name: The second one 你 中文测试中文 is even more interesting! 吃水果 // Result of "你": 1 matches: // 1. Doc 2, (0.391161) // Name: The second one 中文测试中文 is even more interesting! 吃水果 // Result of "first": 1 matches: // 1. Doc 1, (0.512150) // Name: This is the first document we’ve added // Result of "中文": 1 matches: // 1. Doc 2, (0.553186) // Name: The second one 你 中文测试中文 is even more interesting! 吃水果 // Result of "交换机": 2 matches: // 1. Doc 4, (0.608495) // Name: 工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作 // 2. Doc 5, (0.086700) // Name: 咱俩交换一下吧。 // Result of "交换": 2 matches: // 1. Doc 5, (0.534158) // Name: 咱俩交换一下吧。 // 2. Doc 4, (0.296297) // Name: 工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作 }