diff --git a/example_bleve_test.go b/example_bleve_test.go
new file mode 100644
index 0000000..5fe0616
--- /dev/null
+++ b/example_bleve_test.go
@@ -0,0 +1,126 @@
+package jiebago_test
+
+import (
+ "fmt"
+ "log"
+ "os"
+
+ "github.com/blevesearch/bleve"
+ _ "github.com/wangbin/jiebago"
+)
+
+func ExampleBeleveSearch() {
+ // open a new index
+ indexMapping := bleve.NewIndexMapping()
+
+ err := indexMapping.AddCustomTokenizer("jieba",
+ map[string]interface{}{
+ "file": "dict.txt",
+ "type": "jieba",
+ })
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ // create a custom analyzer
+ err = indexMapping.AddCustomAnalyzer("jieba",
+ map[string]interface{}{
+ "type": "custom",
+ "tokenizer": "jieba",
+ "token_filters": []string{
+ "possessive_en",
+ "to_lower",
+ "stop_en",
+ },
+ })
+
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ indexMapping.DefaultAnalyzer = "jieba"
+ cacheDir := "jieba.beleve"
+ os.Remove(cacheDir)
+ index, err := bleve.New(cacheDir, indexMapping)
+
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ docs := []struct {
+ Title string
+ Name string
+ }{
+ {
+ Title: "Doc 1",
+ Name: "This is the first document we’ve added",
+ },
+ {
+ Title: "Doc 2",
+ Name: "The second one 你 中文测试中文 is even more interesting! 吃水果",
+ },
+ {
+ Title: "Doc 3",
+ Name: "买水果然后来世博园。",
+ },
+ {
+ Title: "Doc 4",
+ Name: "工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作",
+ },
+ {
+ Title: "Doc 5",
+ Name: "咱俩交换一下吧。",
+ },
+ }
+ // index docs
+ for _, doc := range docs {
+ index.Index(doc.Title, doc)
+ }
+
+ // search for some text
+ for _, keyword := range []string{"水果世博园", "你", "first", "中文", "交换机", "交换"} {
+ query := bleve.NewMatchQuery(keyword)
+ search := bleve.NewSearchRequest(query)
+ search.Highlight = bleve.NewHighlight()
+ searchResults, err := index.Search(search)
+ if err != nil {
+ log.Fatal(err)
+ }
+ fmt.Printf("Result of \"%s\": %d matches:\n", keyword, searchResults.Total)
+ for i, hit := range searchResults.Hits {
+ rv := fmt.Sprintf("%d. %s, (%f)\n", i+searchResults.Request.From+1, hit.ID, hit.Score)
+ for fragmentField, fragments := range hit.Fragments {
+ rv += fmt.Sprintf("%s: ", fragmentField)
+ for _, fragment := range fragments {
+ rv += fmt.Sprintf("%s", fragment)
+ }
+ }
+ fmt.Printf("%s\n", rv)
+ }
+ }
+ // Output:
+ // Result of "水果世博园": 2 matches:
+ // 1. Doc 3, (1.099550)
+ // Name: 买水果然后来世博园。
+ // 2. Doc 2, (0.031941)
+ // Name: The second one 你 中文测试中文 is even more interesting! 吃水果
+ // Result of "你": 1 matches:
+ // 1. Doc 2, (0.391161)
+ // Name: The second one 你 中文测试中文 is even more interesting! 吃水果
+ // Result of "first": 1 matches:
+ // 1. Doc 1, (0.512150)
+ // Name: This is the first document we’ve added
+ // Result of "中文": 1 matches:
+ // 1. Doc 2, (0.553186)
+ // Name: The second one 你 中文测试中文 is even more interesting! 吃水果
+ // Result of "交换机": 2 matches:
+ // 1. Doc 4, (0.608495)
+ // Name: 工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作
+ // 2. Doc 5, (0.086700)
+ // Name: 咱俩交换一下吧。
+ // Result of "交换": 2 matches:
+ // 1. Doc 5, (0.534158)
+ // Name: 咱俩交换一下吧。
+ // 2. Doc 4, (0.296297)
+ // Name: 工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作
+}
diff --git a/example_test.go b/example_test.go
index 08ef201..02e3f9a 100644
--- a/example_test.go
+++ b/example_test.go
@@ -60,30 +60,29 @@ func ExampleTokenize() {
fmt.Println("Default Mode:")
for _, token := range tokenizer.Tokenize(sentence) {
fmt.Printf(
- "Term: %s\tStart: %d\tEnd: %d\tPosition: %d\tType: %d\n",
+ "Term: %s Start: %d End: %d Position: %d Type: %d\n",
token.Term, token.Start, token.End, token.Position, token.Type)
}
- fmt.Println()
+
//search mode
tokenizer, _ = jiebago.NewJiebaTokenizer("dict.txt", true, true)
fmt.Println("Search Mode:")
for _, token := range tokenizer.Tokenize(sentence) {
fmt.Printf(
- "Term: %s\tStart: %d\tEnd: %d\tPosition: %d\tType: %d\n",
+ "Term: %s Start: %d End: %d Position: %d Type: %d\n",
token.Term, token.Start, token.End, token.Position, token.Type)
}
// Output:
// Default Mode:
- // Term: 永和 Start: 0 End: 6 Position: 1 Type: 1
- // Term: 服装 Start: 6 End: 12 Position: 2 Type: 1
- // Term: 饰品 Start: 12 End: 18 Position: 3 Type: 1
- // Term: 有限公司 Start: 18 End: 30 Position: 4 Type: 1
-
+ // Term: 永和 Start: 0 End: 6 Position: 1 Type: 1
+ // Term: 服装 Start: 6 End: 12 Position: 2 Type: 1
+ // Term: 饰品 Start: 12 End: 18 Position: 3 Type: 1
+ // Term: 有限公司 Start: 18 End: 30 Position: 4 Type: 1
// Search Mode:
- // Term: 永和 Start: 0 End: 6 Position: 1 Type: 1
- // Term: 服装 Start: 6 End: 12 Position: 2 Type: 1
- // Term: 饰品 Start: 12 End: 18 Position: 3 Type: 1
- // Term: 有限 Start: 18 End: 24 Position: 4 Type: 1
- // Term: 公司 Start: 24 End: 30 Position: 5 Type: 1
- // Term: 有限公司 Start: 18 End: 30 Position: 6 Type: 1
+ // Term: 永和 Start: 0 End: 6 Position: 1 Type: 1
+ // Term: 服装 Start: 6 End: 12 Position: 2 Type: 1
+ // Term: 饰品 Start: 12 End: 18 Position: 3 Type: 1
+ // Term: 有限 Start: 18 End: 24 Position: 4 Type: 1
+ // Term: 公司 Start: 24 End: 30 Position: 5 Type: 1
+ // Term: 有限公司 Start: 18 End: 30 Position: 6 Type: 1
}
diff --git a/posseg/example_test.go b/posseg/example_test.go
new file mode 100644
index 0000000..2b5a5ae
--- /dev/null
+++ b/posseg/example_test.go
@@ -0,0 +1,21 @@
+package posseg_test
+
+import (
+ "fmt"
+
+ "github.com/wangbin/jiebago/posseg"
+)
+
+func Example() {
+ var seg posseg.Segmenter
+ seg.LoadDictionary("../dict.txt")
+
+ for segment := range seg.Cut("我爱北京天安门", true) {
+ fmt.Printf("%s %s\n", segment.Text(), segment.Pos())
+ }
+ // Output:
+ // 我 r
+ // 爱 v
+ // 北京 ns
+ // 天安门 ns
+}