1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-05 00:32:51 +08:00

added more examples

This commit is contained in:
Wang Bin
2015-05-06 17:50:29 +08:00
parent b3ea94ba47
commit c068670e9b
3 changed files with 160 additions and 14 deletions

126
example_bleve_test.go Normal file
View File

@@ -0,0 +1,126 @@
package jiebago_test
import (
"fmt"
"log"
"os"
"github.com/blevesearch/bleve"
_ "github.com/wangbin/jiebago"
)
func ExampleBeleveSearch() {
// open a new index
indexMapping := bleve.NewIndexMapping()
err := indexMapping.AddCustomTokenizer("jieba",
map[string]interface{}{
"file": "dict.txt",
"type": "jieba",
})
if err != nil {
log.Fatal(err)
}
// create a custom analyzer
err = indexMapping.AddCustomAnalyzer("jieba",
map[string]interface{}{
"type": "custom",
"tokenizer": "jieba",
"token_filters": []string{
"possessive_en",
"to_lower",
"stop_en",
},
})
if err != nil {
log.Fatal(err)
}
indexMapping.DefaultAnalyzer = "jieba"
cacheDir := "jieba.beleve"
os.Remove(cacheDir)
index, err := bleve.New(cacheDir, indexMapping)
if err != nil {
log.Fatal(err)
}
docs := []struct {
Title string
Name string
}{
{
Title: "Doc 1",
Name: "This is the first document weve added",
},
{
Title: "Doc 2",
Name: "The second one 你 中文测试中文 is even more interesting! 吃水果",
},
{
Title: "Doc 3",
Name: "买水果然后来世博园。",
},
{
Title: "Doc 4",
Name: "工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作",
},
{
Title: "Doc 5",
Name: "咱俩交换一下吧。",
},
}
// index docs
for _, doc := range docs {
index.Index(doc.Title, doc)
}
// search for some text
for _, keyword := range []string{"水果世博园", "你", "first", "中文", "交换机", "交换"} {
query := bleve.NewMatchQuery(keyword)
search := bleve.NewSearchRequest(query)
search.Highlight = bleve.NewHighlight()
searchResults, err := index.Search(search)
if err != nil {
log.Fatal(err)
}
fmt.Printf("Result of \"%s\": %d matches:\n", keyword, searchResults.Total)
for i, hit := range searchResults.Hits {
rv := fmt.Sprintf("%d. %s, (%f)\n", i+searchResults.Request.From+1, hit.ID, hit.Score)
for fragmentField, fragments := range hit.Fragments {
rv += fmt.Sprintf("%s: ", fragmentField)
for _, fragment := range fragments {
rv += fmt.Sprintf("%s", fragment)
}
}
fmt.Printf("%s\n", rv)
}
}
// Output:
// Result of "水果世博园": 2 matches:
// 1. Doc 3, (1.099550)
// Name: 买<span class="highlight">水果</span>然后来<span class="highlight">世博</span>园。
// 2. Doc 2, (0.031941)
// Name: The second one 你 中文测试中文 is even more interesting! 吃<span class="highlight">水果</span>
// Result of "你": 1 matches:
// 1. Doc 2, (0.391161)
// Name: The second one <span class="highlight">你</span> 中文测试中文 is even more interesting! 吃水果
// Result of "first": 1 matches:
// 1. Doc 1, (0.512150)
// Name: This is the <span class="highlight">first</span> document weve added
// Result of "中文": 1 matches:
// 1. Doc 2, (0.553186)
// Name: The second one 你 <span class="highlight">中文</span>测试<span class="highlight">中文</span> is even more interesting! 吃水果
// Result of "交换机": 2 matches:
// 1. Doc 4, (0.608495)
// Name: 工信处女干事每月经过下属科室都要亲口交代24口<span class="highlight">交换机</span>等技术性器件的安装工作
// 2. Doc 5, (0.086700)
// Name: 咱俩<span class="highlight">交换</span>一下吧。
// Result of "交换": 2 matches:
// 1. Doc 5, (0.534158)
// Name: 咱俩<span class="highlight">交换</span>一下吧。
// 2. Doc 4, (0.296297)
// Name: 工信处女干事每月经过下属科室都要亲口交代24口<span class="highlight">交换</span>机等技术性器件的安装工作
}

View File

@@ -60,30 +60,29 @@ func ExampleTokenize() {
fmt.Println("Default Mode:")
for _, token := range tokenizer.Tokenize(sentence) {
fmt.Printf(
"Term: %s\tStart: %d\tEnd: %d\tPosition: %d\tType: %d\n",
"Term: %s Start: %d End: %d Position: %d Type: %d\n",
token.Term, token.Start, token.End, token.Position, token.Type)
}
fmt.Println()
//search mode
tokenizer, _ = jiebago.NewJiebaTokenizer("dict.txt", true, true)
fmt.Println("Search Mode:")
for _, token := range tokenizer.Tokenize(sentence) {
fmt.Printf(
"Term: %s\tStart: %d\tEnd: %d\tPosition: %d\tType: %d\n",
"Term: %s Start: %d End: %d Position: %d Type: %d\n",
token.Term, token.Start, token.End, token.Position, token.Type)
}
// Output:
// Default Mode:
// Term: 永和 Start: 0 End: 6 Position: 1 Type: 1
// Term: 服装 Start: 6 End: 12 Position: 2 Type: 1
// Term: 饰品 Start: 12 End: 18 Position: 3 Type: 1
// Term: 有限公司 Start: 18 End: 30 Position: 4 Type: 1
// Term: 永和 Start: 0 End: 6 Position: 1 Type: 1
// Term: 服装 Start: 6 End: 12 Position: 2 Type: 1
// Term: 饰品 Start: 12 End: 18 Position: 3 Type: 1
// Term: 有限公司 Start: 18 End: 30 Position: 4 Type: 1
// Search Mode:
// Term: 永和 Start: 0 End: 6 Position: 1 Type: 1
// Term: 服装 Start: 6 End: 12 Position: 2 Type: 1
// Term: 饰品 Start: 12 End: 18 Position: 3 Type: 1
// Term: 有限 Start: 18 End: 24 Position: 4 Type: 1
// Term: 公司 Start: 24 End: 30 Position: 5 Type: 1
// Term: 有限公司 Start: 18 End: 30 Position: 6 Type: 1
// Term: 永和 Start: 0 End: 6 Position: 1 Type: 1
// Term: 服装 Start: 6 End: 12 Position: 2 Type: 1
// Term: 饰品 Start: 12 End: 18 Position: 3 Type: 1
// Term: 有限 Start: 18 End: 24 Position: 4 Type: 1
// Term: 公司 Start: 24 End: 30 Position: 5 Type: 1
// Term: 有限公司 Start: 18 End: 30 Position: 6 Type: 1
}

21
posseg/example_test.go Normal file
View File

@@ -0,0 +1,21 @@
package posseg_test
import (
"fmt"
"github.com/wangbin/jiebago/posseg"
)
func Example() {
var seg posseg.Segmenter
seg.LoadDictionary("../dict.txt")
for segment := range seg.Cut("我爱北京天安门", true) {
fmt.Printf("%s %s\n", segment.Text(), segment.Pos())
}
// Output:
// 我 r
// 爱 v
// 北京 ns
// 天安门 ns
}