code refactor, added more documents

2026-07-17 19:10:24 +08:00 · 2015-05-06 12:55:04 +08:00
parent 87caff09cb
commit 122bad0a8d
23 changed files with 228 additions and 142 deletions
--- a/analyse/analyse_test.go
+++ b/analyse/analyse_test.go
@@ -6,7 +6,7 @@ import (
 )

 var (
-	test_contents = []string{
+	testContents = []string{
 		"这是一个伸手不见五指的黑夜。我叫孙悟空，我爱北京，我爱Python和C++。",
 		"我不喜欢日本和服。",
 		"雷猴回归人间。",
@@ -259,7 +259,7 @@ func TestExtractTags(t *testing.T) {
 	te.LoadDictionary("../dict.txt")
 	te.LoadIdf("idf.txt")

-	for index, sentence := range test_contents {
+	for index, sentence := range testContents {
 		result := te.ExtractTags(sentence, 20)
 		if len(result) != len(Tags[index]) {
 			t.Fatalf("%s = %v", sentence, result)
--- a/analyse/idf.go
+++ b/analyse/idf.go
@@ -7,6 +7,8 @@ import (
 	"github.com/wangbin/jiebago/dictionary"
 )

+// Idf represents a thread-safe dictionary for all words with their
+// IDFs(Inverse Document Frequency).
 type Idf struct {
 	freqMap map[string]float64
 	median  float64
@@ -14,6 +16,7 @@ type Idf struct {
 	sync.RWMutex
 }

+// AddToken adds a new word with IDF into it's dictionary.
 func (i *Idf) AddToken(token dictionary.Token) {
 	i.Lock()
 	i.freqMap[token.Text()] = token.Frequency()
@@ -23,6 +26,7 @@ func (i *Idf) AddToken(token dictionary.Token) {
 	i.Unlock()
 }

+// Load loads all tokens from channel into it's dictionary.
 func (i *Idf) Load(ch <-chan dictionary.Token) {
 	i.Lock()
 	for token := range ch {
@@ -38,6 +42,7 @@ func (i *Idf) loadDictionary(fileName string) error {
 	return dictionary.LoadDictionary(i, fileName)
 }

+// Frequency returns the IDF of given word.
 func (i *Idf) Frequency(key string) (float64, bool) {
 	i.RLock()
 	freq, ok := i.freqMap[key]
@@ -45,6 +50,7 @@ func (i *Idf) Frequency(key string) (float64, bool) {
 	return freq, ok
 }

+// NewIdf creates a new Idf instance.
 func NewIdf() *Idf {
 	return &Idf{freqMap: make(map[string]float64), freqs: make([]float64, 0)}
 }
--- a/analyse/stopwords.go
+++ b/analyse/stopwords.go
@@ -6,6 +6,7 @@ import (
 	"github.com/wangbin/jiebago/dictionary"
 )

+// DefaultStopWordMap contains some stop words.
 var DefaultStopWordMap = map[string]int{
 	"the":   1,
 	"of":    1,
@@ -40,23 +41,27 @@ var DefaultStopWordMap = map[string]int{
 	"or":    1,
 }

+// StopWord is a thread-safe dictionary for all stop words.
 type StopWord struct {
 	stopWordMap map[string]int
 	sync.RWMutex
 }

+// AddToken adds a token into StopWord dictionary.
 func (s *StopWord) AddToken(token dictionary.Token) {
 	s.Lock()
 	s.stopWordMap[token.Text()] = 1
 	s.Unlock()
 }

+// NewStopWord create a new StopWord with default stop words.
 func NewStopWord() *StopWord {
 	s := new(StopWord)
 	s.stopWordMap = DefaultStopWordMap
 	return s
 }

+// IsStopWord checks if a given word is stop word.
 func (s *StopWord) IsStopWord(word string) bool {
 	s.RLock()
 	_, ok := s.stopWordMap[word]
@@ -64,6 +69,7 @@ func (s *StopWord) IsStopWord(word string) bool {
 	return ok
 }

+// Load loads all tokens from given channel into StopWord dictionary.
 func (s *StopWord) Load(ch <-chan dictionary.Token) {
 	s.Lock()
 	for token := range ch {
--- a/analyse/tag_extracker.go
+++ b/analyse/tag_extracker.go
@@ -1,7 +1,6 @@
 package analyse

 import (
-	"fmt"
 	"sort"
 	"strings"
 	"unicode/utf8"
@@ -9,23 +8,23 @@ import (
 	"github.com/wangbin/jiebago"
 )

+// Segment represents a word with weight.
 type Segment struct {
 	text   string
 	weight float64
 }

+// Text returns the segment's text.
 func (s Segment) Text() string {
 	return s.text
 }

+// Weight returns the segment's weight.
 func (s Segment) Weight() float64 {
 	return s.weight
 }

-func (s Segment) String() string {
-	return fmt.Sprintf("{%s: %f}", s.text, s.weight)
-}
-
+// Segments represents a slice of Segment.
 type Segments []Segment

 func (ss Segments) Len() int {
@@ -44,29 +43,33 @@ func (ss Segments) Swap(i, j int) {
 	ss[i], ss[j] = ss[j], ss[i]
 }

+// TagExtracter is used to extract tags from sentence.
 type TagExtracter struct {
 	seg      *jiebago.Segmenter
 	idf      *Idf
 	stopWord *StopWord
 }

+// LoadDictionary reads the given filename and create a new dictionary.
 func (t *TagExtracter) LoadDictionary(fileName string) error {
 	t.stopWord = NewStopWord()
 	t.seg = new(jiebago.Segmenter)
 	return t.seg.LoadDictionary(fileName)
 }

+// LoadIdf reads the given file and create a new Idf dictionary.
 func (t *TagExtracter) LoadIdf(fileName string) error {
 	t.idf = NewIdf()
 	return t.idf.loadDictionary(fileName)
 }

+// LoadStopWords reads the given file and create a new StopWord dictionary.
 func (t *TagExtracter) LoadStopWords(fileName string) error {
 	t.stopWord = NewStopWord()
 	return t.stopWord.loadDictionary(fileName)
 }

-// Keyword extraction.
+// ExtractTags extracts the topK key words from sentence.
 func (t *TagExtracter) ExtractTags(sentence string, topK int) (tags Segments) {
 	freqMap := make(map[string]float64)

--- a/analyse/textrank.go
+++ b/analyse/textrank.go
@@ -1,7 +1,6 @@
 package analyse

 import (
-	"fmt"
 	"math"
 	"sort"

@@ -20,10 +19,6 @@ type edge struct {
 	weight float64
 }

-func (e edge) String() string {
-	return fmt.Sprintf("(%s %s): %f", e.start, e.end, e.weight)
-}
-
 type edges []edge

 func (es edges) Len() int {
@@ -114,8 +109,8 @@ func (u *undirectWeightedGraph) rank() Segments {
 	return result
 }

-// Extract keywords from sentence using TextRank algorithm. the allowed POS list
-// could be manually speificed.
+// TextRankWithPOS extracts keywords from sentence using TextRank algorithm.
+// Parameter allowPOS allows a customized pos list.
 func (t *TextRanker) TextRankWithPOS(sentence string, topK int, allowPOS []string) Segments {
 	posFilt := make(map[string]int)
 	for _, pos := range allowPOS {
@@ -124,7 +119,7 @@ func (t *TextRanker) TextRankWithPOS(sentence string, topK int, allowPOS []strin
 	g := newUndirectWeightedGraph()
 	cm := make(map[[2]string]float64)
 	span := 5
-	pairs := make([]posseg.Segment, 0)
+	var pairs []posseg.Segment
 	for pair := range t.seg.Cut(sentence, true) {
 		pairs = append(pairs, pair)
 	}
@@ -152,16 +147,18 @@ func (t *TextRanker) TextRankWithPOS(sentence string, topK int, allowPOS []strin
 	return tags
 }

-// Extract keywords from sentence using TextRank algorithm.
-// topK specify how many top keywords to be returned at most.
+// TextRank extract keywords from sentence using TextRank algorithm.
+// Parameter topK specify how many top keywords to be returned at most.
 func (t *TextRanker) TextRank(sentence string, topK int) Segments {
 	return t.TextRankWithPOS(sentence, topK, defaultAllowPOS)
 }

+// TextRanker is used to extract tags from sentence.
 type TextRanker struct {
 	seg *posseg.Segmenter
 }

+// LoadDictionary reads a given file and create a new dictionary file for Textranker.
 func (t *TextRanker) LoadDictionary(fileName string) error {
 	t.seg = new(posseg.Segmenter)
 	return t.seg.LoadDictionary(fileName)
--- a/dictionary.go
+++ b/dictionary.go
@@ -14,7 +14,7 @@ type Dictionary struct {
 	sync.RWMutex
 }

-// Load loads all tokens from channel
+// Load loads all tokens from given channel
 func (d *Dictionary) Load(ch <-chan dictionary.Token) {
 	d.Lock()
 	for token := range ch {
@@ -49,7 +49,7 @@ func (d *Dictionary) updateLogTotal() {
 	d.logTotal = math.Log(d.total)
 }

-// Frequency returns the frequency of give word, if not found, the second result is false
+// Frequency returns the frequency and existence of give word
 func (d *Dictionary) Frequency(key string) (float64, bool) {
 	d.RLock()
 	freq, ok := d.freqMap[key]
--- a/dictionary/dictionary.go
+++ b/dictionary/dictionary.go
@@ -8,6 +8,8 @@ import (
 	"strings"
 )

+// DictLoader represents a interface that could add one token or load bunch of
+// tokens from channel.
 type DictLoader interface {
 	Load(<-chan Token)
 	AddToken(Token)
@@ -49,6 +51,7 @@ func loadDictionary(file *os.File) (<-chan Token, <-chan error) {

 }

+// LoadDictionary reads the given file and passes all tokens to a DictLoader.
 func LoadDictionary(dl DictLoader, fileName string) error {
 	filePath, err := dictPath(fileName)
 	if err != nil {
--- a/dictionary/token.go
+++ b/dictionary/token.go
@@ -1,23 +1,28 @@
 package dictionary

+// Token represents a Chinese word with (optional) frequency and POS.
 type Token struct {
 	text      string
 	frequency float64
 	pos       string
 }

+//Text returns token's text.
 func (t Token) Text() string {
 	return t.text
 }

+// Frequency returns token's frequency.
 func (t Token) Frequency() float64 {
 	return t.frequency
 }

+// Pos returns token's POS.
 func (t Token) Pos() string {
 	return t.pos
 }

+// NewToken creates a new token.
 func NewToken(text string, frequency float64, pos string) Token {
 	return Token{text: text, frequency: frequency, pos: pos}
 }
--- a/finalseg/finalseg.go
+++ b/finalseg/finalseg.go
@@ -13,10 +13,10 @@ func cutHan(sentence string) chan string {
 	result := make(chan string)
 	go func() {
 		runes := []rune(sentence)
-		_, pos_list := viterbi(runes, []byte{'B', 'M', 'E', 'S'})
+		_, posList := viterbi(runes, []byte{'B', 'M', 'E', 'S'})
 		begin, next := 0, 0
 		for i, char := range runes {
-			pos := pos_list[i]
+			pos := posList[i]
 			switch pos {
 			case 'B':
 				begin = i
@@ -36,6 +36,8 @@ func cutHan(sentence string) chan string {
 	return result
 }

+// Cut cuts sentence into words using Hidden Markov Model with Viterbi
+// algorithm. It is used by Jiebago for unknonw words.
 func Cut(sentence string) chan string {
 	result := make(chan string)
 	s := sentence
--- a/finalseg/finalseg_test.go
+++ b/finalseg/finalseg_test.go
@@ -6,7 +6,7 @@ import (
 )

 func chanToArray(ch chan string) []string {
-	result := make([]string, 0)
+	var result []string
 	for word := range ch {
 		result = append(result, word)
 	}
--- a/finalseg/viterbi.go
+++ b/finalseg/viterbi.go
@@ -67,11 +67,11 @@ func viterbi(obs []rune, states []byte) (float64, []byte) {
 		V[t] = make(map[byte]float64)
 		for _, y := range states {
 			ps0 := make(probStates, 0)
-			var em_p float64
+			var emP float64
 			if val, ok := probEmit[y][obs[t]]; ok {
-				em_p = val
+				emP = val
 			} else {
-				em_p = minFloat
+				emP = minFloat
 			}
 			for _, y0 := range prevStatus[y] {
 				var transP float64
@@ -80,7 +80,7 @@ func viterbi(obs []rune, states []byte) (float64, []byte) {
 				} else {
 					transP = minFloat
 				}
-				prob0 := V[t-1][y0] + transP + em_p
+				prob0 := V[t-1][y0] + transP + emP
 				ps0 = append(ps0, &probState{prob: prob0, state: y0})
 			}
 			sort.Sort(sort.Reverse(ps0))
--- a/jieba.go
+++ b/jieba.go
@@ -16,15 +16,21 @@ var (
 	reSkipDefault = regexp.MustCompile(`(\r\n|\s)`)
 )

+// Segmenter is a Chinese words segmentation struct.
 type Segmenter struct {
 	dict *Dictionary
 }

+// LoadDictionary loads dictionary from given file name. Everytime
+// LoadDictionary is called, previously loaded dictionary will be cleard.
 func (seg *Segmenter) LoadDictionary(fileName string) error {
 	seg.dict = &Dictionary{freqMap: make(map[string]float64)}
 	return seg.dict.loadDictionary(fileName)
 }

+// LoadUserDictionary loads a user specified dictionary, it must be called
+// after LoadDictionary, and it will not clear any previous loaded dictionary,
+// instead it will override exist entries.
 func (seg *Segmenter) LoadUserDictionary(fileName string) error {
 	return seg.dict.loadDictionary(fileName)
 }
@@ -46,7 +52,7 @@ func (seg *Segmenter) dag(runes []rune) map[int][]int {
 			if freq > 0.0 {
 				dag[k] = append(dag[k], i)
 			}
-			i += 1
+			i++
 			if i >= n {
 				break
 			}
@@ -98,7 +104,7 @@ func (seg *Segmenter) cutDAG(sentence string) <-chan string {
 		routes := seg.calc(runes)
 		var y int
 		length := len(runes)
-		buf := make([]rune, 0)
+		var buf []rune
 		for x := 0; x < length; {
 			y = routes[x].index + 1
 			frag := runes[x:y]
@@ -156,7 +162,7 @@ func (seg *Segmenter) cutDAGNoHMM(sentence string) <-chan string {
 		routes := seg.calc(runes)
 		var y int
 		length := len(runes)
-		buf := make([]rune, 0)
+		var buf []rune
 		for x := 0; x < length; {
 			y = routes[x].index + 1
 			frag := runes[x:y]
@@ -181,6 +187,10 @@ func (seg *Segmenter) cutDAGNoHMM(sentence string) <-chan string {
 	return result
 }

+// Cut cuts a sentence into words using accurate mode.
+// Parameter hmm controls whether to use the Hidden Markov Model.
+// Accurate mode attempts to cut the sentence into the most accurate
+// segmentations, which is suitable for text analysis.
 func (seg *Segmenter) Cut(sentence string, hmm bool) <-chan string {
 	result := make(chan string)
 	var cut cutFunc
@@ -246,6 +256,9 @@ func (seg *Segmenter) cutAll(sentence string) <-chan string {
 	return result
 }

+// CutAll cuts a sentence into words using full mode.
+// Full mode gets all the possible words from the sentence.
+// Fast but not accurate.
 func (seg *Segmenter) CutAll(sentence string) <-chan string {
 	result := make(chan string)
 	go func() {
@@ -268,6 +281,10 @@ func (seg *Segmenter) CutAll(sentence string) <-chan string {
 	return result
 }

+// CutForSearch cuts sentence into words using search engine mode.
+// Search engine mode, based on the accurate mode, attempts to cut long words
+// into several short words, which can raise the recall rate.
+// Suitable for search engines.
 func (seg *Segmenter) CutForSearch(sentence string, hmm bool) <-chan string {
 	result := make(chan string)
 	go func() {
--- a/jieba_test.go
+++ b/jieba_test.go
@@ -3,8 +3,8 @@ package jiebago
 import "testing"

 var (
-	seg           Segmenter
-	test_contents = []string{
+	seg          Segmenter
+	testContents = []string{
 		"这是一个伸手不见五指的黑夜。我叫孙悟空，我爱北京，我爱Python和C++。",
 		"我不喜欢日本和服。",
 		"雷猴回归人间。",
@@ -620,7 +620,7 @@ func init() {
 }

 func chanToArray(ch <-chan string) []string {
-	result := make([]string, 0)
+	var result []string
 	for word := range ch {
 		result = append(result, word)
 	}
@@ -643,7 +643,7 @@ func TestCutDAGNoHmm(t *testing.T) {

 func TestDefaultCut(t *testing.T) {
 	var result []string
-	for index, content := range test_contents {
+	for index, content := range testContents {
 		result = chanToArray(seg.Cut(content, true))
 		if len(result) != len(defaultCutResult[index]) {
 			t.Errorf("default cut for %s length should be %d not %d\n",
@@ -661,7 +661,7 @@ func TestDefaultCut(t *testing.T) {

 func TestCutAll(t *testing.T) {
 	var result []string
-	for index, content := range test_contents {
+	for index, content := range testContents {
 		result = chanToArray(seg.CutAll(content))
 		if len(result) != len(cutAllResult[index]) {
 			t.Errorf("cut all for %s length should be %d not %d\n",
@@ -679,7 +679,7 @@ func TestCutAll(t *testing.T) {

 func TestDefaultCutNoHMM(t *testing.T) {
 	var result []string
-	for index, content := range test_contents {
+	for index, content := range testContents {
 		result = chanToArray(seg.Cut(content, false))
 		if len(result) != len(defaultCutNoHMMResult[index]) {
 			t.Fatalf("default cut no hmm for %s length should be %d not %d\n",
@@ -695,7 +695,7 @@ func TestDefaultCutNoHMM(t *testing.T) {

 func TestCutForSearch(t *testing.T) {
 	var result []string
-	for index, content := range test_contents {
+	for index, content := range testContents {
 		result = chanToArray(seg.CutForSearch(content, true))
 		if len(result) != len(cutForSearchResult[index]) {
 			t.Fatalf("cut for search for %s length should be %d not %d\n",
@@ -707,7 +707,7 @@ func TestCutForSearch(t *testing.T) {
 			}
 		}
 	}
-	for index, content := range test_contents {
+	for index, content := range testContents {
 		result = chanToArray(seg.CutForSearch(content, false))
 		if len(result) != len(cutForSearchNoHMMResult[index]) {
 			t.Fatalf("cut for search no hmm for %s length should be %d not %d\n",
@@ -724,7 +724,7 @@ func TestCutForSearch(t *testing.T) {
 func TestLoadDictionary(t *testing.T) {
 	var result []string
 	seg.LoadDictionary("foobar.txt")
-	for index, content := range test_contents {
+	for index, content := range testContents {
 		result = chanToArray(seg.Cut(content, true))
 		if len(result) != len(userDictCutResult[index]) {
 			t.Fatalf("default cut with user dictionary for %s length should be %d not %d\n",
--- a/posseg/char_state_tab.go
+++ b/posseg/char_state_tab.go
@@ -2,9 +2,9 @@ package posseg

 import "fmt"

-type Tag uint16
+type tag uint16

-func (t Tag) Tag() string {
+func (t tag) position() string {
 	switch t / 100 {
 	case 4:
 		return "S"
@@ -19,31 +19,29 @@ func (t Tag) Tag() string {
 	}
 }

-func (t Tag) POS() string {
+func (t tag) pos() string {
 	return poss[t%100]
 }

-func (t Tag) String() string {
-	return fmt.Sprintf("(%s, %s)", t.Tag(), t.POS())
-}
-
-func NewTag(tag, pos string) (Tag, error) {
-	tagIndex := -1
+func newTag(position, pos string) (tag, error) {
+	positionIndex := -1
 	posIndex := -1
-	for i, t := range tags {
-		if tag == t {
-			tagIndex = (i + 1) * 100
+	for i, p := range positions {
+		if position == p {
+			positionIndex = (i + 1) * 100
+			break
 		}
 	}
 	for i, p := range poss {
 		if pos == p {
 			posIndex = i
+			break
 		}
 	}
-	if tagIndex < 0 || posIndex < 0 {
-		return 0, fmt.Errorf("Failed to convert %s %s to Tag", tag, pos)
+	if positionIndex < 0 || posIndex < 0 {
+		return 0, fmt.Errorf("Failed to convert %s %s to Tag", position, pos)
 	}
-	return Tag(tagIndex + posIndex), nil
+	return tag(positionIndex + posIndex), nil
 }

 type charStateTabMap map[rune][]uint16
@@ -51,9 +49,8 @@ type charStateTabMap map[rune][]uint16
 func (m charStateTabMap) get(key rune) []uint16 {
 	if value, ok := m[key]; ok {
 		return value
-	} else {
-		return probTransKeys
 	}
+	return probTransKeys
 }

 var (
@@ -6708,6 +6705,6 @@ var (
 		'\u9fa0': []uint16{413},
 	}

-	tags = []string{"B", "E", "M", "S"}
-	poss = []string{"a", "ad", "ag", "an", "b", "bg", "c", "d", "df", "dg", "e", "en", "f", "g", "h", "i", "in", "j", "jn", "k", "l", "ln", "m", "mg", "mq", "n", "ng", "nr", "nrfg", "nrt", "ns", "nt", "nz", "o", "p", "q", "qe", "qg", "r", "rg", "rr", "rz", "s", "t", "tg", "u", "ud", "ug", "uj", "ul", "uv", "uz", "v", "vd", "vg", "vi", "vn", "vq", "w", "x", "y", "yg", "z", "zg"}
+	positions = []string{"B", "E", "M", "S"}
+	poss      = []string{"a", "ad", "ag", "an", "b", "bg", "c", "d", "df", "dg", "e", "en", "f", "g", "h", "i", "in", "j", "jn", "k", "l", "ln", "m", "mg", "mq", "n", "ng", "nr", "nrfg", "nrt", "ns", "nt", "nz", "o", "p", "q", "qe", "qg", "r", "rg", "rr", "rz", "s", "t", "tg", "u", "ud", "ug", "uj", "ul", "uv", "uz", "v", "vd", "vg", "vi", "vn", "vq", "w", "x", "y", "yg", "z", "zg"}
 )
--- a/posseg/dictionary.go
+++ b/posseg/dictionary.go
@@ -7,6 +7,7 @@ import (
 	"github.com/wangbin/jiebago/dictionary"
 )

+// A Dictionary represents a thread-safe dictionary used for word segmentation.
 type Dictionary struct {
 	total, logTotal float64
 	freqMap         map[string]float64
@@ -14,6 +15,7 @@ type Dictionary struct {
 	sync.RWMutex
 }

+// Load loads all tokens from given channel
 func (d *Dictionary) Load(ch <-chan dictionary.Token) {
 	d.Lock()
 	for token := range ch {
@@ -23,6 +25,7 @@ func (d *Dictionary) Load(ch <-chan dictionary.Token) {
 	d.updateLogTotal()
 }

+// AddToken adds one token
 func (d *Dictionary) AddToken(token dictionary.Token) {
 	d.Lock()
 	d.addToken(token)
@@ -50,6 +53,7 @@ func (d *Dictionary) updateLogTotal() {
 	d.logTotal = math.Log(d.total)
 }

+// Frequency returns the frequency and existence of give word
 func (d *Dictionary) Frequency(key string) (float64, bool) {
 	d.RLock()
 	freq, ok := d.freqMap[key]
@@ -57,6 +61,7 @@ func (d *Dictionary) Frequency(key string) (float64, bool) {
 	return freq, ok
 }

+// Pos returns the POS and existence of give word
 func (d *Dictionary) Pos(key string) (string, bool) {
 	d.RLock()
 	pos, ok := d.posMap[key]
--- a/posseg/posseg.go
+++ b/posseg/posseg.go
@@ -17,27 +17,36 @@ var (
 	reSkipInternal = regexp.MustCompile(`(\r\n|\s)`)
 )

+// Segment represents a word with it's POS
 type Segment struct {
 	text, pos string
 }

+// Text returns the Segment's text.
 func (s Segment) Text() string {
 	return s.text
 }

+// Pos returns the Segment's POS.
 func (s Segment) Pos() string {
 	return s.pos
 }

+// Segmenter is a Chinese words segmentation struct.
 type Segmenter struct {
 	dict *Dictionary
 }

+// LoadDictionary loads dictionary from given file name.
+// Everytime LoadDictionary is called, previously loaded dictionary will be cleard.
 func (seg *Segmenter) LoadDictionary(fileName string) error {
 	seg.dict = &Dictionary{freqMap: make(map[string]float64), posMap: make(map[string]string)}
 	return seg.dict.loadDictionary(fileName)
 }

+// LoadUserDictionary loads a user specified dictionary, it must be called
+// after LoadDictionary, and it will not clear any previous loaded dictionary,
+// instead it will override exist entries.
 func (seg *Segmenter) LoadUserDictionary(fileName string) error {
 	return seg.dict.loadDictionary(fileName)
 }
@@ -52,19 +61,19 @@ func (seg *Segmenter) cutDetailInternal(sentence string) <-chan Segment {
 		next := 0
 		for i, char := range runes {
 			pos := posList[i]
-			switch pos.Tag() {
+			switch pos.position() {
 			case "B":
 				begin = i
 			case "E":
-				result <- Segment{string(runes[begin : i+1]), pos.POS()}
+				result <- Segment{string(runes[begin : i+1]), pos.pos()}
 				next = i + 1
 			case "S":
-				result <- Segment{string(char), pos.POS()}
+				result <- Segment{string(char), pos.pos()}
 				next = i + 1
 			}
 		}
 		if next < len(runes) {
-			result <- Segment{string(runes[next:]), posList[next].POS()}
+			result <- Segment{string(runes[next:]), posList[next].pos()}
 		}
 		close(result)
 	}()
@@ -117,7 +126,7 @@ func (seg *Segmenter) dag(runes []rune) map[int][]int {
 			if freq > 0.0 {
 				dag[k] = append(dag[k], i)
 			}
-			i += 1
+			i++
 			if i >= n {
 				break
 			}
@@ -170,7 +179,7 @@ func (seg *Segmenter) cutDAG(sentence string) <-chan Segment {
 		routes := seg.calc(runes)
 		var y int
 		length := len(runes)
-		buf := make([]rune, 0)
+		var buf []rune
 		for x := 0; x < length; {
 			y = routes[x].index + 1
 			frag := runes[x:y]
@@ -253,7 +262,7 @@ func (seg *Segmenter) cutDAGNoHMM(sentence string) <-chan Segment {
 		routes := seg.calc(runes)
 		var y int
 		length := len(runes)
-		buf := make([]rune, 0)
+		var buf []rune
 		for x := 0; x < length; {
 			y = routes[x].index + 1
 			frag := runes[x:y]
@@ -283,6 +292,8 @@ func (seg *Segmenter) cutDAGNoHMM(sentence string) <-chan Segment {
 	return result
 }

+// Cut cuts a sentence into words.
+// Parameter hmm controls whether to use the Hidden Markov Model.
 func (seg *Segmenter) Cut(sentence string, hmm bool) <-chan Segment {
 	result := make(chan Segment)
 	var cut cutFunc
--- a/posseg/posseg_test.go
+++ b/posseg/posseg_test.go
@@ -5,8 +5,8 @@ import (
 )

 var (
-	seg           Segmenter
-	test_contents = []string{
+	seg          Segmenter
+	testContents = []string{
 		"这是一个伸手不见五指的黑夜。我叫孙悟空，我爱北京，我爱Python和C++。",
 		"我不喜欢日本和服。",
 		"雷猴回归人间。",
@@ -273,7 +273,7 @@ func init() {
 }

 func chanToArray(ch <-chan Segment) []Segment {
-	result := make([]Segment, 0)
+	var result []Segment
 	for word := range ch {
 		result = append(result, word)
 	}
@@ -281,7 +281,7 @@ func chanToArray(ch <-chan Segment) []Segment {
 }

 func TestCut(t *testing.T) {
-	for index, content := range test_contents {
+	for index, content := range testContents {
 		result := chanToArray(seg.Cut(content, true))
 		if len(defaultCutResult[index]) != len(result) {
 			t.Errorf("default cut for %s length should be %d not %d\n",
@@ -289,7 +289,7 @@ func TestCut(t *testing.T) {
 			t.Errorf("expect: %v\n", defaultCutResult[index])
 			t.Fatalf("got: %v\n", result)
 		}
-		for i, _ := range result {
+		for i := range result {
 			if result[i] != defaultCutResult[index][i] {
 				t.Fatalf("expect %s, got %s", defaultCutResult[index][i], result[i])
 			}
@@ -298,7 +298,7 @@ func TestCut(t *testing.T) {
 		if len(noHMMCutResult[index]) != len(result) {
 			t.Fatal(content)
 		}
-		for i, _ := range result {
+		for i := range result {
 			if result[i] != noHMMCutResult[index][i] {
 				t.Fatal(content)
 			}
@@ -320,7 +320,7 @@ func TestBug132(t *testing.T) {
 	if len(cutResult) != len(result) {
 		t.Fatal(result)
 	}
-	for i, _ := range result {
+	for i := range result {
 		if result[i] != cutResult[i] {
 			t.Fatal(result[i])
 		}
@@ -349,7 +349,7 @@ func TestBug137(t *testing.T) {
 	if len(cutResult) != len(result) {
 		t.Fatal(result)
 	}
-	for i, _ := range result {
+	for i := range result {
 		if result[i] != cutResult[i] {
 			t.Fatal(result[i])
 		}
@@ -404,7 +404,7 @@ func TestUserDict(t *testing.T) {
 	if len(cutResult) != len(result) {
 		t.Fatal(result)
 	}
-	for i, _ := range result {
+	for i := range result {
 		if result[i] != cutResult[i] {
 			t.Fatal(result[i])
 		}
--- a/posseg/prob_emit.go
+++ b/posseg/prob_emit.go
@@ -1,15 +1,14 @@
 package posseg

-const MinFloat = -3.14e100
+const minFloat = -3.14e100

 type runeFloatMap map[rune]float64

 func (m runeFloatMap) get(key rune) float64 {
 	if value, ok := m[key]; ok {
 		return value
-	} else {
-		return MinFloat
 	}
+	return minFloat
 }

 var probEmit = map[uint16]runeFloatMap{
--- a/posseg/prob_trans.go
+++ b/posseg/prob_trans.go
@@ -11,9 +11,8 @@ type probTransMap map[uint16]float64
 func (m probTransMap) Get(key uint16) float64 {
 	if value, ok := m[key]; ok {
 		return value
-	} else {
-		return inf
 	}
+	return inf
 }

 var (
--- a/posseg/viterbi.go
+++ b/posseg/viterbi.go
@@ -31,52 +31,52 @@ func (pss probStates) Swap(i, j int) {
 	pss[i], pss[j] = pss[j], pss[i]
 }

-func viterbi(obs []rune) []Tag {
+func viterbi(obs []rune) []tag {
 	obsLength := len(obs)
 	V := make([]map[uint16]float64, obsLength)
 	V[0] = make(map[uint16]float64)
-	mem_path := make([]map[uint16]uint16, obsLength)
-	mem_path[0] = make(map[uint16]uint16)
+	memPath := make([]map[uint16]uint16, obsLength)
+	memPath[0] = make(map[uint16]uint16)
 	ys := charStateTab.get(obs[0]) // default is all_states
 	for _, y := range ys {
 		V[0][y] = probEmit[y].get(obs[0]) + probStart[y]
-		mem_path[0][y] = 0
+		memPath[0][y] = 0
 	}
 	for t := 1; t < obsLength; t++ {
-		prev_states := make([]uint16, 0)
-		for x := range mem_path[t-1] {
+		var prevStates []uint16
+		for x := range memPath[t-1] {
 			if len(probTrans[x]) > 0 {
-				prev_states = append(prev_states, x)
+				prevStates = append(prevStates, x)
 			}
 		}
 		//use Go's map to implement Python's Set()
-		prev_states_expect_next := make(map[uint16]int)
-		for _, x := range prev_states {
+		prevStatesExpectNext := make(map[uint16]int)
+		for _, x := range prevStates {
 			for y := range probTrans[x] {
-				prev_states_expect_next[y] = 1
+				prevStatesExpectNext[y] = 1
 			}
 		}
-		tmp_obs_states := charStateTab.get(obs[t])
+		tmpObsStates := charStateTab.get(obs[t])

-		obs_states := make([]uint16, 0)
-		for index := range tmp_obs_states {
-			if _, ok := prev_states_expect_next[tmp_obs_states[index]]; ok {
-				obs_states = append(obs_states, tmp_obs_states[index])
+		var obsStates []uint16
+		for index := range tmpObsStates {
+			if _, ok := prevStatesExpectNext[tmpObsStates[index]]; ok {
+				obsStates = append(obsStates, tmpObsStates[index])
 			}
 		}
-		if len(obs_states) == 0 {
-			for key := range prev_states_expect_next {
-				obs_states = append(obs_states, key)
+		if len(obsStates) == 0 {
+			for key := range prevStatesExpectNext {
+				obsStates = append(obsStates, key)
 			}
 		}
-		if len(obs_states) == 0 {
-			obs_states = probTransKeys
+		if len(obsStates) == 0 {
+			obsStates = probTransKeys
 		}
-		mem_path[t] = make(map[uint16]uint16)
+		memPath[t] = make(map[uint16]uint16)
 		V[t] = make(map[uint16]float64)
-		for _, y := range obs_states {
+		for _, y := range obsStates {
 			var max, ps probState
-			for i, y0 := range prev_states {
+			for i, y0 := range prevStates {
 				ps = probState{
 					prob:  V[t-1][y0] + probTrans[y0].Get(y) + probEmit[y].get(obs[t]),
 					state: y0}
@@ -85,23 +85,23 @@ func viterbi(obs []rune) []Tag {
 				}
 			}
 			V[t][y] = max.prob
-			mem_path[t][y] = max.state
+			memPath[t][y] = max.state
 		}
 	}
 	last := make(probStates, 0)
-	length := len(mem_path)
+	length := len(memPath)
 	vlength := len(V)
-	for y := range mem_path[length-1] {
+	for y := range memPath[length-1] {
 		ps := probState{prob: V[vlength-1][y], state: y}
 		last = append(last, ps)
 	}
 	sort.Sort(sort.Reverse(last))
 	state := last[0].state
-	route := make([]Tag, len(obs))
+	route := make([]tag, len(obs))

 	for i := obsLength - 1; i >= 0; i-- {
-		route[i] = Tag(state)
-		state = mem_path[i][state]
+		route[i] = tag(state)
+		state = memPath[i][state]
 	}
 	return route
 }
--- a/posseg/viterbi_test.go
+++ b/posseg/viterbi_test.go
@@ -4,49 +4,49 @@ import (
 	"testing"
 )

-var defaultRoute []Tag
+var defaultRoute []tag

 func init() {
-	var t Tag
-	t, _ = NewTag("B", "nr")
+	var t tag
+	t, _ = newTag("B", "nr")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("M", "nr")
+	t, _ = newTag("M", "nr")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("E", "nr")
+	t, _ = newTag("E", "nr")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("S", "v")
+	t, _ = newTag("S", "v")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("B", "v")
+	t, _ = newTag("B", "v")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("E", "v")
+	t, _ = newTag("E", "v")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("B", "n")
+	t, _ = newTag("B", "n")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("M", "n")
+	t, _ = newTag("M", "n")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("E", "n")
+	t, _ = newTag("E", "n")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("S", "d")
+	t, _ = newTag("S", "d")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("S", "v")
+	t, _ = newTag("S", "v")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("S", "n")
+	t, _ = newTag("S", "n")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("B", "v")
+	t, _ = newTag("B", "v")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("E", "v")
+	t, _ = newTag("E", "v")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("B", "nr")
+	t, _ = newTag("B", "nr")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("M", "nr")
+	t, _ = newTag("M", "nr")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("M", "nr")
+	t, _ = newTag("M", "nr")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("M", "nr")
+	t, _ = newTag("M", "nr")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("E", "nr")
+	t, _ = newTag("E", "nr")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("S", "zg")
+	t, _ = newTag("S", "zg")
 	defaultRoute = append(defaultRoute, t)
 }

--- a/tokenizer.go
+++ b/tokenizer.go
@@ -9,18 +9,40 @@ import (
 	"github.com/blevesearch/bleve/registry"
 )

+// Name is the jieba tokenizer name.
 const Name = "jieba"

-var IdeographRegexp = regexp.MustCompile(`\p{Han}+`)
+var ideographRegexp = regexp.MustCompile(`\p{Han}+`)

+// JiebaTokenizer is the beleve tokenizer for jiebago.
 type JiebaTokenizer struct {
 	seg             Segmenter
 	hmm, searchMode bool
 }

-func NewJiebaTokenizer(dictFileName string, hmm, searchMode bool) (analysis.Tokenizer, error) {
+/*
+NewJiebaTokenizer creates a new JiebaTokenizer.
+
+Parameters:
+
+    dictFilePath: path of the dictioanry file.
+
+    hmm: whether to use Hidden Markov Model to cut unknown words,
+    i.e. not found in dictionary. For example word "安卓" (means "Android" in
+    English) not in the dictionary file. If hmm is set to false, it will be
+    cutted into two single words "安" and "卓", if hmm is set to true, it will
+    be traded as one single word because Jieba using Hidden Markov Model with
+    Viterbi algorithm to guess the best possibility.
+
+    searchMode: whether to further cut long words into serveral short words.
+    In Chinese, some long words may contains other words, for example "交换机"
+    is a Chinese word for "Switcher", if sechMode is false, it will trade
+    "交换机" as a single word. If searchMode is true, it will further split
+    this word into "交换", "换机", which are valid Chinese words.
+*/
+func NewJiebaTokenizer(dictFilePath string, hmm, searchMode bool) (analysis.Tokenizer, error) {
 	var seg Segmenter
-	err := seg.LoadDictionary(dictFileName)
+	err := seg.LoadDictionary(dictFilePath)
 	return &JiebaTokenizer{
 		seg:        seg,
 		hmm:        hmm,
@@ -28,6 +50,7 @@ func NewJiebaTokenizer(dictFileName string, hmm, searchMode bool) (analysis.Toke
 	}, err
 }

+// Tokenize cuts input into bleve token stream.
 func (jt *JiebaTokenizer) Tokenize(input []byte) analysis.TokenStream {
 	rv := make(analysis.TokenStream, 0)
 	runeStart := 0
@@ -77,9 +100,20 @@ func (jt *JiebaTokenizer) Tokenize(input []byte) analysis.TokenStream {
 	return rv
 }

+/*
+JiebaTokenizerConstructor creates a JiebaTokenizer.
+
+Parameter config should contains at least one parameter:
+
+    file: the path of the dictionary file.
+
+    hmm: optional, specify whether to use Hidden Markov Model, see NewJiebaTokenizer for details.
+
+    search: optional, speficy whether to use search mode, see NewJiebaTokenizer for details.
+*/
 func JiebaTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (
 	analysis.Tokenizer, error) {
-	dictFileName, ok := config["file"].(string)
+	dictFilePath, ok := config["file"].(string)
 	if !ok {
 		return nil, fmt.Errorf("must specify dictionary file path")
 	}
@@ -92,11 +126,11 @@ func JiebaTokenizerConstructor(config map[string]interface{}, cache *registry.Ca
 		searchMode = true
 	}

-	return NewJiebaTokenizer(dictFileName, hmm, searchMode)
+	return NewJiebaTokenizer(dictFilePath, hmm, searchMode)
 }

 func detectTokenType(term string) analysis.TokenType {
-	if IdeographRegexp.MatchString(term) {
+	if ideographRegexp.MatchString(term) {
 		return analysis.Ideographic
 	}
 	_, err := strconv.ParseFloat(term, 64)
--- a/util/util.go
+++ b/util/util.go
@@ -2,12 +2,14 @@ package util

 import "regexp"

-// RegexpSplit split slices s into substrings separated by the expression and
-// returns a slice of the substrings between those expression matches.
-// If capturing parentheses are used in expression, then the text of all groups
-// in the expression are also returned as part of the resulting slice.
-//
-// This function acts consistent with Python's re.split function.
+/*
+RegexpSplit split slices s into substrings separated by the expression and
+returns a slice of the substrings between those expression matches.
+If capturing parentheses are used in expression, then the text of all groups
+in the expression are also returned as part of the resulting slice.
+
+This function acts consistent with Python's re.split function.
+*/
 func RegexpSplit(re *regexp.Regexp, s string, n int) []string {
 	if n == 0 {
 		return nil