code refactor, added more documents

2026-06-26 06:50:23 +08:00 · 2015-05-06 12:55:04 +08:00
parent 87caff09cb
commit 122bad0a8d
23 changed files with 228 additions and 142 deletions
--- a/analyse/analyse_test.go
+++ b/analyse/analyse_test.go
@@ -6,7 +6,7 @@ import (
 )
 var (
-	test_contents = []string{
+	testContents = []string{
 		"这是一个伸手不见五指的黑夜。我叫孙悟空，我爱北京，我爱Python和C++。",
 		"我不喜欢日本和服。",
 		"雷猴回归人间。",
@@ -259,7 +259,7 @@ func TestExtractTags(t *testing.T) {
 	te.LoadDictionary("../dict.txt")
 	te.LoadIdf("idf.txt")
-	for index, sentence := range test_contents {
+	for index, sentence := range testContents {
 		result := te.ExtractTags(sentence, 20)
 		if len(result) != len(Tags[index]) {
 			t.Fatalf("%s = %v", sentence, result)
--- a/analyse/idf.go
+++ b/analyse/idf.go
@@ -7,6 +7,8 @@ import (
 	"github.com/wangbin/jiebago/dictionary"
 )
 // Idf represents a thread-safe dictionary for all words with their
 // IDFs(Inverse Document Frequency).
 type Idf struct {
 	freqMap map[string]float64
 	median  float64
@@ -14,6 +16,7 @@ type Idf struct {
 	sync.RWMutex
 }
 // AddToken adds a new word with IDF into it's dictionary.
 func (i *Idf) AddToken(token dictionary.Token) {
 	i.Lock()
 	i.freqMap[token.Text()] = token.Frequency()
@@ -23,6 +26,7 @@ func (i *Idf) AddToken(token dictionary.Token) {
 	i.Unlock()
 }
 // Load loads all tokens from channel into it's dictionary.
 func (i *Idf) Load(ch <-chan dictionary.Token) {
 	i.Lock()
 	for token := range ch {
@@ -38,6 +42,7 @@ func (i *Idf) loadDictionary(fileName string) error {
 	return dictionary.LoadDictionary(i, fileName)
 }
 // Frequency returns the IDF of given word.
 func (i *Idf) Frequency(key string) (float64, bool) {
 	i.RLock()
 	freq, ok := i.freqMap[key]
@@ -45,6 +50,7 @@ func (i *Idf) Frequency(key string) (float64, bool) {
 	return freq, ok
 }
 // NewIdf creates a new Idf instance.
 func NewIdf() *Idf {
 	return &Idf{freqMap: make(map[string]float64), freqs: make([]float64, 0)}
 }
--- a/analyse/stopwords.go
+++ b/analyse/stopwords.go
@@ -6,6 +6,7 @@ import (
 	"github.com/wangbin/jiebago/dictionary"
 )
 // DefaultStopWordMap contains some stop words.
 var DefaultStopWordMap = map[string]int{
 	"the":   1,
 	"of":    1,
@@ -40,23 +41,27 @@ var DefaultStopWordMap = map[string]int{
 	"or":    1,
 }
 // StopWord is a thread-safe dictionary for all stop words.
 type StopWord struct {
 	stopWordMap map[string]int
 	sync.RWMutex
 }
 // AddToken adds a token into StopWord dictionary.
 func (s *StopWord) AddToken(token dictionary.Token) {
 	s.Lock()
 	s.stopWordMap[token.Text()] = 1
 	s.Unlock()
 }
 // NewStopWord create a new StopWord with default stop words.
 func NewStopWord() *StopWord {
 	s := new(StopWord)
 	s.stopWordMap = DefaultStopWordMap
 	return s
 }
 // IsStopWord checks if a given word is stop word.
 func (s *StopWord) IsStopWord(word string) bool {
 	s.RLock()
 	_, ok := s.stopWordMap[word]
@@ -64,6 +69,7 @@ func (s *StopWord) IsStopWord(word string) bool {
 	return ok
 }
 // Load loads all tokens from given channel into StopWord dictionary.
 func (s *StopWord) Load(ch <-chan dictionary.Token) {
 	s.Lock()
 	for token := range ch {
--- a/analyse/tag_extracker.go
+++ b/analyse/tag_extracker.go
@@ -1,7 +1,6 @@
 package analyse
 import (
 	"fmt"
 	"sort"
 	"strings"
 	"unicode/utf8"
@@ -9,23 +8,23 @@ import (
 	"github.com/wangbin/jiebago"
 )
 // Segment represents a word with weight.
 type Segment struct {
 	text   string
 	weight float64
 }
 // Text returns the segment's text.
 func (s Segment) Text() string {
 	return s.text
 }
 // Weight returns the segment's weight.
 func (s Segment) Weight() float64 {
 	return s.weight
 }
-func (s Segment) String() string {
+// Segments represents a slice of Segment.
 	return fmt.Sprintf("{%s: %f}", s.text, s.weight)
 }
 type Segments []Segment
 func (ss Segments) Len() int {
@@ -44,29 +43,33 @@ func (ss Segments) Swap(i, j int) {
 	ss[i], ss[j] = ss[j], ss[i]
 }
 // TagExtracter is used to extract tags from sentence.
 type TagExtracter struct {
 	seg      *jiebago.Segmenter
 	idf      *Idf
 	stopWord *StopWord
 }
 // LoadDictionary reads the given filename and create a new dictionary.
 func (t *TagExtracter) LoadDictionary(fileName string) error {
 	t.stopWord = NewStopWord()
 	t.seg = new(jiebago.Segmenter)
 	return t.seg.LoadDictionary(fileName)
 }
 // LoadIdf reads the given file and create a new Idf dictionary.
 func (t *TagExtracter) LoadIdf(fileName string) error {
 	t.idf = NewIdf()
 	return t.idf.loadDictionary(fileName)
 }
 // LoadStopWords reads the given file and create a new StopWord dictionary.
 func (t *TagExtracter) LoadStopWords(fileName string) error {
 	t.stopWord = NewStopWord()
 	return t.stopWord.loadDictionary(fileName)
 }
-// Keyword extraction.
+// ExtractTags extracts the topK key words from sentence.
 func (t *TagExtracter) ExtractTags(sentence string, topK int) (tags Segments) {
 	freqMap := make(map[string]float64)
--- a/analyse/textrank.go
+++ b/analyse/textrank.go
@@ -1,7 +1,6 @@
 package analyse
 import (
 	"fmt"
 	"math"
 	"sort"
@@ -20,10 +19,6 @@ type edge struct {
 	weight float64
 }
 func (e edge) String() string {
 	return fmt.Sprintf("(%s %s): %f", e.start, e.end, e.weight)
 }
 type edges []edge
 func (es edges) Len() int {
@@ -114,8 +109,8 @@ func (u *undirectWeightedGraph) rank() Segments {
 	return result
 }
-// Extract keywords from sentence using TextRank algorithm. the allowed POS list
+// TextRankWithPOS extracts keywords from sentence using TextRank algorithm.
-// could be manually speificed.
+// Parameter allowPOS allows a customized pos list.
 func (t *TextRanker) TextRankWithPOS(sentence string, topK int, allowPOS []string) Segments {
 	posFilt := make(map[string]int)
 	for _, pos := range allowPOS {
@@ -124,7 +119,7 @@ func (t *TextRanker) TextRankWithPOS(sentence string, topK int, allowPOS []strin
 	g := newUndirectWeightedGraph()
 	cm := make(map[[2]string]float64)
 	span := 5
-	pairs := make([]posseg.Segment, 0)
+	var pairs []posseg.Segment
 	for pair := range t.seg.Cut(sentence, true) {
 		pairs = append(pairs, pair)
 	}
@@ -152,16 +147,18 @@ func (t *TextRanker) TextRankWithPOS(sentence string, topK int, allowPOS []strin
 	return tags
 }
-// Extract keywords from sentence using TextRank algorithm.
+// TextRank extract keywords from sentence using TextRank algorithm.
-// topK specify how many top keywords to be returned at most.
+// Parameter topK specify how many top keywords to be returned at most.
 func (t *TextRanker) TextRank(sentence string, topK int) Segments {
 	return t.TextRankWithPOS(sentence, topK, defaultAllowPOS)
 }
 // TextRanker is used to extract tags from sentence.
 type TextRanker struct {
 	seg *posseg.Segmenter
 }
 // LoadDictionary reads a given file and create a new dictionary file for Textranker.
 func (t *TextRanker) LoadDictionary(fileName string) error {
 	t.seg = new(posseg.Segmenter)
 	return t.seg.LoadDictionary(fileName)
--- a/dictionary.go
+++ b/dictionary.go
@@ -14,7 +14,7 @@ type Dictionary struct {
 	sync.RWMutex
 }
-// Load loads all tokens from channel
+// Load loads all tokens from given channel
 func (d *Dictionary) Load(ch <-chan dictionary.Token) {
 	d.Lock()
 	for token := range ch {
@@ -49,7 +49,7 @@ func (d *Dictionary) updateLogTotal() {
 	d.logTotal = math.Log(d.total)
 }
-// Frequency returns the frequency of give word, if not found, the second result is false
+// Frequency returns the frequency and existence of give word
 func (d *Dictionary) Frequency(key string) (float64, bool) {
 	d.RLock()
 	freq, ok := d.freqMap[key]
--- a/dictionary/dictionary.go
+++ b/dictionary/dictionary.go
@@ -8,6 +8,8 @@ import (
 	"strings"
 )
 // DictLoader represents a interface that could add one token or load bunch of
 // tokens from channel.
 type DictLoader interface {
 	Load(<-chan Token)
 	AddToken(Token)
@@ -49,6 +51,7 @@ func loadDictionary(file *os.File) (<-chan Token, <-chan error) {
 }
 // LoadDictionary reads the given file and passes all tokens to a DictLoader.
 func LoadDictionary(dl DictLoader, fileName string) error {
 	filePath, err := dictPath(fileName)
 	if err != nil {
--- a/dictionary/token.go
+++ b/dictionary/token.go
@@ -1,23 +1,28 @@
 package dictionary
 // Token represents a Chinese word with (optional) frequency and POS.
 type Token struct {
 	text      string
 	frequency float64
 	pos       string
 }
 //Text returns token's text.
 func (t Token) Text() string {
 	return t.text
 }
 // Frequency returns token's frequency.
 func (t Token) Frequency() float64 {
 	return t.frequency
 }
 // Pos returns token's POS.
 func (t Token) Pos() string {
 	return t.pos
 }
 // NewToken creates a new token.
 func NewToken(text string, frequency float64, pos string) Token {
 	return Token{text: text, frequency: frequency, pos: pos}
 }
--- a/finalseg/finalseg.go
+++ b/finalseg/finalseg.go
@@ -13,10 +13,10 @@ func cutHan(sentence string) chan string {
 	result := make(chan string)
 	go func() {
 		runes := []rune(sentence)
-		_, pos_list := viterbi(runes, []byte{'B', 'M', 'E', 'S'})
+		_, posList := viterbi(runes, []byte{'B', 'M', 'E', 'S'})
 		begin, next := 0, 0
 		for i, char := range runes {
-			pos := pos_list[i]
+			pos := posList[i]
 			switch pos {
 			case 'B':
 				begin = i
@@ -36,6 +36,8 @@ func cutHan(sentence string) chan string {
 	return result
 }
 // Cut cuts sentence into words using Hidden Markov Model with Viterbi
 // algorithm. It is used by Jiebago for unknonw words.
 func Cut(sentence string) chan string {
 	result := make(chan string)
 	s := sentence
--- a/finalseg/finalseg_test.go
+++ b/finalseg/finalseg_test.go
@@ -6,7 +6,7 @@ import (
 )
 func chanToArray(ch chan string) []string {
-	result := make([]string, 0)
+	var result []string
 	for word := range ch {
 		result = append(result, word)
 	}
--- a/finalseg/viterbi.go
+++ b/finalseg/viterbi.go
@@ -67,11 +67,11 @@ func viterbi(obs []rune, states []byte) (float64, []byte) {
 		V[t] = make(map[byte]float64)
 		for _, y := range states {
 			ps0 := make(probStates, 0)
-			var em_p float64
+			var emP float64
 			if val, ok := probEmit[y][obs[t]]; ok {
-				em_p = val
+				emP = val
 			} else {
-				em_p = minFloat
+				emP = minFloat
 			}
 			for _, y0 := range prevStatus[y] {
 				var transP float64
@@ -80,7 +80,7 @@ func viterbi(obs []rune, states []byte) (float64, []byte) {
 				} else {
 					transP = minFloat
 				}
-				prob0 := V[t-1][y0] + transP + em_p
+				prob0 := V[t-1][y0] + transP + emP
 				ps0 = append(ps0, &probState{prob: prob0, state: y0})
 			}
 			sort.Sort(sort.Reverse(ps0))
--- a/jieba.go
+++ b/jieba.go
@@ -16,15 +16,21 @@ var (
 	reSkipDefault = regexp.MustCompile(`(\r\n|\s)`)
 )
 // Segmenter is a Chinese words segmentation struct.
 type Segmenter struct {
 	dict *Dictionary
 }
 // LoadDictionary loads dictionary from given file name. Everytime
 // LoadDictionary is called, previously loaded dictionary will be cleard.
 func (seg *Segmenter) LoadDictionary(fileName string) error {
 	seg.dict = &Dictionary{freqMap: make(map[string]float64)}
 	return seg.dict.loadDictionary(fileName)
 }
 // LoadUserDictionary loads a user specified dictionary, it must be called
 // after LoadDictionary, and it will not clear any previous loaded dictionary,
 // instead it will override exist entries.
 func (seg *Segmenter) LoadUserDictionary(fileName string) error {
 	return seg.dict.loadDictionary(fileName)
 }
@@ -46,7 +52,7 @@ func (seg *Segmenter) dag(runes []rune) map[int][]int {
 			if freq > 0.0 {
 				dag[k] = append(dag[k], i)
 			}
-			i += 1
+			i++
 			if i >= n {
 				break
 			}
@@ -98,7 +104,7 @@ func (seg *Segmenter) cutDAG(sentence string) <-chan string {
 		routes := seg.calc(runes)
 		var y int
 		length := len(runes)
-		buf := make([]rune, 0)
+		var buf []rune
 		for x := 0; x < length; {
 			y = routes[x].index + 1
 			frag := runes[x:y]
@@ -156,7 +162,7 @@ func (seg *Segmenter) cutDAGNoHMM(sentence string) <-chan string {
 		routes := seg.calc(runes)
 		var y int
 		length := len(runes)
-		buf := make([]rune, 0)
+		var buf []rune
 		for x := 0; x < length; {
 			y = routes[x].index + 1
 			frag := runes[x:y]
@@ -181,6 +187,10 @@ func (seg *Segmenter) cutDAGNoHMM(sentence string) <-chan string {
 	return result
 }
 // Cut cuts a sentence into words using accurate mode.
 // Parameter hmm controls whether to use the Hidden Markov Model.
 // Accurate mode attempts to cut the sentence into the most accurate
 // segmentations, which is suitable for text analysis.
 func (seg *Segmenter) Cut(sentence string, hmm bool) <-chan string {
 	result := make(chan string)
 	var cut cutFunc
@@ -246,6 +256,9 @@ func (seg *Segmenter) cutAll(sentence string) <-chan string {
 	return result
 }
 // CutAll cuts a sentence into words using full mode.
 // Full mode gets all the possible words from the sentence.
 // Fast but not accurate.
 func (seg *Segmenter) CutAll(sentence string) <-chan string {
 	result := make(chan string)
 	go func() {
@@ -268,6 +281,10 @@ func (seg *Segmenter) CutAll(sentence string) <-chan string {
 	return result
 }
 // CutForSearch cuts sentence into words using search engine mode.
 // Search engine mode, based on the accurate mode, attempts to cut long words
 // into several short words, which can raise the recall rate.
 // Suitable for search engines.
 func (seg *Segmenter) CutForSearch(sentence string, hmm bool) <-chan string {
 	result := make(chan string)
 	go func() {
--- a/jieba_test.go
+++ b/jieba_test.go
@@ -3,8 +3,8 @@ package jiebago
 import "testing"
 var (
-	seg           Segmenter
+	seg          Segmenter
-	test_contents = []string{
+	testContents = []string{
 		"这是一个伸手不见五指的黑夜。我叫孙悟空，我爱北京，我爱Python和C++。",
 		"我不喜欢日本和服。",
 		"雷猴回归人间。",
@@ -620,7 +620,7 @@ func init() {
 }
 func chanToArray(ch <-chan string) []string {
-	result := make([]string, 0)
+	var result []string
 	for word := range ch {
 		result = append(result, word)
 	}
@@ -643,7 +643,7 @@ func TestCutDAGNoHmm(t *testing.T) {
 func TestDefaultCut(t *testing.T) {
 	var result []string
-	for index, content := range test_contents {
+	for index, content := range testContents {
 		result = chanToArray(seg.Cut(content, true))
 		if len(result) != len(defaultCutResult[index]) {
 			t.Errorf("default cut for %s length should be %d not %d\n",
@@ -661,7 +661,7 @@ func TestDefaultCut(t *testing.T) {
 func TestCutAll(t *testing.T) {
 	var result []string
-	for index, content := range test_contents {
+	for index, content := range testContents {
 		result = chanToArray(seg.CutAll(content))
 		if len(result) != len(cutAllResult[index]) {
 			t.Errorf("cut all for %s length should be %d not %d\n",
@@ -679,7 +679,7 @@ func TestCutAll(t *testing.T) {
 func TestDefaultCutNoHMM(t *testing.T) {
 	var result []string
-	for index, content := range test_contents {
+	for index, content := range testContents {
 		result = chanToArray(seg.Cut(content, false))
 		if len(result) != len(defaultCutNoHMMResult[index]) {
 			t.Fatalf("default cut no hmm for %s length should be %d not %d\n",
@@ -695,7 +695,7 @@ func TestDefaultCutNoHMM(t *testing.T) {
 func TestCutForSearch(t *testing.T) {
 	var result []string
-	for index, content := range test_contents {
+	for index, content := range testContents {
 		result = chanToArray(seg.CutForSearch(content, true))
 		if len(result) != len(cutForSearchResult[index]) {
 			t.Fatalf("cut for search for %s length should be %d not %d\n",
@@ -707,7 +707,7 @@ func TestCutForSearch(t *testing.T) {
 			}
 		}
 	}
-	for index, content := range test_contents {
+	for index, content := range testContents {
 		result = chanToArray(seg.CutForSearch(content, false))
 		if len(result) != len(cutForSearchNoHMMResult[index]) {
 			t.Fatalf("cut for search no hmm for %s length should be %d not %d\n",
@@ -724,7 +724,7 @@ func TestCutForSearch(t *testing.T) {
 func TestLoadDictionary(t *testing.T) {
 	var result []string
 	seg.LoadDictionary("foobar.txt")
-	for index, content := range test_contents {
+	for index, content := range testContents {
 		result = chanToArray(seg.Cut(content, true))
 		if len(result) != len(userDictCutResult[index]) {
 			t.Fatalf("default cut with user dictionary for %s length should be %d not %d\n",
--- a/posseg/char_state_tab.go
+++ b/posseg/char_state_tab.go
@@ -2,9 +2,9 @@ package posseg
 import "fmt"
-type Tag uint16
+type tag uint16
-func (t Tag) Tag() string {
+func (t tag) position() string {
 	switch t / 100 {
 	case 4:
 		return "S"
@@ -19,31 +19,29 @@ func (t Tag) Tag() string {
 	}
 }
-func (t Tag) POS() string {
+func (t tag) pos() string {
 	return poss[t%100]
 }
-func (t Tag) String() string {
+func newTag(position, pos string) (tag, error) {
-	return fmt.Sprintf("(%s, %s)", t.Tag(), t.POS())
+	positionIndex := -1
 }
 func NewTag(tag, pos string) (Tag, error) {
 	tagIndex := -1
 	posIndex := -1
-	for i, t := range tags {
+	for i, p := range positions {
-		if tag == t {
+		if position == p {
-			tagIndex = (i + 1) * 100
+			positionIndex = (i + 1) * 100
 			break
 		}
 	}
 	for i, p := range poss {
 		if pos == p {
 			posIndex = i
 			break
 		}
 	}
-	if tagIndex < 0 || posIndex < 0 {
+	if positionIndex < 0 || posIndex < 0 {
-		return 0, fmt.Errorf("Failed to convert %s %s to Tag", tag, pos)
+		return 0, fmt.Errorf("Failed to convert %s %s to Tag", position, pos)
 	}
-	return Tag(tagIndex + posIndex), nil
+	return tag(positionIndex + posIndex), nil
 }
 type charStateTabMap map[rune][]uint16
@@ -51,9 +49,8 @@ type charStateTabMap map[rune][]uint16
 func (m charStateTabMap) get(key rune) []uint16 {
 	if value, ok := m[key]; ok {
 		return value
 	} else {
 		return probTransKeys
 	}
 	return probTransKeys
 }
 var (
@@ -6708,6 +6705,6 @@ var (
 		'\u9fa0': []uint16{413},
 	}
-	tags = []string{"B", "E", "M", "S"}
+	positions = []string{"B", "E", "M", "S"}
-	poss = []string{"a", "ad", "ag", "an", "b", "bg", "c", "d", "df", "dg", "e", "en", "f", "g", "h", "i", "in", "j", "jn", "k", "l", "ln", "m", "mg", "mq", "n", "ng", "nr", "nrfg", "nrt", "ns", "nt", "nz", "o", "p", "q", "qe", "qg", "r", "rg", "rr", "rz", "s", "t", "tg", "u", "ud", "ug", "uj", "ul", "uv", "uz", "v", "vd", "vg", "vi", "vn", "vq", "w", "x", "y", "yg", "z", "zg"}
+	poss      = []string{"a", "ad", "ag", "an", "b", "bg", "c", "d", "df", "dg", "e", "en", "f", "g", "h", "i", "in", "j", "jn", "k", "l", "ln", "m", "mg", "mq", "n", "ng", "nr", "nrfg", "nrt", "ns", "nt", "nz", "o", "p", "q", "qe", "qg", "r", "rg", "rr", "rz", "s", "t", "tg", "u", "ud", "ug", "uj", "ul", "uv", "uz", "v", "vd", "vg", "vi", "vn", "vq", "w", "x", "y", "yg", "z", "zg"}
 )
--- a/posseg/dictionary.go
+++ b/posseg/dictionary.go
@@ -7,6 +7,7 @@ import (
 	"github.com/wangbin/jiebago/dictionary"
 )
 // A Dictionary represents a thread-safe dictionary used for word segmentation.
 type Dictionary struct {
 	total, logTotal float64
 	freqMap         map[string]float64
@@ -14,6 +15,7 @@ type Dictionary struct {
 	sync.RWMutex
 }
 // Load loads all tokens from given channel
 func (d *Dictionary) Load(ch <-chan dictionary.Token) {
 	d.Lock()
 	for token := range ch {
@@ -23,6 +25,7 @@ func (d *Dictionary) Load(ch <-chan dictionary.Token) {
 	d.updateLogTotal()
 }
 // AddToken adds one token
 func (d *Dictionary) AddToken(token dictionary.Token) {
 	d.Lock()
 	d.addToken(token)
@@ -50,6 +53,7 @@ func (d *Dictionary) updateLogTotal() {
 	d.logTotal = math.Log(d.total)
 }
 // Frequency returns the frequency and existence of give word
 func (d *Dictionary) Frequency(key string) (float64, bool) {
 	d.RLock()
 	freq, ok := d.freqMap[key]
@@ -57,6 +61,7 @@ func (d *Dictionary) Frequency(key string) (float64, bool) {
 	return freq, ok
 }
 // Pos returns the POS and existence of give word
 func (d *Dictionary) Pos(key string) (string, bool) {
 	d.RLock()
 	pos, ok := d.posMap[key]
--- a/posseg/posseg.go
+++ b/posseg/posseg.go
@@ -17,27 +17,36 @@ var (
 	reSkipInternal = regexp.MustCompile(`(\r\n|\s)`)
 )
 // Segment represents a word with it's POS
 type Segment struct {
 	text, pos string
 }
 // Text returns the Segment's text.
 func (s Segment) Text() string {
 	return s.text
 }
 // Pos returns the Segment's POS.
 func (s Segment) Pos() string {
 	return s.pos
 }
 // Segmenter is a Chinese words segmentation struct.
 type Segmenter struct {
 	dict *Dictionary
 }
 // LoadDictionary loads dictionary from given file name.
 // Everytime LoadDictionary is called, previously loaded dictionary will be cleard.
 func (seg *Segmenter) LoadDictionary(fileName string) error {
 	seg.dict = &Dictionary{freqMap: make(map[string]float64), posMap: make(map[string]string)}
 	return seg.dict.loadDictionary(fileName)
 }
 // LoadUserDictionary loads a user specified dictionary, it must be called
 // after LoadDictionary, and it will not clear any previous loaded dictionary,
 // instead it will override exist entries.
 func (seg *Segmenter) LoadUserDictionary(fileName string) error {
 	return seg.dict.loadDictionary(fileName)
 }
@@ -52,19 +61,19 @@ func (seg *Segmenter) cutDetailInternal(sentence string) <-chan Segment {
 		next := 0
 		for i, char := range runes {
 			pos := posList[i]
-			switch pos.Tag() {
+			switch pos.position() {
 			case "B":
 				begin = i
 			case "E":
-				result <- Segment{string(runes[begin : i+1]), pos.POS()}
+				result <- Segment{string(runes[begin : i+1]), pos.pos()}
 				next = i + 1
 			case "S":
-				result <- Segment{string(char), pos.POS()}
+				result <- Segment{string(char), pos.pos()}
 				next = i + 1
 			}
 		}
 		if next < len(runes) {
-			result <- Segment{string(runes[next:]), posList[next].POS()}
+			result <- Segment{string(runes[next:]), posList[next].pos()}
 		}
 		close(result)
 	}()
@@ -117,7 +126,7 @@ func (seg *Segmenter) dag(runes []rune) map[int][]int {
 			if freq > 0.0 {
 				dag[k] = append(dag[k], i)
 			}
-			i += 1
+			i++
 			if i >= n {
 				break
 			}
@@ -170,7 +179,7 @@ func (seg *Segmenter) cutDAG(sentence string) <-chan Segment {
 		routes := seg.calc(runes)
 		var y int
 		length := len(runes)
-		buf := make([]rune, 0)
+		var buf []rune
 		for x := 0; x < length; {
 			y = routes[x].index + 1
 			frag := runes[x:y]
@@ -253,7 +262,7 @@ func (seg *Segmenter) cutDAGNoHMM(sentence string) <-chan Segment {
 		routes := seg.calc(runes)
 		var y int
 		length := len(runes)
-		buf := make([]rune, 0)
+		var buf []rune
 		for x := 0; x < length; {
 			y = routes[x].index + 1
 			frag := runes[x:y]
@@ -283,6 +292,8 @@ func (seg *Segmenter) cutDAGNoHMM(sentence string) <-chan Segment {
 	return result
 }
 // Cut cuts a sentence into words.
 // Parameter hmm controls whether to use the Hidden Markov Model.
 func (seg *Segmenter) Cut(sentence string, hmm bool) <-chan Segment {
 	result := make(chan Segment)
 	var cut cutFunc
--- a/posseg/posseg_test.go
+++ b/posseg/posseg_test.go
@@ -5,8 +5,8 @@ import (
 )
 var (
-	seg           Segmenter
+	seg          Segmenter
-	test_contents = []string{
+	testContents = []string{
 		"这是一个伸手不见五指的黑夜。我叫孙悟空，我爱北京，我爱Python和C++。",
 		"我不喜欢日本和服。",
 		"雷猴回归人间。",
@@ -273,7 +273,7 @@ func init() {
 }
 func chanToArray(ch <-chan Segment) []Segment {
-	result := make([]Segment, 0)
+	var result []Segment
 	for word := range ch {
 		result = append(result, word)
 	}
@@ -281,7 +281,7 @@ func chanToArray(ch <-chan Segment) []Segment {
 }
 func TestCut(t *testing.T) {
-	for index, content := range test_contents {
+	for index, content := range testContents {
 		result := chanToArray(seg.Cut(content, true))
 		if len(defaultCutResult[index]) != len(result) {
 			t.Errorf("default cut for %s length should be %d not %d\n",
@@ -289,7 +289,7 @@ func TestCut(t *testing.T) {
 			t.Errorf("expect: %v\n", defaultCutResult[index])
 			t.Fatalf("got: %v\n", result)
 		}
-		for i, _ := range result {
+		for i := range result {
 			if result[i] != defaultCutResult[index][i] {
 				t.Fatalf("expect %s, got %s", defaultCutResult[index][i], result[i])
 			}
@@ -298,7 +298,7 @@ func TestCut(t *testing.T) {
 		if len(noHMMCutResult[index]) != len(result) {
 			t.Fatal(content)
 		}
-		for i, _ := range result {
+		for i := range result {
 			if result[i] != noHMMCutResult[index][i] {
 				t.Fatal(content)
 			}
@@ -320,7 +320,7 @@ func TestBug132(t *testing.T) {
 	if len(cutResult) != len(result) {
 		t.Fatal(result)
 	}
-	for i, _ := range result {
+	for i := range result {
 		if result[i] != cutResult[i] {
 			t.Fatal(result[i])
 		}
@@ -349,7 +349,7 @@ func TestBug137(t *testing.T) {
 	if len(cutResult) != len(result) {
 		t.Fatal(result)
 	}
-	for i, _ := range result {
+	for i := range result {
 		if result[i] != cutResult[i] {
 			t.Fatal(result[i])
 		}
@@ -404,7 +404,7 @@ func TestUserDict(t *testing.T) {
 	if len(cutResult) != len(result) {
 		t.Fatal(result)
 	}
-	for i, _ := range result {
+	for i := range result {
 		if result[i] != cutResult[i] {
 			t.Fatal(result[i])
 		}
--- a/posseg/prob_emit.go
+++ b/posseg/prob_emit.go
@@ -1,15 +1,14 @@
 package posseg
-const MinFloat = -3.14e100
+const minFloat = -3.14e100
 type runeFloatMap map[rune]float64
 func (m runeFloatMap) get(key rune) float64 {
 	if value, ok := m[key]; ok {
 		return value
 	} else {
 		return MinFloat
 	}
 	return minFloat
 }
 var probEmit = map[uint16]runeFloatMap{
--- a/posseg/prob_trans.go
+++ b/posseg/prob_trans.go
@@ -11,9 +11,8 @@ type probTransMap map[uint16]float64
 func (m probTransMap) Get(key uint16) float64 {
 	if value, ok := m[key]; ok {
 		return value
 	} else {
 		return inf
 	}
 	return inf
 }
 var (
--- a/posseg/viterbi.go
+++ b/posseg/viterbi.go
@@ -31,52 +31,52 @@ func (pss probStates) Swap(i, j int) {
 	pss[i], pss[j] = pss[j], pss[i]
 }
-func viterbi(obs []rune) []Tag {
+func viterbi(obs []rune) []tag {
 	obsLength := len(obs)
 	V := make([]map[uint16]float64, obsLength)
 	V[0] = make(map[uint16]float64)
-	mem_path := make([]map[uint16]uint16, obsLength)
+	memPath := make([]map[uint16]uint16, obsLength)
-	mem_path[0] = make(map[uint16]uint16)
+	memPath[0] = make(map[uint16]uint16)
 	ys := charStateTab.get(obs[0]) // default is all_states
 	for _, y := range ys {
 		V[0][y] = probEmit[y].get(obs[0]) + probStart[y]
-		mem_path[0][y] = 0
+		memPath[0][y] = 0
 	}
 	for t := 1; t < obsLength; t++ {
-		prev_states := make([]uint16, 0)
+		var prevStates []uint16
-		for x := range mem_path[t-1] {
+		for x := range memPath[t-1] {
 			if len(probTrans[x]) > 0 {
-				prev_states = append(prev_states, x)
+				prevStates = append(prevStates, x)
 			}
 		}
 		//use Go's map to implement Python's Set()
-		prev_states_expect_next := make(map[uint16]int)
+		prevStatesExpectNext := make(map[uint16]int)
-		for _, x := range prev_states {
+		for _, x := range prevStates {
 			for y := range probTrans[x] {
-				prev_states_expect_next[y] = 1
+				prevStatesExpectNext[y] = 1
 			}
 		}
-		tmp_obs_states := charStateTab.get(obs[t])
+		tmpObsStates := charStateTab.get(obs[t])
-		obs_states := make([]uint16, 0)
+		var obsStates []uint16
-		for index := range tmp_obs_states {
+		for index := range tmpObsStates {
-			if _, ok := prev_states_expect_next[tmp_obs_states[index]]; ok {
+			if _, ok := prevStatesExpectNext[tmpObsStates[index]]; ok {
-				obs_states = append(obs_states, tmp_obs_states[index])
+				obsStates = append(obsStates, tmpObsStates[index])
 			}
 		}
-		if len(obs_states) == 0 {
+		if len(obsStates) == 0 {
-			for key := range prev_states_expect_next {
+			for key := range prevStatesExpectNext {
-				obs_states = append(obs_states, key)
+				obsStates = append(obsStates, key)
 			}
 		}
-		if len(obs_states) == 0 {
+		if len(obsStates) == 0 {
-			obs_states = probTransKeys
+			obsStates = probTransKeys
 		}
-		mem_path[t] = make(map[uint16]uint16)
+		memPath[t] = make(map[uint16]uint16)
 		V[t] = make(map[uint16]float64)
-		for _, y := range obs_states {
+		for _, y := range obsStates {
 			var max, ps probState
-			for i, y0 := range prev_states {
+			for i, y0 := range prevStates {
 				ps = probState{
 					prob:  V[t-1][y0] + probTrans[y0].Get(y) + probEmit[y].get(obs[t]),
 					state: y0}
@@ -85,23 +85,23 @@ func viterbi(obs []rune) []Tag {
 				}
 			}
 			V[t][y] = max.prob
-			mem_path[t][y] = max.state
+			memPath[t][y] = max.state
 		}
 	}
 	last := make(probStates, 0)
-	length := len(mem_path)
+	length := len(memPath)
 	vlength := len(V)
-	for y := range mem_path[length-1] {
+	for y := range memPath[length-1] {
 		ps := probState{prob: V[vlength-1][y], state: y}
 		last = append(last, ps)
 	}
 	sort.Sort(sort.Reverse(last))
 	state := last[0].state
-	route := make([]Tag, len(obs))
+	route := make([]tag, len(obs))
 	for i := obsLength - 1; i >= 0; i-- {
-		route[i] = Tag(state)
+		route[i] = tag(state)
-		state = mem_path[i][state]
+		state = memPath[i][state]
 	}
 	return route
 }
--- a/posseg/viterbi_test.go
+++ b/posseg/viterbi_test.go
@@ -4,49 +4,49 @@ import (
 	"testing"
 )
-var defaultRoute []Tag
+var defaultRoute []tag
 func init() {
-	var t Tag
+	var t tag
-	t, _ = NewTag("B", "nr")
+	t, _ = newTag("B", "nr")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("M", "nr")
+	t, _ = newTag("M", "nr")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("E", "nr")
+	t, _ = newTag("E", "nr")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("S", "v")
+	t, _ = newTag("S", "v")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("B", "v")
+	t, _ = newTag("B", "v")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("E", "v")
+	t, _ = newTag("E", "v")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("B", "n")
+	t, _ = newTag("B", "n")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("M", "n")
+	t, _ = newTag("M", "n")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("E", "n")
+	t, _ = newTag("E", "n")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("S", "d")
+	t, _ = newTag("S", "d")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("S", "v")
+	t, _ = newTag("S", "v")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("S", "n")
+	t, _ = newTag("S", "n")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("B", "v")
+	t, _ = newTag("B", "v")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("E", "v")
+	t, _ = newTag("E", "v")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("B", "nr")
+	t, _ = newTag("B", "nr")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("M", "nr")
+	t, _ = newTag("M", "nr")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("M", "nr")
+	t, _ = newTag("M", "nr")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("M", "nr")
+	t, _ = newTag("M", "nr")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("E", "nr")
+	t, _ = newTag("E", "nr")
 	defaultRoute = append(defaultRoute, t)
-	t, _ = NewTag("S", "zg")
+	t, _ = newTag("S", "zg")
 	defaultRoute = append(defaultRoute, t)
 }
--- a/tokenizer.go
+++ b/tokenizer.go
@@ -9,18 +9,40 @@ import (
 	"github.com/blevesearch/bleve/registry"
 )
 // Name is the jieba tokenizer name.
 const Name = "jieba"
-var IdeographRegexp = regexp.MustCompile(`\p{Han}+`)
+var ideographRegexp = regexp.MustCompile(`\p{Han}+`)
 // JiebaTokenizer is the beleve tokenizer for jiebago.
 type JiebaTokenizer struct {
 	seg             Segmenter
 	hmm, searchMode bool
 }
-func NewJiebaTokenizer(dictFileName string, hmm, searchMode bool) (analysis.Tokenizer, error) {
+/*
 NewJiebaTokenizer creates a new JiebaTokenizer.
 Parameters:
    dictFilePath: path of the dictioanry file.
    hmm: whether to use Hidden Markov Model to cut unknown words,
    i.e. not found in dictionary. For example word "安卓" (means "Android" in
    English) not in the dictionary file. If hmm is set to false, it will be
    cutted into two single words "安" and "卓", if hmm is set to true, it will
    be traded as one single word because Jieba using Hidden Markov Model with
    Viterbi algorithm to guess the best possibility.
    searchMode: whether to further cut long words into serveral short words.
    In Chinese, some long words may contains other words, for example "交换机"
    is a Chinese word for "Switcher", if sechMode is false, it will trade
    "交换机" as a single word. If searchMode is true, it will further split
    this word into "交换", "换机", which are valid Chinese words.
 */
 func NewJiebaTokenizer(dictFilePath string, hmm, searchMode bool) (analysis.Tokenizer, error) {
 	var seg Segmenter
-	err := seg.LoadDictionary(dictFileName)
+	err := seg.LoadDictionary(dictFilePath)
 	return &JiebaTokenizer{
 		seg:        seg,
 		hmm:        hmm,
@@ -28,6 +50,7 @@ func NewJiebaTokenizer(dictFileName string, hmm, searchMode bool) (analysis.Toke
 	}, err
 }
 // Tokenize cuts input into bleve token stream.
 func (jt *JiebaTokenizer) Tokenize(input []byte) analysis.TokenStream {
 	rv := make(analysis.TokenStream, 0)
 	runeStart := 0
@@ -77,9 +100,20 @@ func (jt *JiebaTokenizer) Tokenize(input []byte) analysis.TokenStream {
 	return rv
 }
 /*
 JiebaTokenizerConstructor creates a JiebaTokenizer.
 Parameter config should contains at least one parameter:
    file: the path of the dictionary file.
    hmm: optional, specify whether to use Hidden Markov Model, see NewJiebaTokenizer for details.
    search: optional, speficy whether to use search mode, see NewJiebaTokenizer for details.
 */
 func JiebaTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (
 	analysis.Tokenizer, error) {
-	dictFileName, ok := config["file"].(string)
+	dictFilePath, ok := config["file"].(string)
 	if !ok {
 		return nil, fmt.Errorf("must specify dictionary file path")
 	}
@@ -92,11 +126,11 @@ func JiebaTokenizerConstructor(config map[string]interface{}, cache *registry.Ca
 		searchMode = true
 	}
-	return NewJiebaTokenizer(dictFileName, hmm, searchMode)
+	return NewJiebaTokenizer(dictFilePath, hmm, searchMode)
 }
 func detectTokenType(term string) analysis.TokenType {
-	if IdeographRegexp.MatchString(term) {
+	if ideographRegexp.MatchString(term) {
 		return analysis.Ideographic
 	}
 	_, err := strconv.ParseFloat(term, 64)
--- a/util/util.go
+++ b/util/util.go
@@ -2,12 +2,14 @@ package util
 import "regexp"
-// RegexpSplit split slices s into substrings separated by the expression and
+/*
-// returns a slice of the substrings between those expression matches.
+RegexpSplit split slices s into substrings separated by the expression and
-// If capturing parentheses are used in expression, then the text of all groups
+returns a slice of the substrings between those expression matches.
-// in the expression are also returned as part of the resulting slice.
+If capturing parentheses are used in expression, then the text of all groups
-//
+in the expression are also returned as part of the resulting slice.
-// This function acts consistent with Python's re.split function.
+
 This function acts consistent with Python's re.split function.
 */
 func RegexpSplit(re *regexp.Regexp, s string, n int) []string {
 	if n == 0 {
 		return nil