code refactor, added more documents

2026-07-01 01:20:29 +08:00 · 2015-05-06 12:55:04 +08:00
parent 87caff09cb
commit 122bad0a8d
23 changed files with 228 additions and 142 deletions
--- a/posseg/posseg.go
+++ b/posseg/posseg.go
@@ -17,27 +17,36 @@ var (
 	reSkipInternal = regexp.MustCompile(`(\r\n|\s)`)
 )

+// Segment represents a word with it's POS
 type Segment struct {
 	text, pos string
 }

+// Text returns the Segment's text.
 func (s Segment) Text() string {
 	return s.text
 }

+// Pos returns the Segment's POS.
 func (s Segment) Pos() string {
 	return s.pos
 }

+// Segmenter is a Chinese words segmentation struct.
 type Segmenter struct {
 	dict *Dictionary
 }

+// LoadDictionary loads dictionary from given file name.
+// Everytime LoadDictionary is called, previously loaded dictionary will be cleard.
 func (seg *Segmenter) LoadDictionary(fileName string) error {
 	seg.dict = &Dictionary{freqMap: make(map[string]float64), posMap: make(map[string]string)}
 	return seg.dict.loadDictionary(fileName)
 }

+// LoadUserDictionary loads a user specified dictionary, it must be called
+// after LoadDictionary, and it will not clear any previous loaded dictionary,
+// instead it will override exist entries.
 func (seg *Segmenter) LoadUserDictionary(fileName string) error {
 	return seg.dict.loadDictionary(fileName)
 }
@@ -52,19 +61,19 @@ func (seg *Segmenter) cutDetailInternal(sentence string) <-chan Segment {
 		next := 0
 		for i, char := range runes {
 			pos := posList[i]
-			switch pos.Tag() {
+			switch pos.position() {
 			case "B":
 				begin = i
 			case "E":
-				result <- Segment{string(runes[begin : i+1]), pos.POS()}
+				result <- Segment{string(runes[begin : i+1]), pos.pos()}
 				next = i + 1
 			case "S":
-				result <- Segment{string(char), pos.POS()}
+				result <- Segment{string(char), pos.pos()}
 				next = i + 1
 			}
 		}
 		if next < len(runes) {
-			result <- Segment{string(runes[next:]), posList[next].POS()}
+			result <- Segment{string(runes[next:]), posList[next].pos()}
 		}
 		close(result)
 	}()
@@ -117,7 +126,7 @@ func (seg *Segmenter) dag(runes []rune) map[int][]int {
 			if freq > 0.0 {
 				dag[k] = append(dag[k], i)
 			}
-			i += 1
+			i++
 			if i >= n {
 				break
 			}
@@ -170,7 +179,7 @@ func (seg *Segmenter) cutDAG(sentence string) <-chan Segment {
 		routes := seg.calc(runes)
 		var y int
 		length := len(runes)
-		buf := make([]rune, 0)
+		var buf []rune
 		for x := 0; x < length; {
 			y = routes[x].index + 1
 			frag := runes[x:y]
@@ -253,7 +262,7 @@ func (seg *Segmenter) cutDAGNoHMM(sentence string) <-chan Segment {
 		routes := seg.calc(runes)
 		var y int
 		length := len(runes)
-		buf := make([]rune, 0)
+		var buf []rune
 		for x := 0; x < length; {
 			y = routes[x].index + 1
 			frag := runes[x:y]
@@ -283,6 +292,8 @@ func (seg *Segmenter) cutDAGNoHMM(sentence string) <-chan Segment {
 	return result
 }

+// Cut cuts a sentence into words.
+// Parameter hmm controls whether to use the Hidden Markov Model.
 func (seg *Segmenter) Cut(sentence string, hmm bool) <-chan Segment {
 	result := make(chan Segment)
 	var cut cutFunc