mirror of
https://github.com/fumiama/jieba.git
synced 2026-06-13 05:31:02 +08:00
refactor variable name
This commit is contained in:
14
jieba.go
14
jieba.go
@@ -82,7 +82,7 @@ func GetDAG(sentence string) map[int][]int {
|
|||||||
i = k
|
i = k
|
||||||
frag = string(runes[k])
|
frag = string(runes[k])
|
||||||
for {
|
for {
|
||||||
if freq, ok := T.Freq[frag]; !ok {
|
if freq, ok := Trie.Freq[frag]; !ok {
|
||||||
break
|
break
|
||||||
} else {
|
} else {
|
||||||
if freq > 0.0 {
|
if freq > 0.0 {
|
||||||
@@ -108,14 +108,14 @@ func Calc(sentence string, dag map[int][]int) map[int]*Route {
|
|||||||
number := len(runes)
|
number := len(runes)
|
||||||
routes := make(map[int]*Route)
|
routes := make(map[int]*Route)
|
||||||
routes[number] = &Route{Freq: 0.0, Index: 0}
|
routes[number] = &Route{Freq: 0.0, Index: 0}
|
||||||
logTotal := math.Log(T.Total)
|
logTotal := math.Log(Trie.Total)
|
||||||
for idx := number - 1; idx >= 0; idx-- {
|
for idx := number - 1; idx >= 0; idx-- {
|
||||||
candidates := make(Routes, 0)
|
candidates := make(Routes, 0)
|
||||||
for _, i := range dag[idx] {
|
for _, i := range dag[idx] {
|
||||||
word := string(runes[idx : i+1])
|
word := string(runes[idx : i+1])
|
||||||
var route *Route
|
var route *Route
|
||||||
if _, ok := T.Freq[word]; ok {
|
if _, ok := Trie.Freq[word]; ok {
|
||||||
route = &Route{Freq: math.Log(T.Freq[word]) - logTotal + routes[i+1].Freq, Index: i}
|
route = &Route{Freq: math.Log(Trie.Freq[word]) - logTotal + routes[i+1].Freq, Index: i}
|
||||||
} else {
|
} else {
|
||||||
route = &Route{Freq: math.Log(1.0) - logTotal + routes[i+1].Freq, Index: i}
|
route = &Route{Freq: math.Log(1.0) - logTotal + routes[i+1].Freq, Index: i}
|
||||||
}
|
}
|
||||||
@@ -153,7 +153,7 @@ func cut_DAG(sentence string) []string {
|
|||||||
buf = make([]rune, 0)
|
buf = make([]rune, 0)
|
||||||
} else {
|
} else {
|
||||||
bufString := string(buf)
|
bufString := string(buf)
|
||||||
if v, ok := T.Freq[bufString]; !ok || v == 0.0 {
|
if v, ok := Trie.Freq[bufString]; !ok || v == 0.0 {
|
||||||
recognized := finalseg.Cut(bufString)
|
recognized := finalseg.Cut(bufString)
|
||||||
for _, t := range recognized {
|
for _, t := range recognized {
|
||||||
result = append(result, t)
|
result = append(result, t)
|
||||||
@@ -176,7 +176,7 @@ func cut_DAG(sentence string) []string {
|
|||||||
result = append(result, string(buf))
|
result = append(result, string(buf))
|
||||||
} else {
|
} else {
|
||||||
bufString := string(buf)
|
bufString := string(buf)
|
||||||
if v, ok := T.Freq[bufString]; !ok || v == 0.0 {
|
if v, ok := Trie.Freq[bufString]; !ok || v == 0.0 {
|
||||||
recognized := finalseg.Cut(bufString)
|
recognized := finalseg.Cut(bufString)
|
||||||
for _, t := range recognized {
|
for _, t := range recognized {
|
||||||
result = append(result, t)
|
result = append(result, t)
|
||||||
@@ -320,7 +320,7 @@ func CutForSearch(sentence string, hmm bool) []string {
|
|||||||
var gram2 string
|
var gram2 string
|
||||||
for i := 0; i < len(runes)-increment+1; i++ {
|
for i := 0; i < len(runes)-increment+1; i++ {
|
||||||
gram2 = string(runes[i : i+increment])
|
gram2 = string(runes[i : i+increment])
|
||||||
if v, ok := T.Freq[gram2]; ok && v > 0.0 {
|
if v, ok := Trie.Freq[gram2]; ok && v > 0.0 {
|
||||||
result = append(result, gram2)
|
result = append(result, gram2)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -121,7 +121,7 @@ func cut_DAG(sentence string) []WordTag {
|
|||||||
buf = make([]rune, 0)
|
buf = make([]rune, 0)
|
||||||
} else {
|
} else {
|
||||||
bufString := string(buf)
|
bufString := string(buf)
|
||||||
if v, ok := jiebago.T.Freq[bufString]; !ok || v == 0.0 {
|
if v, ok := jiebago.Trie.Freq[bufString]; !ok || v == 0.0 {
|
||||||
recognized := cutDetail(bufString)
|
recognized := cutDetail(bufString)
|
||||||
for _, t := range recognized {
|
for _, t := range recognized {
|
||||||
result = append(result, t)
|
result = append(result, t)
|
||||||
@@ -160,7 +160,7 @@ func cut_DAG(sentence string) []WordTag {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
bufString := string(buf)
|
bufString := string(buf)
|
||||||
if v, ok := jiebago.T.Freq[bufString]; !ok || v == 0.0 {
|
if v, ok := jiebago.Trie.Freq[bufString]; !ok || v == 0.0 {
|
||||||
recognized := cutDetail(bufString)
|
recognized := cutDetail(bufString)
|
||||||
for _, t := range recognized {
|
for _, t := range recognized {
|
||||||
result = append(result, t)
|
result = append(result, t)
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ func Tokenize(sentence string, mode string, HMM bool) []Token {
|
|||||||
if width > step {
|
if width > step {
|
||||||
for i := 0; i < width-step+1; i++ {
|
for i := 0; i < width-step+1; i++ {
|
||||||
gram := string(runes[i : i+step])
|
gram := string(runes[i : i+step])
|
||||||
if _, ok := T.Freq[gram]; ok {
|
if _, ok := Trie.Freq[gram]; ok {
|
||||||
tokens = append(tokens, Token{gram, start + i, start + i + step})
|
tokens = append(tokens, Token{gram, start + i, start + i + step})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
33
trie.go
33
trie.go
@@ -11,14 +11,14 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
var T *Trie
|
var Trie *trie
|
||||||
|
|
||||||
type Trie struct {
|
type trie struct {
|
||||||
Total float64
|
Total float64
|
||||||
Freq map[string]float64
|
Freq map[string]float64
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t Trie) MarshalBinary() ([]byte, error) {
|
func (t trie) MarshalBinary() ([]byte, error) {
|
||||||
var b bytes.Buffer
|
var b bytes.Buffer
|
||||||
enc := gob.NewEncoder(&b)
|
enc := gob.NewEncoder(&b)
|
||||||
err := enc.Encode(t.Total)
|
err := enc.Encode(t.Total)
|
||||||
@@ -32,7 +32,7 @@ func (t Trie) MarshalBinary() ([]byte, error) {
|
|||||||
return b.Bytes(), nil
|
return b.Bytes(), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *Trie) UnmarshalBinary(data []byte) error {
|
func (t *trie) UnmarshalBinary(data []byte) error {
|
||||||
b := bytes.NewBuffer(data)
|
b := bytes.NewBuffer(data)
|
||||||
dec := gob.NewDecoder(b)
|
dec := gob.NewDecoder(b)
|
||||||
err := dec.Decode(&t.Total)
|
err := dec.Decode(&t.Total)
|
||||||
@@ -46,7 +46,7 @@ func (t *Trie) UnmarshalBinary(data []byte) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func newTrie(dictFileName string) error {
|
func (t *trie) load(dictFileName string) error {
|
||||||
dictFilePath, err := DictPath(dictFileName)
|
dictFilePath, err := DictPath(dictFileName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -83,7 +83,7 @@ func newTrie(dictFileName string) error {
|
|||||||
|
|
||||||
if isDictCached {
|
if isDictCached {
|
||||||
dec := gob.NewDecoder(cacheFile)
|
dec := gob.NewDecoder(cacheFile)
|
||||||
err = dec.Decode(&T)
|
err = dec.Decode(&t)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
isDictCached = false
|
isDictCached = false
|
||||||
} else {
|
} else {
|
||||||
@@ -98,7 +98,7 @@ func newTrie(dictFileName string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, wtf := range wtfs {
|
for _, wtf := range wtfs {
|
||||||
T.addWord(wtf)
|
t.addWord(wtf)
|
||||||
}
|
}
|
||||||
// dump trie
|
// dump trie
|
||||||
cacheFile, err = os.OpenFile(cacheFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
|
cacheFile, err = os.OpenFile(cacheFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
|
||||||
@@ -107,7 +107,7 @@ func newTrie(dictFileName string) error {
|
|||||||
}
|
}
|
||||||
defer cacheFile.Close()
|
defer cacheFile.Close()
|
||||||
enc := gob.NewEncoder(cacheFile)
|
enc := gob.NewEncoder(cacheFile)
|
||||||
err = enc.Encode(T)
|
err = enc.Encode(t)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
} else {
|
} else {
|
||||||
@@ -117,7 +117,7 @@ func newTrie(dictFileName string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *Trie) addWord(wtf *WordTagFreq) {
|
func (t *trie) addWord(wtf *WordTagFreq) {
|
||||||
t.Freq[wtf.Word] = wtf.Freq
|
t.Freq[wtf.Word] = wtf.Freq
|
||||||
t.Total += wtf.Freq
|
t.Total += wtf.Freq
|
||||||
runes := []rune(wtf.Word)
|
runes := []rune(wtf.Word)
|
||||||
@@ -129,12 +129,6 @@ func (t *Trie) addWord(wtf *WordTagFreq) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
func addWord(wtf *WordTagFreq) {
|
|
||||||
if len(wtf.Tag) > 0 {
|
|
||||||
UserWordTagTab[wtf.Word] = strings.TrimSpace(wtf.Tag)
|
|
||||||
}
|
|
||||||
T.addWord(wtf)
|
|
||||||
}
|
|
||||||
|
|
||||||
func LoadUserDict(dictFilePath string) error {
|
func LoadUserDict(dictFilePath string) error {
|
||||||
wtfs, err := ParseDictFile(dictFilePath)
|
wtfs, err := ParseDictFile(dictFilePath)
|
||||||
@@ -142,12 +136,15 @@ func LoadUserDict(dictFilePath string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
for _, wtf := range wtfs {
|
for _, wtf := range wtfs {
|
||||||
addWord(wtf)
|
if len(wtf.Tag) > 0 {
|
||||||
|
UserWordTagTab[wtf.Word] = strings.TrimSpace(wtf.Tag)
|
||||||
|
}
|
||||||
|
Trie.addWord(wtf)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func SetDictionary(dictFileName string) error {
|
func SetDictionary(dictFileName string) error {
|
||||||
T = &Trie{Total: 0.0, Freq: make(map[string]float64)}
|
Trie = &trie{Total: 0.0, Freq: make(map[string]float64)}
|
||||||
return newTrie(dictFileName)
|
return Trie.load(dictFileName)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user