1
0
mirror of https://github.com/fumiama/jieba.git synced 2026-06-05 00:32:51 +08:00

small refactor, don't compile regular expression every time, corresponding to jieba commit #32a0e92a09614cf5c72f87b1a59a5c4369200516

This commit is contained in:
Wang Bin
2015-02-25 16:32:28 +08:00
parent 5702495bf6
commit 08ac49d10b
3 changed files with 51 additions and 46 deletions

View File

@@ -4,6 +4,11 @@ import (
"regexp"
)
var (
reHan = regexp.MustCompile(`\p{Han}+`)
reSkip = regexp.MustCompile(`(\d+\.\d+|[a-zA-Z0-9]+)`)
)
func cutHan(sentence string) []string {
runes := []rune(sentence)
result := make([]string, 0)
@@ -30,14 +35,12 @@ func cutHan(sentence string) []string {
func Cut(sentence string) []string {
result := make([]string, 0)
re_han := regexp.MustCompile(`\p{Han}+`)
re_skip := regexp.MustCompile(`(\d+\.\d+|[a-zA-Z0-9]+)`)
s := sentence
var hans string
var hanLoc []int
var nonhanLoc []int
for {
hanLoc = re_han.FindStringIndex(s)
hanLoc = reHan.FindStringIndex(s)
if hanLoc == nil {
if len(s) == 0 {
break
@@ -50,7 +53,7 @@ func Cut(sentence string) []string {
}
continue
}
nonhanLoc = re_skip.FindStringIndex(s)
nonhanLoc = reSkip.FindStringIndex(s)
if nonhanLoc == nil {
if len(s) == 0 {
break