mirror of
https://github.com/fumiama/jieba.git
synced 2026-06-05 00:32:51 +08:00
small refactor, don't compile regular expression every time, corresponding to jieba commit #32a0e92a09614cf5c72f87b1a59a5c4369200516
This commit is contained in:
@@ -4,6 +4,11 @@ import (
|
||||
"regexp"
|
||||
)
|
||||
|
||||
var (
|
||||
reHan = regexp.MustCompile(`\p{Han}+`)
|
||||
reSkip = regexp.MustCompile(`(\d+\.\d+|[a-zA-Z0-9]+)`)
|
||||
)
|
||||
|
||||
func cutHan(sentence string) []string {
|
||||
runes := []rune(sentence)
|
||||
result := make([]string, 0)
|
||||
@@ -30,14 +35,12 @@ func cutHan(sentence string) []string {
|
||||
|
||||
func Cut(sentence string) []string {
|
||||
result := make([]string, 0)
|
||||
re_han := regexp.MustCompile(`\p{Han}+`)
|
||||
re_skip := regexp.MustCompile(`(\d+\.\d+|[a-zA-Z0-9]+)`)
|
||||
s := sentence
|
||||
var hans string
|
||||
var hanLoc []int
|
||||
var nonhanLoc []int
|
||||
for {
|
||||
hanLoc = re_han.FindStringIndex(s)
|
||||
hanLoc = reHan.FindStringIndex(s)
|
||||
if hanLoc == nil {
|
||||
if len(s) == 0 {
|
||||
break
|
||||
@@ -50,7 +53,7 @@ func Cut(sentence string) []string {
|
||||
}
|
||||
continue
|
||||
}
|
||||
nonhanLoc = re_skip.FindStringIndex(s)
|
||||
nonhanLoc = reSkip.FindStringIndex(s)
|
||||
if nonhanLoc == nil {
|
||||
if len(s) == 0 {
|
||||
break
|
||||
|
||||
Reference in New Issue
Block a user