1
0
mirror of https://github.com/fumiama/emozi.git synced 2026-06-05 00:32:48 +08:00

optimize(db): add memory cache & mutex

This commit is contained in:
源文雨
2024-02-14 20:57:19 +09:00
parent ceb7686da6
commit 76dca88501
4 changed files with 82 additions and 23 deletions

View File

@@ -4,6 +4,7 @@ import (
"errors"
"strconv"
"strings"
"sync"
"time"
sql "github.com/FloatTech/sqlite"
@@ -11,13 +12,18 @@ import (
// Coder encoder/decoder
type Coder struct {
mu sync.RWMutex
db sql.Sqlite
字表缓存 map[rune][]字表
部首缓存 map[rune]string
isRandom bool
}
// NewCoder israndom 随机挑选声母韵母的颜文字, 否则固定使用第一个
func NewCoder(israndom bool, cachettl time.Duration) (c Coder, err error) {
c.db.DBPath = EmoziDatabasePath
c.字表缓存 = make(map[rune][]字表, 4096)
c.部首缓存 = make(map[rune]string, 4096)
c.isRandom = israndom
err = c.db.Open(cachettl)
if err != nil {
@@ -41,14 +47,16 @@ func NewCoder(israndom bool, cachettl time.Duration) (c Coder, err error) {
// Close ...
func (c *Coder) Close() error {
c.mu.Lock()
defer c.mu.Unlock()
return c.db.Close()
}
// Encode 从汉字序列生成 EmoziString
func (c *Coder) Encode(s string, selections ...int) (EmoziString, error) {
// Encode 从汉字序列生成 EmoziString 返回 EmoziString 多音字选择数列表
func (c *Coder) Encode(s string, selections ...int) (EmoziString, []int, error) {
sb := strings.Builder{}
x := &字表{}
lst := []字表{}
lstbuf := make([]字表, 0, len(s)/2)
var lst []字表
write := func(x *字表) {
sb.WriteString(c.查声母(x.S))
sb.WriteString(c.查韵母(x.Y))
@@ -56,27 +64,19 @@ func (c *Coder) Encode(s string, selections ...int) (EmoziString, error) {
sb.WriteString(c.查部首(x.R))
}
多音字计数 := 0
多音字数表 := []int{}
var err error
for _, ch := range s { // nolint: go-staticcheck
lst = lst[:0]
err := c.db.FindFor(附字表名, x, "WHERE W="+strconv.Itoa(int(ch)), func() error {
lst = append(lst, *x)
return nil
})
if err != nil {
lst = lst[:0]
err = c.db.FindFor(主字表名, x, "WHERE W="+strconv.Itoa(int(ch)), func() error {
lst = append(lst, *x)
return nil
})
}
lst, lstbuf, err = c.查字(ch, lstbuf)
if err != nil || len(lst) == 0 {
sb.WriteRune(ch)
continue
}
if len(lst) == 1 {
write(x)
write(&lst[0])
continue
}
多音字数表 = append(多音字数表, len(lst))
if len(selections) > 多音字计数 {
idx := selections[多音字计数]
多音字计数++
@@ -94,7 +94,7 @@ func (c *Coder) Encode(s string, selections ...int) (EmoziString, error) {
}
sb.WriteString("]")
}
return WrapRawEmoziString(sb.String()), nil
return WrapRawEmoziString(sb.String()), 多音字数表, nil
}
// Add 向主库添加一个新字
@@ -108,11 +108,13 @@ func (c *Coder) Add(w, r, p, f string) error {
if err != nil {
return err
}
c.mu.Lock()
err = c.db.InsertUnique(主字表名, &字表{
ID: 颜表ID(rw, s, y, t),
W: rw, S: s, Y: y, T: t,
R: rr, P: p, F: f,
})
c.mu.Unlock()
if err != nil {
return errors.New("已有同音同形的字 '" + w + "'")
}
@@ -134,11 +136,13 @@ func (c *Coder) Overlay(w, r, p, f string) error {
}
func (c *Coder) overlay(w, p, f string, s 声母枚举, y 韵母枚举, t 声调枚举, rw rune, rr rune) error {
c.mu.Lock()
err := c.db.InsertUnique(附字表名, &字表{
ID: 颜表ID(rw, s, y, t),
W: rw, S: s, Y: y, T: t,
R: rr, P: p, F: f,
})
c.mu.Unlock()
if err != nil {
return errors.New("已有同音同形的字 '" + w + "'")
}
@@ -158,14 +162,18 @@ func (c *Coder) ChangeOverlay(oldw, oldr, oldf, neww, newr, newf string) error {
}
q := "WHERE ID=" + strconv.FormatInt(颜表ID(rw, s, y, t), 10)
x := 字表{}
c.mu.RLock()
err = c.db.Find(附字表名, &x, q)
c.mu.RUnlock()
if err != nil {
return err
}
if x.R != rr {
return errors.New("提供的旧部首 '" + string(rr) + "' 与记载的 '" + string(x.R) + "' 不符")
}
c.mu.Lock()
err = c.db.Del(附字表名, q)
c.mu.Unlock()
if err != nil {
return err
}
@@ -174,5 +182,7 @@ func (c *Coder) ChangeOverlay(oldw, oldr, oldf, neww, newr, newf string) error {
// OverlayRadical 添加一个部首
func (c *Coder) OverlayRadical(r rune, e string) error {
c.mu.Lock()
defer c.mu.Unlock()
return c.db.InsertUnique(部首表名, &部首表{R: r, E: e})
}

View File

@@ -10,20 +10,26 @@ func TestEncode(t *testing.T) {
if err != nil {
t.Fatal(err)
}
es, err := c.Encode("你好,世界!看看多音字:行。")
es, lst, err := c.Encode("你好,世界!看看多音字:行。")
if err != nil {
t.Fatal(err)
}
t.Log(es.String())
t.Log(es.String(), lst)
if es.String() != "🥛👔🐴👤🌹🐱🐴👩,💦🌞😨🌍➕✌😨👨‍🌾!😭🔐🍉👁️😭🔐🍉👁️🔪🌀🍉🪩🐑🎵🍉🎵👈🌞😨🚼:[👇🦅🧗⛕|🌹👍🧗⛕]。" {
t.Fatal("got", es.String())
}
es, err = c.Encode("你好,世界!指定多音字:银行行。", 1, 0)
if len(lst) != 1 && lst[0] != 2 {
t.Fail()
}
es, lst, err = c.Encode("你好,世界!指定多音字:银行行。", 1, 0)
if err != nil {
t.Fatal(err)
}
t.Log(es.String())
t.Log(es.String(), lst)
if es.String() != "🥛👔🐴👤🌹🐱🐴👩,💦🌞😨🌍➕✌😨👨‍🌾!🐽🌞🐴✋🔪🦅😨🏠🔪🌀🍉🪩🐑🎵🍉🎵👈🌞😨🚼:🐑🎵🧗💰🌹👍🧗⛕👇🦅🧗⛕。" {
t.Fatal("got", es.String())
}
if len(lst) != 2 && lst[0] != 2 && lst[1] != 2 {
t.Fail()
}
}

34
data.go
View File

@@ -1,6 +1,9 @@
package emozi
import _ "embed"
import (
_ "embed"
"strconv"
)
// 字数据库 数据来自 https://github.com/shuowenjiezi/shuowen
//
@@ -31,6 +34,35 @@ func 颜表ID(w rune, s 声母枚举, y 韵母枚举, t 声调枚举) int64 {
return int64((uint64(w) << 32) | (uint64(s) << 16) | (uint64(y) << 8) | (uint64(t)))
}
// 查字 返回 lst lstbuf error
func (c *Coder) 查字(ch rune, lstbuf []字表) ([]字表, []字表, error) {
c.mu.RLock()
lst, ok := c.字表缓存[ch]
c.mu.RUnlock()
if ok {
return lst, lstbuf, nil
}
lstbuf = lstbuf[:0]
x := 字表{}
c.mu.Lock()
defer c.mu.Unlock()
err := c.db.FindFor(附字表名, &x, "WHERE W="+strconv.Itoa(int(ch)), func() error {
lstbuf = append(lstbuf, x)
return nil
})
if err != nil {
lstbuf = lstbuf[:0]
err = c.db.FindFor(主字表名, &x, "WHERE W="+strconv.Itoa(int(ch)), func() error {
lstbuf = append(lstbuf, x)
return nil
})
}
lstsave := make([]字表, len(lstbuf))
copy(lstsave, lstbuf)
c.字表缓存[ch] = lstsave
return lstbuf, lstbuf, err
}
// 从表 从部首表
type 部首表 struct {
R rune // R 该部首

View File

@@ -41,13 +41,24 @@ func (c *Coder) 查声调(t 声调枚举) string {
}
func (c *Coder) 查部首(r rune) string {
c.mu.RLock()
e, ok := c.部首缓存[r]
c.mu.RUnlock()
if ok {
return e
}
x := &部首表{}
c.mu.Lock()
defer c.mu.Unlock()
err := c.db.Find(部首表名, x, "WHERE R="+strconv.Itoa(int(r)))
if err == nil && len(x.E) > 0 && x.E != {
c.部首缓存[r] = x.E
return x.E
}
if e, ok := 部首后备[r]; ok {
c.部首缓存[r] = e
return e
}
c.部首缓存[r] =
return
}