1
0
mirror of https://github.com/fumiama/emozi.git synced 2026-06-05 16:50:38 +08:00
Files
emozi/coder.go
2024-02-15 17:50:39 +09:00

353 lines
8.3 KiB
Go

package emozi
import (
"errors"
"os"
"strconv"
"strings"
"sync"
"time"
sql "github.com/FloatTech/sqlite"
)
// Coder encoder/decoder
type Coder struct {
mu sync.RWMutex
db sql.Sqlite
字表缓存 map[rune][]字表
逆字表缓存 map[int64][]rune
部首缓存 map[rune]string
逆部首缓存 map[string][]rune
}
// NewCoder israndom 随机挑选声母韵母的颜文字, 否则固定使用第一个
func NewCoder(cachettl time.Duration) (c Coder, err error) {
if _, err = os.Stat(EmoziDatabasePath); err != nil {
err = os.WriteFile(EmoziDatabasePath, 字数据库, 0644)
if err != nil {
return
}
}
c.db.DBPath = EmoziDatabasePath
c.字表缓存 = make(map[rune][]字表, 4096)
c.逆字表缓存 = make(map[int64][]rune, 4096)
c.部首缓存 = make(map[rune]string, 4096)
c.逆部首缓存 = make(map[string][]rune, 4096)
err = c.db.Open(cachettl)
if err != nil {
return
}
err = c.db.Create(主字表名, &字表{})
if err != nil {
return
}
err = c.db.Create(附字表名, &字表{})
if err != nil {
return
}
err = c.db.Create(部首表名, &部首表{})
if err != nil {
return
}
_ = c.db.Query("CREATE INDEX IF NOT EXISTS IE ON "+部首表名+" (E);", nil)
return
}
// Close ...
func (c *Coder) Close() error {
c.mu.Lock()
defer c.mu.Unlock()
c.字表缓存 = nil
c.逆字表缓存 = nil
c.部首缓存 = nil
c.逆部首缓存 = nil
return c.db.Close()
}
// Encode 从汉字序列生成 EmoziString 返回 EmoziString 多音字选择数列表
func (c *Coder) Encode(enableRandom bool, s string, selections ...int) (EmoziString, []int, error) {
sb := strings.Builder{}
buflen := len(s) / 2
if buflen < 4 {
buflen = 4
}
lstbuf := make([]字表, 0, buflen)
var lst []字表
var write func(x *字表)
randomwrite := func(x *字表) {
sb.WriteString(c.声母(enableRandom, x.S))
sb.WriteString(c.韵母(enableRandom, x.Y))
sb.WriteString(c.声调(enableRandom, x.T))
sb.WriteString(c.部首(x.R))
}
norandomwrite := func(x *字表) {
sb.WriteString(c.声母(false, x.S))
sb.WriteString(c.韵母(false, x.Y))
sb.WriteString(c.声调(false, x.T))
sb.WriteString(c.部首(x.R))
write = randomwrite
}
write = norandomwrite
多音字计数 := 0
多音字数表 := []int{}
var err error
for _, ch := range s { // nolint: go-staticcheck
lst, lstbuf, err = c.查字(ch, lstbuf)
if err != nil || len(lst) == 0 {
//fmt.Println("写入未知字:", string(ch), ch)
sb.WriteRune(ch)
continue
}
if len(lst) == 1 {
write(&lst[0])
continue
}
多音字数表 = append(多音字数表, len(lst))
if len(selections) > 多音字计数 {
idx := selections[多音字计数]
多音字计数++
if idx >= 0 && idx < len(lst) {
write(&lst[idx])
continue
}
}
// 多音字
sb.WriteString("[")
write(&lst[0])
for _, x := range lst[1:] {
sb.WriteString("|")
write(&x)
}
sb.WriteString("]")
}
return WrapRawEmoziString(sb.String()), 多音字数表, nil
}
// Decode 从 EmoziString 解码得到可能的文字序列
func (c *Coder) Decode(es EmoziString, forcedecode bool) (string, error) {
if !es.IsValid() && !forcedecode {
return "", ErrInvalidEmoziString
}
s := ""
if forcedecode {
s = string(es)
} else {
s = es.String()
}
// fmt.Println(len(s), s)
lstbuf := make([]字表, 0, len(s)/8)
read := func(s string) (string, int) {
sum := 0
sm, n := c.逆声母(s)
if n == 0 {
return "", 0
}
sum += n
// fmt.Println(n, sm, s[0:sum])
ym, n := c.逆韵母(s[sum:])
if n == 0 {
return "", 0
}
sum += n
// fmt.Println(n, ym, s[sum-n:sum])
t, n := c.逆声调(s[sum:])
if n == 0 {
return "", 0
}
sum += n
// fmt.Println(n, t, s[sum-n:sum])
rs, n := c.逆部首(s[sum:])
if n == 0 {
return "", 0
}
sum += n
// fmt.Println(n, rs, s[sum-n:sum])
var possibles []rune
var err error
if len(rs) == 0 { // 意符为空
possibles, lstbuf, err = c.逆字(sm, ym, t, 0, lstbuf)
if err != nil {
return "[]", sum
}
} else {
var revr []rune
for i := 0; i < len(rs); i++ {
revr, lstbuf, err = c.逆字(sm, ym, t, rs[i], lstbuf)
if err != nil || len(revr) == 0 {
continue
}
if len(possibles) == 0 {
possibles = revr
} else {
possibles = append(possibles, revr...)
}
}
}
if len(possibles) == 0 {
return "[]", sum
}
if len(possibles) == 1 {
return string(possibles[0]), sum
}
sb := strings.Builder{}
sb.WriteString("[")
sb.WriteRune(possibles[0])
for _, r := range possibles[1:] {
sb.WriteString("|")
sb.WriteRune(r)
}
sb.WriteString("]")
return sb.String(), sum
}
sb := strings.Builder{}
sum := 0
for sum < len(s) {
ch, n := read(s[sum:])
if n <= 0 {
sb.WriteByte(s[sum])
sum++
continue
}
sum += n
sb.WriteString(ch)
}
return sb.String(), nil
}
// AddChar 向主库添加一个新字
//
// w: 字, r: 部首, p: 不带声调的拼音(可空), f: 带声调的拼音
func (c *Coder) AddChar(w, r, p, f string) error {
if p == "" {
p = 去调(f)
}
s, y, t, rw, rr, err := 拆音识字(w, r, p, f)
if err != nil {
return err
}
c.mu.Lock()
err = c.db.InsertUnique(主字表名, &字表{
ID: 字表ID(rw, s, y, t),
W: rw, S: s, Y: y, T: t,
R: rr, P: p, F: f,
})
c.mu.Unlock()
if err != nil {
return errors.New("已有同音同形的字 '" + w + "'")
}
return nil
}
// AddCharOverlay 向附加库添加一个新字, 覆盖在主库之上
//
// w: 字, r: 部首, p: 不带声调的拼音(可空), f: 带声调的拼音
// 返回: 字表ID, 文字描述, error
func (c *Coder) AddCharOverlay(w, r, p, f string) (int64, string, error) {
if p == "" {
p = 去调(f)
}
s, y, t, rw, rr, err := 拆音识字(w, r, p, f)
if err != nil {
return 0, "", err
}
return c.addcharoverlay(w, p, f, s, y, t, rw, rr)
}
func (c *Coder) addcharoverlay(w, p, f string, s 声母枚举, y 韵母枚举, t 声调枚举, rw rune, rr rune) (int64, string, error) {
id := 字表ID(rw, s, y, t)
c.mu.Lock()
err := c.db.InsertUnique(附字表名, &字表{
ID: id,
W: rw, S: s, Y: y, T: t,
R: rr, P: p, F: f,
})
c.mu.Unlock()
if err != nil {
return 0, "", errors.New("已有同音同形的字 '" + w + "'")
}
sb := strings.Builder{}
sb.WriteString(s.String())
sb.WriteString(", ")
sb.WriteString(y.String())
sb.WriteString(", ")
sb.WriteString(t.String())
return id, sb.String(), nil
}
// ChangeCharOverlay 更改附加库的一项
func (c *Coder) ChangeCharOverlay(oldw, oldr, oldf, neww, newr, newf string) (int64, string, error) {
s, y, t, rw, rr, err := 拆音识字(oldw, oldr, 去调(oldf), oldf)
if err != nil {
return 0, "", err
}
newp := 去调(newf)
ns, ny, nt, nrw, nrr, err := 拆音识字(neww, newr, newp, newf)
if err != nil {
return 0, "", err
}
q := "WHERE ID=" + strconv.FormatInt(字表ID(rw, s, y, t), 10)
x := 字表{}
c.mu.RLock()
err = c.db.Find(附字表名, &x, q)
c.mu.RUnlock()
if err != nil {
return 0, "", err
}
if x.R != rr {
return 0, "", errors.New("提供的旧部首 '" + string(rr) + "' 与记载的 '" + string(x.R) + "' 不符")
}
c.mu.Lock()
err = c.db.Del(附字表名, q)
c.mu.Unlock()
if err != nil {
return 0, "", err
}
return c.addcharoverlay(neww, newp, newf, ns, ny, nt, nrw, nrr)
}
// StabilizeCharFromOverlay 将附加库中的一项固定到主库
func (c *Coder) StabilizeCharFromOverlay(id int64) (string, error) {
x := 字表{}
q := "WHERE ID=" + strconv.FormatInt(id, 10)
c.mu.Lock()
defer c.mu.Unlock()
err := c.db.Find(附字表名, &x, q)
if err != nil {
return "", err
}
err = c.db.Insert(主字表名, &x)
if err != nil {
return x.String(), err
}
return x.String(), c.db.Del(附字表名, q)
}
// DelChar 删除主库的一个字
func (c *Coder) DelChar(id int64) error {
c.mu.Lock()
defer c.mu.Unlock()
return c.db.Del(主字表名, "WHERE ID="+strconv.FormatInt(id, 10))
}
// DelCharOverlay 删除附加库的一个字
func (c *Coder) DelCharOverlay(id int64) error {
c.mu.Lock()
defer c.mu.Unlock()
return c.db.Del(附字表名, "WHERE ID="+strconv.FormatInt(id, 10))
}
// AddRadical 添加一个部首
func (c *Coder) AddRadical(r rune, e string) error {
c.mu.Lock()
defer c.mu.Unlock()
return c.db.InsertUnique(部首表名, &部首表{R: r, E: e})
}
// DelRadical 删除一个部首
func (c *Coder) DelRadical(r rune) error {
c.mu.Lock()
defer c.mu.Unlock()
return c.db.Del(部首表名, "WHERE R="+strconv.Itoa(int(r)))
}