mirror of
https://github.com/fumiama/paper-manager.git
synced 2026-06-05 07:50:23 +08:00
220 lines
5.6 KiB
Go
220 lines
5.6 KiB
Go
package global
|
|
|
|
import (
|
|
"encoding/binary"
|
|
"encoding/hex"
|
|
"encoding/json"
|
|
"strconv"
|
|
|
|
sql "github.com/FloatTech/sqlite"
|
|
"github.com/corona10/goimagehash"
|
|
base14 "github.com/fumiama/go-base16384"
|
|
"github.com/fumiama/paper-manager/backend/utils"
|
|
"github.com/sirupsen/logrus"
|
|
)
|
|
|
|
// QuestionJSON is the struct representation of File.Questions
|
|
type QuestionJSON struct {
|
|
Name string `json:"name"` // Name is name or Question ID
|
|
Points int `json:"points,omitempty"` // Points is sum of subs' points or self
|
|
Sub []QuestionJSON `json:"sub,omitempty"`
|
|
}
|
|
|
|
// Delete me and all subs, ignore errors
|
|
func (q *QuestionJSON) Delete(f *FileDatabase, istemp bool) {
|
|
if b, err := hex.DecodeString(q.Name); err == nil {
|
|
err = f.DelQuestion(int64(binary.LittleEndian.Uint64(b)), istemp)
|
|
if err != nil {
|
|
logrus.Warnln("[global.QuestionJSON] Delete", q.Name, "err:", err)
|
|
}
|
|
}
|
|
for _, sq := range q.Sub {
|
|
sq.Delete(f, istemp)
|
|
}
|
|
}
|
|
|
|
// DelQuestion 删除问题, 与其它问题的 dup
|
|
func (f *FileDatabase) DelQuestion(id int64, istemp bool) error {
|
|
qtable := ""
|
|
if istemp {
|
|
qtable = FileTableTempQuestion
|
|
} else {
|
|
qtable = FileTableQuestion
|
|
}
|
|
f.mu.Lock()
|
|
defer f.mu.Unlock()
|
|
q, err := sql.Find[Question](&f.db, qtable, "WHERE ID="+strconv.FormatInt(id, 10))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if len(q.Dup) > 2 {
|
|
dupmap := make(map[string]float64, 64)
|
|
err = json.Unmarshal(q.Dup, &dupmap)
|
|
if err == nil {
|
|
var buf [8]byte
|
|
for k := range dupmap {
|
|
_, err = hex.Decode(buf[:], base14.StringToBytes(k))
|
|
if err == nil {
|
|
qid := int64(binary.LittleEndian.Uint64(buf[:]))
|
|
qq, err := sql.Find[Question](&f.db, qtable, "WHERE ID="+strconv.FormatInt(qid, 10))
|
|
if err == nil && len(qq.Dup) > 2 {
|
|
dupmap2 := make(map[string]float64, 64)
|
|
err = json.Unmarshal(qq.Dup, &dupmap2)
|
|
if err == nil {
|
|
delete(dupmap2, k)
|
|
qq.Dup, err = json.Marshal(dupmap2)
|
|
if err == nil {
|
|
err = f.db.Insert(qtable, &qq)
|
|
if err != nil {
|
|
logrus.Warnln("[global.DelQuestion] insert modified dup to id", k, "table", qtable, "err:", err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return f.db.Del(qtable, "WHERE ID="+strconv.FormatInt(id, 10))
|
|
}
|
|
|
|
type Question struct {
|
|
ID int64 // ID is the first 8 bytes of the Plain's md5
|
|
ListID int // ListID is fk to List(ID)
|
|
Major string // Major is sub's major name
|
|
Path string // Path is the question's docx position
|
|
Plain string // Plain is the plain text of the question (like markdown format)
|
|
Images []byte // Images is json of the image dhash in XML, ex. ['rId1': '1234567890abcdef', ...]
|
|
Vector []byte // Vector is json of {word: freq, ...}
|
|
Dup []byte // Dup is json of {queid: rate, ...}
|
|
}
|
|
|
|
// GetQuestion by id
|
|
func (f *FileDatabase) GetQuestion(id int64, istemp bool) (Question, error) {
|
|
f.mu.RLock()
|
|
defer f.mu.RUnlock()
|
|
qtable := ""
|
|
if istemp {
|
|
qtable = FileTableTempQuestion
|
|
} else {
|
|
qtable = FileTableQuestion
|
|
}
|
|
return sql.Find[Question](&f.db, qtable, "WHERE ID="+strconv.FormatInt(id, 10))
|
|
}
|
|
|
|
// GetQuestionHex by hexid
|
|
func (f *FileDatabase) GetQuestionByHex(hexid string, istemp bool) (q Question, err error) {
|
|
idb, err := hex.DecodeString(hexid)
|
|
if err != nil {
|
|
return
|
|
}
|
|
return f.GetQuestion(int64(binary.LittleEndian.Uint64(idb)), istemp)
|
|
}
|
|
|
|
// GetMajors ...
|
|
func (f *FileDatabase) GetMajors() (majors []string) {
|
|
type majorq struct {
|
|
Major string
|
|
}
|
|
var maj majorq
|
|
f.mu.RLock()
|
|
defer f.mu.RUnlock()
|
|
f.db.QueryFor("SELECT DISTINCT Major FROM question;", &maj, func() error {
|
|
majors = append(majors, maj.Major)
|
|
return nil
|
|
})
|
|
return
|
|
}
|
|
|
|
// MaxDuplicateRate parse q.Dup and get the max rate
|
|
func (q *Question) MaxDuplicateRate() float64 {
|
|
dupmap := make(map[string]float64, 64)
|
|
if err := json.Unmarshal(q.Dup, &dupmap); err != nil {
|
|
return 0
|
|
}
|
|
r := 0.0
|
|
for _, v := range dupmap {
|
|
if v > r {
|
|
r = v
|
|
}
|
|
}
|
|
return r
|
|
}
|
|
|
|
// GetDuplicateRate calc q & que's dup rate
|
|
func (q *Question) GetDuplicateRate(que *Question) (float64, error) {
|
|
v1, v2 := make(map[string]uint8, 64), make(map[string]uint8, 64)
|
|
m1, m2 := make(map[string]string, 64), make(map[string]string, 64)
|
|
if len(q.Images) > 2 {
|
|
err := json.Unmarshal(q.Images, &m1)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
}
|
|
if len(que.Images) > 2 {
|
|
err := json.Unmarshal(que.Images, &m2)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
}
|
|
if len(q.Vector) > 2 {
|
|
err := json.Unmarshal(q.Vector, &v1)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
}
|
|
if len(que.Vector) > 2 {
|
|
err := json.Unmarshal(que.Vector, &v2)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
}
|
|
imgdsts := uint64(0)
|
|
for _, dhstr2 := range m2 {
|
|
d, err := hex.DecodeString(dhstr2)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
dh2 := goimagehash.NewImageHash(binary.LittleEndian.Uint64(d), goimagehash.DHash)
|
|
r := 0
|
|
for _, dhstr1 := range m1 {
|
|
d, err := hex.DecodeString(dhstr1)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
dh1 := goimagehash.NewImageHash(binary.LittleEndian.Uint64(d), goimagehash.DHash)
|
|
dst, err := dh2.Distance(dh1)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
if dst > r {
|
|
r = dst
|
|
}
|
|
}
|
|
imgdsts += uint64(r)
|
|
}
|
|
imgdupr := 0.0
|
|
if len(m2) > 0 {
|
|
imgdupr = float64(imgdsts) / float64(len(m2)) / 64.0
|
|
}
|
|
v1space := make([]uint8, 0, len(v1)+len(v2))
|
|
v2space := make([]uint8, 0, len(v1)+len(v2))
|
|
for k, v := range v1 {
|
|
v1space = append(v1space, v)
|
|
if tv, ok := v2[k]; ok {
|
|
v2space = append(v2space, tv)
|
|
delete(v2, k)
|
|
} else {
|
|
v2space = append(v2space, 0)
|
|
}
|
|
}
|
|
for _, v := range v2 {
|
|
v1space = append(v1space, 0)
|
|
v2space = append(v2space, v)
|
|
}
|
|
if imgdupr > 0 {
|
|
return (8*utils.Similarity(v1space, v2space) + 2*imgdupr) / 10.0, nil
|
|
}
|
|
return utils.Similarity(v1space, v2space), nil
|
|
}
|