1
0
mirror of https://github.com/fumiama/paper-manager.git synced 2026-06-05 07:50:23 +08:00

finish 解析

This commit is contained in:
源文雨
2023-04-09 23:30:08 +08:00
parent 2fcb3fd636
commit fe1f3b1397
9 changed files with 217 additions and 115 deletions

View File

@@ -25,6 +25,7 @@ import (
"github.com/corona10/goimagehash"
base14 "github.com/fumiama/go-base16384"
"github.com/fumiama/go-docx"
"github.com/sirupsen/logrus"
"github.com/fumiama/paper-manager/backend/utils"
)
@@ -383,9 +384,10 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
err = FileDB.db.FindFor(FileTableQuestion, &q, "", func() error {
r, err := q.GetDuplicateRate(que)
if err != nil {
logrus.Warnln("[global.AddFile] GetDuplicateRate err:", err)
return err
}
if r < 0.1 {
if r < 0.5 {
return nil
}
var buf [8]byte
@@ -394,7 +396,7 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
return nil
})
FileDB.mu.RUnlock()
if err == nil {
if err == nil && len(dupmap) > 0 {
que.Dup, _ = json.Marshal(dupmap)
}
w := bytes.NewBuffer(make([]byte, 0, 65536))
@@ -484,83 +486,3 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
progress(100)
return file, err
}
// QuestionJSON is the struct representation of File.Questions
type QuestionJSON struct {
Name string `json:"name"` // Name is name or Question ID
Points int `json:"points"` // Points is sum of subs' points or self
Rate float64 `json:"rate"` // Rate is the avg(non-leaf) or max(leaf) similarity
Sub []QuestionJSON `json:"sub,omitempty"`
}
type Question struct {
ID int64 // ID is the first 8 bytes of the Plain's md5
Path string // Path is the question's docx position
Plain string // Plain is the plain text of the question (like markdown format)
Images []byte // Images is json of the image dhash in XML, ex. ['rId1': '1234567890abcdef', ...]
Vector []byte // Vector is json of {word: freq, ...}
Dup []byte // Dup is json of {queid: rate, ...}
}
// GetDuplicateRate calc q & que's dup rate
func (q *Question) GetDuplicateRate(que *Question) (float64, error) {
v1, v2 := make(map[string]uint8, 64), make(map[string]uint8, 64)
m1, m2 := make(map[string]string, 64), make(map[string]string, 64)
err := json.Unmarshal(q.Images, &m1)
if err != nil {
return 0, err
}
err = json.Unmarshal(que.Images, &m2)
if err != nil {
return 0, err
}
err = json.Unmarshal(q.Vector, &v1)
if err != nil {
return 0, err
}
err = json.Unmarshal(que.Vector, &v2)
if err != nil {
return 0, err
}
imgdsts := uint64(0)
for _, dhstr2 := range m2 {
d, err := hex.DecodeString(dhstr2)
if err != nil {
return 0, err
}
dh2 := goimagehash.NewImageHash(binary.LittleEndian.Uint64(d), goimagehash.DHash)
r := 0
for _, dhstr1 := range m1 {
d, err := hex.DecodeString(dhstr1)
if err != nil {
return 0, err
}
dh1 := goimagehash.NewImageHash(binary.LittleEndian.Uint64(d), goimagehash.DHash)
dst, err := dh2.Distance(dh1)
if err != nil {
return 0, err
}
if dst > r {
r = dst
}
}
imgdsts += uint64(r)
}
imgdupr := float64(imgdsts) / float64(len(m2)) / 64.0
v1space := make([]uint8, 0, len(v1)+len(v2))
v2space := make([]uint8, 0, len(v1)+len(v2))
for k, v := range v1 {
v1space = append(v1space, v)
if tv, ok := v2[k]; ok {
v2space = append(v2space, tv)
delete(v2, k)
} else {
v2space = append(v2space, 0)
}
}
for _, v := range v2 {
v1space = append(v1space, 0)
v2space = append(v2space, v)
}
return utils.Similarity(v1space, v2space) + imgdupr/2.0, nil
}

View File

@@ -25,7 +25,7 @@ type List struct {
QuesC int // QuesC 总小题数
HasntAnalyzed bool // HasntAnalyzed whether file has been analyzed
IsTemp bool // IsTemp whether file is temp
Path string `db:"Path,UNIQUE"` // Path of file, unique
Path string // Path of file, normally unique
Desc string // Desc is file's description
}
@@ -89,3 +89,10 @@ func (f *FileDatabase) ListUploadedFile() (lst []*List, err error) {
FileDB.mu.RUnlock()
return
}
func (f *FileDatabase) GetFileInfo(id int) (lst List, err error) {
FileDB.mu.RLock()
lst, err = sql.Find[List](&FileDB.db, FileTableList, "WHERE ID="+strconv.Itoa(id))
FileDB.mu.RUnlock()
return
}

104
backend/global/question.go Normal file
View File

@@ -0,0 +1,104 @@
package global
import (
"encoding/binary"
"encoding/hex"
"encoding/json"
"github.com/corona10/goimagehash"
"github.com/fumiama/paper-manager/backend/utils"
)
// QuestionJSON is the struct representation of File.Questions
type QuestionJSON struct {
Name string `json:"name"` // Name is name or Question ID
Points int `json:"points,omitempty"` // Points is sum of subs' points or self
Rate float64 `json:"rate,omitempty"` // Rate is the avg(non-leaf) or max(leaf) similarity
Sub []QuestionJSON `json:"sub,omitempty"`
}
type Question struct {
ID int64 // ID is the first 8 bytes of the Plain's md5
Path string // Path is the question's docx position
Plain string // Plain is the plain text of the question (like markdown format)
Images []byte // Images is json of the image dhash in XML, ex. ['rId1': '1234567890abcdef', ...]
Vector []byte // Vector is json of {word: freq, ...}
Dup []byte // Dup is json of {queid: rate, ...}
}
// GetDuplicateRate calc q & que's dup rate
func (q *Question) GetDuplicateRate(que *Question) (float64, error) {
v1, v2 := make(map[string]uint8, 64), make(map[string]uint8, 64)
m1, m2 := make(map[string]string, 64), make(map[string]string, 64)
if len(q.Images) > 2 {
err := json.Unmarshal(q.Images, &m1)
if err != nil {
return 0, err
}
}
if len(que.Images) > 2 {
err := json.Unmarshal(que.Images, &m2)
if err != nil {
return 0, err
}
}
if len(q.Vector) > 2 {
err := json.Unmarshal(q.Vector, &v1)
if err != nil {
return 0, err
}
}
if len(que.Vector) > 2 {
err := json.Unmarshal(que.Vector, &v2)
if err != nil {
return 0, err
}
}
imgdsts := uint64(0)
for _, dhstr2 := range m2 {
d, err := hex.DecodeString(dhstr2)
if err != nil {
return 0, err
}
dh2 := goimagehash.NewImageHash(binary.LittleEndian.Uint64(d), goimagehash.DHash)
r := 0
for _, dhstr1 := range m1 {
d, err := hex.DecodeString(dhstr1)
if err != nil {
return 0, err
}
dh1 := goimagehash.NewImageHash(binary.LittleEndian.Uint64(d), goimagehash.DHash)
dst, err := dh2.Distance(dh1)
if err != nil {
return 0, err
}
if dst > r {
r = dst
}
}
imgdsts += uint64(r)
}
imgdupr := 0.0
if len(m2) > 0 {
imgdupr = float64(imgdsts) / float64(len(m2)) / 64.0
}
v1space := make([]uint8, 0, len(v1)+len(v2))
v2space := make([]uint8, 0, len(v1)+len(v2))
for k, v := range v1 {
v1space = append(v1space, v)
if tv, ok := v2[k]; ok {
v2space = append(v2space, tv)
delete(v2, k)
} else {
v2space = append(v2space, 0)
}
}
for _, v := range v2 {
v1space = append(v1space, 0)
v2space = append(v2space, v)
}
if imgdupr > 0 {
return (8*utils.Similarity(v1space, v2space) + 2*imgdupr) / 10.0, nil
}
return utils.Similarity(v1space, v2space), nil
}

View File

@@ -9,18 +9,18 @@ import (
// Regex stores user's config of splitting docx file
type Regex struct {
ID int // ID is User(ID)
Title string // Title default `.*(\d{4})\s*-.*学年.*(\d?).*([中末]?).*([AB]?)\s*卷`
Class string // Class default `考试科目:\s*(\S+)\s*`
Title string // Title default `.*(\d{4})\s*-.*学年.*(\d).*([中末]).*([AB])\s*卷`
Class string // Class default `(考试科目|课程名称)\s*(\S+)\s*`
OpenCl string // OpenCl default `考试形式:\s*(\S+)\s*`
Date string // Date default `考试日期:\s*(\d+)\s*年\s*(\d+)\s*月\s*(\d+)\s*日`
Time string // Time default `考试时长:\s*(\d+)\s*分钟`
Rate string // Rate default `成绩构成比例:\s*(.*%)\s*`
Rate string // Rate default `(成绩构成比例|课程成绩构成)\s*(.*%)\s*`
Major string // Major default `([一二三四五六七八九十]+)、\s*(.*)\s*.*([空题]?)\s*(\d*).*共\s*(\d+)\s*分.*`
Sub string // Sub default `(\d+)、`
}
func newRegex() (reg Regex) {
reg.Title = `.*(\d{4})\s*-.*学年.*(\d).*([中末]).*([AB]?)\s*卷`
reg.Title = `.*(\d{4})\s*-.*学年.*(\d).*([中末]).*([AB])\s*卷`
reg.Class = `(考试科目|课程名称)\s*(\S+)\s*`
reg.OpenCl = `考试形式:\s*(\S+)\s*`
reg.Date = `考试日期:\s*(\d+)\s*年\s*(\d+)\s*月\s*(\d*)\s*日`

View File

@@ -84,6 +84,50 @@ func init() {
}
writeresult(w, codeSuccess, &result, messageOk, typeSuccess)
}}
apimap["/api/getFileInfo"] = &apihandler{"GET", func(w http.ResponseWriter, r *http.Request) {
token := r.Header.Get("Authorization")
user := usertokens.Get(token)
if user == nil {
writeresult(w, codeError, nil, errInvalidToken.Error(), typeError)
return
}
var err error
idstr := r.URL.Query().Get("id")
if idstr == "" {
writeresult(w, codeError, nil, "empty id", typeError)
return
}
id, err := strconv.Atoi(idstr)
if err != nil {
writeresult(w, codeError, nil, err.Error(), typeError)
return
}
lst, err := global.FileDB.GetFileInfo(id)
if err != nil && err != sql.ErrNullResult {
writeresult(w, codeError, nil, err.Error(), typeError)
return
}
result := filelist{
ID: id,
Desc: lst.Desc,
Size: float64(lst.Size) / 1024 / 1024, // MB
Ques: lst.QuesC,
Auth: lst.UpName,
Date: time.Unix(lst.UpTime, 0).Format(chineseYYMMDDLayout),
}
j := strings.LastIndex(lst.Path, "/")
if j <= 0 {
result.Title = lst.Path
} else {
result.Title = lst.Path[j+1:]
}
if !lst.HasntAnalyzed {
result.Per = 100
} else {
result.Per = analyzeper.Get(id)
}
writeresult(w, codeSuccess, &result, messageOk, typeSuccess)
}}
apimap["/api/getFilePercent"] = &apihandler{"GET", func(w http.ResponseWriter, r *http.Request) {
token := r.Header.Get("Authorization")
user := usertokens.Get(token)
@@ -132,8 +176,13 @@ func init() {
}
ch := make(chan struct{}, 1)
type message struct {
C int `json:"code"` // C 0 success 1 pending
M string `json:"msg"`
}
if analyzeper.Get(id) > 0 {
writeresult(w, codeError, nil, "已在分析!", typeError)
return
}
go func() {
_, err = global.FileDB.AddFile(id, reg, istemp, func(u uint) { analyzeper.Set(id, u) })
ch <- struct{}{}
@@ -141,14 +190,14 @@ func init() {
}()
select {
case <-time.After(time.Second):
writeresult(w, codeSuccess, &message{M: "正在分析, 请耐心等待..."}, messageOk, typeSuccess)
writeresult(w, codeSuccess, &message{C: 1, M: "正在分析, 请耐心等待..."}, messageOk, typeSuccess)
return
case <-ch:
if err != nil {
writeresult(w, codeError, nil, err.Error(), typeError)
return
}
writeresult(w, codeSuccess, &message{M: "分析完成"}, messageOk, typeSuccess)
writeresult(w, codeSuccess, &message{C: 0, M: "分析完成"}, messageOk, typeSuccess)
}
}}
}

View File

@@ -1,9 +1,10 @@
import { defHttp } from '/@/utils/http/axios'
import { getFileListModel, FilePercent, DelFile, AnalyzeFile } from './model/fileListModel'
import { getFileListModel, DelFile, AnalyzeFile, FileListGroupItem } from './model/fileListModel'
import { DownloadFile, FileStatus } from './model/fileModel'
enum Api {
GetFileList = '/getFileList',
GetFileInfo = '/getFileInfo',
GetFilePercent = '/getFilePercent',
DelFile = '/delFile',
AnalyzeFile = '/analyzeFile',
@@ -18,11 +19,18 @@ export const getFileList = (count?: number) => {
return defHttp.get<getFileListModel>({ url: Api.GetFileList, params: { count: count } })
}
/**
* @description: Get file info
*/
export const getFileInfo = (id: number) => {
return defHttp.get<FileListGroupItem>({ url: Api.GetFileInfo, params: { id } })
}
/**
* @description: Get file percent
*/
export const getFilePercent = (id: number) => {
return defHttp.get<FilePercent>({ url: Api.GetFilePercent, params: { id: id } })
return defHttp.get<number>({ url: Api.GetFilePercent, params: { id: id } })
}
/**

View File

@@ -14,14 +14,11 @@ export interface FileListGroupItem {
*/
export type getFileListModel = FileListGroupItem[]
export interface FilePercent {
percent: number
}
export interface DelFile {
msg: string
}
export interface AnalyzeFile {
code: number
msg: string
}

View File

@@ -1,5 +1,5 @@
import { reactive } from 'vue'
import { getFileList, getFilePercent } from '/@/api/page'
import { getFileList, getFilePercent, getFileInfo } from '/@/api/page'
import { getFileListModel } from '/@/api/page/model/fileListModel'
export const random = (min: number, max: number) =>
@@ -8,9 +8,9 @@ export const random = (min: number, max: number) =>
export function refreshFilePercent(item: any) {
return async () => {
const p = await getFilePercent(item.id)
if (p.percent) {
item.percent = p.percent
if (p.percent < 100) {
if (p) {
item.percent = p
if (p < 100) {
setTimeout(refreshFilePercent(item), 1000)
}
} else item.hassettimeout = false
@@ -62,9 +62,9 @@ async function refreshFileList() {
}
}
export let cardList = reactive(await refreshFileList())
export const cardList = reactive(await refreshFileList())
export let pagination = reactive({
export const pagination = reactive({
current: 1,
total: cardList._cardList.length,
show: true,
@@ -77,17 +77,11 @@ export let pagination = reactive({
export function refreshCardList() {
refreshFileList().then((value) => {
cardList = reactive(value)
pagination = reactive({
current: 1,
total: cardList._cardList.length,
show: true,
pageSize: 10,
onChange: function (page: number, pageSize: number) {
this.current = page
this.pageSize = pageSize
},
})
cardList._cardList = value._cardList
cardList._totalQuestions = value._totalQuestions
cardList._totalSize = value._totalSize
pagination.current = 1
pagination.total = cardList._cardList.length
})
}
@@ -101,3 +95,21 @@ export function deleteFileByID(id: number) {
}
})
}
export function refreshFileByID(id: number) {
getFileInfo(id).then((info) => {
cardList._cardList.map((value: any) => {
if (value.id == id) {
cardList._totalSize = cardList._totalSize - value.size + info.size
cardList._totalQuestions = cardList._totalQuestions - value.questions + info.questions
value.title = info.title
value.description = info.description
value.size = info.size
value.questions = info.questions
value.datetime = info.datetime
value.author = info.author
value.percent = info.percent
}
})
})
}

View File

@@ -110,6 +110,7 @@
refreshFilePercent,
random,
refreshCardList,
refreshFileByID,
} from './data'
import { PageWrapper } from '/@/components/Page'
import { useMessage } from '/@/hooks/web/useMessage'
@@ -120,7 +121,6 @@
import { useI18n } from '/@/hooks/web/useI18n'
import { delFile, analyzeFile } from '/@/api/page'
import { useGo } from '/@/hooks/web/usePage'
import { useTabs } from '/@/hooks/web/useTabs'
const { t } = useI18n()
const { createMessage } = useMessage()
@@ -149,6 +149,11 @@
const msg = await analyzeFile(item.id, true)
if (msg) {
createMessage.success(msg.msg)
if (msg.code == 0) {
item.percent = 100
refreshFileByID(item.id)
return
}
if (!item.hassettimeout && item.percent == 0) {
setTimeout(refreshFilePercent(item), 1000 + random(0, 1000))
item.hassettimeout = true
@@ -173,7 +178,6 @@
},
setup() {
const { hasPermission } = usePermission()
const { refreshPage } = useTabs()
const go = useGo()
function openFile(id: number) {
@@ -182,7 +186,6 @@
async function onChange(_: number[]) {
refreshCardList()
refreshPage()
}
return {