mirror of
https://github.com/fumiama/paper-manager.git
synced 2026-06-05 07:50:23 +08:00
finish 解析
This commit is contained in:
@@ -25,6 +25,7 @@ import (
|
||||
"github.com/corona10/goimagehash"
|
||||
base14 "github.com/fumiama/go-base16384"
|
||||
"github.com/fumiama/go-docx"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/fumiama/paper-manager/backend/utils"
|
||||
)
|
||||
@@ -383,9 +384,10 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
|
||||
err = FileDB.db.FindFor(FileTableQuestion, &q, "", func() error {
|
||||
r, err := q.GetDuplicateRate(que)
|
||||
if err != nil {
|
||||
logrus.Warnln("[global.AddFile] GetDuplicateRate err:", err)
|
||||
return err
|
||||
}
|
||||
if r < 0.1 {
|
||||
if r < 0.5 {
|
||||
return nil
|
||||
}
|
||||
var buf [8]byte
|
||||
@@ -394,7 +396,7 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
|
||||
return nil
|
||||
})
|
||||
FileDB.mu.RUnlock()
|
||||
if err == nil {
|
||||
if err == nil && len(dupmap) > 0 {
|
||||
que.Dup, _ = json.Marshal(dupmap)
|
||||
}
|
||||
w := bytes.NewBuffer(make([]byte, 0, 65536))
|
||||
@@ -484,83 +486,3 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
|
||||
progress(100)
|
||||
return file, err
|
||||
}
|
||||
|
||||
// QuestionJSON is the struct representation of File.Questions
|
||||
type QuestionJSON struct {
|
||||
Name string `json:"name"` // Name is name or Question ID
|
||||
Points int `json:"points"` // Points is sum of subs' points or self
|
||||
Rate float64 `json:"rate"` // Rate is the avg(non-leaf) or max(leaf) similarity
|
||||
Sub []QuestionJSON `json:"sub,omitempty"`
|
||||
}
|
||||
|
||||
type Question struct {
|
||||
ID int64 // ID is the first 8 bytes of the Plain's md5
|
||||
Path string // Path is the question's docx position
|
||||
Plain string // Plain is the plain text of the question (like markdown format)
|
||||
Images []byte // Images is json of the image dhash in XML, ex. ['rId1': '1234567890abcdef', ...]
|
||||
Vector []byte // Vector is json of {word: freq, ...}
|
||||
Dup []byte // Dup is json of {queid: rate, ...}
|
||||
}
|
||||
|
||||
// GetDuplicateRate calc q & que's dup rate
|
||||
func (q *Question) GetDuplicateRate(que *Question) (float64, error) {
|
||||
v1, v2 := make(map[string]uint8, 64), make(map[string]uint8, 64)
|
||||
m1, m2 := make(map[string]string, 64), make(map[string]string, 64)
|
||||
err := json.Unmarshal(q.Images, &m1)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
err = json.Unmarshal(que.Images, &m2)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
err = json.Unmarshal(q.Vector, &v1)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
err = json.Unmarshal(que.Vector, &v2)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
imgdsts := uint64(0)
|
||||
for _, dhstr2 := range m2 {
|
||||
d, err := hex.DecodeString(dhstr2)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
dh2 := goimagehash.NewImageHash(binary.LittleEndian.Uint64(d), goimagehash.DHash)
|
||||
r := 0
|
||||
for _, dhstr1 := range m1 {
|
||||
d, err := hex.DecodeString(dhstr1)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
dh1 := goimagehash.NewImageHash(binary.LittleEndian.Uint64(d), goimagehash.DHash)
|
||||
dst, err := dh2.Distance(dh1)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if dst > r {
|
||||
r = dst
|
||||
}
|
||||
}
|
||||
imgdsts += uint64(r)
|
||||
}
|
||||
imgdupr := float64(imgdsts) / float64(len(m2)) / 64.0
|
||||
v1space := make([]uint8, 0, len(v1)+len(v2))
|
||||
v2space := make([]uint8, 0, len(v1)+len(v2))
|
||||
for k, v := range v1 {
|
||||
v1space = append(v1space, v)
|
||||
if tv, ok := v2[k]; ok {
|
||||
v2space = append(v2space, tv)
|
||||
delete(v2, k)
|
||||
} else {
|
||||
v2space = append(v2space, 0)
|
||||
}
|
||||
}
|
||||
for _, v := range v2 {
|
||||
v1space = append(v1space, 0)
|
||||
v2space = append(v2space, v)
|
||||
}
|
||||
return utils.Similarity(v1space, v2space) + imgdupr/2.0, nil
|
||||
}
|
||||
|
||||
@@ -25,7 +25,7 @@ type List struct {
|
||||
QuesC int // QuesC 总小题数
|
||||
HasntAnalyzed bool // HasntAnalyzed whether file has been analyzed
|
||||
IsTemp bool // IsTemp whether file is temp
|
||||
Path string `db:"Path,UNIQUE"` // Path of file, unique
|
||||
Path string // Path of file, normally unique
|
||||
Desc string // Desc is file's description
|
||||
}
|
||||
|
||||
@@ -89,3 +89,10 @@ func (f *FileDatabase) ListUploadedFile() (lst []*List, err error) {
|
||||
FileDB.mu.RUnlock()
|
||||
return
|
||||
}
|
||||
|
||||
func (f *FileDatabase) GetFileInfo(id int) (lst List, err error) {
|
||||
FileDB.mu.RLock()
|
||||
lst, err = sql.Find[List](&FileDB.db, FileTableList, "WHERE ID="+strconv.Itoa(id))
|
||||
FileDB.mu.RUnlock()
|
||||
return
|
||||
}
|
||||
|
||||
104
backend/global/question.go
Normal file
104
backend/global/question.go
Normal file
@@ -0,0 +1,104 @@
|
||||
package global
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
|
||||
"github.com/corona10/goimagehash"
|
||||
"github.com/fumiama/paper-manager/backend/utils"
|
||||
)
|
||||
|
||||
// QuestionJSON is the struct representation of File.Questions
|
||||
type QuestionJSON struct {
|
||||
Name string `json:"name"` // Name is name or Question ID
|
||||
Points int `json:"points,omitempty"` // Points is sum of subs' points or self
|
||||
Rate float64 `json:"rate,omitempty"` // Rate is the avg(non-leaf) or max(leaf) similarity
|
||||
Sub []QuestionJSON `json:"sub,omitempty"`
|
||||
}
|
||||
|
||||
type Question struct {
|
||||
ID int64 // ID is the first 8 bytes of the Plain's md5
|
||||
Path string // Path is the question's docx position
|
||||
Plain string // Plain is the plain text of the question (like markdown format)
|
||||
Images []byte // Images is json of the image dhash in XML, ex. ['rId1': '1234567890abcdef', ...]
|
||||
Vector []byte // Vector is json of {word: freq, ...}
|
||||
Dup []byte // Dup is json of {queid: rate, ...}
|
||||
}
|
||||
|
||||
// GetDuplicateRate calc q & que's dup rate
|
||||
func (q *Question) GetDuplicateRate(que *Question) (float64, error) {
|
||||
v1, v2 := make(map[string]uint8, 64), make(map[string]uint8, 64)
|
||||
m1, m2 := make(map[string]string, 64), make(map[string]string, 64)
|
||||
if len(q.Images) > 2 {
|
||||
err := json.Unmarshal(q.Images, &m1)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
if len(que.Images) > 2 {
|
||||
err := json.Unmarshal(que.Images, &m2)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
if len(q.Vector) > 2 {
|
||||
err := json.Unmarshal(q.Vector, &v1)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
if len(que.Vector) > 2 {
|
||||
err := json.Unmarshal(que.Vector, &v2)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
imgdsts := uint64(0)
|
||||
for _, dhstr2 := range m2 {
|
||||
d, err := hex.DecodeString(dhstr2)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
dh2 := goimagehash.NewImageHash(binary.LittleEndian.Uint64(d), goimagehash.DHash)
|
||||
r := 0
|
||||
for _, dhstr1 := range m1 {
|
||||
d, err := hex.DecodeString(dhstr1)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
dh1 := goimagehash.NewImageHash(binary.LittleEndian.Uint64(d), goimagehash.DHash)
|
||||
dst, err := dh2.Distance(dh1)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if dst > r {
|
||||
r = dst
|
||||
}
|
||||
}
|
||||
imgdsts += uint64(r)
|
||||
}
|
||||
imgdupr := 0.0
|
||||
if len(m2) > 0 {
|
||||
imgdupr = float64(imgdsts) / float64(len(m2)) / 64.0
|
||||
}
|
||||
v1space := make([]uint8, 0, len(v1)+len(v2))
|
||||
v2space := make([]uint8, 0, len(v1)+len(v2))
|
||||
for k, v := range v1 {
|
||||
v1space = append(v1space, v)
|
||||
if tv, ok := v2[k]; ok {
|
||||
v2space = append(v2space, tv)
|
||||
delete(v2, k)
|
||||
} else {
|
||||
v2space = append(v2space, 0)
|
||||
}
|
||||
}
|
||||
for _, v := range v2 {
|
||||
v1space = append(v1space, 0)
|
||||
v2space = append(v2space, v)
|
||||
}
|
||||
if imgdupr > 0 {
|
||||
return (8*utils.Similarity(v1space, v2space) + 2*imgdupr) / 10.0, nil
|
||||
}
|
||||
return utils.Similarity(v1space, v2space), nil
|
||||
}
|
||||
@@ -9,18 +9,18 @@ import (
|
||||
// Regex stores user's config of splitting docx file
|
||||
type Regex struct {
|
||||
ID int // ID is User(ID)
|
||||
Title string // Title default `.*(\d{4})\s*-.*学年.*(\d?).*([中末]?).*([AB]?)\s*卷`
|
||||
Class string // Class default `考试科目:\s*(\S+)\s*`
|
||||
Title string // Title default `.*(\d{4})\s*-.*学年.*(\d).*([中末]).*([AB])\s*卷`
|
||||
Class string // Class default `(考试科目|课程名称):\s*(\S+)\s*`
|
||||
OpenCl string // OpenCl default `考试形式:\s*(\S+)\s*`
|
||||
Date string // Date default `考试日期:\s*(\d+)\s*年\s*(\d+)\s*月\s*(\d+)\s*日`
|
||||
Time string // Time default `考试时长:\s*(\d+)\s*分钟`
|
||||
Rate string // Rate default `成绩构成比例:\s*(.*%)\s*`
|
||||
Rate string // Rate default `(成绩构成比例|课程成绩构成):\s*(.*%)\s*`
|
||||
Major string // Major default `([一二三四五六七八九十]+)、\s*(.*)\s*(.*([空题]?)\s*(\d*).*共\s*(\d+)\s*分.*)`
|
||||
Sub string // Sub default `(\d+)、`
|
||||
}
|
||||
|
||||
func newRegex() (reg Regex) {
|
||||
reg.Title = `.*(\d{4})\s*-.*学年.*(\d).*([中末]).*([AB]?)\s*卷`
|
||||
reg.Title = `.*(\d{4})\s*-.*学年.*(\d).*([中末]).*([AB])\s*卷`
|
||||
reg.Class = `(考试科目|课程名称):\s*(\S+)\s*`
|
||||
reg.OpenCl = `考试形式:\s*(\S+)\s*`
|
||||
reg.Date = `考试日期:\s*(\d+)\s*年\s*(\d+)\s*月\s*(\d*)\s*日`
|
||||
|
||||
@@ -84,6 +84,50 @@ func init() {
|
||||
}
|
||||
writeresult(w, codeSuccess, &result, messageOk, typeSuccess)
|
||||
}}
|
||||
apimap["/api/getFileInfo"] = &apihandler{"GET", func(w http.ResponseWriter, r *http.Request) {
|
||||
token := r.Header.Get("Authorization")
|
||||
user := usertokens.Get(token)
|
||||
if user == nil {
|
||||
writeresult(w, codeError, nil, errInvalidToken.Error(), typeError)
|
||||
return
|
||||
}
|
||||
var err error
|
||||
idstr := r.URL.Query().Get("id")
|
||||
if idstr == "" {
|
||||
writeresult(w, codeError, nil, "empty id", typeError)
|
||||
return
|
||||
}
|
||||
id, err := strconv.Atoi(idstr)
|
||||
if err != nil {
|
||||
writeresult(w, codeError, nil, err.Error(), typeError)
|
||||
return
|
||||
}
|
||||
lst, err := global.FileDB.GetFileInfo(id)
|
||||
if err != nil && err != sql.ErrNullResult {
|
||||
writeresult(w, codeError, nil, err.Error(), typeError)
|
||||
return
|
||||
}
|
||||
result := filelist{
|
||||
ID: id,
|
||||
Desc: lst.Desc,
|
||||
Size: float64(lst.Size) / 1024 / 1024, // MB
|
||||
Ques: lst.QuesC,
|
||||
Auth: lst.UpName,
|
||||
Date: time.Unix(lst.UpTime, 0).Format(chineseYYMMDDLayout),
|
||||
}
|
||||
j := strings.LastIndex(lst.Path, "/")
|
||||
if j <= 0 {
|
||||
result.Title = lst.Path
|
||||
} else {
|
||||
result.Title = lst.Path[j+1:]
|
||||
}
|
||||
if !lst.HasntAnalyzed {
|
||||
result.Per = 100
|
||||
} else {
|
||||
result.Per = analyzeper.Get(id)
|
||||
}
|
||||
writeresult(w, codeSuccess, &result, messageOk, typeSuccess)
|
||||
}}
|
||||
apimap["/api/getFilePercent"] = &apihandler{"GET", func(w http.ResponseWriter, r *http.Request) {
|
||||
token := r.Header.Get("Authorization")
|
||||
user := usertokens.Get(token)
|
||||
@@ -132,8 +176,13 @@ func init() {
|
||||
}
|
||||
ch := make(chan struct{}, 1)
|
||||
type message struct {
|
||||
C int `json:"code"` // C 0 success 1 pending
|
||||
M string `json:"msg"`
|
||||
}
|
||||
if analyzeper.Get(id) > 0 {
|
||||
writeresult(w, codeError, nil, "已在分析!", typeError)
|
||||
return
|
||||
}
|
||||
go func() {
|
||||
_, err = global.FileDB.AddFile(id, reg, istemp, func(u uint) { analyzeper.Set(id, u) })
|
||||
ch <- struct{}{}
|
||||
@@ -141,14 +190,14 @@ func init() {
|
||||
}()
|
||||
select {
|
||||
case <-time.After(time.Second):
|
||||
writeresult(w, codeSuccess, &message{M: "正在分析, 请耐心等待..."}, messageOk, typeSuccess)
|
||||
writeresult(w, codeSuccess, &message{C: 1, M: "正在分析, 请耐心等待..."}, messageOk, typeSuccess)
|
||||
return
|
||||
case <-ch:
|
||||
if err != nil {
|
||||
writeresult(w, codeError, nil, err.Error(), typeError)
|
||||
return
|
||||
}
|
||||
writeresult(w, codeSuccess, &message{M: "分析完成"}, messageOk, typeSuccess)
|
||||
writeresult(w, codeSuccess, &message{C: 0, M: "分析完成"}, messageOk, typeSuccess)
|
||||
}
|
||||
}}
|
||||
}
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
import { defHttp } from '/@/utils/http/axios'
|
||||
import { getFileListModel, FilePercent, DelFile, AnalyzeFile } from './model/fileListModel'
|
||||
import { getFileListModel, DelFile, AnalyzeFile, FileListGroupItem } from './model/fileListModel'
|
||||
import { DownloadFile, FileStatus } from './model/fileModel'
|
||||
|
||||
enum Api {
|
||||
GetFileList = '/getFileList',
|
||||
GetFileInfo = '/getFileInfo',
|
||||
GetFilePercent = '/getFilePercent',
|
||||
DelFile = '/delFile',
|
||||
AnalyzeFile = '/analyzeFile',
|
||||
@@ -18,11 +19,18 @@ export const getFileList = (count?: number) => {
|
||||
return defHttp.get<getFileListModel>({ url: Api.GetFileList, params: { count: count } })
|
||||
}
|
||||
|
||||
/**
|
||||
* @description: Get file info
|
||||
*/
|
||||
export const getFileInfo = (id: number) => {
|
||||
return defHttp.get<FileListGroupItem>({ url: Api.GetFileInfo, params: { id } })
|
||||
}
|
||||
|
||||
/**
|
||||
* @description: Get file percent
|
||||
*/
|
||||
export const getFilePercent = (id: number) => {
|
||||
return defHttp.get<FilePercent>({ url: Api.GetFilePercent, params: { id: id } })
|
||||
return defHttp.get<number>({ url: Api.GetFilePercent, params: { id: id } })
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -14,14 +14,11 @@ export interface FileListGroupItem {
|
||||
*/
|
||||
export type getFileListModel = FileListGroupItem[]
|
||||
|
||||
export interface FilePercent {
|
||||
percent: number
|
||||
}
|
||||
|
||||
export interface DelFile {
|
||||
msg: string
|
||||
}
|
||||
|
||||
export interface AnalyzeFile {
|
||||
code: number
|
||||
msg: string
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { reactive } from 'vue'
|
||||
import { getFileList, getFilePercent } from '/@/api/page'
|
||||
import { getFileList, getFilePercent, getFileInfo } from '/@/api/page'
|
||||
import { getFileListModel } from '/@/api/page/model/fileListModel'
|
||||
|
||||
export const random = (min: number, max: number) =>
|
||||
@@ -8,9 +8,9 @@ export const random = (min: number, max: number) =>
|
||||
export function refreshFilePercent(item: any) {
|
||||
return async () => {
|
||||
const p = await getFilePercent(item.id)
|
||||
if (p.percent) {
|
||||
item.percent = p.percent
|
||||
if (p.percent < 100) {
|
||||
if (p) {
|
||||
item.percent = p
|
||||
if (p < 100) {
|
||||
setTimeout(refreshFilePercent(item), 1000)
|
||||
}
|
||||
} else item.hassettimeout = false
|
||||
@@ -62,9 +62,9 @@ async function refreshFileList() {
|
||||
}
|
||||
}
|
||||
|
||||
export let cardList = reactive(await refreshFileList())
|
||||
export const cardList = reactive(await refreshFileList())
|
||||
|
||||
export let pagination = reactive({
|
||||
export const pagination = reactive({
|
||||
current: 1,
|
||||
total: cardList._cardList.length,
|
||||
show: true,
|
||||
@@ -77,17 +77,11 @@ export let pagination = reactive({
|
||||
|
||||
export function refreshCardList() {
|
||||
refreshFileList().then((value) => {
|
||||
cardList = reactive(value)
|
||||
pagination = reactive({
|
||||
current: 1,
|
||||
total: cardList._cardList.length,
|
||||
show: true,
|
||||
pageSize: 10,
|
||||
onChange: function (page: number, pageSize: number) {
|
||||
this.current = page
|
||||
this.pageSize = pageSize
|
||||
},
|
||||
})
|
||||
cardList._cardList = value._cardList
|
||||
cardList._totalQuestions = value._totalQuestions
|
||||
cardList._totalSize = value._totalSize
|
||||
pagination.current = 1
|
||||
pagination.total = cardList._cardList.length
|
||||
})
|
||||
}
|
||||
|
||||
@@ -101,3 +95,21 @@ export function deleteFileByID(id: number) {
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
export function refreshFileByID(id: number) {
|
||||
getFileInfo(id).then((info) => {
|
||||
cardList._cardList.map((value: any) => {
|
||||
if (value.id == id) {
|
||||
cardList._totalSize = cardList._totalSize - value.size + info.size
|
||||
cardList._totalQuestions = cardList._totalQuestions - value.questions + info.questions
|
||||
value.title = info.title
|
||||
value.description = info.description
|
||||
value.size = info.size
|
||||
value.questions = info.questions
|
||||
value.datetime = info.datetime
|
||||
value.author = info.author
|
||||
value.percent = info.percent
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
@@ -110,6 +110,7 @@
|
||||
refreshFilePercent,
|
||||
random,
|
||||
refreshCardList,
|
||||
refreshFileByID,
|
||||
} from './data'
|
||||
import { PageWrapper } from '/@/components/Page'
|
||||
import { useMessage } from '/@/hooks/web/useMessage'
|
||||
@@ -120,7 +121,6 @@
|
||||
import { useI18n } from '/@/hooks/web/useI18n'
|
||||
import { delFile, analyzeFile } from '/@/api/page'
|
||||
import { useGo } from '/@/hooks/web/usePage'
|
||||
import { useTabs } from '/@/hooks/web/useTabs'
|
||||
|
||||
const { t } = useI18n()
|
||||
const { createMessage } = useMessage()
|
||||
@@ -149,6 +149,11 @@
|
||||
const msg = await analyzeFile(item.id, true)
|
||||
if (msg) {
|
||||
createMessage.success(msg.msg)
|
||||
if (msg.code == 0) {
|
||||
item.percent = 100
|
||||
refreshFileByID(item.id)
|
||||
return
|
||||
}
|
||||
if (!item.hassettimeout && item.percent == 0) {
|
||||
setTimeout(refreshFilePercent(item), 1000 + random(0, 1000))
|
||||
item.hassettimeout = true
|
||||
@@ -173,7 +178,6 @@
|
||||
},
|
||||
setup() {
|
||||
const { hasPermission } = usePermission()
|
||||
const { refreshPage } = useTabs()
|
||||
const go = useGo()
|
||||
|
||||
function openFile(id: number) {
|
||||
@@ -182,7 +186,6 @@
|
||||
|
||||
async function onChange(_: number[]) {
|
||||
refreshCardList()
|
||||
refreshPage()
|
||||
}
|
||||
|
||||
return {
|
||||
|
||||
Reference in New Issue
Block a user