1
0
mirror of https://github.com/fumiama/paper-manager.git synced 2026-06-08 17:40:23 +08:00

finish getFileList getFilePercent and part of analyzeFile

This commit is contained in:
源文雨
2023-04-08 00:30:04 +08:00
parent 773db2e581
commit 2fcb3fd636
15 changed files with 302 additions and 90 deletions

View File

@@ -85,8 +85,8 @@ func init() {
// File stores to paper/Class/2022-2023学年/第一学期/期末/A/xxx.docx
type File struct {
ID uint64 // ID is the first 8 bytes of the original file's md5
ListID int // ListID is the foreign key to List(ID)
ID int64 // ID is the first 8 bytes of the original file's md5
ListID int // ListID is the foreign key to List(ID)
Year StudyYear
Type PaperType
Date uint32 // Date is the yyyymmdd of 考试日期
@@ -115,6 +115,7 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
if !user.IsFileManager() && !istemp {
return nil, ErrInvalidRole
}
progress(1)
lst, err := sql.Find[List](&FileDB.db, FileTableList, "WHERE ID="+strconv.Itoa(lstid))
if err != nil {
return nil, err
@@ -128,13 +129,14 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
return nil, err
}
defer docf.Close()
progress(2)
h := md5.New()
_, err = io.Copy(h, docf)
if err != nil {
return nil, err
}
var buf [md5.Size]byte
id := binary.LittleEndian.Uint64(h.Sum(buf[:0]))
id := int64(binary.LittleEndian.Uint64(h.Sum(buf[:0])))
_, err = docf.Seek(0, io.SeekStart)
if err != nil {
return nil, err
@@ -144,10 +146,12 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
return nil, err
}
sz := stat.Size()
progress(3)
doc, err := docx.Parse(docf, sz)
if err != nil {
return nil, err
}
progress(5)
doc.Document.Body.DropDrawingOf("NilPicture")
majorre, err := regexp.Compile(reg.Major)
if err != nil {
@@ -157,6 +161,7 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
if len(docs) < 2 {
return nil, ErrMajorSplitsTooShort
}
progress(9)
// filling File struct
file := &File{
ID: id,
@@ -186,6 +191,7 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
if err != nil {
return nil, err
}
progress(10)
for _, it := range docs[0].Document.Body.Items {
if p, ok := it.(*docx.Paragraph); ok {
text := p.String()
@@ -206,8 +212,8 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
}
}
class := classre.FindStringSubmatch(text)
if len(class) >= 2 {
file.Class = class[1]
if len(class) >= 3 {
file.Class = class[2]
}
opcl := opclre.FindStringSubmatch(text)
if len(opcl) >= 2 {
@@ -216,7 +222,10 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
date := datere.FindStringSubmatch(text)
if len(date) >= 4 {
y, m, d := date[1], date[2], date[3]
if y != "" && m != "" && d != "" {
if y != "" && m != "" {
if d == "" {
d = "1"
}
yyyy, err := strconv.ParseUint(y, 10, 64)
if err == nil && yyyy > 1600 {
mm, err := strconv.ParseUint(m, 10, 64)
@@ -237,11 +246,12 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
}
}
rate := ratere.FindStringSubmatch(text)
if len(rate) >= 2 {
file.Rate = rate[1]
if len(rate) >= 3 {
file.Rate = rate[2]
}
}
}
progress(19)
if file.Class == "" || strings.Contains(file.Class, "..") {
return nil, ErrEmptyClass
}
@@ -250,12 +260,12 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
filebasepath = PaperFolder + "temp/" + strconv.Itoa(*user.ID) + "/"
} else {
filebasepath = fmt.Sprintf(
PaperFolder+file.Class+"/%v/%v/%v/%v/",
PaperFolder+file.Class+"/%v/%v/%v/%c/",
file.Year, file.Type.FirstSecond(), file.Type.MiddleFinal(), file.Type.AB(),
)
}
lst.Path = filebasepath
err = os.MkdirAll(filebasepath, 0755)
questionpath := filebasepath + "questions/"
err = os.MkdirAll(questionpath, 0755)
if err != nil {
return nil, err
}
@@ -266,7 +276,19 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
return nil, err
}
filequestions := make([]QuestionJSON, 0, len(docs))
lst.QuesC = 0
progress(20)
p := uint(20)
delta := uint(70 / len(docs))
if delta == 0 {
delta = 1
}
for _, majordoc := range docs {
p += delta
if p > 90 {
p = 90
}
progress(p)
majorq := QuestionJSON{}
for _, it := range majordoc.Document.Body.Items {
if p, ok := it.(*docx.Paragraph); ok {
@@ -280,6 +302,10 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
}
}
subdocs := majordoc.SplitByParagraph(docx.SplitDocxByPlainTextRegex(subre))
if len(subdocs) < 2 {
continue
}
subdocs = subdocs[1:]
majorq.Sub = make([]QuestionJSON, 0, len(subdocs))
for _, subdoc := range subdocs {
sb := bytes.NewBuffer(make([]byte, 0, 4096))
@@ -288,7 +314,7 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
}
m := md5.Sum(sb.Bytes())
que := &Question{
ID: binary.LittleEndian.Uint64(m[:8]),
ID: int64(binary.LittleEndian.Uint64(m[:8])),
Plain: base14.BytesToString(sb.Bytes()),
Images: func() []byte {
m := make(map[string]string)
@@ -340,7 +366,9 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
}
v := make(map[string]uint8, len(words)*2)
for _, word := range words {
v[word]++
if word != "" && word != "\n" && word != " " {
v[word]++
}
}
data, err := json.Marshal(v)
if err != nil {
@@ -361,7 +389,7 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
return nil
}
var buf [8]byte
binary.LittleEndian.PutUint64(buf[:], q.ID)
binary.LittleEndian.PutUint64(buf[:], uint64(q.ID))
dupmap[hex.EncodeToString(buf[:])] = r
return nil
})
@@ -372,11 +400,11 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
w := bytes.NewBuffer(make([]byte, 0, 65536))
_, err = subdoc.WriteTo(w)
var buf [8]byte
binary.LittleEndian.PutUint64(buf[:], que.ID)
binary.LittleEndian.PutUint64(buf[:], uint64(que.ID))
queidstr := hex.EncodeToString(buf[:])
if err == nil {
m5 := md5.Sum(w.Bytes())
quepath := filebasepath + hex.EncodeToString(m5[:]) + ".docx"
quepath := questionpath + hex.EncodeToString(m5[:]) + ".docx"
f, err := os.Create(quepath)
if err == nil {
_, _ = io.Copy(f, w)
@@ -420,9 +448,29 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
})
}
filequestions = append(filequestions, majorq)
lst.QuesC += len(majorq.Sub)
}
progress(90)
file.Questions, _ = json.Marshal(filequestions)
lst.Path += file.Class + ".docx"
_, err = docf.Seek(0, io.SeekStart)
if err != nil {
return nil, err
}
lst.Path = filebasepath + file.Class + ".docx"
lst.HasntAnalyzed = false
lst.Desc = fmt.Sprintf("%s%v%v%v%c卷",
file.Class, file.Year, file.Type.FirstSecond(), file.Type.MiddleFinal(), file.Type.AB(),
)
dstf, err := os.Create(lst.Path)
if err != nil {
return nil, err
}
defer dstf.Close()
_, err = io.Copy(dstf, docf)
if err != nil {
return nil, err
}
progress(95)
FileDB.mu.Lock()
if istemp {
err = FileDB.db.Insert(FileTableTempFile, file)
@@ -433,6 +481,7 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
}
_ = FileDB.db.Insert(FileTableList, &lst)
FileDB.mu.Unlock()
progress(100)
return file, err
}
@@ -445,7 +494,7 @@ type QuestionJSON struct {
}
type Question struct {
ID uint64 // ID is the first 8 bytes of the Plain's md5
ID int64 // ID is the first 8 bytes of the Plain's md5
Path string // Path is the question's docx position
Plain string // Plain is the plain text of the question (like markdown format)
Images []byte // Images is json of the image dhash in XML, ex. ['rId1': '1234567890abcdef', ...]

View File

@@ -7,6 +7,8 @@ import (
"strconv"
"strings"
"time"
sql "github.com/FloatTech/sqlite"
)
var (
@@ -15,17 +17,21 @@ var (
// List of file path
type List struct {
ID *int // ID is self-inc
Uploader int // Uploader is uid
UpTime int64 // UpTime is upload time (unix timestamp)
Size int64 // Size of the original file
IsTemp bool // IsTemp whether file is temp
Path string // Path of file
ID *int // ID is self-inc
Uploader int // Uploader is uid
UpName string // UpName is uploader's name
UpTime int64 // UpTime is upload time (unix timestamp)
Size int64 // Size of the original file
QuesC int // QuesC 总小题数
HasntAnalyzed bool // HasntAnalyzed whether file has been analyzed
IsTemp bool // IsTemp whether file is temp
Path string `db:"Path,UNIQUE"` // Path of file, unique
Desc string // Desc is file's description
}
// SaveFileToTemp copy file to PaperFolder/tmp/uploader/name and add record into list.
func (f *FileDatabase) SaveFileToTemp(uploader int, file io.Reader, name string) (err error) {
_, err = UserDB.GetUserByID(uploader)
func (f *FileDatabase) SaveFileToTemp(uploader int, file io.Reader, name string) (id int, err error) {
user, err := UserDB.GetUserByID(uploader)
if err != nil {
return
}
@@ -38,12 +44,16 @@ func (f *FileDatabase) SaveFileToTemp(uploader int, file io.Reader, name string)
if err != nil {
return
}
lst := List{
Uploader: uploader,
UpTime: time.Now().Unix(),
IsTemp: true,
Path: tmpdir + "/" + name,
}
fpath := tmpdir + "/" + name
FileDB.mu.RLock()
lst, _ := sql.Find[List](&FileDB.db, FileTableList, "WHERE Path='"+fpath+"'")
FileDB.mu.RUnlock()
lst.Uploader = uploader
lst.UpName = user.Name
lst.UpTime = time.Now().Unix()
lst.HasntAnalyzed = true
lst.IsTemp = true
lst.Path = fpath
ff, err := os.Create(lst.Path)
if err != nil {
return
@@ -58,5 +68,24 @@ func (f *FileDatabase) SaveFileToTemp(uploader int, file io.Reader, name string)
FileDB.mu.Lock()
err = FileDB.db.Insert(FileTableList, &lst)
FileDB.mu.Unlock()
if err != nil {
return
}
if lst.ID != nil {
id = *lst.ID
return
}
FileDB.mu.RLock()
err = FileDB.db.Find(FileTableList, &lst, "WHERE Path='"+fpath+"'")
FileDB.mu.RUnlock()
id = *lst.ID
return
}
// ListUploadedFile will select all file that HasntAnalyzed && IsTemp or !HasntAnalyzed && !IsTemp
func (f *FileDatabase) ListUploadedFile() (lst []*List, err error) {
FileDB.mu.RLock()
lst, err = sql.FindAll[List](&FileDB.db, FileTableList, "WHERE (HasntAnalyzed AND IsTemp) OR (NOT HasntAnalyzed AND NOT IsTemp) ORDER BY UpTime DESC")
FileDB.mu.RUnlock()
return
}

View File

@@ -20,12 +20,12 @@ type Regex struct {
}
func newRegex() (reg Regex) {
reg.Title = `.*(\d{4})\s*-.*学年.*(\d?).*([中末]?).*([AB]?)\s*卷`
reg.Class = `考试科目:\s*(\S+)\s*`
reg.Title = `.*(\d{4})\s*-.*学年.*(\d).*([中末]).*([AB]?)\s*卷`
reg.Class = `(考试科目|课程名称)\s*(\S+)\s*`
reg.OpenCl = `考试形式:\s*(\S+)\s*`
reg.Date = `考试日期:\s*(\d+)\s*年\s*(\d+)\s*月\s*(\d+)\s*日`
reg.Date = `考试日期:\s*(\d+)\s*年\s*(\d+)\s*月\s*(\d*)\s*日`
reg.Time = `考试时长:\s*(\d+)\s*分钟`
reg.Rate = `成绩构成比例:\s*(.*%)\s*`
reg.Rate = `(成绩构成比例|课程成绩构成)\s*(.*%)\s*`
reg.Major = `([一二三四五六七八九十]+)、\s*(.*)\s*.*([空题]?)\s*(\d*).*共\s*(\d+)\s*分.*`
reg.Sub = `(\d+)、`
return
@@ -77,5 +77,6 @@ func (u *UserDatabase) GetUserRegex(id int) (*Regex, error) {
u.mu.RLock()
_ = u.db.Find(UserTableRegex, &reg, "WHERE ID="+strconv.Itoa(id))
u.mu.RUnlock()
reg.ID = *user.ID
return &reg, nil
}

View File

@@ -95,8 +95,8 @@ type User struct {
Role UserRole
Date int64 // Date is the creating date's unix timestamp
Pswd string
Last int64 // Last is the last password reseting unix timestamp
Name string
Last int64 // Last is the last password reseting unix timestamp
Name string `db:"Name,UNIQUE"`
Nick string
Avtr string // Avtr is the user's avatar, typically a image url
Cont string // Cont is the user's contact, ex. phone number

View File

@@ -1,13 +1,158 @@
package backend
import (
"errors"
"net/http"
"strconv"
"strings"
"time"
sql "github.com/FloatTech/sqlite"
"github.com/FloatTech/ttl"
"github.com/fumiama/paper-manager/backend/global"
"github.com/fumiama/paper-manager/backend/utils"
"github.com/sirupsen/logrus"
)
const (
chineseYYMMDDLayout = "2006年01月02日"
)
// analyzeper 分析进度缓存
var analyzeper = ttl.NewCache[int, uint](time.Hour)
var (
errNoAnalyzePermission = errors.New("no analyze permission")
)
type filelist struct {
ID int `json:"id"`
Title string `json:"title"`
Desc string `json:"description"`
Size float64 `json:"size"`
Ques int `json:"questions"`
Auth string `json:"author"`
Date string `json:"datetime"`
Per uint `json:"percent"`
}
func init() {
apimap["/api/getFileList"] = &apihandler{"GET", func(w http.ResponseWriter, r *http.Request) {
token := r.Header.Get("Authorization")
user := usertokens.Get(token)
if user == nil {
writeresult(w, codeError, nil, errInvalidToken.Error(), typeError)
return
}
count := -1
var err error
countstr := r.URL.Query().Get("count")
if countstr != "" {
count, err = strconv.Atoi(countstr)
if err != nil {
writeresult(w, codeError, nil, err.Error(), typeError)
return
}
}
lst, err := global.FileDB.ListUploadedFile()
if err != nil && err != sql.ErrNullResult {
writeresult(w, codeError, nil, err.Error(), typeError)
return
}
if count > 0 && len(lst) > count {
lst = lst[:count]
}
result := make([]filelist, len(lst))
for i, v := range lst {
result[i].ID = *v.ID
j := strings.LastIndex(v.Path, "/")
if j <= 0 {
result[i].Title = v.Path
} else {
result[i].Title = v.Path[j+1:]
}
result[i].Desc = v.Desc
result[i].Size = float64(v.Size) / 1024 / 1024 // MB
result[i].Ques = v.QuesC
result[i].Auth = v.UpName
result[i].Date = time.Unix(v.UpTime, 0).Format(chineseYYMMDDLayout)
if !v.HasntAnalyzed {
result[i].Per = 100
} else {
result[i].Per = analyzeper.Get(*v.ID)
}
}
writeresult(w, codeSuccess, &result, messageOk, typeSuccess)
}}
apimap["/api/getFilePercent"] = &apihandler{"GET", func(w http.ResponseWriter, r *http.Request) {
token := r.Header.Get("Authorization")
user := usertokens.Get(token)
if user == nil {
writeresult(w, codeError, nil, errInvalidToken.Error(), typeError)
return
}
idstr := r.URL.Query().Get("id")
if idstr == "" {
writeresult(w, codeError, nil, "empty id", typeError)
return
}
id, err := strconv.Atoi(idstr)
if err != nil {
writeresult(w, codeError, nil, err.Error(), typeError)
return
}
writeresult(w, codeSuccess, analyzeper.Get(id), messageOk, typeSuccess)
}}
apimap["/api/analyzeFile"] = &apihandler{"GET", func(w http.ResponseWriter, r *http.Request) {
token := r.Header.Get("Authorization")
user := usertokens.Get(token)
if user == nil {
writeresult(w, codeError, nil, errInvalidToken.Error(), typeError)
return
}
istemp := r.URL.Query().Get("permanent") != "true"
if !user.IsFileManager() && !istemp {
writeresult(w, codeError, nil, errNoAnalyzePermission.Error(), typeError)
return
}
idstr := r.URL.Query().Get("id")
if idstr == "" {
writeresult(w, codeError, nil, "empty id", typeError)
return
}
id, err := strconv.Atoi(idstr)
if err != nil {
writeresult(w, codeError, nil, err.Error(), typeError)
return
}
reg, err := global.UserDB.GetUserRegex(*user.ID)
if err != nil {
writeresult(w, codeError, nil, err.Error(), typeError)
return
}
ch := make(chan struct{}, 1)
type message struct {
M string `json:"msg"`
}
go func() {
_, err = global.FileDB.AddFile(id, reg, istemp, func(u uint) { analyzeper.Set(id, u) })
ch <- struct{}{}
close(ch)
}()
select {
case <-time.After(time.Second):
writeresult(w, codeSuccess, &message{M: "正在分析, 请耐心等待..."}, messageOk, typeSuccess)
return
case <-ch:
if err != nil {
writeresult(w, codeError, nil, err.Error(), typeError)
return
}
writeresult(w, codeSuccess, &message{M: "分析完成"}, messageOk, typeSuccess)
}
}}
}
// PaperHandler serves protected contents in global.FileFolder
func PaperHandler(w http.ResponseWriter, r *http.Request) {
if !utils.IsMethod("GET", w, r) {

View File

@@ -114,12 +114,12 @@ func UploadHandler(w http.ResponseWriter, r *http.Request) {
writeresult(w, codeError, nil, "invalid filename", typeError)
return
}
err = global.FileDB.SaveFileToTemp(*user.ID, ff, fn)
id, err := global.FileDB.SaveFileToTemp(*user.ID, ff, fn)
if err != nil {
writeresult(w, codeError, nil, err.Error(), typeError)
return
}
writeresult(w, codeSuccess, "上传"+fn+"成功", messageOk, typeSuccess)
writeresult(w, codeSuccess, id, messageOk, typeSuccess)
return
}
if err != http.ErrMissingFile {