mirror of
https://github.com/fumiama/paper-manager.git
synced 2026-06-10 10:50:23 +08:00
finish getFileList getFilePercent and part of analyzeFile
This commit is contained in:
@@ -85,8 +85,8 @@ func init() {
|
||||
|
||||
// File stores to paper/Class/2022-2023学年/第一学期/期末/A/xxx.docx
|
||||
type File struct {
|
||||
ID uint64 // ID is the first 8 bytes of the original file's md5
|
||||
ListID int // ListID is the foreign key to List(ID)
|
||||
ID int64 // ID is the first 8 bytes of the original file's md5
|
||||
ListID int // ListID is the foreign key to List(ID)
|
||||
Year StudyYear
|
||||
Type PaperType
|
||||
Date uint32 // Date is the yyyymmdd of 考试日期
|
||||
@@ -115,6 +115,7 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
|
||||
if !user.IsFileManager() && !istemp {
|
||||
return nil, ErrInvalidRole
|
||||
}
|
||||
progress(1)
|
||||
lst, err := sql.Find[List](&FileDB.db, FileTableList, "WHERE ID="+strconv.Itoa(lstid))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -128,13 +129,14 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
|
||||
return nil, err
|
||||
}
|
||||
defer docf.Close()
|
||||
progress(2)
|
||||
h := md5.New()
|
||||
_, err = io.Copy(h, docf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var buf [md5.Size]byte
|
||||
id := binary.LittleEndian.Uint64(h.Sum(buf[:0]))
|
||||
id := int64(binary.LittleEndian.Uint64(h.Sum(buf[:0])))
|
||||
_, err = docf.Seek(0, io.SeekStart)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -144,10 +146,12 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
|
||||
return nil, err
|
||||
}
|
||||
sz := stat.Size()
|
||||
progress(3)
|
||||
doc, err := docx.Parse(docf, sz)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
progress(5)
|
||||
doc.Document.Body.DropDrawingOf("NilPicture")
|
||||
majorre, err := regexp.Compile(reg.Major)
|
||||
if err != nil {
|
||||
@@ -157,6 +161,7 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
|
||||
if len(docs) < 2 {
|
||||
return nil, ErrMajorSplitsTooShort
|
||||
}
|
||||
progress(9)
|
||||
// filling File struct
|
||||
file := &File{
|
||||
ID: id,
|
||||
@@ -186,6 +191,7 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
progress(10)
|
||||
for _, it := range docs[0].Document.Body.Items {
|
||||
if p, ok := it.(*docx.Paragraph); ok {
|
||||
text := p.String()
|
||||
@@ -206,8 +212,8 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
|
||||
}
|
||||
}
|
||||
class := classre.FindStringSubmatch(text)
|
||||
if len(class) >= 2 {
|
||||
file.Class = class[1]
|
||||
if len(class) >= 3 {
|
||||
file.Class = class[2]
|
||||
}
|
||||
opcl := opclre.FindStringSubmatch(text)
|
||||
if len(opcl) >= 2 {
|
||||
@@ -216,7 +222,10 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
|
||||
date := datere.FindStringSubmatch(text)
|
||||
if len(date) >= 4 {
|
||||
y, m, d := date[1], date[2], date[3]
|
||||
if y != "" && m != "" && d != "" {
|
||||
if y != "" && m != "" {
|
||||
if d == "" {
|
||||
d = "1"
|
||||
}
|
||||
yyyy, err := strconv.ParseUint(y, 10, 64)
|
||||
if err == nil && yyyy > 1600 {
|
||||
mm, err := strconv.ParseUint(m, 10, 64)
|
||||
@@ -237,11 +246,12 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
|
||||
}
|
||||
}
|
||||
rate := ratere.FindStringSubmatch(text)
|
||||
if len(rate) >= 2 {
|
||||
file.Rate = rate[1]
|
||||
if len(rate) >= 3 {
|
||||
file.Rate = rate[2]
|
||||
}
|
||||
}
|
||||
}
|
||||
progress(19)
|
||||
if file.Class == "" || strings.Contains(file.Class, "..") {
|
||||
return nil, ErrEmptyClass
|
||||
}
|
||||
@@ -250,12 +260,12 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
|
||||
filebasepath = PaperFolder + "temp/" + strconv.Itoa(*user.ID) + "/"
|
||||
} else {
|
||||
filebasepath = fmt.Sprintf(
|
||||
PaperFolder+file.Class+"/%v/%v/%v/%v/",
|
||||
PaperFolder+file.Class+"/%v/%v/%v/%c/",
|
||||
file.Year, file.Type.FirstSecond(), file.Type.MiddleFinal(), file.Type.AB(),
|
||||
)
|
||||
}
|
||||
lst.Path = filebasepath
|
||||
err = os.MkdirAll(filebasepath, 0755)
|
||||
questionpath := filebasepath + "questions/"
|
||||
err = os.MkdirAll(questionpath, 0755)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -266,7 +276,19 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
|
||||
return nil, err
|
||||
}
|
||||
filequestions := make([]QuestionJSON, 0, len(docs))
|
||||
lst.QuesC = 0
|
||||
progress(20)
|
||||
p := uint(20)
|
||||
delta := uint(70 / len(docs))
|
||||
if delta == 0 {
|
||||
delta = 1
|
||||
}
|
||||
for _, majordoc := range docs {
|
||||
p += delta
|
||||
if p > 90 {
|
||||
p = 90
|
||||
}
|
||||
progress(p)
|
||||
majorq := QuestionJSON{}
|
||||
for _, it := range majordoc.Document.Body.Items {
|
||||
if p, ok := it.(*docx.Paragraph); ok {
|
||||
@@ -280,6 +302,10 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
|
||||
}
|
||||
}
|
||||
subdocs := majordoc.SplitByParagraph(docx.SplitDocxByPlainTextRegex(subre))
|
||||
if len(subdocs) < 2 {
|
||||
continue
|
||||
}
|
||||
subdocs = subdocs[1:]
|
||||
majorq.Sub = make([]QuestionJSON, 0, len(subdocs))
|
||||
for _, subdoc := range subdocs {
|
||||
sb := bytes.NewBuffer(make([]byte, 0, 4096))
|
||||
@@ -288,7 +314,7 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
|
||||
}
|
||||
m := md5.Sum(sb.Bytes())
|
||||
que := &Question{
|
||||
ID: binary.LittleEndian.Uint64(m[:8]),
|
||||
ID: int64(binary.LittleEndian.Uint64(m[:8])),
|
||||
Plain: base14.BytesToString(sb.Bytes()),
|
||||
Images: func() []byte {
|
||||
m := make(map[string]string)
|
||||
@@ -340,7 +366,9 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
|
||||
}
|
||||
v := make(map[string]uint8, len(words)*2)
|
||||
for _, word := range words {
|
||||
v[word]++
|
||||
if word != "" && word != "\n" && word != " " {
|
||||
v[word]++
|
||||
}
|
||||
}
|
||||
data, err := json.Marshal(v)
|
||||
if err != nil {
|
||||
@@ -361,7 +389,7 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
|
||||
return nil
|
||||
}
|
||||
var buf [8]byte
|
||||
binary.LittleEndian.PutUint64(buf[:], q.ID)
|
||||
binary.LittleEndian.PutUint64(buf[:], uint64(q.ID))
|
||||
dupmap[hex.EncodeToString(buf[:])] = r
|
||||
return nil
|
||||
})
|
||||
@@ -372,11 +400,11 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
|
||||
w := bytes.NewBuffer(make([]byte, 0, 65536))
|
||||
_, err = subdoc.WriteTo(w)
|
||||
var buf [8]byte
|
||||
binary.LittleEndian.PutUint64(buf[:], que.ID)
|
||||
binary.LittleEndian.PutUint64(buf[:], uint64(que.ID))
|
||||
queidstr := hex.EncodeToString(buf[:])
|
||||
if err == nil {
|
||||
m5 := md5.Sum(w.Bytes())
|
||||
quepath := filebasepath + hex.EncodeToString(m5[:]) + ".docx"
|
||||
quepath := questionpath + hex.EncodeToString(m5[:]) + ".docx"
|
||||
f, err := os.Create(quepath)
|
||||
if err == nil {
|
||||
_, _ = io.Copy(f, w)
|
||||
@@ -420,9 +448,29 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
|
||||
})
|
||||
}
|
||||
filequestions = append(filequestions, majorq)
|
||||
lst.QuesC += len(majorq.Sub)
|
||||
}
|
||||
progress(90)
|
||||
file.Questions, _ = json.Marshal(filequestions)
|
||||
lst.Path += file.Class + ".docx"
|
||||
_, err = docf.Seek(0, io.SeekStart)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
lst.Path = filebasepath + file.Class + ".docx"
|
||||
lst.HasntAnalyzed = false
|
||||
lst.Desc = fmt.Sprintf("%s%v%v%v%c卷",
|
||||
file.Class, file.Year, file.Type.FirstSecond(), file.Type.MiddleFinal(), file.Type.AB(),
|
||||
)
|
||||
dstf, err := os.Create(lst.Path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer dstf.Close()
|
||||
_, err = io.Copy(dstf, docf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
progress(95)
|
||||
FileDB.mu.Lock()
|
||||
if istemp {
|
||||
err = FileDB.db.Insert(FileTableTempFile, file)
|
||||
@@ -433,6 +481,7 @@ func (f *FileDatabase) AddFile(lstid int, reg *Regex, istemp bool, progress func
|
||||
}
|
||||
_ = FileDB.db.Insert(FileTableList, &lst)
|
||||
FileDB.mu.Unlock()
|
||||
progress(100)
|
||||
return file, err
|
||||
}
|
||||
|
||||
@@ -445,7 +494,7 @@ type QuestionJSON struct {
|
||||
}
|
||||
|
||||
type Question struct {
|
||||
ID uint64 // ID is the first 8 bytes of the Plain's md5
|
||||
ID int64 // ID is the first 8 bytes of the Plain's md5
|
||||
Path string // Path is the question's docx position
|
||||
Plain string // Plain is the plain text of the question (like markdown format)
|
||||
Images []byte // Images is json of the image dhash in XML, ex. ['rId1': '1234567890abcdef', ...]
|
||||
|
||||
@@ -7,6 +7,8 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
sql "github.com/FloatTech/sqlite"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -15,17 +17,21 @@ var (
|
||||
|
||||
// List of file path
|
||||
type List struct {
|
||||
ID *int // ID is self-inc
|
||||
Uploader int // Uploader is uid
|
||||
UpTime int64 // UpTime is upload time (unix timestamp)
|
||||
Size int64 // Size of the original file
|
||||
IsTemp bool // IsTemp whether file is temp
|
||||
Path string // Path of file
|
||||
ID *int // ID is self-inc
|
||||
Uploader int // Uploader is uid
|
||||
UpName string // UpName is uploader's name
|
||||
UpTime int64 // UpTime is upload time (unix timestamp)
|
||||
Size int64 // Size of the original file
|
||||
QuesC int // QuesC 总小题数
|
||||
HasntAnalyzed bool // HasntAnalyzed whether file has been analyzed
|
||||
IsTemp bool // IsTemp whether file is temp
|
||||
Path string `db:"Path,UNIQUE"` // Path of file, unique
|
||||
Desc string // Desc is file's description
|
||||
}
|
||||
|
||||
// SaveFileToTemp copy file to PaperFolder/tmp/uploader/name and add record into list.
|
||||
func (f *FileDatabase) SaveFileToTemp(uploader int, file io.Reader, name string) (err error) {
|
||||
_, err = UserDB.GetUserByID(uploader)
|
||||
func (f *FileDatabase) SaveFileToTemp(uploader int, file io.Reader, name string) (id int, err error) {
|
||||
user, err := UserDB.GetUserByID(uploader)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
@@ -38,12 +44,16 @@ func (f *FileDatabase) SaveFileToTemp(uploader int, file io.Reader, name string)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
lst := List{
|
||||
Uploader: uploader,
|
||||
UpTime: time.Now().Unix(),
|
||||
IsTemp: true,
|
||||
Path: tmpdir + "/" + name,
|
||||
}
|
||||
fpath := tmpdir + "/" + name
|
||||
FileDB.mu.RLock()
|
||||
lst, _ := sql.Find[List](&FileDB.db, FileTableList, "WHERE Path='"+fpath+"'")
|
||||
FileDB.mu.RUnlock()
|
||||
lst.Uploader = uploader
|
||||
lst.UpName = user.Name
|
||||
lst.UpTime = time.Now().Unix()
|
||||
lst.HasntAnalyzed = true
|
||||
lst.IsTemp = true
|
||||
lst.Path = fpath
|
||||
ff, err := os.Create(lst.Path)
|
||||
if err != nil {
|
||||
return
|
||||
@@ -58,5 +68,24 @@ func (f *FileDatabase) SaveFileToTemp(uploader int, file io.Reader, name string)
|
||||
FileDB.mu.Lock()
|
||||
err = FileDB.db.Insert(FileTableList, &lst)
|
||||
FileDB.mu.Unlock()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if lst.ID != nil {
|
||||
id = *lst.ID
|
||||
return
|
||||
}
|
||||
FileDB.mu.RLock()
|
||||
err = FileDB.db.Find(FileTableList, &lst, "WHERE Path='"+fpath+"'")
|
||||
FileDB.mu.RUnlock()
|
||||
id = *lst.ID
|
||||
return
|
||||
}
|
||||
|
||||
// ListUploadedFile will select all file that HasntAnalyzed && IsTemp or !HasntAnalyzed && !IsTemp
|
||||
func (f *FileDatabase) ListUploadedFile() (lst []*List, err error) {
|
||||
FileDB.mu.RLock()
|
||||
lst, err = sql.FindAll[List](&FileDB.db, FileTableList, "WHERE (HasntAnalyzed AND IsTemp) OR (NOT HasntAnalyzed AND NOT IsTemp) ORDER BY UpTime DESC")
|
||||
FileDB.mu.RUnlock()
|
||||
return
|
||||
}
|
||||
|
||||
@@ -20,12 +20,12 @@ type Regex struct {
|
||||
}
|
||||
|
||||
func newRegex() (reg Regex) {
|
||||
reg.Title = `.*(\d{4})\s*-.*学年.*(\d?).*([中末]?).*([AB]?)\s*卷`
|
||||
reg.Class = `考试科目:\s*(\S+)\s*`
|
||||
reg.Title = `.*(\d{4})\s*-.*学年.*(\d).*([中末]).*([AB]?)\s*卷`
|
||||
reg.Class = `(考试科目|课程名称):\s*(\S+)\s*`
|
||||
reg.OpenCl = `考试形式:\s*(\S+)\s*`
|
||||
reg.Date = `考试日期:\s*(\d+)\s*年\s*(\d+)\s*月\s*(\d+)\s*日`
|
||||
reg.Date = `考试日期:\s*(\d+)\s*年\s*(\d+)\s*月\s*(\d*)\s*日`
|
||||
reg.Time = `考试时长:\s*(\d+)\s*分钟`
|
||||
reg.Rate = `成绩构成比例:\s*(.*%)\s*`
|
||||
reg.Rate = `(成绩构成比例|课程成绩构成):\s*(.*%)\s*`
|
||||
reg.Major = `([一二三四五六七八九十]+)、\s*(.*)\s*(.*([空题]?)\s*(\d*).*共\s*(\d+)\s*分.*)`
|
||||
reg.Sub = `(\d+)、`
|
||||
return
|
||||
@@ -77,5 +77,6 @@ func (u *UserDatabase) GetUserRegex(id int) (*Regex, error) {
|
||||
u.mu.RLock()
|
||||
_ = u.db.Find(UserTableRegex, ®, "WHERE ID="+strconv.Itoa(id))
|
||||
u.mu.RUnlock()
|
||||
reg.ID = *user.ID
|
||||
return ®, nil
|
||||
}
|
||||
|
||||
@@ -95,8 +95,8 @@ type User struct {
|
||||
Role UserRole
|
||||
Date int64 // Date is the creating date's unix timestamp
|
||||
Pswd string
|
||||
Last int64 // Last is the last password reseting unix timestamp
|
||||
Name string
|
||||
Last int64 // Last is the last password reseting unix timestamp
|
||||
Name string `db:"Name,UNIQUE"`
|
||||
Nick string
|
||||
Avtr string // Avtr is the user's avatar, typically a image url
|
||||
Cont string // Cont is the user's contact, ex. phone number
|
||||
|
||||
Reference in New Issue
Block a user