1
0
mirror of https://github.com/fumiama/paper-manager.git synced 2026-06-05 07:50:23 +08:00

add cosine similarity & 完善 PaperType

This commit is contained in:
源文雨
2023-03-22 20:54:54 +08:00
parent 5aeca6d5fa
commit 0756b36633
3 changed files with 91 additions and 1 deletions

View File

@@ -26,6 +26,17 @@ func (pt PaperType) AB() byte {
}
}
func (pt PaperType) SetAB(x byte) PaperType {
n := PaperType(0)
switch x {
case 'A':
n = 1
case 'B':
n = 2
}
return pt | n
}
// MiddleFinal default 平时
func (pt PaperType) MiddleFinal() string {
switch (pt & 0xf0) >> 4 {
@@ -38,6 +49,17 @@ func (pt PaperType) MiddleFinal() string {
}
}
func (pt PaperType) SetMiddleFinal(x string) PaperType {
n := PaperType(0)
switch x {
case "期中":
n = 1 << 4
case "期末":
n = 2 << 4
}
return pt | n
}
// FirstSecond default is 年度
func (pt PaperType) FirstSecond() string {
switch (pt & 0x0f00) >> 8 {
@@ -50,6 +72,17 @@ func (pt PaperType) FirstSecond() string {
}
}
func (pt PaperType) SetFirstSecond(x string) PaperType {
n := PaperType(0)
switch x {
case "第1学期":
n = 1 << 8
case "第2学期":
n = 2 << 8
}
return pt | n
}
// OpenClose default 闭卷
func (pt PaperType) OpenClose() string {
switch (pt & 0xf000) >> 12 {
@@ -64,6 +97,19 @@ func (pt PaperType) OpenClose() string {
}
}
func (pt PaperType) SetOpenClose(x string) PaperType {
n := PaperType(0)
switch x {
case "开卷":
n = 1 << 12
case "一页纸开卷":
n = 2 << 12
case "闭卷":
n = 3 << 12
}
return pt | n
}
// StudyYear 学年
type StudyYear uint16
@@ -82,6 +128,10 @@ func init() {
if err != nil {
panic(err)
}
err = FileDB.db.Create(FileTableQuestion, &Question{})
if err != nil {
panic(err)
}
err = FileDB.db.Close()
if err != nil {
panic(err)
@@ -114,6 +164,7 @@ func (f *FileDatabase) AddFile() {}
type QuestionJSON struct {
Name string `json:"name"` // Name is name or Question ID
Points int `json:"points"` // Points is sum of subs' points or self
Rate float64 `json:"rate"` // Rate is the avg(non-leaf) or max(leaf) similarity
Sub []QuestionJSON `json:"sub,omitempty"`
}
@@ -122,12 +173,13 @@ type Question struct {
Plain string // Plain is the plain text of the question (like markdown format)
XML []byte // XML is the OpenXML bytes of the question
Images []byte // Images is json of the image paths in XML, ex. ['md5.jpg', 'md5.png', ...]
Vector []byte // Vector is json of {word: rate, ...} freq
Dup []byte // Dup is json of Duplication struct
}
// Duplication is the struct representation of Question.Dup
type Duplication struct {
ID string `json:"id"` // ID is hex string for json's 53 bits number
Rate float64 `json:"rate"` // Rate is the 重复率 or 总重复率
Rate float64 `json:"rate"` // Rate is the avg(non-leaf) or max(leaf) similarity
To []Duplication `json:"to,omitempty"`
}

19
backend/utils/cosimi.go Normal file
View File

@@ -0,0 +1,19 @@
// edit from https://github.com/kabychow/go-cosinesimilarity
package utils
import "math"
// Similarity len(x) must eq len(y)
func Similarity(x, y []uint8) float64 {
var sum, s1, s2 uint64
for i := 0; i < len(x); i++ {
sum += uint64(x[i]) * uint64(y[i])
s1 += uint64(x[i]) * uint64(x[i])
s2 += uint64(y[i]) * uint64(y[i])
}
if s1 == 0 || s2 == 0 {
return 0.0
}
return float64(sum) / (math.Sqrt(float64(s1)) * math.Sqrt(float64(s2)))
}

View File

@@ -0,0 +1,19 @@
package utils
import (
"math"
"testing"
)
func TestSimilarity(t *testing.T) {
r := Similarity([]uint8{1, 2, 3}, []uint8{1, 3, 4})
t.Log(r)
if math.Abs(r-0.9958705948858224) > 1e-6 {
t.Fail()
}
r = Similarity([]uint8{3, 2, 1}, []uint8{1, 3, 4})
t.Log(r)
if math.Abs(r-0.6813851438692469) > 1e-6 {
t.Fail()
}
}