diff --git a/.gitignore b/.gitignore index 196db6b..249a408 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ docxlib .vscode/ *.docx -/*.xml \ No newline at end of file +/*.xml +.DS_Store diff --git a/apidrawing.go b/apidrawing.go index bc8d3c4..a094811 100644 --- a/apidrawing.go +++ b/apidrawing.go @@ -1,7 +1,95 @@ package docxlib -// AddDrawing adds drawing to paragraph -func (p *Paragraph) AddDrawing(pic []byte) *Run { - //TODO: finish add drawing - return nil +import ( + "bytes" + "fmt" + "math/rand" + "os" + "strconv" + "sync/atomic" + + "github.com/fumiama/imgsz" +) + +// AddInlineDrawing adds inline drawing to paragraph +func (p *Paragraph) AddInlineDrawing(pic []byte) (*Run, error) { + sz, format, err := imgsz.DecodeSize(bytes.NewReader(pic)) + if err != nil { + return nil, err + } + id := strconv.Itoa(int(atomic.AddUintptr(&p.file.imageId, 1))) + rId := p.file.addImage(Media{Name: "image" + id + "." + format, Data: pic}) + w, h := sz.Width, sz.Height + if float64(w)/float64(h) > 1.2 { + h = A4_EMU_MAX_WIDTH * h / w + w = A4_EMU_MAX_WIDTH + } else { + h = A4_EMU_MAX_WIDTH * h / w / 2 + w = A4_EMU_MAX_WIDTH / 2 + } + d := &Drawing{ + Inline: &WPInline{ + AnchorID: fmt.Sprintf("%08X", rand.Uint32()), + EditID: fmt.Sprintf("%08X", rand.Uint32()), + + Extent: &WPExtent{ + CX: w, + CY: h, + }, + EffectExtent: &WPEffectExtent{}, + DocPr: &WPDocPr{ + ID: id, + Name: "图片 " + id, + }, + CNvGraphicFramePr: &WPCNvGraphicFramePr{ + Locks: &AGraphicFrameLocks{ + NoChangeAspect: 1, + }, + }, + Graphic: &AGraphic{ + GraphicData: &AGraphicData{ + URI: XMLNS_PICTURE, + Pic: &PICPic{ + NonVisualPicProperties: &PICNonVisualPicProperties{ + NonVisualDrawingProperties: PICNonVisualDrawingProperties{ + ID: id, + }, + }, + BlipFill: &PICBlipFill{ + Blip: ABlip{ + Embed: rId, + Cstate: "print", + }, + }, + SpPr: &PICSpPr{ + Xfrm: AXfrm{ + Ext: AExt{ + CX: w, + CY: h, + }, + }, + PrstGeom: APrstGeom{ + Prst: "rect", + }, + }, + }, + }, + }, + }, + } + run := &Run{ + Drawing: d, + RunProperties: &RunProperties{}, + } + p.Children = append(p.Children, ParagraphChild{Run: run}) + return run, nil +} + +// AddInlineDrawingFrom adds drawing from file to paragraph +func (p *Paragraph) AddInlineDrawingFrom(file string) (*Run, error) { + data, err := os.ReadFile(file) + if err != nil { + return nil, err + } + return p.AddInlineDrawing(data) } diff --git a/apilink.go b/apilink.go index c72506e..d9807b9 100644 --- a/apilink.go +++ b/apilink.go @@ -1,28 +1,9 @@ package docxlib -import ( - "strconv" - "sync/atomic" -) - const ( HYPERLINK_STYLE = "a1" ) -// when adding an hyperlink we need to store a reference in the relationship field -func (f *Docx) addLinkRelation(link string) string { - rel := &Relationship{ - ID: "rId" + strconv.Itoa(int(atomic.AddUintptr(&f.rId, 1))), - Type: REL_HYPERLINK, - Target: link, - TargetMode: REL_TARGETMODE, - } - - f.DocRelation.Relationships = append(f.DocRelation.Relationships, rel) - - return rel.ID -} - // AddLink adds an hyperlink to paragraph func (p *Paragraph) AddLink(text string, link string) *Hyperlink { rId := p.file.addLinkRelation(link) diff --git a/apitext.go b/apitext.go index e5b411c..c556aa6 100644 --- a/apitext.go +++ b/apitext.go @@ -1,7 +1,5 @@ package docxlib -import "strings" - // AddText adds text to paragraph func (p *Paragraph) AddText(text string) *Run { t := &Text{ @@ -17,30 +15,3 @@ func (p *Paragraph) AddText(text string) *Run { return run } - -func (p *Paragraph) String() string { - sb := strings.Builder{} - for _, c := range p.Children { - switch { - case c.Link != nil: - id := c.Link.ID - text := c.Link.Run.InstrText - link, err := p.file.Refer(id) - sb.WriteString(text) - sb.WriteByte('(') - if err != nil { - sb.WriteString(id) - } else { - sb.WriteString(link) - } - sb.WriteByte(')') - case c.Run != nil: - sb.WriteString("run") //TODO: implement - case c.Properties != nil: - sb.WriteString("prop") //TODO: implement - default: - continue - } - } - return sb.String() -} diff --git a/cmd/getstructure/main.go b/cmd/getstructure/main.go index f491e1d..210b54c 100644 --- a/cmd/getstructure/main.go +++ b/cmd/getstructure/main.go @@ -39,13 +39,13 @@ func main() { fmt.Printf("\tWe've found a new run with the text ->%s\n", child.Run.Text.Text) } if child.Run.Drawing != nil { - fmt.Printf("\tWe've found a new run with the drawing ->%s\n", child.Run.Drawing.Inline.DistT) // TODO: replace to refid + fmt.Printf("\tWe've found a new run with the drawing ->%d\n", child.Run.Drawing.Inline.DistT) // TODO: replace to refid } } if child.Link != nil { id := child.Link.ID text := child.Link.Run.InstrText - link, err := doc.Refer(id) + link, err := doc.ReferHref(id) if err != nil { fmt.Printf("\tWe found a link with id %s and text %s without target\n", id, text) } else { diff --git a/cmd/main/main.go b/cmd/main/main.go index ee67fab..4e557bd 100644 --- a/cmd/main/main.go +++ b/cmd/main/main.go @@ -31,6 +31,17 @@ func main() { nextPara := w.AddParagraph() nextPara.AddLink("google", `http://google.com`) + para3 := w.AddParagraph() + // add text + para3.AddText("直接粘贴 inline") + + para4 := w.AddParagraph() + para4.AddInlineDrawingFrom("testdata/fumiama.JPG") + para4.AddInlineDrawingFrom("testdata/fumiama2x.webp") + + para5 := w.AddParagraph() + para5.AddInlineDrawingFrom("testdata/fumiamayoko.png") + f, err := os.Create(*fileLocation) if err != nil { panic(err) @@ -66,13 +77,13 @@ func main() { fmt.Printf("\tWe've found a new run with the text ->%s\n", child.Run.Text.Text) } if child.Run.Drawing != nil { - fmt.Printf("\tWe've found a new run with the drawing ->%s\n", child.Run.Drawing.Inline.DistT) // TODO: replace to refid + fmt.Printf("\tWe've found a new run with the drawing ->%d\n", child.Run.Drawing.Inline.DistT) // TODO: replace to refid } } if child.Link != nil { id := child.Link.ID text := child.Link.Run.InstrText - link, err := doc.Refer(id) + link, err := doc.ReferHref(id) if err != nil { fmt.Printf("\tWe found a link with id %s and text %s without target\n", id, text) } else { @@ -83,7 +94,7 @@ func main() { } fmt.Print("End of paragraph\n\n") } - f, err = os.Create("tmp.docx") + f, err = os.Create("unmarshal_" + *fileLocation) if err != nil { panic(err) } diff --git a/docxlib.go b/docxlib.go index 1633741..62bdba0 100644 --- a/docxlib.go +++ b/docxlib.go @@ -18,7 +18,11 @@ type Docx struct { Document Document DocRelation Relationships - rId uintptr + media []Media + mediaNameIdx map[string]int + + rId uintptr + imageId uintptr buf *bytes.Buffer isbufempty bool @@ -93,15 +97,3 @@ func (f *Docx) Read(p []byte) (n int, err error) { f.isbufempty = false return f.buf.Read(p) } - -// Refer gets the url for a reference -func (f *Docx) Refer(id string) (href string, err error) { - for _, a := range f.DocRelation.Relationships { - if a.ID == id { - href = a.Target - return - } - } - err = ErrRefIDNotFound - return -} diff --git a/empty.go b/empty.go index 6eb928d..27c68d8 100644 --- a/empty.go +++ b/empty.go @@ -42,8 +42,10 @@ func newEmptyFile() *Docx { }, }, }, - rId: 3, - buf: bytes.NewBuffer(make([]byte, 0, 1024*1024*4)), + media: make([]Media, 0, 64), + mediaNameIdx: make(map[string]int, 64), + rId: 3, + buf: bytes.NewBuffer(make([]byte, 0, 1024*1024*4)), } docx.Document.file = docx return docx diff --git a/files.go b/fs.go similarity index 100% rename from files.go rename to fs.go diff --git a/go.mod b/go.mod index 8257b64..0e1e5c6 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,5 @@ module github.com/fumiama/docxlib go 1.16 + +require github.com/fumiama/imgsz v0.0.2 diff --git a/go.sum b/go.sum index e69de29..ac04f3a 100644 --- a/go.sum +++ b/go.sum @@ -0,0 +1,2 @@ +github.com/fumiama/imgsz v0.0.2 h1:fAkC0FnIscdKOXwAxlyw3EUba5NzxZdSxGaq3Uyfxak= +github.com/fumiama/imgsz v0.0.2/go.mod h1:dR71mI3I2O5u6+PCpd47M9TZptzP+39tRBcbdIkoqM4= diff --git a/image.go b/image.go new file mode 100644 index 0000000..6466769 --- /dev/null +++ b/image.go @@ -0,0 +1,7 @@ +package docxlib + +// addImage add image to docx and return its rId +func (f *Docx) addImage(m Media) string { + f.addMedia(m) + return f.addImageRelation(m) +} diff --git a/link.go b/link.go new file mode 100644 index 0000000..c8feaac --- /dev/null +++ b/link.go @@ -0,0 +1,51 @@ +package docxlib + +import ( + "strconv" + "sync/atomic" +) + +// when adding an hyperlink we need to store a reference in the relationship field +// +// this func is not thread-safe +func (f *Docx) addLinkRelation(link string) string { + rel := &Relationship{ + ID: "rId" + strconv.Itoa(int(atomic.AddUintptr(&f.rId, 1))), + Type: REL_HYPERLINK, + Target: link, + TargetMode: REL_TARGETMODE, + } + + f.DocRelation.Relationships = append(f.DocRelation.Relationships, rel) + + return rel.ID +} + +// when adding an image we need to store a reference in the relationship field +// +// this func is not thread-safe +func (f *Docx) addImageRelation(m Media) string { + rel := &Relationship{ + ID: "rId" + strconv.Itoa(int(atomic.AddUintptr(&f.rId, 1))), + Type: REL_IMAGE, + Target: "media/" + m.Name, + } + + f.DocRelation.Relationships = append(f.DocRelation.Relationships, rel) + + return rel.ID +} + +// ReferHref gets the url for a reference +func (f *Docx) ReferHref(id string) (href string, err error) { + f.DocRelation.mu.RLock() + defer f.DocRelation.mu.RUnlock() + for _, a := range f.DocRelation.Relationships { + if a.ID == id { + href = a.Target + return + } + } + err = ErrRefIDNotFound + return +} diff --git a/media.go b/media.go new file mode 100644 index 0000000..5e75511 --- /dev/null +++ b/media.go @@ -0,0 +1,29 @@ +package docxlib + +const MEDIA_FOLDER = `word/media/` + +// Media is in word/media +type Media struct { + Name string // Name is for word/media/Name + Data []byte // Data is data of this media +} + +// String is the full path of the media +func (m *Media) String() string { + return MEDIA_FOLDER + m.Name +} + +// Media get media struct pointer (or nil on notfound) by name +func (f *Docx) Media(name string) *Media { + i, ok := f.mediaNameIdx[name] + if !ok { + return nil + } + return &f.media[i] +} + +// addMedia append the media to docx's media list +func (f *Docx) addMedia(m Media) { + f.mediaNameIdx[m.Name] = len(f.media) + f.media = append(f.media, m) +} diff --git a/pack.go b/pack.go index 2bbe513..bd7efbb 100644 --- a/pack.go +++ b/pack.go @@ -2,6 +2,7 @@ package docxlib import ( "archive/zip" + "bytes" "encoding/xml" "io" "os" @@ -27,9 +28,13 @@ func (f *Docx) pack(zipWriter *zip.Writer) (err error) { return } } - files["word/_rels/document.xml.rels"] = marshaller{data: f.DocRelation} + files["word/_rels/document.xml.rels"] = marshaller{data: &f.DocRelation} files["word/document.xml"] = marshaller{data: f.Document} + for _, m := range f.media { + files[m.String()] = bytes.NewReader(m.Data) + } + for path, r := range files { w, err := zipWriter.Create(path) if err != nil { diff --git a/structdoc.go b/structdoc.go index a6d0ccc..846be91 100644 --- a/structdoc.go +++ b/structdoc.go @@ -10,6 +10,8 @@ const ( XMLNS_R = `http://schemas.openxmlformats.org/officeDocument/2006/relationships` XMLNS_WP = `http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing` XMLNS_WP14 = `http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing` + + XMLNS_PICTURE = `http://schemas.openxmlformats.org/drawingml/2006/picture` ) func getAtt(atts []xml.Attr, name string) string { @@ -48,6 +50,9 @@ func (doc *Document) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error if err == io.EOF { break } + if err != nil { + return err + } switch tt := t.(type) { case xml.StartElement: diff --git a/structdoc_test.go b/structdoc_test.go index c45d153..2f6b514 100644 --- a/structdoc_test.go +++ b/structdoc_test.go @@ -2,6 +2,7 @@ package docxlib import ( "encoding/xml" + "hash/crc64" "io" "os" "testing" @@ -113,7 +114,7 @@ const drawing_doc = ` - + @@ -177,7 +178,7 @@ const drawing_doc = ` - + @@ -223,7 +224,7 @@ const drawing_doc = ` - + @@ -290,7 +291,7 @@ const drawing_doc = ` - + @@ -545,21 +546,8 @@ func TestUnmarshalDrawingStructure(t *testing.T) { if child.Run != nil && child.Run.Drawing != nil { t.Log("fild drawing at aragraph", i, ", child", j) if child.Run.Drawing.Inline != nil { - tail := "-mock-inline-p" + string(rune('0'+i)) + "-c" + string(rune('0'+j)) anchor := "mock-anchor-p" + string(rune('0'+i)) + "-c" + string(rune('0'+j)) edit := "mock-edit-p" + string(rune('0'+i)) + "-c" + string(rune('0'+j)) - if "T"+tail != child.Run.Drawing.Inline.DistT { - t.Fatal("expect", "T"+tail, "but got", child.Run.Drawing.Inline.DistT) - } - if "B"+tail != child.Run.Drawing.Inline.DistB { - t.Fatal("expect", "B"+tail, "but got", child.Run.Drawing.Inline.DistB) - } - if "L"+tail != child.Run.Drawing.Inline.DistL { - t.Fatal("expect", "L"+tail, "but got", child.Run.Drawing.Inline.DistL) - } - if "R"+tail != child.Run.Drawing.Inline.DistR { - t.Fatal("expect", "R"+tail, "but got", child.Run.Drawing.Inline.DistR) - } if anchor != child.Run.Drawing.Inline.AnchorID { t.Fatal("expect", anchor, "but got", child.Run.Drawing.Inline.AnchorID) } @@ -577,7 +565,6 @@ func TestUnmarshalDrawingStructure(t *testing.T) { } } } - t.Fail() } func TestMarshalDrawingStructure(t *testing.T) { @@ -588,14 +575,13 @@ func TestMarshalDrawingStructure(t *testing.T) { para1.AddText("直接粘贴 inline") para2 := w.AddParagraph() - para2.AddText("test font size and color").Size("44").Color("ff0000") - para2.AddText("test font size and color").Size("44").Color("ff0000") - para2.AddText("test font size and color").Size("44").Color("ff0000") + para2.AddInlineDrawingFrom("testdata/fumiama.JPG") + para2.AddInlineDrawingFrom("testdata/fumiama2x.webp") - nextPara := w.AddParagraph() - nextPara.AddLink("google", `http://google.com`) + para3 := w.AddParagraph() + para3.AddInlineDrawingFrom("testdata/fumiamayoko.png") - f, err := os.Create("test.xml") + f, err := os.Create("TestMarshalDrawingStructure_Marshal.xml") if err != nil { t.Fatal(err) } @@ -613,7 +599,7 @@ func TestMarshalDrawingStructure(t *testing.T) { if err != nil { t.Fatal(err) } - f1, err := os.Create("test1.xml") + f1, err := os.Create("TestMarshalDrawingStructure_Unmarshal.xml") if err != nil { t.Fatal(err) } @@ -622,5 +608,30 @@ func TestMarshalDrawingStructure(t *testing.T) { if err != nil { t.Fatal(err) } - t.Fail() + _, err = f.Seek(0, io.SeekStart) + if err != nil { + t.Fatal(err) + } + _, err = f1.Seek(0, io.SeekStart) + if err != nil { + t.Fatal(err) + } + h := crc64.New(crc64.MakeTable(crc64.ECMA)) + _, err = io.Copy(h, f) + if err != nil { + t.Fatal(err) + } + md51 := h.Sum64() + h.Reset() + _, err = io.Copy(h, f1) + if err != nil { + t.Fatal(err) + } + md52 := h.Sum64() + if md51 != md52 { + t.Fail() + } /* else { + _ = os.Remove("TestMarshalDrawingStructure_Marshal.xml") + _ = os.Remove("TestMarshalDrawingStructure_Unmarshal.xml") + }*/ } diff --git a/structdrawing.go b/structdrawing.go index c1f6f6a..7e0be4a 100644 --- a/structdrawing.go +++ b/structdrawing.go @@ -3,6 +3,12 @@ package docxlib import ( "encoding/xml" "io" + "strconv" +) + +const ( + // A4_EMU_MAX_WIDTH is the max display width of an A4 paper + A4_EMU_MAX_WIDTH = 5274310 ) // Drawing element contains photos @@ -17,16 +23,31 @@ func (r *Drawing) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { if err == io.EOF { break } + if err != nil { + return err + } switch tt := t.(type) { case xml.StartElement: switch tt.Name.Local { case "inline": r.Inline = new(WPInline) - r.Inline.DistT = getAtt(tt.Attr, "distT") - r.Inline.DistB = getAtt(tt.Attr, "distB") - r.Inline.DistL = getAtt(tt.Attr, "distL") - r.Inline.DistR = getAtt(tt.Attr, "distR") + r.Inline.DistT, err = strconv.Atoi(getAtt(tt.Attr, "distT")) + if err != nil { + return err + } + r.Inline.DistB, err = strconv.Atoi(getAtt(tt.Attr, "distB")) + if err != nil { + return err + } + r.Inline.DistL, err = strconv.Atoi(getAtt(tt.Attr, "distL")) + if err != nil { + return err + } + r.Inline.DistR, err = strconv.Atoi(getAtt(tt.Attr, "distR")) + if err != nil { + return err + } r.Inline.AnchorID = getAtt(tt.Attr, "anchorId") r.Inline.EditID = getAtt(tt.Attr, "editId") d.DecodeElement(r.Inline, &start) @@ -43,17 +64,18 @@ func (r *Drawing) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { // WPInline wp:inline type WPInline struct { XMLName xml.Name `xml:"wp:inline,omitempty"` - DistT string `xml:"distT,attr"` - DistB string `xml:"distB,attr"` - DistL string `xml:"distL,attr"` - DistR string `xml:"distR,attr"` - AnchorID string `xml:"wp14:anchorId,attr"` - EditID string `xml:"wp14:editId,attr"` + DistT int `xml:"distT,attr"` + DistB int `xml:"distB,attr"` + DistL int `xml:"distL,attr"` + DistR int `xml:"distR,attr"` + AnchorID string `xml:"wp14:anchorId,attr,omitempty"` + EditID string `xml:"wp14:editId,attr,omitempty"` - Extent *WPExtent - EffectExtent *WPEffectExtent - DocPr *WPDocPr - Graphic *AGraphic + Extent *WPExtent + EffectExtent *WPEffectExtent + DocPr *WPDocPr + CNvGraphicFramePr *WPCNvGraphicFramePr + Graphic *AGraphic } func (r *WPInline) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { @@ -62,26 +84,51 @@ func (r *WPInline) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { if err == io.EOF { break } + if err != nil { + return err + } switch tt := t.(type) { case xml.StartElement: switch tt.Name.Local { case "extent": r.Extent = new(WPExtent) - r.Extent.CX = getAtt(tt.Attr, "cx") - r.Extent.CY = getAtt(tt.Attr, "cy") + r.Extent.CX, err = strconv.Atoi(getAtt(tt.Attr, "cx")) + if err != nil { + return err + } + r.Extent.CY, err = strconv.Atoi(getAtt(tt.Attr, "cy")) + if err != nil { + return err + } case "effectExtent": r.EffectExtent = new(WPEffectExtent) - r.EffectExtent.L = getAtt(tt.Attr, "l") - r.EffectExtent.T = getAtt(tt.Attr, "t") - r.EffectExtent.R = getAtt(tt.Attr, "r") - r.EffectExtent.B = getAtt(tt.Attr, "b") + r.EffectExtent.L, err = strconv.Atoi(getAtt(tt.Attr, "l")) + if err != nil { + return err + } + r.EffectExtent.T, err = strconv.Atoi(getAtt(tt.Attr, "t")) + if err != nil { + return err + } + r.EffectExtent.R, err = strconv.Atoi(getAtt(tt.Attr, "r")) + if err != nil { + return err + } + r.EffectExtent.B, err = strconv.Atoi(getAtt(tt.Attr, "b")) + if err != nil { + return err + } case "docPr": r.DocPr = new(WPDocPr) r.DocPr.ID = getAtt(tt.Attr, "id") r.DocPr.Name = getAtt(tt.Attr, "name") r.DocPr.Macro = getAtt(tt.Attr, "macro") r.DocPr.Hidden = getAtt(tt.Attr, "hidden") + case "cNvGraphicFramePr": + var value WPCNvGraphicFramePr + d.DecodeElement(&value, &start) + r.CNvGraphicFramePr = &value case "graphic": var value AGraphic d.DecodeElement(&value, &start) @@ -97,19 +144,21 @@ func (r *WPInline) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { } // WPExtent represents the extent of a drawing in a Word document. +// +// CX CY 's unit is English Metric Units, which is 1/914400 inch type WPExtent struct { XMLName xml.Name `xml:"wp:extent,omitempty"` - CX string `xml:"cx,attr"` - CY string `xml:"cy,attr"` + CX int `xml:"cx,attr"` + CY int `xml:"cy,attr"` } // WPEffectExtent represents the effect extent of a drawing in a Word document. type WPEffectExtent struct { XMLName xml.Name `xml:"wp:effectExtent,omitempty"` - L string `xml:"l,attr"` - T string `xml:"t,attr"` - R string `xml:"r,attr"` - B string `xml:"b,attr"` + L int `xml:"l,attr"` + T int `xml:"t,attr"` + R int `xml:"r,attr"` + B int `xml:"b,attr"` } // WPDocPr represents the document properties of a drawing in a Word document. @@ -121,6 +170,48 @@ type WPDocPr struct { Hidden string `xml:"hidden,attr,omitempty"` } +// WPCNvGraphicFramePr represents the non-visual properties of a graphic frame. +type WPCNvGraphicFramePr struct { + XMLName xml.Name `xml:"wp:cNvGraphicFramePr,omitempty"` + Locks *AGraphicFrameLocks +} + +func (w *WPCNvGraphicFramePr) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + for { + t, err := d.Token() + if err == io.EOF { + break + } + if err != nil { + return err + } + + switch tt := t.(type) { + case xml.StartElement: + switch tt.Name.Local { + case "graphicFrameLocks": + var value AGraphicFrameLocks + d.DecodeElement(&value, &start) + value.NoChangeAspect, err = strconv.Atoi(getAtt(tt.Attr, "noChangeAspect")) + if err != nil { + return err + } + w.Locks = &value + default: + continue + } + } + + } + return nil +} + +// AGraphicFrameLocks represents the locks applied to a graphic frame. +type AGraphicFrameLocks struct { + XMLName xml.Name `xml:"http://schemas.openxmlformats.org/drawingml/2006/main graphicFrameLocks,omitempty"` + NoChangeAspect int `xml:"noChangeAspect,attr"` +} + // AGraphic represents a graphic in a Word document. type AGraphic struct { XMLName xml.Name `xml:"http://schemas.openxmlformats.org/drawingml/2006/main graphic,omitempty"` @@ -133,6 +224,9 @@ func (a *AGraphic) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { if err == io.EOF { break } + if err != nil { + return err + } switch tt := t.(type) { case xml.StartElement: @@ -164,6 +258,9 @@ func (a *AGraphicData) UnmarshalXML(d *xml.Decoder, start xml.StartElement) erro if err == io.EOF { break } + if err != nil { + return err + } switch tt := t.(type) { case xml.StartElement: @@ -186,7 +283,7 @@ type PICPic struct { XMLName xml.Name `xml:"http://schemas.openxmlformats.org/drawingml/2006/picture pic,omitempty"` NonVisualPicProperties *PICNonVisualPicProperties BlipFill *PICBlipFill - // is unecessary + SpPr *PICSpPr } func (p *PICPic) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { @@ -195,6 +292,9 @@ func (p *PICPic) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { if err == io.EOF { break } + if err != nil { + return err + } switch tt := t.(type) { case xml.StartElement: @@ -207,6 +307,10 @@ func (p *PICPic) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { var value PICBlipFill d.DecodeElement(&value, &start) p.BlipFill = &value + case "spPr": + var value PICSpPr + d.DecodeElement(&value, &start) + p.SpPr = &value default: continue } @@ -228,6 +332,9 @@ func (p *PICNonVisualPicProperties) UnmarshalXML(d *xml.Decoder, start xml.Start if err == io.EOF { break } + if err != nil { + return err + } switch tt := t.(type) { case xml.StartElement: @@ -253,6 +360,7 @@ type PICNonVisualDrawingProperties struct { type PICBlipFill struct { XMLName xml.Name `xml:"http://schemas.openxmlformats.org/drawingml/2006/picture blipFill,omitempty"` Blip ABlip + Stretch AStretch } func (p *PICBlipFill) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { @@ -261,12 +369,18 @@ func (p *PICBlipFill) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error if err == io.EOF { break } + if err != nil { + return err + } switch tt := t.(type) { case xml.StartElement: switch tt.Name.Local { case "blip": p.Blip.Embed = getAtt(tt.Attr, "embed") + p.Blip.Cstate = getAtt(tt.Attr, "cstate") + case "stretch": + d.DecodeElement(&p.Stretch, &start) default: continue } @@ -280,4 +394,163 @@ func (p *PICBlipFill) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error type ABlip struct { XMLName xml.Name `xml:"http://schemas.openxmlformats.org/drawingml/2006/main blip,omitempty"` Embed string `xml:"r:embed,attr"` + Cstate string `xml:"cstate,attr"` +} + +// AStretch ... +type AStretch struct { + XMLName xml.Name `xml:"http://schemas.openxmlformats.org/drawingml/2006/main stretch,omitempty"` + FillRect AFillRect +} + +// AFillRect ... +type AFillRect struct { + XMLName xml.Name `xml:"http://schemas.openxmlformats.org/drawingml/2006/main fillRect,omitempty"` +} + +// PICSpPr is a struct representing the element in OpenXML, +// which describes the shape properties for a picture. +type PICSpPr struct { + XMLName xml.Name `xml:"http://schemas.openxmlformats.org/drawingml/2006/picture spPr,omitempty"` + Xfrm AXfrm + PrstGeom APrstGeom +} + +func (p *PICSpPr) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + for { + t, err := d.Token() + if err == io.EOF { + break + } + if err != nil { + return err + } + + switch tt := t.(type) { + case xml.StartElement: + switch tt.Name.Local { + case "xfrm": + d.DecodeElement(&p.Xfrm, &start) + case "prstGeom": + d.DecodeElement(&p.PrstGeom, &start) + p.PrstGeom.Prst = getAtt(tt.Attr, "prst") + default: + continue + } + } + + } + return nil +} + +// AXfrm is a struct representing the element in OpenXML, +// which describes the position and size of a shape. +type AXfrm struct { + XMLName xml.Name `xml:"http://schemas.openxmlformats.org/drawingml/2006/main xfrm,omitempty"` + Off AOff + Ext AExt +} + +func (a *AXfrm) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + for { + t, err := d.Token() + if err == io.EOF { + break + } + if err != nil { + return err + } + + switch tt := t.(type) { + case xml.StartElement: + switch tt.Name.Local { + case "off": + a.Off.X, err = strconv.Atoi(getAtt(tt.Attr, "x")) + if err != nil { + return err + } + a.Off.Y, err = strconv.Atoi(getAtt(tt.Attr, "y")) + if err != nil { + return err + } + case "ext": + a.Ext.CX, err = strconv.Atoi(getAtt(tt.Attr, "cx")) + if err != nil { + return err + } + a.Ext.CY, err = strconv.Atoi(getAtt(tt.Attr, "cy")) + if err != nil { + return err + } + default: + continue + } + } + + } + return nil +} + +// AOff is a struct representing the element in OpenXML, +// which describes the offset of a shape from its original position. +type AOff struct { + XMLName xml.Name `xml:"http://schemas.openxmlformats.org/drawingml/2006/main off,omitempty"` + X int `xml:"x,attr"` + Y int `xml:"y,attr"` +} + +// AExt is a struct representing the element in OpenXML, +// which describes the size of a shape. +type AExt struct { + XMLName xml.Name `xml:"http://schemas.openxmlformats.org/drawingml/2006/main ext,omitempty"` + CX int `xml:"cx,attr"` + CY int `xml:"cy,attr"` +} + +// APrstGeom is a struct representing the element in OpenXML, +// which describes the preset shape geometry for a shape. +type APrstGeom struct { + XMLName xml.Name `xml:"http://schemas.openxmlformats.org/drawingml/2006/main prstGeom,omitempty"` + Prst string `xml:"prst,attr"` + AvLst AAvLst +} + +// AAvLst is a struct representing the element in OpenXML, +// which describes the adjustments to the shape's preset geometry. +type AAvLst struct { + XMLName xml.Name `xml:"http://schemas.openxmlformats.org/drawingml/2006/main avLst,omitempty"` + RawXML string `xml:",innerxml"` +} + +func (a *AAvLst) UnmarshalXML(d *xml.Decoder, start xml.StartElement) (err error) { + var content []byte + + if content, err = xml.Marshal(start); err != nil { + return err + } + + for { + t, err := d.Token() + if err == io.EOF { + break + } + if err != nil { + return err + } + + if end, ok := t.(xml.EndElement); ok && end == start.End() { + break + } + + b, err := xml.Marshal(t) + if err != nil { + return err + } + + content = append(content, b...) + } + + a.RawXML = BytesToString(content) + + return nil } diff --git a/structlink.go b/structlink.go index 7f015b5..0f63296 100644 --- a/structlink.go +++ b/structlink.go @@ -18,6 +18,9 @@ func (r *Hyperlink) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { if err == io.EOF { break } + if err != nil { + return err + } switch tt := t.(type) { case xml.StartElement: diff --git a/structpara.go b/structpara.go index 0aa74af..0ac96cc 100644 --- a/structpara.go +++ b/structpara.go @@ -3,6 +3,7 @@ package docxlib import ( "encoding/xml" "io" + "strings" ) type ParagraphChild struct { @@ -18,6 +19,33 @@ type Paragraph struct { file *Docx } +func (p *Paragraph) String() string { + sb := strings.Builder{} + for _, c := range p.Children { + switch { + case c.Link != nil: + id := c.Link.ID + text := c.Link.Run.InstrText + link, err := p.file.ReferHref(id) + sb.WriteString(text) + sb.WriteByte('(') + if err != nil { + sb.WriteString(id) + } else { + sb.WriteString(link) + } + sb.WriteByte(')') + case c.Run != nil: + sb.WriteString("run") //TODO: implement + case c.Properties != nil: + sb.WriteString("prop") //TODO: implement + default: + continue + } + } + return sb.String() +} + func (p *Paragraph) MarshalXML(e *xml.Encoder, start xml.StartElement) error { err := e.EncodeToken(start) if err != nil { @@ -48,6 +76,9 @@ func (p *Paragraph) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { if err == io.EOF { break } + if err != nil { + return err + } switch tt := t.(type) { case xml.StartElement: var elem ParagraphChild diff --git a/structrel.go b/structrel.go index 411632b..e43616b 100644 --- a/structrel.go +++ b/structrel.go @@ -1,15 +1,20 @@ package docxlib -import "encoding/xml" +import ( + "encoding/xml" + "sync" +) const ( XMLNS_REL = `http://schemas.openxmlformats.org/package/2006/relationships` REL_HYPERLINK = `http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink` + REL_IMAGE = `http://schemas.openxmlformats.org/officeDocument/2006/relationships/image` REL_TARGETMODE = "External" ) type Relationships struct { + mu sync.RWMutex XMLName xml.Name `xml:"Relationships"` Xmlns string `xml:"xmlns,attr"` Relationships []*Relationship `xml:"Relationship"` diff --git a/structrun.go b/structrun.go index e6d7745..f02a82c 100644 --- a/structrun.go +++ b/structrun.go @@ -21,6 +21,9 @@ func (r *Run) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { if err == io.EOF { break } + if err != nil { + return err + } switch tt := t.(type) { case xml.StartElement: @@ -67,6 +70,9 @@ func (r *RunProperties) UnmarshalXML(d *xml.Decoder, start xml.StartElement) err if err == io.EOF { break } + if err != nil { + return err + } switch tt := t.(type) { case xml.StartElement: diff --git a/structtext.go b/structtext.go index b0c8782..87be9fd 100644 --- a/structtext.go +++ b/structtext.go @@ -7,7 +7,7 @@ import ( // Text object contains the actual text type Text struct { - XMLName xml.Name `xml:"w:t"` + XMLName xml.Name `xml:"w:t,omitempty"` XMLSpace string `xml:"xml:space,attr,omitempty"` Text string `xml:",chardata"` } @@ -18,6 +18,9 @@ func (r *Text) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { if err == io.EOF { break } + if err != nil { + return err + } switch tt := t.(type) { case xml.CharData: diff --git a/testdata/fumiama.JPG b/testdata/fumiama.JPG new file mode 100644 index 0000000..bca1ca0 Binary files /dev/null and b/testdata/fumiama.JPG differ diff --git a/testdata/fumiama2x.webp b/testdata/fumiama2x.webp new file mode 100644 index 0000000..ec689c6 Binary files /dev/null and b/testdata/fumiama2x.webp differ diff --git a/testdata/fumiamayoko.png b/testdata/fumiamayoko.png new file mode 100644 index 0000000..7ca373b Binary files /dev/null and b/testdata/fumiamayoko.png differ diff --git a/unpack.go b/unpack.go index 8b8d019..c7b6581 100644 --- a/unpack.go +++ b/unpack.go @@ -4,6 +4,8 @@ import ( "archive/zip" "bytes" "encoding/xml" + "io" + "strings" ) // unpack receives a zip file (word documents are a zip with multiple xml inside) @@ -13,6 +15,7 @@ import ( // 2. Relationships func unpack(zipReader *zip.Reader) (docx *Docx, err error) { docx = new(Docx) + docx.mediaNameIdx = make(map[string]int, 64) for _, f := range zipReader.File { if f.Name == "word/_rels/document.xml.rels" { err = docx.parseDocRelation(f) @@ -26,6 +29,10 @@ func unpack(zipReader *zip.Reader) (docx *Docx, err error) { return } } + err = docx.checkAndParseMedia(f) + if err != nil { + return + } } docx.buf = bytes.NewBuffer(make([]byte, 0, 1024*1024*4)) return @@ -61,6 +68,24 @@ func (f *Docx) parseDocRelation(file *zip.File) error { defer zf.Close() f.DocRelation.Xmlns = XMLNS_R - //TODO: find last rId + //TODO: find last rId & imageId return xml.NewDecoder(zf).Decode(&f.DocRelation) } + +func (f *Docx) checkAndParseMedia(file *zip.File) error { + if !strings.HasPrefix(file.Name, MEDIA_FOLDER) { + return nil + } + name := file.Name[len(MEDIA_FOLDER):] + zf, err := file.Open() + if err != nil { + return err + } + data, err := io.ReadAll(zf) + if err != nil { + return err + } + f.mediaNameIdx[name] = len(f.media) + f.media = append(f.media, Media{Name: name, Data: data}) + return zf.Close() +}