From 517198ded46ca281febe2d84368a0c9482126753 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Wed, 15 Feb 2023 17:23:01 +0800 Subject: [PATCH] fix: recursive paragraph unmarshalling --- empty.go | 4 ++- structdoc.go | 9 ++++++- structdoc_test.go | 16 +++++++----- structdrawing.go | 64 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 85 insertions(+), 8 deletions(-) diff --git a/empty.go b/empty.go index 57feef5..6eb928d 100644 --- a/empty.go +++ b/empty.go @@ -6,7 +6,7 @@ import ( ) func newEmptyFile() *Docx { - return &Docx{ + docx := &Docx{ Document: Document{ XMLName: xml.Name{ Space: "w", @@ -45,4 +45,6 @@ func newEmptyFile() *Docx { rId: 3, buf: bytes.NewBuffer(make([]byte, 0, 1024*1024*4)), } + docx.Document.file = docx + return docx } diff --git a/structdoc.go b/structdoc.go index d68b0b3..a6d0ccc 100644 --- a/structdoc.go +++ b/structdoc.go @@ -33,6 +33,8 @@ type Document struct { XMLWP string `xml:"xmlns:wp,attr,omitempty"` // cannot be unmarshalled in XMLWP14 string `xml:"xmlns:wp14,attr,omitempty"` // cannot be unmarshalled in Body *Body + + file *Docx } func (doc *Document) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { @@ -50,11 +52,16 @@ func (doc *Document) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error switch tt := t.(type) { case xml.StartElement: switch tt.Name.Local { + case "body": case "p": var value Paragraph d.DecodeElement(&value, &start) - doc.Body.Paragraphs = append(doc.Body.Paragraphs, &value) + if len(value.Children) > 0 { + value.file = doc.file + doc.Body.Paragraphs = append(doc.Body.Paragraphs, &value) + } default: + d.Skip() continue } } diff --git a/structdoc_test.go b/structdoc_test.go index 8da73ec..2d13029 100644 --- a/structdoc_test.go +++ b/structdoc_test.go @@ -11,19 +11,19 @@ const decoded_doc_1 = `Table of Contents TOC \h \z \t "Heading 1,2,S6,1,S0,1,S1,1,S2,1,S3,1,S4,1,S5,1" Holy Grail [xref:bRJduW6hNR] PAGEREF _Toc420414504 \h 21.What is your name? [xref:TH7u7QDqhD] PAGEREF _Toc420414505 \h 22.What is your quest? [xref:bC62HkFATC] PAGEREF _Toc420414506 \h 23.What is your favourite colour? [xref:I3TphuHX6N] PAGEREF _Toc420414507 \h 2Holy Grail [ FORMTEXT xref:bRJduW6hNR]What is your name? [ FORMTEXT xref:TH7u7QDqhD]My name is Sir Launcelot of Camelot.What is your quest? [ FORMTEXT xref:bC62HkFATC]To seek the Holy Grail[or a grail shaped beacon]. What is your favourite colour? [ FORMTEXT xref:I3TphuHX6N]Blue.How many paragraphs here then?` func TestUnmarshalPlainStructure(t *testing.T) { - doc := Document{ - XMLW: XMLNS_W, - XMLR: XMLNS_R, - XMLWP: XMLNS_WP, - XMLName: xml.Name{Space: XMLNS_W, Local: "document"}} testCases := []struct { content string numParagraphs int }{ {decoded_doc_1, 5}, - {decoded_doc_2, 19}, + {decoded_doc_2, 14}, } for _, tc := range testCases { + doc := Document{ + XMLW: XMLNS_W, + XMLR: XMLNS_R, + XMLWP: XMLNS_WP, + XMLName: xml.Name{Space: XMLNS_W, Local: "document"}} err := xml.Unmarshal(StringToBytes(tc.content), &doc) if err != nil { t.Fatal(err) @@ -566,10 +566,14 @@ func TestUnmarshalDrawingStructure(t *testing.T) { if edit != child.Run.Drawing.Inline.EditID { t.Fatal("expect", edit, "but got", child.Run.Drawing.Inline.EditID) } + if child.Run.Drawing.Inline.Graphic != nil && child.Run.Drawing.Inline.Graphic.GraphicData != nil { + t.Log(child.Run.Drawing.Inline.Graphic.GraphicData.URI) + } } } } } + t.Fail() } func TestMarshalDrawingStructure(t *testing.T) { diff --git a/structdrawing.go b/structdrawing.go index 44c2a47..005b220 100644 --- a/structdrawing.go +++ b/structdrawing.go @@ -53,6 +53,7 @@ type WPInline struct { Extent *WPExtent EffectExtent *WPEffectExtent DocPr *WPDocPr + Graphic *AGraphic } func (r *WPInline) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { @@ -66,18 +67,25 @@ func (r *WPInline) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { case xml.StartElement: switch tt.Name.Local { case "extent": + r.Extent = new(WPExtent) r.Extent.CX = getAtt(tt.Attr, "cx") r.Extent.CY = getAtt(tt.Attr, "cy") case "effectExtent": + r.EffectExtent = new(WPEffectExtent) r.EffectExtent.L = getAtt(tt.Attr, "l") r.EffectExtent.T = getAtt(tt.Attr, "t") r.EffectExtent.R = getAtt(tt.Attr, "r") r.EffectExtent.B = getAtt(tt.Attr, "b") case "docPr": + r.DocPr = new(WPDocPr) r.DocPr.ID = getAtt(tt.Attr, "id") r.DocPr.Name = getAtt(tt.Attr, "name") r.DocPr.Macro = getAtt(tt.Attr, "macro") r.DocPr.Hidden = getAtt(tt.Attr, "hidden") + case "graphic": + var value AGraphic + d.DecodeElement(&value, &start) + r.Graphic = &value default: continue } @@ -119,8 +127,64 @@ type AGraphic struct { GraphicData *AGraphicData } +func (a *AGraphic) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + for { + t, err := d.Token() + if err == io.EOF { + break + } + + switch tt := t.(type) { + case xml.StartElement: + switch tt.Name.Local { + case "graphicData": + var value AGraphicData + d.DecodeElement(&value, &start) + value.URI = getAtt(tt.Attr, "uri") + a.GraphicData = &value + default: + continue + } + } + + } + return nil +} + // AGraphicData represents the data of a graphic in a Word document. type AGraphicData struct { XMLName xml.Name `xml:"http://schemas.openxmlformats.org/drawingml/2006/main graphicData,omitempty"` URI string `xml:"uri,attr"` + Pic PICPic +} + +// PICPic represents a picture in a Word document. +type PICPic struct { + XMLName xml.Name `xml:"http://schemas.openxmlformats.org/drawingml/2006/picture pic,omitempty"` + NonVisualPicProperties PICNonVisualPicProperties + BlipFill PICBlipFill +} + +// PICNonVisualPicProperties represents the non-visual properties of a picture in a Word document. +type PICNonVisualPicProperties struct { + XMLName xml.Name `xml:"http://schemas.openxmlformats.org/drawingml/2006/picture nvPicPr,omitempty"` + NonVisualDrawingProperties PICNonVisualDrawingProperties +} + +// PICNonVisualDrawingProperties represents the non-visual drawing properties of a picture in a Word document. +type PICNonVisualDrawingProperties struct { + XMLName xml.Name `xml:"http://schemas.openxmlformats.org/drawingml/2006/picture cNvPr,omitempty"` + ID string `xml:"id,attr"` +} + +// PICBlipFill represents the blip fill of a picture in a Word document. +type PICBlipFill struct { + XMLName xml.Name `xml:"http://schemas.openxmlformats.org/drawingml/2006/picture blipFill,omitempty"` + Blip ABlip +} + +// ABlip represents the blip of a picture in a Word document. +type ABlip struct { + XMLName xml.Name `xml:"http://schemas.openxmlformats.org/drawingml/2006/main blip,omitempty"` + Embed string `xml:"r:embed,attr"` }