From fbaed74afa8526cf1c6a42eeb6d415bd8b067557 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Thu, 23 Feb 2023 17:41:45 +0800 Subject: [PATCH] feat: add table structures --- structdoc.go | 5 + structdrawing.go | 79 +++++++- structlink.go | 5 + structpara.go | 28 +++ structrun.go | 8 + structtable.go | 473 +++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 596 insertions(+), 2 deletions(-) create mode 100644 structtable.go diff --git a/structdoc.go b/structdoc.go index 9ef0179..7afdf83 100644 --- a/structdoc.go +++ b/structdoc.go @@ -99,6 +99,11 @@ func (doc *Document) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error if err != nil && !strings.HasPrefix(err.Error(), "expected") { return err } + continue + } + err = d.Skip() // skip unsupported tags + if err != nil { + return err } } diff --git a/structdrawing.go b/structdrawing.go index b4164d1..4abeafc 100644 --- a/structdrawing.go +++ b/structdrawing.go @@ -52,6 +52,10 @@ func (r *Drawing) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { return err } default: + err = d.Skip() // skip unsupported tags + if err != nil { + return err + } continue } } @@ -61,7 +65,14 @@ func (r *Drawing) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { } -// WPInline wp:inline +// WPInline is an element that represents an inline image within a text paragraph. +// +// It contains information about the image's size and position, +// as well as any non-visual properties associated with the image. +// The element can contain child elements such as to specify +// the dimensions of the image and to specify the non-visual +// properties of the image. Inline images are often used in documents where the images +// are meant to be treated as part of the text flow, such as in a newsletter or a product brochure. type WPInline struct { XMLName xml.Name `xml:"wp:inline,omitempty"` DistT int64 `xml:"distT,attr"` @@ -166,6 +177,10 @@ func (r *WPInline) UnmarshalXML(d *xml.Decoder, start xml.StartElement) (err err } r.Graphic = &value default: + err = d.Skip() // skip unsupported tags + if err != nil { + return err + } continue } } @@ -316,6 +331,10 @@ func (w *WPCNvGraphicFramePr) UnmarshalXML(d *xml.Decoder, start xml.StartElemen } w.Locks = &value default: + err = d.Skip() // skip unsupported tags + if err != nil { + return err + } continue } } @@ -367,6 +386,10 @@ func (a *AGraphic) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { value.URI = getAtt(tt.Attr, "uri") a.GraphicData = &value default: + err = d.Skip() // skip unsupported tags + if err != nil { + return err + } continue } } @@ -404,6 +427,10 @@ func (a *AGraphicData) UnmarshalXML(d *xml.Decoder, start xml.StartElement) erro value.XMLPIC = getAtt(tt.Attr, "pic") a.Pic = &value default: + err = d.Skip() // skip unsupported tags + if err != nil { + return err + } continue } } @@ -456,6 +483,10 @@ func (p *PICPic) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { } p.SpPr = &value default: + err = d.Skip() // skip unsupported tags + if err != nil { + return err + } continue } } @@ -493,6 +524,10 @@ func (p *PICNonVisualPicProperties) UnmarshalXML(d *xml.Decoder, start xml.Start return err } default: + err = d.Skip() // skip unsupported tags + if err != nil { + return err + } continue } } @@ -530,6 +565,10 @@ func (p *PicCNvPicPr) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error } p.Locks = &value default: + err = d.Skip() // skip unsupported tags + if err != nil { + return err + } continue } } @@ -581,6 +620,10 @@ func (p *PICBlipFill) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error return err } default: + err = d.Skip() // skip unsupported tags + if err != nil { + return err + } continue } } @@ -628,6 +671,10 @@ func (a *ABlip) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { } a.AlphaModFix = &value default: + err = d.Skip() // skip unsupported tags + if err != nil { + return err + } continue } } @@ -686,6 +733,10 @@ func (p *PICSpPr) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { } p.PrstGeom.Prst = getAtt(tt.Attr, "prst") default: + err = d.Skip() // skip unsupported tags + if err != nil { + return err + } continue } } @@ -758,6 +809,10 @@ func (a *AXfrm) UnmarshalXML(d *xml.Decoder, start xml.StartElement) (err error) return err } default: + err = d.Skip() // skip unsupported tags + if err != nil { + return err + } continue } } @@ -831,7 +886,15 @@ func (a *AAvLst) UnmarshalXML(d *xml.Decoder, start xml.StartElement) (err error return nil } -// WPAnchor wp:anchor +// WPAnchor is an element that represents an anchored object in a Word document. +// +// It allows for the positioning of a drawing object relative to a specific location +// in the text of the document. The element contains child elements that +// specify the dimensions and position of the anchored object, as well as the non-visual +// properties of the object. The element can contain the element, +// which contains the non-visual properties of the anchored object, such as its ID and name, +// as well as the element, which specifies the visual properties of the object, +// such as its shape and fill. type WPAnchor struct { XMLName xml.Name `xml:"wp:anchor,omitempty"` DistT int64 `xml:"distT,attr"` @@ -984,6 +1047,10 @@ func (r *WPAnchor) UnmarshalXML(d *xml.Decoder, start xml.StartElement) (err err return err } default: + err = d.Skip() // skip unsupported tags + if err != nil { + return err + } continue } } @@ -1030,6 +1097,10 @@ func (r *WPPositionH) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error return err } default: + err = d.Skip() // skip unsupported tags + if err != nil { + return err + } continue } } @@ -1069,6 +1140,10 @@ func (r *WPPositionV) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error return err } default: + err = d.Skip() // skip unsupported tags + if err != nil { + return err + } continue } } diff --git a/structlink.go b/structlink.go index 2267eaf..c5e8e97 100644 --- a/structlink.go +++ b/structlink.go @@ -30,6 +30,11 @@ func (r *Hyperlink) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { if err != nil && !strings.HasPrefix(err.Error(), "expected") { return err } + continue + } + err = d.Skip() // skip unsupported tags + if err != nil { + return err } } diff --git a/structpara.go b/structpara.go index 046c7b1..a89c642 100644 --- a/structpara.go +++ b/structpara.go @@ -27,6 +27,10 @@ func (p *ParagraphProperties) UnmarshalXML(d *xml.Decoder, start xml.StartElemen case "jc": p.Justification = &Justification{Val: getAtt(tt.Attr, "val")} default: + err = d.Skip() // skip unsupported tags + if err != nil { + return err + } continue } } @@ -39,6 +43,12 @@ func (p *ParagraphProperties) UnmarshalXML(d *xml.Decoder, start xml.StartElemen // Paragraph type Paragraph struct { // XMLName xml.Name `xml:"w:p,omitempty"` + + RsidR string `xml:"w:rsidR,attr,omitempty"` + RsidRPr string `xml:"w:rsidRPr,attr,omitempty"` + RsidRDefault string `xml:"w:rsidRDefault,attr,omitempty"` + RsidP string `xml:"w:rsidP,attr,omitempty"` + Properties *ParagraphProperties Children []interface{} // Children will generate an unnecessary tag ... and we skip it by a self-defined xml.Marshaler @@ -95,6 +105,20 @@ func (p *Paragraph) MarshalXML(e *xml.Encoder, start xml.StartElement) error { // UnmarshalXML ... func (p *Paragraph) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + for _, attr := range start.Attr { + switch attr.Name.Local { + case "rsidR": + p.RsidR = attr.Value + case "rsidRPr": + p.RsidRPr = attr.Value + case "rsidRDefault": + p.RsidRDefault = attr.Value + case "rsidP": + p.RsidP = attr.Value + default: + // ignore other attributes + } + } children := make([]interface{}, 0, 64) for { t, err := d.Token() @@ -145,6 +169,10 @@ func (p *Paragraph) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { p.Properties = &value continue default: + err = d.Skip() // skip unsupported tags + if err != nil { + return err + } continue } children = append(children, elem) diff --git a/structrun.go b/structrun.go index b9a96b7..098e8af 100644 --- a/structrun.go +++ b/structrun.go @@ -74,6 +74,10 @@ func (r *Run) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { }{}) } default: + err = d.Skip() // skip unsupported tags + if err != nil { + return err + } continue } } @@ -128,6 +132,10 @@ func (r *RunProperties) UnmarshalXML(d *xml.Decoder, start xml.StartElement) err value.Val = getAtt(tt.Attr, "val") r.Style = &value default: + err = d.Skip() // skip unsupported tags + if err != nil { + return err + } continue } } diff --git a/structtable.go b/structtable.go new file mode 100644 index 0000000..abeb83f --- /dev/null +++ b/structtable.go @@ -0,0 +1,473 @@ +package docxlib + +import ( + "encoding/xml" + "io" + "strconv" + "strings" + "sync" +) + +// WTable represents a table within a Word document. +type WTable struct { + XMLName xml.Name `xml:"w:tbl,omitempty"` + TableProperties *WTableProperties + TableGrid *WTableGrid + TableRows []*WTableRow +} + +// WTableProperties is an element that represents the properties of a table in Word document. +type WTableProperties struct { + XMLName xml.Name `xml:"w:tblPr,omitempty"` + Style *WTableStyle + Width *WTableWidth + Look *WTableLook +} + +// UnmarshalXML implements the xml.Unmarshaler interface. +func (t *WTableProperties) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + for { + token, err := d.Token() + if err == io.EOF { + break + } + if err != nil { + return err + } + if tt, ok := token.(xml.StartElement); ok { + switch tt.Name.Local { + case "tblStyle": + t.Style = new(WTableStyle) + err = d.DecodeElement(t.Style, &tt) + if err != nil && !strings.HasPrefix(err.Error(), "expected") { + return err + } + case "tblW": + t.Width = new(WTableWidth) + err = d.DecodeElement(t.Width, &tt) + if err != nil && !strings.HasPrefix(err.Error(), "expected") { + return err + } + case "tblLook": + t.Look = new(WTableLook) + err = d.DecodeElement(t.Look, &tt) + if err != nil && !strings.HasPrefix(err.Error(), "expected") { + return err + } + default: + err = d.Skip() // skip unsupported tags + if err != nil { + return err + } + continue + } + } + } + return nil +} + +// WTableStyle represents the style of a table in a Word document. +type WTableStyle struct { + XMLName xml.Name `xml:"w:tblStyle,omitempty"` + Val string `xml:"w:val,attr"` +} + +// UnmarshalXML ... +func (t *WTableStyle) UnmarshalXML(d *xml.Decoder, start xml.StartElement) (err error) { + for _, attr := range start.Attr { + if attr.Value == "" { + continue + } + switch attr.Name.Local { + case "val": + t.Val = attr.Value + default: + // ignore other attributes + } + } + // Consume the end element + _, err = d.Token() + if err != nil { + return + } + return nil +} + +// WTableWidth represents the width of a table in a Word document. +type WTableWidth struct { + XMLName xml.Name `xml:"w:tblW,omitempty"` + W int64 `xml:"w:w,attr"` + Type string `xml:"w:type,attr"` +} + +// UnmarshalXML ... +func (t *WTableWidth) UnmarshalXML(d *xml.Decoder, start xml.StartElement) (err error) { + for _, attr := range start.Attr { + if attr.Value == "" { + continue + } + switch attr.Name.Local { + case "w": + t.W, err = strconv.ParseInt(attr.Value, 10, 64) + if err != nil { + return + } + case "type": + t.Type = attr.Value + default: + // ignore other attributes + } + } + // Consume the end element + _, err = d.Token() + if err != nil { + return + } + return nil +} + +// WTableLook represents the look of a table in a Word document. +type WTableLook struct { + XMLName xml.Name `xml:"w:tblLook,omitempty"` + Val string `xml:"w:val,attr"` + FirstRow int `xml:"w:firstRow,attr"` + LastRow int `xml:"w:lastRow,attr"` + FirstCol int `xml:"w:firstColumn,attr"` + LastCol int `xml:"w:lastColumn,attr"` + NoHBand int `xml:"w:noHBand,attr"` + NoVBand int `xml:"w:noVBand,attr"` +} + +// UnmarshalXML ... +func (t *WTableLook) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + for _, attr := range start.Attr { + if attr.Value == "" { + continue + } + switch attr.Name.Local { + case "val": + t.Val = attr.Value + case "firstRow": + t.FirstRow = int(attr.Value[0] - '0') + case "lastRow": + t.LastRow = int(attr.Value[0] - '0') + case "firstColumn": + t.FirstCol = int(attr.Value[0] - '0') + case "lastColumn": + t.LastCol = int(attr.Value[0] - '0') + case "noHBand": + t.NoHBand = int(attr.Value[0] - '0') + case "noVBand": + t.NoVBand = int(attr.Value[0] - '0') + default: + // ignore other attributes + } + } + // Consume the end element + _, err := d.Token() + if err != nil { + return err + } + return nil +} + +// WTableGrid is a structure that represents the table grid of a Word document. +type WTableGrid struct { + XMLName xml.Name `xml:"w:tblGrid,omitempty"` + GridCols []*WGridCol `xml:"w:gridCol,omitempty"` +} + +// UnmarshalXML ... +func (t *WTableGrid) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + for { + tok, err := d.Token() + if err == io.EOF { + break + } + if err != nil { + return err + } + + if el, ok := tok.(xml.StartElement); ok { + switch el.Name.Local { + case "gridCol": + var gc WGridCol + err := d.DecodeElement(&gc, &el) + if err != nil && !strings.HasPrefix(err.Error(), "expected") { + return err + } + t.GridCols = append(t.GridCols, &gc) + default: + err = d.Skip() // skip unsupported tags + if err != nil { + return err + } + continue + } + } + } + return nil +} + +// WGridCol is a structure that represents a table grid column of a Word document. +type WGridCol struct { + XMLName xml.Name `xml:"w:gridCol,omitempty"` + W int64 `xml:"w,attr"` +} + +// UnmarshalXML ... +func (g *WGridCol) UnmarshalXML(d *xml.Decoder, start xml.StartElement) (err error) { + for _, attr := range start.Attr { + if attr.Value == "" { + continue + } + switch attr.Name.Local { + case "w": + g.W, err = strconv.ParseInt(attr.Value, 10, 64) + if err != nil { + return + } + default: + // ignore other attributes + } + } + // Consume the end element + _, err = d.Token() + if err != nil { + return + } + return nil +} + +// WTableRow represents a row within a table. +type WTableRow struct { + XMLName xml.Name `xml:"w:tr,omitempty"` + RsidR string `xml:"w:rsidR,attr"` + RsidTr string `xml:"w:rsidTr,attr"` + TableRowProperties *WTableRowProperties + TableCells []*WTableCell +} + +// UnmarshalXML ... +func (w *WTableRow) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + for _, attr := range start.Attr { + switch attr.Name.Local { + case "rsidR": + w.RsidR = attr.Value + case "rsidTr": + w.RsidTr = attr.Value + default: + // ignore other attributes + } + } + + for { + t, err := d.Token() + if err == io.EOF { + break + } + if err != nil { + return err + } + + if tt, ok := t.(xml.StartElement); ok { + switch tt.Name.Local { + case "trPr": + w.TableRowProperties = new(WTableRowProperties) + err = d.DecodeElement(w.TableRowProperties, &tt) + if err != nil && !strings.HasPrefix(err.Error(), "expected") { + return err + } + case "tc": + var value WTableCell + err = d.DecodeElement(&value, &tt) + if err != nil && !strings.HasPrefix(err.Error(), "expected") { + return err + } + w.TableCells = append(w.TableCells, &value) + default: + err = d.Skip() // skip unsupported tags + if err != nil { + return err + } + continue + } + } + } + return nil +} + +// WTableRowProperties represents the properties of a row within a table. +type WTableRowProperties struct { + XMLName xml.Name `xml:"w:trPr,omitempty"` + TrHeight *WTrHeight +} + +// UnmarshalXML ... +func (t *WTableRowProperties) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + for { + tok, err := d.Token() + if err == io.EOF { + break + } + if err != nil { + return err + } + + if elem, ok := tok.(xml.StartElement); ok { + switch elem.Name.Local { + case "trHeight": + th := new(WTrHeight) + for _, attr := range elem.Attr { + if attr.Name.Local == "val" { + th.Val, err = strconv.Atoi(attr.Value) + if err != nil { + return err + } + break + } + } + t.TrHeight = th + err = d.Skip() + if err != nil { + return err + } + default: + err = d.Skip() + if err != nil { + return err + } + } + } + } + return nil +} + +// WTrHeight represents the height of a row within a table. +type WTrHeight struct { + XMLName xml.Name `xml:"w:trHeight,omitempty"` + Val int `xml:"w:val,attr"` +} + +// WTableCell represents a cell within a table. +type WTableCell struct { + mu sync.Mutex + + XMLName xml.Name `xml:"w:tc,omitempty"` + TcPr *WTcPr + Paragraphs []Paragraph `xml:"w:p,omitempty"` + + file *Docx +} + +// UnmarshalXML ... +func (r *WTableCell) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + for { + t, err := d.Token() + if err == io.EOF { + break + } + if err != nil { + return err + } + + if tt, ok := t.(xml.StartElement); ok { + switch tt.Name.Local { + case "p": + var value Paragraph + err = d.DecodeElement(&value, &tt) + if err != nil && !strings.HasPrefix(err.Error(), "expected") { + return err + } + if len(value.Children) > 0 { + value.file = r.file + r.mu.Lock() + r.Paragraphs = append(r.Paragraphs, value) + r.mu.Unlock() + } + case "tcPr": + var value WTcPr + err = d.DecodeElement(&value, &tt) + if err != nil && !strings.HasPrefix(err.Error(), "expected") { + return err + } + r.TcPr = &value + default: + err = d.Skip() // skip unsupported tags + if err != nil { + return err + } + continue + } + } + } + return nil +} + +// WTcPr represents the properties of a table cell. +type WTcPr struct { + XMLName xml.Name `xml:"w:tcPr,omitempty"` + TableCellWidth *WTableCellWidth `xml:"w:tcW,omitempty"` + GridSpan *WGridSpan `xml:"w:gridSpan,omitempty"` + VAlign *WVerticalAlignment `xml:"w:vAlign,omitempty"` +} + +// UnmarshalXML ... +func (r *WTcPr) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + for { + t, err := d.Token() + if err == io.EOF { + break + } + if err != nil { + return err + } + + if tt, ok := t.(xml.StartElement); ok { + switch tt.Name.Local { + case "tcW": + r.TableCellWidth = new(WTableCellWidth) + r.TableCellWidth.W, err = strconv.ParseInt(getAtt(tt.Attr, "w"), 10, 64) + if err != nil { + return err + } + r.TableCellWidth.Type = getAtt(tt.Attr, "type") + case "gridSpan": + r.GridSpan = new(WGridSpan) + r.GridSpan.Val, err = strconv.Atoi(getAtt(tt.Attr, "val")) + if err != nil { + return err + } + case "vAlign": + r.VAlign = new(WVerticalAlignment) + r.VAlign.Val = getAtt(tt.Attr, "val") + default: + err = d.Skip() // skip unsupported tags + if err != nil { + return err + } + continue + } + } + } + return nil +} + +// WTableCellWidth represents the width of a table cell. +type WTableCellWidth struct { + XMLName xml.Name `xml:"w:tcW,omitempty"` + W int64 `xml:"w,attr"` + Type string `xml:"type,attr"` +} + +// WGridSpan represents the number of grid columns this cell should span. +type WGridSpan struct { + XMLName xml.Name `xml:"w:gridSpan,omitempty"` + Val int `xml:"val,attr"` +} + +// WVerticalAlignment represents the vertical alignment of the content of a cell. +type WVerticalAlignment struct { + XMLName xml.Name `xml:"w:vAlign,omitempty"` + Val string `xml:"val,attr"` +}