From 45efdb33782f2a1a40c1cae501509afc7210ea9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Fri, 10 Mar 2023 00:43:36 +0800 Subject: [PATCH] add more attrs --- cmd/main/main.go | 17 +++++++++++++ structeffects.go | 63 +++++++++++++++++++++++++++++++++++++++++++++--- structpara.go | 8 ++++++ structrun.go | 8 ++++++ structtable.go | 6 ++--- structtext.go | 60 ++++++++++++++++++++++++++++++++++++++++++++- 6 files changed, 154 insertions(+), 8 deletions(-) diff --git a/cmd/main/main.go b/cmd/main/main.go index bea06f1..04dd303 100644 --- a/cmd/main/main.go +++ b/cmd/main/main.go @@ -38,6 +38,7 @@ func main() { clean := flag.Bool("c", false, "clean mode (keep text and picture only)") unm := flag.Bool("u", false, "lease unmarshalled file") splitre := flag.String("s", "", "split file into many docxs by matching regex") + droppp := flag.Bool("p", false, "drop all paragraph properties") flag.Parse() var w *docx.Docx if !*analyzeOnly { @@ -180,6 +181,22 @@ func main() { if *clean { doc.Document.Body.DropDrawingOf("NilPicture") } + if *droppp { + for _, it := range doc.Document.Body.Items { + switch o := it.(type) { + case *docx.Paragraph: // printable + o.Properties = nil + case *docx.Table: // printable + for _, tr := range o.TableRows { + for _, tc := range tr.TableCells { + for _, p := range tc.Paragraphs { + p.Properties = nil + } + } + } + } + } + } if *unm { i := strings.LastIndex(*fileLocation, "/") name := (*fileLocation)[:i+1] + "unmarshal_" + (*fileLocation)[i+1:] diff --git a/structeffects.go b/structeffects.go index c372f48..0302330 100644 --- a/structeffects.go +++ b/structeffects.go @@ -276,14 +276,33 @@ func (r *NonVisualProperties) UnmarshalXML(d *xml.Decoder, start xml.StartElemen type Spacing struct { XMLName xml.Name `xml:"w:spacing,omitempty"` - Line int `xml:"w:line,attr"` - LineRule string `xml:"w:lineRule,attr"` + Val int `xml:"w:val,attr,omitempty"` + + BeforeLines int `xml:"w:beforeLines,attr,omitempty"` + Before int `xml:"w:before,attr,omitempty"` + Line int `xml:"w:line,attr,omitempty"` + LineRule string `xml:"w:lineRule,attr,omitempty"` } // UnmarshalXML ... func (s *Spacing) UnmarshalXML(d *xml.Decoder, start xml.StartElement) (err error) { for _, attr := range start.Attr { switch attr.Name.Local { + case "val": + s.Val, err = strconv.Atoi(attr.Value) + if err != nil { + return + } + case "beforeLines": + s.BeforeLines, err = strconv.Atoi(attr.Value) + if err != nil { + return + } + case "before": + s.Before, err = strconv.Atoi(attr.Value) + if err != nil { + return + } case "line": s.Line, err = strconv.Atoi(attr.Value) if err != nil { @@ -304,14 +323,34 @@ func (s *Spacing) UnmarshalXML(d *xml.Decoder, start xml.StartElement) (err erro type Ind struct { XMLName xml.Name `xml:"w:ind,omitempty"` - FirstLineChars int `xml:"w:firstLineChars,attr"` - FirstLine int `xml:"w:firstLine,attr"` + LeftChars int `xml:"w:leftChars,attr,omitempty"` + Left int `xml:"w:left,attr,omitempty"` + FirstLineChars int `xml:"w:firstLineChars,attr,omitempty"` + FirstLine int `xml:"w:firstLine,attr,omitempty"` + HangingChars int `xml:"w:hangingChars,attr,omitempty"` + Hanging int `xml:"w:hanging,attr,omitempty"` } // UnmarshalXML ... func (i *Ind) UnmarshalXML(d *xml.Decoder, start xml.StartElement) (err error) { for _, attr := range start.Attr { switch attr.Name.Local { + case "leftChars": + if attr.Value == "" { + continue + } + i.LeftChars, err = strconv.Atoi(attr.Value) + if err != nil { + return + } + case "left": + if attr.Value == "" { + continue + } + i.Left, err = strconv.Atoi(attr.Value) + if err != nil { + return + } case "firstLineChars": if attr.Value == "" { continue @@ -328,6 +367,22 @@ func (i *Ind) UnmarshalXML(d *xml.Decoder, start xml.StartElement) (err error) { if err != nil { return } + case "hangingChars": + if attr.Value == "" { + continue + } + i.HangingChars, err = strconv.Atoi(attr.Value) + if err != nil { + return + } + case "hanging": + if attr.Value == "" { + continue + } + i.Hanging, err = strconv.Atoi(attr.Value) + if err != nil { + return + } default: // ignore other attributes } diff --git a/structpara.go b/structpara.go index 0cd8a98..70c0148 100644 --- a/structpara.go +++ b/structpara.go @@ -31,6 +31,7 @@ import ( // ParagraphProperties type ParagraphProperties struct { XMLName xml.Name `xml:"w:pPr,omitempty"` + Tabs *Tabs Spacing *Spacing Ind *Ind Justification *Justification @@ -58,6 +59,13 @@ func (p *ParagraphProperties) UnmarshalXML(d *xml.Decoder, start xml.StartElemen } if tt, ok := t.(xml.StartElement); ok { switch tt.Name.Local { + case "tabs": + var value Tabs + err = d.DecodeElement(&value, &tt) + if err != nil && !strings.HasPrefix(err.Error(), "expected") { + return err + } + p.Tabs = &value case "spacing": var value Spacing err = d.DecodeElement(&value, &tt) diff --git a/structrun.go b/structrun.go index 2b4e75e..2973243 100644 --- a/structrun.go +++ b/structrun.go @@ -205,6 +205,7 @@ type RunProperties struct { Color *Color Size *Size SizeCs *SizeCs + Spacing *Spacing RunStyle *RunStyle Style *Style Shade *Shade @@ -255,6 +256,13 @@ func (r *RunProperties) UnmarshalXML(d *xml.Decoder, start xml.StartElement) err var value Size value.Val = getAtt(tt.Attr, "val") r.Size = &value + case "spacing": + var value Spacing + err = d.DecodeElement(&value, &tt) + if err != nil && !strings.HasPrefix(err.Error(), "expected") { + return err + } + r.Spacing = &value case "szCs": var value SizeCs value.Val = getAtt(tt.Attr, "val") diff --git a/structtable.go b/structtable.go index 601bf0d..1c03594 100644 --- a/structtable.go +++ b/structtable.go @@ -773,9 +773,9 @@ func (w *WTableBorders) UnmarshalXML(d *xml.Decoder, start xml.StartElement) err // WTableBorder is a structure representing a single border of a Word table. type WTableBorder struct { - Val string `xml:"w:val,attr"` - Size int `xml:"w:sz,attr"` - Space int `xml:"w:space,attr"` + Val string `xml:"w:val,attr,omitempty"` + Size int `xml:"w:sz,attr,omitempty"` + Space int `xml:"w:space,attr,omitempty"` Color string `xml:"w:color,attr,omitempty"` } diff --git a/structtext.go b/structtext.go index d36c188..abea6fe 100644 --- a/structtext.go +++ b/structtext.go @@ -24,11 +24,69 @@ import ( "encoding/xml" "io" "reflect" + "strconv" + "strings" ) +// Tabs ... +type Tabs struct { + XMLName xml.Name `xml:"w:tabs,omitempty"` + Tabs []*Tab +} + +// UnmarshalXML ... +func (tb *Tabs) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + for { + t, err := d.Token() + if err == io.EOF { + break + } + if err != nil { + return err + } + + if tt, ok := t.(xml.StartElement); ok { + if tt.Name.Local == "tab" { + var value Tab + err := d.DecodeElement(&value, &tt) + if err != nil && !strings.HasPrefix(err.Error(), "expected") { + return err + } + tb.Tabs = append(tb.Tabs, &value) + } + } + } + + return nil +} + // Tab is the literal tab type Tab struct { - XMLName xml.Name `xml:"w:tab,omitempty"` + XMLName xml.Name `xml:"w:tab,omitempty"` + Val string `xml:"w:val,attr,omitempty"` + Position int `xml:"w:pos,attr,omitempty"` +} + +// UnmarshalXML ... +func (t *Tab) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + var err error + for _, attr := range start.Attr { + switch attr.Name.Local { + case "val": + t.Val = attr.Value + case "pos": + if attr.Value == "" { + continue + } + t.Position, err = strconv.Atoi(attr.Value) + if err != nil { + return err + } + } + } + // Consume the end element + _, err = d.Token() + return err } // BarterRabbet is