1
0
mirror of https://github.com/fumiama/go-docx.git synced 2026-06-12 11:40:28 +08:00

add KeepElements & DropDrawingOf

This commit is contained in:
源文雨
2023-03-09 20:50:39 +08:00
parent 54e7323f63
commit b68d555c67
7 changed files with 275 additions and 13 deletions

View File

@@ -32,12 +32,12 @@ func (f *Docx) AddParagraph() *Paragraph {
// AddParagraph adds a new paragraph // AddParagraph adds a new paragraph
func (c *WTableCell) AddParagraph() *Paragraph { func (c *WTableCell) AddParagraph() *Paragraph {
c.Paragraphs = append(c.Paragraphs, Paragraph{ c.Paragraphs = append(c.Paragraphs, &Paragraph{
Children: make([]interface{}, 0, 64), Children: make([]interface{}, 0, 64),
file: c.file, file: c.file,
}) })
return &c.Paragraphs[len(c.Paragraphs)-1] return c.Paragraphs[len(c.Paragraphs)-1]
} }
// Justification allows to set para's horizonal alignment // Justification allows to set para's horizonal alignment

View File

@@ -33,6 +33,7 @@ import (
func main() { func main() {
fileLocation := flag.String("f", "new-file.docx", "file location") fileLocation := flag.String("f", "new-file.docx", "file location")
analyzeOnly := flag.Bool("a", false, "analyze file only") analyzeOnly := flag.Bool("a", false, "analyze file only")
clean := flag.Bool("c", false, "clean mode (keep text and picture only)")
unm := flag.Bool("u", false, "lease unmarshalled file") unm := flag.Bool("u", false, "lease unmarshalled file")
flag.Parse() flag.Parse()
var w *docx.Docx var w *docx.Docx
@@ -173,6 +174,9 @@ func main() {
if err != nil { if err != nil {
panic(err) panic(err)
} }
if *clean {
doc.Document.Body.DropDrawingOf("NilPicture")
}
if *unm { if *unm {
i := strings.LastIndex(*fileLocation, "/") i := strings.LastIndex(*fileLocation, "/")
name := (*fileLocation)[:i+1] + "unmarshal_" + (*fileLocation)[i+1:] name := (*fileLocation)[:i+1] + "unmarshal_" + (*fileLocation)[i+1:]
@@ -191,9 +195,11 @@ func main() {
} }
fmt.Println("Plain text:") fmt.Println("Plain text:")
for _, it := range doc.Document.Body.Items { for _, it := range doc.Document.Body.Items {
switch it.(type) { switch o := it.(type) {
case *docx.Paragraph, *docx.Table: // printable case *docx.Paragraph: // printable
fmt.Println(it) fmt.Println(o.String())
case *docx.Table: // printable
fmt.Println(o.String())
} }
} }
fmt.Println("End of main") fmt.Println("End of main")

View File

@@ -23,7 +23,10 @@ package docx
import ( import (
"encoding/xml" "encoding/xml"
"io" "io"
"reflect"
"regexp"
"strings" "strings"
"unsafe"
) )
//nolint:revive,stylecheck //nolint:revive,stylecheck
@@ -74,19 +77,19 @@ func (b *Body) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
switch tt.Name.Local { switch tt.Name.Local {
case "p": case "p":
var value Paragraph var value Paragraph
value.file = b.file
err = d.DecodeElement(&value, &tt) err = d.DecodeElement(&value, &tt)
if err != nil && !strings.HasPrefix(err.Error(), "expected") { if err != nil && !strings.HasPrefix(err.Error(), "expected") {
return err return err
} }
value.file = b.file
b.Items = append(b.Items, &value) b.Items = append(b.Items, &value)
case "tbl": case "tbl":
var value Table var value Table
value.file = b.file
err = d.DecodeElement(&value, &tt) err = d.DecodeElement(&value, &tt)
if err != nil && !strings.HasPrefix(err.Error(), "expected") { if err != nil && !strings.HasPrefix(err.Error(), "expected") {
return err return err
} }
value.file = b.file
b.Items = append(b.Items, &value) b.Items = append(b.Items, &value)
default: default:
err = d.Skip() // skip unsupported tags err = d.Skip() // skip unsupported tags
@@ -99,6 +102,51 @@ func (b *Body) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
return nil return nil
} }
// KeepElements keep named elems amd removes others
//
// names: *docx.Paragraph *docx.Table
func (b *Body) KeepElements(name ...string) {
items := make([]interface{}, 0, len(b.Items))
namemap := make(map[string]struct{}, len(name)*2)
for _, n := range name {
namemap[n] = struct{}{}
}
for _, item := range b.Items {
_, ok := namemap[reflect.ValueOf(item).Type().String()]
if ok {
items = append(items, item)
}
}
b.Items = items
}
// DropDrawingOf drops all matched drawing in body
// name: Canvas, Shape, Group, ShapeAndCanvas, ShapeAndCanvasAndGroup, NilPicture
func (b *Body) DropDrawingOf(name string) {
for _, item := range b.Items {
switch o := item.(type) {
case *Paragraph:
f := reflect.ValueOf(o).MethodByName("Drop" + name)
if *(*uintptr)(unsafe.Pointer(&f)) == 0 {
continue
}
_ = f.Call(nil)
case *Table:
for _, tr := range o.TableRows {
for _, tc := range tr.TableCells {
for _, p := range tc.Paragraphs {
f := reflect.ValueOf(p).MethodByName("Drop" + name)
if *(*uintptr)(unsafe.Pointer(&f)) == 0 {
continue
}
_ = f.Call(nil)
}
}
}
}
}
}
// Document <w:document> // Document <w:document>
type Document struct { type Document struct {
XMLName xml.Name `xml:"w:document"` XMLName xml.Name `xml:"w:document"`
@@ -150,6 +198,13 @@ func (doc *Document) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error
// ParagraphSplitRule check whether the paragraph is a separator or not // ParagraphSplitRule check whether the paragraph is a separator or not
type ParagraphSplitRule func(*Paragraph) bool type ParagraphSplitRule func(*Paragraph) bool
// SplitDocxByPlainTextRegex matches p.String()
func SplitDocxByPlainTextRegex(re *regexp.Regexp) ParagraphSplitRule {
return func(p *Paragraph) bool {
return re.MatchString(p.String())
}
}
// SplitByParagraph splits a doc to many docs by using a matched paragraph // SplitByParagraph splits a doc to many docs by using a matched paragraph
// as the separator. // as the separator.
// //
@@ -263,10 +318,11 @@ func (t *Table) copymedia(to *Docx) (nt Table) {
ntr.file = to ntr.file = to
for _, tc := range tr.TableCells { for _, tc := range tr.TableCells {
ntc := *tc ntc := *tc
ntc.Paragraphs = make([]Paragraph, 0, len(tc.Paragraphs)) ntc.Paragraphs = make([]*Paragraph, 0, len(tc.Paragraphs))
ntc.file = to ntc.file = to
for _, p := range tc.Paragraphs { for _, p := range tc.Paragraphs {
ntc.Paragraphs = append(ntc.Paragraphs, p.copymedia(to)) np := p.copymedia(to)
ntc.Paragraphs = append(ntc.Paragraphs, &np)
} }
ntr.TableCells = append(ntr.TableCells, &ntc) ntr.TableCells = append(ntr.TableCells, &ntc)
} }

View File

@@ -23,6 +23,7 @@ package docx
import ( import (
"encoding/xml" "encoding/xml"
"io" "io"
"reflect"
"strconv" "strconv"
"strings" "strings"
) )
@@ -296,3 +297,183 @@ func (p *Paragraph) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
p.Children = children p.Children = children
return nil return nil
} }
// KeepElements keep named elems amd removes others
//
// names: *docx.Hyperlink *docx.Run *docx.RunProperties
func (p *Paragraph) KeepElements(name ...string) {
items := make([]interface{}, 0, len(p.Children))
namemap := make(map[string]struct{}, len(name)*2)
for _, n := range name {
namemap[n] = struct{}{}
}
for _, item := range p.Children {
_, ok := namemap[reflect.ValueOf(item).Type().String()]
if ok {
items = append(items, item)
}
}
p.Children = items
}
// DropCanvas drops all canvases in paragraph
func (p *Paragraph) DropCanvas() {
for _, pc := range p.Children {
if r, ok := pc.(*Run); ok {
nrc := make([]interface{}, 0, len(r.Children))
for _, rc := range r.Children {
if d, ok := rc.(*Drawing); ok {
if d.Inline != nil && d.Inline.Graphic != nil && d.Inline.Graphic.GraphicData != nil {
if d.Inline.Graphic.GraphicData.Canvas != nil {
continue
}
}
if d.Anchor != nil && d.Anchor.Graphic != nil && d.Anchor.Graphic.GraphicData != nil {
if d.Anchor.Graphic.GraphicData.Canvas != nil {
continue
}
}
}
nrc = append(nrc, rc)
}
r.Children = nrc
}
}
}
// DropShape drops all shapes in paragraph
func (p *Paragraph) DropShape() {
for _, pc := range p.Children {
if r, ok := pc.(*Run); ok {
nrc := make([]interface{}, 0, len(r.Children))
for _, rc := range r.Children {
if d, ok := rc.(*Drawing); ok {
if d.Inline != nil && d.Inline.Graphic != nil && d.Inline.Graphic.GraphicData != nil {
if d.Inline.Graphic.GraphicData.Shape != nil {
continue
}
}
if d.Anchor != nil && d.Anchor.Graphic != nil && d.Anchor.Graphic.GraphicData != nil {
if d.Anchor.Graphic.GraphicData.Shape != nil {
continue
}
}
}
nrc = append(nrc, rc)
}
r.Children = nrc
}
}
}
// DropGroup drops all groups in paragraph
func (p *Paragraph) DropGroup() {
for _, pc := range p.Children {
if r, ok := pc.(*Run); ok {
nrc := make([]interface{}, 0, len(r.Children))
for _, rc := range r.Children {
if d, ok := rc.(*Drawing); ok {
if d.Inline != nil && d.Inline.Graphic != nil && d.Inline.Graphic.GraphicData != nil {
if d.Inline.Graphic.GraphicData.Group != nil {
continue
}
}
if d.Anchor != nil && d.Anchor.Graphic != nil && d.Anchor.Graphic.GraphicData != nil {
if d.Anchor.Graphic.GraphicData.Group != nil {
continue
}
}
}
nrc = append(nrc, rc)
}
r.Children = nrc
}
}
}
// DropShapeAndCanvas drops all shapes and canvases in paragraph
func (p *Paragraph) DropShapeAndCanvas() {
for _, pc := range p.Children {
if r, ok := pc.(*Run); ok {
nrc := make([]interface{}, 0, len(r.Children))
for _, rc := range r.Children {
if d, ok := rc.(*Drawing); ok {
if d.Inline != nil && d.Inline.Graphic != nil && d.Inline.Graphic.GraphicData != nil {
if d.Inline.Graphic.GraphicData.Shape != nil || d.Inline.Graphic.GraphicData.Canvas != nil {
continue
}
}
if d.Anchor != nil && d.Anchor.Graphic != nil && d.Anchor.Graphic.GraphicData != nil {
if d.Anchor.Graphic.GraphicData.Shape != nil || d.Anchor.Graphic.GraphicData.Canvas != nil {
continue
}
}
}
nrc = append(nrc, rc)
}
r.Children = nrc
}
}
}
// DropShapeAndCanvasAndGroup drops all shapes, canvases and groups in paragraph
func (p *Paragraph) DropShapeAndCanvasAndGroup() {
for _, pc := range p.Children {
if r, ok := pc.(*Run); ok {
nrc := make([]interface{}, 0, len(r.Children))
for _, rc := range r.Children {
if d, ok := rc.(*Drawing); ok {
if d.Inline != nil && d.Inline.Graphic != nil && d.Inline.Graphic.GraphicData != nil {
if d.Inline.Graphic.GraphicData.Shape != nil || d.Inline.Graphic.GraphicData.Canvas != nil || d.Inline.Graphic.GraphicData.Group != nil {
continue
}
}
if d.Anchor != nil && d.Anchor.Graphic != nil && d.Anchor.Graphic.GraphicData != nil {
if d.Anchor.Graphic.GraphicData.Shape != nil || d.Anchor.Graphic.GraphicData.Canvas != nil || d.Anchor.Graphic.GraphicData.Group != nil {
continue
}
}
}
nrc = append(nrc, rc)
}
r.Children = nrc
}
}
}
// DropNilPicture drops all drawings with nil picture in paragraph
func (p *Paragraph) DropNilPicture() {
for _, pc := range p.Children {
if r, ok := pc.(*Run); ok {
nrc := make([]interface{}, 0, len(r.Children))
for _, rc := range r.Children {
if d, ok := rc.(*Drawing); ok {
if d.Inline == nil && d.Anchor == nil {
continue
}
if (d.Inline != nil && d.Inline.Graphic == nil) || (d.Anchor != nil && d.Anchor.Graphic == nil) {
continue
}
if d.Inline != nil && d.Inline.Graphic != nil && d.Inline.Graphic.GraphicData == nil {
continue
}
if d.Anchor != nil && d.Anchor.Graphic != nil && d.Anchor.Graphic.GraphicData == nil {
continue
}
if d.Inline != nil && d.Inline.Graphic != nil && d.Inline.Graphic.GraphicData != nil {
if d.Inline.Graphic.GraphicData.Pic == nil {
continue
}
}
if d.Anchor != nil && d.Anchor.Graphic != nil && d.Anchor.Graphic.GraphicData != nil {
if d.Anchor.Graphic.GraphicData.Pic == nil {
continue
}
}
}
nrc = append(nrc, rc)
}
r.Children = nrc
}
}
}

View File

@@ -23,6 +23,7 @@ package docx
import ( import (
"encoding/xml" "encoding/xml"
"io" "io"
"reflect"
"strconv" "strconv"
"strings" "strings"
) )
@@ -175,6 +176,24 @@ func (r *Run) parse(d *xml.Decoder, tt xml.StartElement) (child interface{}, err
return return
} }
// KeepElements keep named elems amd removes others
//
// names: *docx.Text *docx.Drawing *docx.Tab *docx.BarterRabbet
func (r *Run) KeepElements(name ...string) {
items := make([]interface{}, 0, len(r.Children))
namemap := make(map[string]struct{}, len(name)*2)
for _, n := range name {
namemap[n] = struct{}{}
}
for _, item := range r.Children {
_, ok := namemap[reflect.ValueOf(item).Type().String()]
if ok {
items = append(items, item)
}
}
r.Children = items
}
// RunProperties encapsulates visual properties of a run // RunProperties encapsulates visual properties of a run
type RunProperties struct { type RunProperties struct {
XMLName xml.Name `xml:"w:rPr,omitempty"` XMLName xml.Name `xml:"w:rPr,omitempty"`

View File

@@ -600,11 +600,11 @@ func (c *WTextBoxContent) UnmarshalXML(d *xml.Decoder, start xml.StartElement) e
switch tt.Name.Local { switch tt.Name.Local {
case "p": case "p":
var value Paragraph var value Paragraph
value.file = c.file
err = d.DecodeElement(&value, &tt) err = d.DecodeElement(&value, &tt)
if err != nil && !strings.HasPrefix(err.Error(), "expected") { if err != nil && !strings.HasPrefix(err.Error(), "expected") {
return err return err
} }
value.file = c.file
c.Paragraphs = append(c.Paragraphs, value) c.Paragraphs = append(c.Paragraphs, value)
default: default:
err = d.Skip() // skip unsupported tags err = d.Skip() // skip unsupported tags

View File

@@ -536,7 +536,7 @@ type WTableRowHeight struct {
type WTableCell struct { type WTableCell struct {
XMLName xml.Name `xml:"w:tc,omitempty"` XMLName xml.Name `xml:"w:tc,omitempty"`
TableCellProperties *WTableCellProperties TableCellProperties *WTableCellProperties
Paragraphs []Paragraph `xml:"w:p,omitempty"` Paragraphs []*Paragraph `xml:"w:p,omitempty"`
file *Docx file *Docx
} }
@@ -556,12 +556,12 @@ func (c *WTableCell) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error
switch tt.Name.Local { switch tt.Name.Local {
case "p": case "p":
var value Paragraph var value Paragraph
value.file = c.file
err = d.DecodeElement(&value, &tt) err = d.DecodeElement(&value, &tt)
if err != nil && !strings.HasPrefix(err.Error(), "expected") { if err != nil && !strings.HasPrefix(err.Error(), "expected") {
return err return err
} }
value.file = c.file c.Paragraphs = append(c.Paragraphs, &value)
c.Paragraphs = append(c.Paragraphs, value)
case "tcPr": case "tcPr":
var value WTableCellProperties var value WTableCellProperties
err = d.DecodeElement(&value, &tt) err = d.DecodeElement(&value, &tt)