mirror of
https://github.com/fumiama/go-docx.git
synced 2026-06-27 14:40:24 +08:00
add KeepElements & DropDrawingOf
This commit is contained in:
@@ -32,12 +32,12 @@ func (f *Docx) AddParagraph() *Paragraph {
|
||||
|
||||
// AddParagraph adds a new paragraph
|
||||
func (c *WTableCell) AddParagraph() *Paragraph {
|
||||
c.Paragraphs = append(c.Paragraphs, Paragraph{
|
||||
c.Paragraphs = append(c.Paragraphs, &Paragraph{
|
||||
Children: make([]interface{}, 0, 64),
|
||||
file: c.file,
|
||||
})
|
||||
|
||||
return &c.Paragraphs[len(c.Paragraphs)-1]
|
||||
return c.Paragraphs[len(c.Paragraphs)-1]
|
||||
}
|
||||
|
||||
// Justification allows to set para's horizonal alignment
|
||||
|
||||
@@ -33,6 +33,7 @@ import (
|
||||
func main() {
|
||||
fileLocation := flag.String("f", "new-file.docx", "file location")
|
||||
analyzeOnly := flag.Bool("a", false, "analyze file only")
|
||||
clean := flag.Bool("c", false, "clean mode (keep text and picture only)")
|
||||
unm := flag.Bool("u", false, "lease unmarshalled file")
|
||||
flag.Parse()
|
||||
var w *docx.Docx
|
||||
@@ -173,6 +174,9 @@ func main() {
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if *clean {
|
||||
doc.Document.Body.DropDrawingOf("NilPicture")
|
||||
}
|
||||
if *unm {
|
||||
i := strings.LastIndex(*fileLocation, "/")
|
||||
name := (*fileLocation)[:i+1] + "unmarshal_" + (*fileLocation)[i+1:]
|
||||
@@ -191,9 +195,11 @@ func main() {
|
||||
}
|
||||
fmt.Println("Plain text:")
|
||||
for _, it := range doc.Document.Body.Items {
|
||||
switch it.(type) {
|
||||
case *docx.Paragraph, *docx.Table: // printable
|
||||
fmt.Println(it)
|
||||
switch o := it.(type) {
|
||||
case *docx.Paragraph: // printable
|
||||
fmt.Println(o.String())
|
||||
case *docx.Table: // printable
|
||||
fmt.Println(o.String())
|
||||
}
|
||||
}
|
||||
fmt.Println("End of main")
|
||||
|
||||
64
structdoc.go
64
structdoc.go
@@ -23,7 +23,10 @@ package docx
|
||||
import (
|
||||
"encoding/xml"
|
||||
"io"
|
||||
"reflect"
|
||||
"regexp"
|
||||
"strings"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
//nolint:revive,stylecheck
|
||||
@@ -74,19 +77,19 @@ func (b *Body) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
|
||||
switch tt.Name.Local {
|
||||
case "p":
|
||||
var value Paragraph
|
||||
value.file = b.file
|
||||
err = d.DecodeElement(&value, &tt)
|
||||
if err != nil && !strings.HasPrefix(err.Error(), "expected") {
|
||||
return err
|
||||
}
|
||||
value.file = b.file
|
||||
b.Items = append(b.Items, &value)
|
||||
case "tbl":
|
||||
var value Table
|
||||
value.file = b.file
|
||||
err = d.DecodeElement(&value, &tt)
|
||||
if err != nil && !strings.HasPrefix(err.Error(), "expected") {
|
||||
return err
|
||||
}
|
||||
value.file = b.file
|
||||
b.Items = append(b.Items, &value)
|
||||
default:
|
||||
err = d.Skip() // skip unsupported tags
|
||||
@@ -99,6 +102,51 @@ func (b *Body) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// KeepElements keep named elems amd removes others
|
||||
//
|
||||
// names: *docx.Paragraph *docx.Table
|
||||
func (b *Body) KeepElements(name ...string) {
|
||||
items := make([]interface{}, 0, len(b.Items))
|
||||
namemap := make(map[string]struct{}, len(name)*2)
|
||||
for _, n := range name {
|
||||
namemap[n] = struct{}{}
|
||||
}
|
||||
for _, item := range b.Items {
|
||||
_, ok := namemap[reflect.ValueOf(item).Type().String()]
|
||||
if ok {
|
||||
items = append(items, item)
|
||||
}
|
||||
}
|
||||
b.Items = items
|
||||
}
|
||||
|
||||
// DropDrawingOf drops all matched drawing in body
|
||||
// name: Canvas, Shape, Group, ShapeAndCanvas, ShapeAndCanvasAndGroup, NilPicture
|
||||
func (b *Body) DropDrawingOf(name string) {
|
||||
for _, item := range b.Items {
|
||||
switch o := item.(type) {
|
||||
case *Paragraph:
|
||||
f := reflect.ValueOf(o).MethodByName("Drop" + name)
|
||||
if *(*uintptr)(unsafe.Pointer(&f)) == 0 {
|
||||
continue
|
||||
}
|
||||
_ = f.Call(nil)
|
||||
case *Table:
|
||||
for _, tr := range o.TableRows {
|
||||
for _, tc := range tr.TableCells {
|
||||
for _, p := range tc.Paragraphs {
|
||||
f := reflect.ValueOf(p).MethodByName("Drop" + name)
|
||||
if *(*uintptr)(unsafe.Pointer(&f)) == 0 {
|
||||
continue
|
||||
}
|
||||
_ = f.Call(nil)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Document <w:document>
|
||||
type Document struct {
|
||||
XMLName xml.Name `xml:"w:document"`
|
||||
@@ -150,6 +198,13 @@ func (doc *Document) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error
|
||||
// ParagraphSplitRule check whether the paragraph is a separator or not
|
||||
type ParagraphSplitRule func(*Paragraph) bool
|
||||
|
||||
// SplitDocxByPlainTextRegex matches p.String()
|
||||
func SplitDocxByPlainTextRegex(re *regexp.Regexp) ParagraphSplitRule {
|
||||
return func(p *Paragraph) bool {
|
||||
return re.MatchString(p.String())
|
||||
}
|
||||
}
|
||||
|
||||
// SplitByParagraph splits a doc to many docs by using a matched paragraph
|
||||
// as the separator.
|
||||
//
|
||||
@@ -263,10 +318,11 @@ func (t *Table) copymedia(to *Docx) (nt Table) {
|
||||
ntr.file = to
|
||||
for _, tc := range tr.TableCells {
|
||||
ntc := *tc
|
||||
ntc.Paragraphs = make([]Paragraph, 0, len(tc.Paragraphs))
|
||||
ntc.Paragraphs = make([]*Paragraph, 0, len(tc.Paragraphs))
|
||||
ntc.file = to
|
||||
for _, p := range tc.Paragraphs {
|
||||
ntc.Paragraphs = append(ntc.Paragraphs, p.copymedia(to))
|
||||
np := p.copymedia(to)
|
||||
ntc.Paragraphs = append(ntc.Paragraphs, &np)
|
||||
}
|
||||
ntr.TableCells = append(ntr.TableCells, &ntc)
|
||||
}
|
||||
|
||||
181
structpara.go
181
structpara.go
@@ -23,6 +23,7 @@ package docx
|
||||
import (
|
||||
"encoding/xml"
|
||||
"io"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
@@ -296,3 +297,183 @@ func (p *Paragraph) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
|
||||
p.Children = children
|
||||
return nil
|
||||
}
|
||||
|
||||
// KeepElements keep named elems amd removes others
|
||||
//
|
||||
// names: *docx.Hyperlink *docx.Run *docx.RunProperties
|
||||
func (p *Paragraph) KeepElements(name ...string) {
|
||||
items := make([]interface{}, 0, len(p.Children))
|
||||
namemap := make(map[string]struct{}, len(name)*2)
|
||||
for _, n := range name {
|
||||
namemap[n] = struct{}{}
|
||||
}
|
||||
for _, item := range p.Children {
|
||||
_, ok := namemap[reflect.ValueOf(item).Type().String()]
|
||||
if ok {
|
||||
items = append(items, item)
|
||||
}
|
||||
}
|
||||
p.Children = items
|
||||
}
|
||||
|
||||
// DropCanvas drops all canvases in paragraph
|
||||
func (p *Paragraph) DropCanvas() {
|
||||
for _, pc := range p.Children {
|
||||
if r, ok := pc.(*Run); ok {
|
||||
nrc := make([]interface{}, 0, len(r.Children))
|
||||
for _, rc := range r.Children {
|
||||
if d, ok := rc.(*Drawing); ok {
|
||||
if d.Inline != nil && d.Inline.Graphic != nil && d.Inline.Graphic.GraphicData != nil {
|
||||
if d.Inline.Graphic.GraphicData.Canvas != nil {
|
||||
continue
|
||||
}
|
||||
}
|
||||
if d.Anchor != nil && d.Anchor.Graphic != nil && d.Anchor.Graphic.GraphicData != nil {
|
||||
if d.Anchor.Graphic.GraphicData.Canvas != nil {
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
nrc = append(nrc, rc)
|
||||
}
|
||||
r.Children = nrc
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// DropShape drops all shapes in paragraph
|
||||
func (p *Paragraph) DropShape() {
|
||||
for _, pc := range p.Children {
|
||||
if r, ok := pc.(*Run); ok {
|
||||
nrc := make([]interface{}, 0, len(r.Children))
|
||||
for _, rc := range r.Children {
|
||||
if d, ok := rc.(*Drawing); ok {
|
||||
if d.Inline != nil && d.Inline.Graphic != nil && d.Inline.Graphic.GraphicData != nil {
|
||||
if d.Inline.Graphic.GraphicData.Shape != nil {
|
||||
continue
|
||||
}
|
||||
}
|
||||
if d.Anchor != nil && d.Anchor.Graphic != nil && d.Anchor.Graphic.GraphicData != nil {
|
||||
if d.Anchor.Graphic.GraphicData.Shape != nil {
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
nrc = append(nrc, rc)
|
||||
}
|
||||
r.Children = nrc
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// DropGroup drops all groups in paragraph
|
||||
func (p *Paragraph) DropGroup() {
|
||||
for _, pc := range p.Children {
|
||||
if r, ok := pc.(*Run); ok {
|
||||
nrc := make([]interface{}, 0, len(r.Children))
|
||||
for _, rc := range r.Children {
|
||||
if d, ok := rc.(*Drawing); ok {
|
||||
if d.Inline != nil && d.Inline.Graphic != nil && d.Inline.Graphic.GraphicData != nil {
|
||||
if d.Inline.Graphic.GraphicData.Group != nil {
|
||||
continue
|
||||
}
|
||||
}
|
||||
if d.Anchor != nil && d.Anchor.Graphic != nil && d.Anchor.Graphic.GraphicData != nil {
|
||||
if d.Anchor.Graphic.GraphicData.Group != nil {
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
nrc = append(nrc, rc)
|
||||
}
|
||||
r.Children = nrc
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// DropShapeAndCanvas drops all shapes and canvases in paragraph
|
||||
func (p *Paragraph) DropShapeAndCanvas() {
|
||||
for _, pc := range p.Children {
|
||||
if r, ok := pc.(*Run); ok {
|
||||
nrc := make([]interface{}, 0, len(r.Children))
|
||||
for _, rc := range r.Children {
|
||||
if d, ok := rc.(*Drawing); ok {
|
||||
if d.Inline != nil && d.Inline.Graphic != nil && d.Inline.Graphic.GraphicData != nil {
|
||||
if d.Inline.Graphic.GraphicData.Shape != nil || d.Inline.Graphic.GraphicData.Canvas != nil {
|
||||
continue
|
||||
}
|
||||
}
|
||||
if d.Anchor != nil && d.Anchor.Graphic != nil && d.Anchor.Graphic.GraphicData != nil {
|
||||
if d.Anchor.Graphic.GraphicData.Shape != nil || d.Anchor.Graphic.GraphicData.Canvas != nil {
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
nrc = append(nrc, rc)
|
||||
}
|
||||
r.Children = nrc
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// DropShapeAndCanvasAndGroup drops all shapes, canvases and groups in paragraph
|
||||
func (p *Paragraph) DropShapeAndCanvasAndGroup() {
|
||||
for _, pc := range p.Children {
|
||||
if r, ok := pc.(*Run); ok {
|
||||
nrc := make([]interface{}, 0, len(r.Children))
|
||||
for _, rc := range r.Children {
|
||||
if d, ok := rc.(*Drawing); ok {
|
||||
if d.Inline != nil && d.Inline.Graphic != nil && d.Inline.Graphic.GraphicData != nil {
|
||||
if d.Inline.Graphic.GraphicData.Shape != nil || d.Inline.Graphic.GraphicData.Canvas != nil || d.Inline.Graphic.GraphicData.Group != nil {
|
||||
continue
|
||||
}
|
||||
}
|
||||
if d.Anchor != nil && d.Anchor.Graphic != nil && d.Anchor.Graphic.GraphicData != nil {
|
||||
if d.Anchor.Graphic.GraphicData.Shape != nil || d.Anchor.Graphic.GraphicData.Canvas != nil || d.Anchor.Graphic.GraphicData.Group != nil {
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
nrc = append(nrc, rc)
|
||||
}
|
||||
r.Children = nrc
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// DropNilPicture drops all drawings with nil picture in paragraph
|
||||
func (p *Paragraph) DropNilPicture() {
|
||||
for _, pc := range p.Children {
|
||||
if r, ok := pc.(*Run); ok {
|
||||
nrc := make([]interface{}, 0, len(r.Children))
|
||||
for _, rc := range r.Children {
|
||||
if d, ok := rc.(*Drawing); ok {
|
||||
if d.Inline == nil && d.Anchor == nil {
|
||||
continue
|
||||
}
|
||||
if (d.Inline != nil && d.Inline.Graphic == nil) || (d.Anchor != nil && d.Anchor.Graphic == nil) {
|
||||
continue
|
||||
}
|
||||
if d.Inline != nil && d.Inline.Graphic != nil && d.Inline.Graphic.GraphicData == nil {
|
||||
continue
|
||||
}
|
||||
if d.Anchor != nil && d.Anchor.Graphic != nil && d.Anchor.Graphic.GraphicData == nil {
|
||||
continue
|
||||
}
|
||||
if d.Inline != nil && d.Inline.Graphic != nil && d.Inline.Graphic.GraphicData != nil {
|
||||
if d.Inline.Graphic.GraphicData.Pic == nil {
|
||||
continue
|
||||
}
|
||||
}
|
||||
if d.Anchor != nil && d.Anchor.Graphic != nil && d.Anchor.Graphic.GraphicData != nil {
|
||||
if d.Anchor.Graphic.GraphicData.Pic == nil {
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
nrc = append(nrc, rc)
|
||||
}
|
||||
r.Children = nrc
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
19
structrun.go
19
structrun.go
@@ -23,6 +23,7 @@ package docx
|
||||
import (
|
||||
"encoding/xml"
|
||||
"io"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
@@ -175,6 +176,24 @@ func (r *Run) parse(d *xml.Decoder, tt xml.StartElement) (child interface{}, err
|
||||
return
|
||||
}
|
||||
|
||||
// KeepElements keep named elems amd removes others
|
||||
//
|
||||
// names: *docx.Text *docx.Drawing *docx.Tab *docx.BarterRabbet
|
||||
func (r *Run) KeepElements(name ...string) {
|
||||
items := make([]interface{}, 0, len(r.Children))
|
||||
namemap := make(map[string]struct{}, len(name)*2)
|
||||
for _, n := range name {
|
||||
namemap[n] = struct{}{}
|
||||
}
|
||||
for _, item := range r.Children {
|
||||
_, ok := namemap[reflect.ValueOf(item).Type().String()]
|
||||
if ok {
|
||||
items = append(items, item)
|
||||
}
|
||||
}
|
||||
r.Children = items
|
||||
}
|
||||
|
||||
// RunProperties encapsulates visual properties of a run
|
||||
type RunProperties struct {
|
||||
XMLName xml.Name `xml:"w:rPr,omitempty"`
|
||||
|
||||
@@ -600,11 +600,11 @@ func (c *WTextBoxContent) UnmarshalXML(d *xml.Decoder, start xml.StartElement) e
|
||||
switch tt.Name.Local {
|
||||
case "p":
|
||||
var value Paragraph
|
||||
value.file = c.file
|
||||
err = d.DecodeElement(&value, &tt)
|
||||
if err != nil && !strings.HasPrefix(err.Error(), "expected") {
|
||||
return err
|
||||
}
|
||||
value.file = c.file
|
||||
c.Paragraphs = append(c.Paragraphs, value)
|
||||
default:
|
||||
err = d.Skip() // skip unsupported tags
|
||||
|
||||
@@ -536,7 +536,7 @@ type WTableRowHeight struct {
|
||||
type WTableCell struct {
|
||||
XMLName xml.Name `xml:"w:tc,omitempty"`
|
||||
TableCellProperties *WTableCellProperties
|
||||
Paragraphs []Paragraph `xml:"w:p,omitempty"`
|
||||
Paragraphs []*Paragraph `xml:"w:p,omitempty"`
|
||||
|
||||
file *Docx
|
||||
}
|
||||
@@ -556,12 +556,12 @@ func (c *WTableCell) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error
|
||||
switch tt.Name.Local {
|
||||
case "p":
|
||||
var value Paragraph
|
||||
value.file = c.file
|
||||
err = d.DecodeElement(&value, &tt)
|
||||
if err != nil && !strings.HasPrefix(err.Error(), "expected") {
|
||||
return err
|
||||
}
|
||||
value.file = c.file
|
||||
c.Paragraphs = append(c.Paragraphs, value)
|
||||
c.Paragraphs = append(c.Paragraphs, &value)
|
||||
case "tcPr":
|
||||
var value WTableCellProperties
|
||||
err = d.DecodeElement(&value, &tt)
|
||||
|
||||
Reference in New Issue
Block a user