1
0
mirror of https://github.com/fumiama/go-docx.git synced 2026-06-04 23:30:25 +08:00
Files
go-docx/structdoc.go
mabiao0525 d74423910a feat: A3PageSize (#22)
* feat: A3PageSize

* feat: A3PageSize2

* feat: A3PageSize3

* feat: A3PageSize3

* feat: A3PageSize4

* Update structsect.go

---------

Co-authored-by: 源文雨 <41315874+fumiama@users.noreply.github.com>
2024-03-19 07:49:12 +00:00

372 lines
9.9 KiB
Go

/*
Copyright (c) 2020 gingfrederik
Copyright (c) 2021 Gonzalo Fernandez-Victorio
Copyright (c) 2021 Basement Crowd Ltd (https://www.basementcrowd.com)
Copyright (c) 2023 Fumiama Minamoto (源文雨)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package docx
import (
"encoding/xml"
"io"
"reflect"
"regexp"
"strings"
)
//nolint:revive,stylecheck
const (
XMLNS_W = `http://schemas.openxmlformats.org/wordprocessingml/2006/main`
XMLNS_R = `http://schemas.openxmlformats.org/officeDocument/2006/relationships`
XMLNS_WP = `http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing`
XMLNS_WPS = `http://schemas.microsoft.com/office/word/2010/wordprocessingShape`
XMLNS_WPC = `http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas`
XMLNS_WPG = `http://schemas.microsoft.com/office/word/2010/wordprocessingGroup`
XMLNS_MC = `http://schemas.openxmlformats.org/markup-compatibility/2006`
// XMLNS_WP14 = `http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing`
XMLNS_O = `urn:schemas-microsoft-com:office:office`
XMLNS_V = `urn:schemas-microsoft-com:vml`
XMLNS_PICTURE = `http://schemas.openxmlformats.org/drawingml/2006/picture`
)
func getAtt(atts []xml.Attr, name string) string {
for _, at := range atts {
if at.Name.Local == name {
return at.Value
}
}
return ""
}
// Body <w:body>
type Body struct {
Items []interface{}
file *Docx
}
// UnmarshalXML ...
func (b *Body) UnmarshalXML(d *xml.Decoder, _ xml.StartElement) error {
for {
t, err := d.Token()
if err == io.EOF {
break
}
if err != nil {
return err
}
if tt, ok := t.(xml.StartElement); ok {
switch tt.Name.Local {
case "p":
var value Paragraph
value.file = b.file
err = d.DecodeElement(&value, &tt)
if err != nil && !strings.HasPrefix(err.Error(), "expected") {
return err
}
b.Items = append(b.Items, &value)
case "tbl":
var value Table
value.file = b.file
err = d.DecodeElement(&value, &tt)
if err != nil && !strings.HasPrefix(err.Error(), "expected") {
return err
}
b.Items = append(b.Items, &value)
case "sectPr":
var value SectPr
err = d.DecodeElement(&value, &tt)
if err != nil && !strings.HasPrefix(err.Error(), "expected") {
return err
}
b.Items = append(b.Items, &value)
default:
err = d.Skip() // skip unsupported tags
if err != nil {
return err
}
}
}
}
return nil
}
// KeepElements keep named elems amd removes others
//
// names: *docx.Paragraph *docx.Table
func (b *Body) KeepElements(name ...string) {
items := make([]interface{}, 0, len(b.Items))
namemap := make(map[string]struct{}, len(name)*2)
for _, n := range name {
namemap[n] = struct{}{}
}
for _, item := range b.Items {
_, ok := namemap[reflect.ValueOf(item).Type().String()]
if ok {
items = append(items, item)
}
}
b.Items = items
}
// DropDrawingOf drops all matched drawing in body
// name: Canvas, Shape, Group, ShapeAndCanvas, ShapeAndCanvasAndGroup, NilPicture
func (b *Body) DropDrawingOf(name string) {
for _, item := range b.Items {
switch o := item.(type) {
case *Paragraph:
f := reflect.ValueOf(o).MethodByName("Drop" + name)
if !f.IsValid() {
continue
}
_ = f.Call(nil)
case *Table:
for _, tr := range o.TableRows {
for _, tc := range tr.TableCells {
for _, p := range tc.Paragraphs {
f := reflect.ValueOf(p).MethodByName("Drop" + name)
if !f.IsValid() {
continue
}
_ = f.Call(nil)
}
}
}
}
}
}
// Document <w:document>
type Document struct {
XMLName xml.Name `xml:"w:document"`
XMLW string `xml:"xmlns:w,attr"` // cannot be unmarshalled in
XMLR string `xml:"xmlns:r,attr,omitempty"` // cannot be unmarshalled in
XMLWP string `xml:"xmlns:wp,attr,omitempty"` // cannot be unmarshalled in
XMLWPS string `xml:"xmlns:wps,attr,omitempty"` // cannot be unmarshalled in
XMLWPC string `xml:"xmlns:wpc,attr,omitempty"` // cannot be unmarshalled in
XMLWPG string `xml:"xmlns:wpg,attr,omitempty"` // cannot be unmarshalled in
// XMLMC string `xml:"xmlns:mc,attr,omitempty"` // cannot be unmarshalled in
// XMLWP14 string `xml:"xmlns:wp14,attr,omitempty"` // cannot be unmarshalled in
// XMLO string `xml:"xmlns:o,attr,omitempty"` // cannot be unmarshalled in
// XMLV string `xml:"xmlns:v,attr,omitempty"` // cannot be unmarshalled in
// MCIgnorable string `xml:"mc:Ignorable,attr,omitempty"`
Body Body `xml:"w:body"`
}
// UnmarshalXML ...
func (doc *Document) UnmarshalXML(d *xml.Decoder, _ xml.StartElement) error {
for {
t, err := d.Token()
if err == io.EOF {
break
}
if err != nil {
return err
}
if tt, ok := t.(xml.StartElement); ok {
if tt.Name.Local == "body" {
err = d.DecodeElement(&doc.Body, &tt)
if err != nil && !strings.HasPrefix(err.Error(), "expected") {
return err
}
continue
}
err = d.Skip() // skip unsupported tags
if err != nil {
return err
}
}
}
return nil
}
// ParagraphSplitRule check whether the paragraph is a separator or not
type ParagraphSplitRule func(*Paragraph) bool
// SplitDocxByPlainTextRegex matches p.String()
func SplitDocxByPlainTextRegex(re *regexp.Regexp) ParagraphSplitRule {
return func(p *Paragraph) bool {
return re.MatchString(p.String())
}
}
// SplitByParagraph splits a doc to many docs by using a matched paragraph
// as the separator.
//
// The separator will be placed to the first doc item
func (f *Docx) SplitByParagraph(separator ParagraphSplitRule) (docs []*Docx) {
items := f.Document.Body.Items
newdoclop:
for len(items) > 0 {
ndoc := new(Docx)
// migrate base data
ndoc.mediaNameIdx = make(map[string]int, 64)
ndoc.slowIDs = make(map[string]uintptr, 64)
ndoc.template = f.template
ndoc.tmplfs = f.tmplfs
ndoc.tmpfslst = f.tmpfslst
ndoc.Document.XMLW = XMLNS_W
ndoc.Document.XMLR = XMLNS_R
ndoc.Document.XMLWP = XMLNS_WP
// ndoc.Document.XMLMC = XMLNS_MC
// ndoc.Document.XMLO = XMLNS_O
// ndoc.Document.XMLV = XMLNS_V
ndoc.Document.XMLWPS = XMLNS_WPS
ndoc.Document.XMLWPC = XMLNS_WPC
ndoc.Document.XMLWPG = XMLNS_WPG
// ndoc.Document.XMLWP14 = XMLNS_WP14
ndoc.Document.XMLName.Space = XMLNS_W
ndoc.Document.XMLName.Local = "document"
ndoc.Document.Body.file = ndoc
ndoc.docRelation = Relationships{
Xmlns: XMLNS_REL,
Relationship: []Relationship{
{
ID: "rId1",
Type: `http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles`,
Target: "styles.xml",
},
{
ID: "rId2",
Type: `http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme`,
Target: "theme/theme1.xml",
},
{
ID: "rId3",
Type: `http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable`,
Target: "fontTable.xml",
},
},
}
ndoc.rID = 3
for i, item := range items {
switch o := item.(type) {
case *Paragraph:
if separator(o) && len(ndoc.Document.Body.Items) > 0 {
items = items[i:]
docs = append(docs, ndoc)
continue newdoclop
}
np := o.copymedia(ndoc)
ndoc.Document.Body.Items = append(ndoc.Document.Body.Items, &np)
case *Table:
nt := o.copymedia(ndoc)
ndoc.Document.Body.Items = append(ndoc.Document.Body.Items, &nt)
default:
ndoc.Document.Body.Items = append(ndoc.Document.Body.Items, o)
}
}
if len(ndoc.Document.Body.Items) > 0 {
docs = append(docs, ndoc)
}
break
}
return
}
func (r *Run) copymedia(to *Docx) *Run {
nr := *r
nr.Children = make([]interface{}, 0, len(r.Children))
nr.file = to
for _, rc := range r.Children {
if d, ok := rc.(*Drawing); ok {
nr.Children = append(nr.Children, d.copymedia(to))
continue
}
nr.Children = append(nr.Children, rc)
}
return &nr
}
func (p *Paragraph) copymedia(to *Docx) (np Paragraph) {
np = *p
np.Children = make([]interface{}, 0, len(p.Children))
np.file = to
for _, pc := range p.Children {
if r, ok := pc.(*Run); ok {
np.Children = append(np.Children, r.copymedia(to))
continue
}
if h, ok := pc.(*Hyperlink); ok {
tgt, err := p.file.ReferTarget(h.ID)
if err != nil {
continue
}
rid := to.addLinkRelation(tgt)
np.Children = append(np.Children, &Hyperlink{
ID: rid,
Run: *h.Run.copymedia(to),
})
continue
}
np.Children = append(np.Children, pc)
}
return
}
func (t *Table) copymedia(to *Docx) (nt Table) {
nt = *t
nt.TableRows = make([]*WTableRow, 0, len(t.TableRows))
nt.file = to
for _, tr := range t.TableRows {
ntr := *tr
ntr.TableCells = make([]*WTableCell, 0, len(tr.TableCells))
ntr.file = to
for _, tc := range tr.TableCells {
ntc := *tc
ntc.Paragraphs = make([]*Paragraph, 0, len(tc.Paragraphs))
ntc.file = to
for _, p := range tc.Paragraphs {
np := p.copymedia(to)
ntc.Paragraphs = append(ntc.Paragraphs, &np)
}
ntr.TableCells = append(ntr.TableCells, &ntc)
}
nt.TableRows = append(nt.TableRows, &ntr)
}
return
}
// AppendFile appends all contents in af to f
func (f *Docx) AppendFile(af *Docx) {
for _, item := range af.Document.Body.Items {
switch o := item.(type) {
case *Paragraph:
np := o.copymedia(f)
f.Document.Body.Items = append(f.Document.Body.Items, &np)
case *Table:
nt := o.copymedia(f)
f.Document.Body.Items = append(f.Document.Body.Items, &nt)
default:
f.Document.Body.Items = append(f.Document.Body.Items, o)
}
}
}