mirror of
https://github.com/fumiama/go-docx.git
synced 2026-06-04 23:30:25 +08:00
add SplitByParagraph
This commit is contained in:
2
.github/workflows/pull.yml
vendored
2
.github/workflows/pull.yml
vendored
@@ -8,7 +8,7 @@ jobs:
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@master
|
||||
with:
|
||||
go-version: 1.19
|
||||
go-version: '1.20'
|
||||
|
||||
- name: Check out code into the Go module directory
|
||||
uses: actions/checkout@master
|
||||
|
||||
2
.github/workflows/push.yml
vendored
2
.github/workflows/push.yml
vendored
@@ -8,7 +8,7 @@ jobs:
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@master
|
||||
with:
|
||||
go-version: 1.19
|
||||
go-version: '1.20'
|
||||
|
||||
- name: Check out code into the Go module directory
|
||||
uses: actions/checkout@master
|
||||
|
||||
@@ -59,7 +59,7 @@ run:
|
||||
tests: false
|
||||
skip-dirs:
|
||||
- order
|
||||
go: '1.19'
|
||||
go: '1.20'
|
||||
|
||||
# output configuration options
|
||||
output:
|
||||
|
||||
10
apitable.go
10
apitable.go
@@ -23,7 +23,7 @@ package docx
|
||||
// AddTable add a new table to body by col*row
|
||||
//
|
||||
// unit: twips (1/20 point)
|
||||
func (f *Docx) AddTable(row int, col int) *WTable {
|
||||
func (f *Docx) AddTable(row int, col int) *Table {
|
||||
trs := make([]*WTableRow, row)
|
||||
for i := 0; i < row; i++ {
|
||||
cells := make([]*WTableCell, col)
|
||||
@@ -40,7 +40,7 @@ func (f *Docx) AddTable(row int, col int) *WTable {
|
||||
TableCells: cells,
|
||||
}
|
||||
}
|
||||
tbl := &WTable{
|
||||
tbl := &Table{
|
||||
TableProperties: &WTableProperties{
|
||||
Width: &WTableWidth{Type: "auto"},
|
||||
TableBorders: &WTableBorders{
|
||||
@@ -65,7 +65,7 @@ func (f *Docx) AddTable(row int, col int) *WTable {
|
||||
// AddTableTwips add a new table to body by height and width
|
||||
//
|
||||
// unit: twips (1/20 point)
|
||||
func (f *Docx) AddTableTwips(rowHeights []int64, colWidths []int64) *WTable {
|
||||
func (f *Docx) AddTableTwips(rowHeights []int64, colWidths []int64) *Table {
|
||||
grids := make([]*WGridCol, len(colWidths))
|
||||
trs := make([]*WTableRow, len(rowHeights))
|
||||
for i, w := range colWidths {
|
||||
@@ -95,7 +95,7 @@ func (f *Docx) AddTableTwips(rowHeights []int64, colWidths []int64) *WTable {
|
||||
}
|
||||
}
|
||||
}
|
||||
tbl := &WTable{
|
||||
tbl := &Table{
|
||||
TableProperties: &WTableProperties{
|
||||
Width: &WTableWidth{Type: "auto"},
|
||||
TableBorders: &WTableBorders{
|
||||
@@ -127,7 +127,7 @@ func (f *Docx) AddTableTwips(rowHeights []int64, colWidths []int64) *WTable {
|
||||
// end:右对齐。
|
||||
// both:两端对齐。
|
||||
// distribute:分散对齐。
|
||||
func (t *WTable) Justification(val string) *WTable {
|
||||
func (t *Table) Justification(val string) *Table {
|
||||
if t.TableProperties.Justification == nil {
|
||||
t.TableProperties.Justification = &Justification{Val: val}
|
||||
return t
|
||||
|
||||
@@ -192,7 +192,7 @@ func main() {
|
||||
fmt.Println("Plain text:")
|
||||
for _, it := range doc.Document.Body.Items {
|
||||
switch it.(type) {
|
||||
case *docx.Paragraph, *docx.WTable: // printable
|
||||
case *docx.Paragraph, *docx.Table: // printable
|
||||
fmt.Println(it)
|
||||
}
|
||||
}
|
||||
|
||||
19
docx.go
19
docx.go
@@ -24,7 +24,6 @@ package docx
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"bytes"
|
||||
"io"
|
||||
"io/fs"
|
||||
"sync"
|
||||
@@ -50,9 +49,6 @@ type Docx struct {
|
||||
tmplfs fs.FS
|
||||
tmpfslst []string
|
||||
|
||||
buf *bytes.Buffer
|
||||
isbufempty bool
|
||||
|
||||
io.Reader
|
||||
io.WriterTo
|
||||
}
|
||||
@@ -109,20 +105,9 @@ func (f *Docx) WriteTo(writer io.Writer) (_ int64, err error) {
|
||||
return 0, f.pack(zipWriter)
|
||||
}
|
||||
|
||||
// Read allows to save a docx to buf
|
||||
// Read is a fake function and cannot be used
|
||||
func (f *Docx) Read(p []byte) (n int, err error) {
|
||||
if !f.isbufempty {
|
||||
n, err = f.buf.Read(p)
|
||||
if err == io.EOF {
|
||||
f.buf.Reset()
|
||||
f.isbufempty = true
|
||||
return
|
||||
}
|
||||
}
|
||||
zipWriter := zip.NewWriter(f.buf)
|
||||
defer zipWriter.Close()
|
||||
f.isbufempty = false
|
||||
return f.buf.Read(p)
|
||||
panic("fake stub!")
|
||||
}
|
||||
|
||||
// UseTemplate will replace template files
|
||||
|
||||
2
empty.go
2
empty.go
@@ -21,7 +21,6 @@
|
||||
package docx
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/xml"
|
||||
)
|
||||
|
||||
@@ -71,7 +70,6 @@ func newEmptyA4File() *Docx {
|
||||
slowIDs: make(map[string]uintptr, 64),
|
||||
template: "a4",
|
||||
tmpfslst: A4TemplateFilesList,
|
||||
buf: bytes.NewBuffer(make([]byte, 0, 1024*1024)),
|
||||
}
|
||||
docx.Document.Body.file = docx
|
||||
return docx
|
||||
|
||||
2
go.mod
2
go.mod
@@ -1,5 +1,5 @@
|
||||
module github.com/fumiama/go-docx
|
||||
|
||||
go 1.18
|
||||
go 1.20
|
||||
|
||||
require github.com/fumiama/imgsz v0.0.2
|
||||
|
||||
130
structdoc.go
130
structdoc.go
@@ -81,7 +81,7 @@ func (b *Body) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
|
||||
value.file = b.file
|
||||
b.Items = append(b.Items, &value)
|
||||
case "tbl":
|
||||
var value WTable
|
||||
var value Table
|
||||
err = d.DecodeElement(&value, &tt)
|
||||
if err != nil && !strings.HasPrefix(err.Error(), "expected") {
|
||||
return err
|
||||
@@ -146,3 +146,131 @@ func (doc *Document) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ParagraphSplitRule check whether the paragraph is a separator or not
|
||||
type ParagraphSplitRule func(*Paragraph) bool
|
||||
|
||||
// SplitByParagraph splits a doc to many docs by using a matched paragraph
|
||||
// as the separator.
|
||||
//
|
||||
// The separator will be placed to the first doc item
|
||||
func (doc *Docx) SplitByParagraph(separator ParagraphSplitRule) (docs []*Docx) {
|
||||
items := doc.Document.Body.Items
|
||||
newdoclop:
|
||||
for len(items) > 0 {
|
||||
ndoc := new(Docx)
|
||||
|
||||
// migrate base data
|
||||
ndoc.mediaNameIdx = make(map[string]int, 64)
|
||||
ndoc.slowIDs = make(map[string]uintptr, 64)
|
||||
ndoc.template = doc.template
|
||||
ndoc.tmplfs = doc.tmplfs
|
||||
ndoc.tmpfslst = doc.tmpfslst
|
||||
|
||||
ndoc.Document.XMLW = XMLNS_W
|
||||
ndoc.Document.XMLR = XMLNS_R
|
||||
ndoc.Document.XMLWP = XMLNS_WP
|
||||
// ndoc.Document.XMLMC = XMLNS_MC
|
||||
// ndoc.Document.XMLO = XMLNS_O
|
||||
// ndoc.Document.XMLV = XMLNS_V
|
||||
ndoc.Document.XMLWPS = XMLNS_WPS
|
||||
ndoc.Document.XMLWPC = XMLNS_WPC
|
||||
ndoc.Document.XMLWPG = XMLNS_WPG
|
||||
// ndoc.Document.XMLWP14 = XMLNS_WP14
|
||||
ndoc.Document.XMLName.Space = XMLNS_W
|
||||
ndoc.Document.XMLName.Local = "document"
|
||||
ndoc.Document.Body.file = ndoc
|
||||
|
||||
ndoc.docRelation = Relationships{
|
||||
Xmlns: XMLNS_REL,
|
||||
Relationship: []Relationship{
|
||||
{
|
||||
ID: "rId1",
|
||||
Type: `http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles`,
|
||||
Target: "styles.xml",
|
||||
},
|
||||
{
|
||||
ID: "rId2",
|
||||
Type: `http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme`,
|
||||
Target: "theme/theme1.xml",
|
||||
},
|
||||
{
|
||||
ID: "rId3",
|
||||
Type: `http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable`,
|
||||
Target: "fontTable.xml",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
ndoc.rID = 3
|
||||
|
||||
for i, item := range items {
|
||||
switch o := item.(type) {
|
||||
case *Paragraph:
|
||||
if separator(o) && len(ndoc.Document.Body.Items) > 0 {
|
||||
items = items[i:]
|
||||
docs = append(docs, ndoc)
|
||||
continue newdoclop
|
||||
}
|
||||
np := o.copymedia(ndoc)
|
||||
ndoc.Document.Body.Items = append(ndoc.Document.Body.Items, &np)
|
||||
case *Table:
|
||||
nt := o.copymedia(ndoc)
|
||||
ndoc.Document.Body.Items = append(ndoc.Document.Body.Items, &nt)
|
||||
default:
|
||||
ndoc.Document.Body.Items = append(ndoc.Document.Body.Items, o)
|
||||
}
|
||||
}
|
||||
|
||||
if len(ndoc.Document.Body.Items) > 0 {
|
||||
docs = append(docs, ndoc)
|
||||
}
|
||||
break
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (p *Paragraph) copymedia(to *Docx) (np Paragraph) {
|
||||
np = *p
|
||||
np.Children = make([]interface{}, 0, len(p.Children))
|
||||
np.file = to
|
||||
for _, pc := range p.Children {
|
||||
if r, ok := pc.(*Run); ok {
|
||||
nr := *r
|
||||
nr.Children = make([]interface{}, 0, len(r.Children))
|
||||
nr.file = to
|
||||
for _, rc := range r.Children {
|
||||
if d, ok := rc.(*Drawing); ok {
|
||||
nr.Children = append(nr.Children, d.copymedia(to))
|
||||
continue
|
||||
}
|
||||
nr.Children = append(nr.Children, rc)
|
||||
}
|
||||
continue
|
||||
}
|
||||
np.Children = append(np.Children, pc)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (t *Table) copymedia(to *Docx) (nt Table) {
|
||||
nt = *t
|
||||
nt.TableRows = make([]*WTableRow, 0, len(t.TableRows))
|
||||
nt.file = to
|
||||
for _, tr := range t.TableRows {
|
||||
ntr := *tr
|
||||
ntr.TableCells = make([]*WTableCell, 0, len(tr.TableCells))
|
||||
ntr.file = to
|
||||
for _, tc := range tr.TableCells {
|
||||
ntc := *tc
|
||||
ntc.Paragraphs = make([]Paragraph, 0, len(tc.Paragraphs))
|
||||
ntc.file = to
|
||||
for _, p := range tc.Paragraphs {
|
||||
ntc.Paragraphs = append(ntc.Paragraphs, p.copymedia(to))
|
||||
}
|
||||
ntr.TableCells = append(ntr.TableCells, &ntc)
|
||||
}
|
||||
nt.TableRows = append(nt.TableRows, &ntr)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
133
structdrawing.go
133
structdrawing.go
@@ -27,6 +27,7 @@ import (
|
||||
"io"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
//nolint:revive,stylecheck
|
||||
@@ -89,6 +90,22 @@ func (r *Drawing) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *Drawing) copymedia(to *Docx) *Drawing {
|
||||
if r.Inline != nil {
|
||||
return &Drawing{
|
||||
Inline: r.Inline.copymedia(to),
|
||||
file: to,
|
||||
}
|
||||
}
|
||||
if r.Anchor != nil {
|
||||
return &Drawing{
|
||||
Anchor: r.Anchor.copymedia(to),
|
||||
file: to,
|
||||
}
|
||||
}
|
||||
return &Drawing{file: to}
|
||||
}
|
||||
|
||||
// WPInline is an element that represents an inline image within a text paragraph.
|
||||
//
|
||||
// It contains information about the image's size and position,
|
||||
@@ -275,6 +292,64 @@ func (r *WPInline) String() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (r *WPInline) copymedia(to *Docx) *WPInline {
|
||||
if r.Graphic.GraphicData.Pic != nil {
|
||||
if r.Graphic.GraphicData.Pic.BlipFill != nil {
|
||||
tgt, err := r.file.ReferTarget(r.Graphic.GraphicData.Pic.BlipFill.Blip.Embed)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
format := tgt[strings.LastIndex(tgt, ".")+1:]
|
||||
idn := int(atomic.AddUintptr(&to.docID, 1))
|
||||
id := int(to.IncreaseID("图片"))
|
||||
ids := strconv.Itoa(id)
|
||||
m := r.file.Media(tgt[6:])
|
||||
if m == nil {
|
||||
return nil
|
||||
}
|
||||
rid := to.addImage(format, m.Data)
|
||||
inln := *r
|
||||
grph := *r.Graphic
|
||||
inln.Graphic = &grph
|
||||
grphdata := *r.Graphic.GraphicData
|
||||
grph.GraphicData = &grphdata
|
||||
pic := *r.Graphic.GraphicData.Pic
|
||||
grphdata.Pic = &pic
|
||||
grphdata.file = to
|
||||
grph.file = to
|
||||
inln.file = to
|
||||
|
||||
inln.DocPr = &WPDocPr{
|
||||
ID: idn,
|
||||
Name: "图片 " + ids,
|
||||
}
|
||||
pic.NonVisualPicProperties = &PICNonVisualPicProperties{
|
||||
NonVisualDrawingProperties: NonVisualProperties{
|
||||
ID: id,
|
||||
Name: "图片 " + ids,
|
||||
},
|
||||
CNvPicPr: r.Graphic.GraphicData.Pic.NonVisualPicProperties.CNvPicPr,
|
||||
}
|
||||
pic.BlipFill = &PICBlipFill{
|
||||
Blip: ABlip{
|
||||
Embed: rid,
|
||||
Cstate: r.Graphic.GraphicData.Pic.BlipFill.Blip.Cstate,
|
||||
},
|
||||
Stretch: r.Graphic.GraphicData.Pic.BlipFill.Stretch,
|
||||
}
|
||||
return &inln
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if r.Graphic.GraphicData.Shape != nil { // shape has no media
|
||||
return r
|
||||
}
|
||||
if r.Graphic.GraphicData.Canvas != nil { //TODO: copy canvas media
|
||||
return r
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// WPExtent represents the extent of a drawing in a Word document.
|
||||
//
|
||||
// CX CY 's unit is English Metric Units, which is 1/914400 inch
|
||||
@@ -1350,6 +1425,64 @@ func (r *WPAnchor) String() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (r *WPAnchor) copymedia(to *Docx) *WPAnchor {
|
||||
if r.Graphic.GraphicData.Pic != nil {
|
||||
if r.Graphic.GraphicData.Pic.BlipFill != nil {
|
||||
tgt, err := r.file.ReferTarget(r.Graphic.GraphicData.Pic.BlipFill.Blip.Embed)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
format := tgt[strings.LastIndex(tgt, ".")+1:]
|
||||
idn := int(atomic.AddUintptr(&to.docID, 1))
|
||||
id := int(to.IncreaseID("图片"))
|
||||
ids := strconv.Itoa(id)
|
||||
m := r.file.Media(tgt[6:])
|
||||
if m == nil {
|
||||
return nil
|
||||
}
|
||||
rid := to.addImage(format, m.Data)
|
||||
anch := *r
|
||||
grph := *r.Graphic
|
||||
anch.Graphic = &grph
|
||||
grphdata := *r.Graphic.GraphicData
|
||||
grph.GraphicData = &grphdata
|
||||
pic := *r.Graphic.GraphicData.Pic
|
||||
grphdata.Pic = &pic
|
||||
grphdata.file = to
|
||||
grph.file = to
|
||||
anch.file = to
|
||||
|
||||
anch.DocPr = &WPDocPr{
|
||||
ID: idn,
|
||||
Name: "图片 " + ids,
|
||||
}
|
||||
pic.NonVisualPicProperties = &PICNonVisualPicProperties{
|
||||
NonVisualDrawingProperties: NonVisualProperties{
|
||||
ID: id,
|
||||
Name: "图片 " + ids,
|
||||
},
|
||||
CNvPicPr: r.Graphic.GraphicData.Pic.NonVisualPicProperties.CNvPicPr,
|
||||
}
|
||||
pic.BlipFill = &PICBlipFill{
|
||||
Blip: ABlip{
|
||||
Embed: rid,
|
||||
Cstate: r.Graphic.GraphicData.Pic.BlipFill.Blip.Cstate,
|
||||
},
|
||||
Stretch: r.Graphic.GraphicData.Pic.BlipFill.Stretch,
|
||||
}
|
||||
return &anch
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if r.Graphic.GraphicData.Shape != nil { // shape has no media
|
||||
return r
|
||||
}
|
||||
if r.Graphic.GraphicData.Canvas != nil { //TODO: copy canvas media
|
||||
return r
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// WPSimplePos represents the position of an object in a Word document.
|
||||
type WPSimplePos struct {
|
||||
XMLName xml.Name `xml:"wp:simplePos,omitempty"`
|
||||
|
||||
@@ -27,8 +27,8 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// WTable represents a table within a Word document.
|
||||
type WTable struct {
|
||||
// Table represents a table within a Word document.
|
||||
type Table struct {
|
||||
XMLName xml.Name `xml:"w:tbl,omitempty"`
|
||||
TableProperties *WTableProperties
|
||||
TableGrid *WTableGrid
|
||||
@@ -37,7 +37,7 @@ type WTable struct {
|
||||
file *Docx
|
||||
}
|
||||
|
||||
func (t *WTable) String() string {
|
||||
func (t *Table) String() string {
|
||||
if len(t.TableRows) == 0 || len(t.TableRows[0].TableCells) == 0 {
|
||||
return ""
|
||||
}
|
||||
@@ -62,7 +62,7 @@ func (t *WTable) String() string {
|
||||
}
|
||||
|
||||
// UnmarshalXML implements the xml.Unmarshaler interface.
|
||||
func (t *WTable) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
|
||||
func (t *Table) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
|
||||
for {
|
||||
token, err := d.Token()
|
||||
if err == io.EOF {
|
||||
|
||||
@@ -22,7 +22,6 @@ package docx
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"bytes"
|
||||
"encoding/xml"
|
||||
"io"
|
||||
"strings"
|
||||
@@ -67,7 +66,8 @@ func unpack(zipReader *zip.Reader) (docx *Docx, err error) {
|
||||
// fill remaining files into tmpfslst
|
||||
docx.tmpfslst = append(docx.tmpfslst, f.Name)
|
||||
}
|
||||
docx.buf = bytes.NewBuffer(make([]byte, 0, 1024*1024))
|
||||
//TODO: find last imageID
|
||||
docx.imageID = 100000
|
||||
return
|
||||
}
|
||||
|
||||
@@ -94,6 +94,7 @@ func (f *Docx) parseDocument(file *zip.File) error {
|
||||
|
||||
f.Document.Body.file = f
|
||||
//TODO: find last docID
|
||||
f.docID = 100000
|
||||
err = xml.NewDecoder(zf).Decode(&f.Document)
|
||||
return err
|
||||
}
|
||||
@@ -108,12 +109,12 @@ func (f *Docx) parseDocRelation(file *zip.File) error {
|
||||
|
||||
f.docRelation.Xmlns = XMLNS_R
|
||||
//TODO: find last rID
|
||||
f.rID = 100000
|
||||
return xml.NewDecoder(zf).Decode(&f.docRelation)
|
||||
}
|
||||
|
||||
// parseMedia add the media into Docx struct
|
||||
func (f *Docx) parseMedia(file *zip.File) error {
|
||||
//TODO: find last imageID
|
||||
name := file.Name[len(MEDIA_FOLDER):]
|
||||
zf, err := file.Open()
|
||||
if err != nil {
|
||||
|
||||
Reference in New Issue
Block a user