1
0
mirror of https://github.com/fumiama/go-docx.git synced 2026-06-04 23:30:25 +08:00

add SplitByParagraph

This commit is contained in:
源文雨
2023-03-09 17:20:17 +08:00
parent 2a630f6342
commit 69a899d4a5
12 changed files with 282 additions and 37 deletions

View File

@@ -8,7 +8,7 @@ jobs:
- name: Set up Go
uses: actions/setup-go@master
with:
go-version: 1.19
go-version: '1.20'
- name: Check out code into the Go module directory
uses: actions/checkout@master

View File

@@ -8,7 +8,7 @@ jobs:
- name: Set up Go
uses: actions/setup-go@master
with:
go-version: 1.19
go-version: '1.20'
- name: Check out code into the Go module directory
uses: actions/checkout@master

View File

@@ -59,7 +59,7 @@ run:
tests: false
skip-dirs:
- order
go: '1.19'
go: '1.20'
# output configuration options
output:

View File

@@ -23,7 +23,7 @@ package docx
// AddTable add a new table to body by col*row
//
// unit: twips (1/20 point)
func (f *Docx) AddTable(row int, col int) *WTable {
func (f *Docx) AddTable(row int, col int) *Table {
trs := make([]*WTableRow, row)
for i := 0; i < row; i++ {
cells := make([]*WTableCell, col)
@@ -40,7 +40,7 @@ func (f *Docx) AddTable(row int, col int) *WTable {
TableCells: cells,
}
}
tbl := &WTable{
tbl := &Table{
TableProperties: &WTableProperties{
Width: &WTableWidth{Type: "auto"},
TableBorders: &WTableBorders{
@@ -65,7 +65,7 @@ func (f *Docx) AddTable(row int, col int) *WTable {
// AddTableTwips add a new table to body by height and width
//
// unit: twips (1/20 point)
func (f *Docx) AddTableTwips(rowHeights []int64, colWidths []int64) *WTable {
func (f *Docx) AddTableTwips(rowHeights []int64, colWidths []int64) *Table {
grids := make([]*WGridCol, len(colWidths))
trs := make([]*WTableRow, len(rowHeights))
for i, w := range colWidths {
@@ -95,7 +95,7 @@ func (f *Docx) AddTableTwips(rowHeights []int64, colWidths []int64) *WTable {
}
}
}
tbl := &WTable{
tbl := &Table{
TableProperties: &WTableProperties{
Width: &WTableWidth{Type: "auto"},
TableBorders: &WTableBorders{
@@ -127,7 +127,7 @@ func (f *Docx) AddTableTwips(rowHeights []int64, colWidths []int64) *WTable {
// end右对齐。
// both两端对齐。
// distribute分散对齐。
func (t *WTable) Justification(val string) *WTable {
func (t *Table) Justification(val string) *Table {
if t.TableProperties.Justification == nil {
t.TableProperties.Justification = &Justification{Val: val}
return t

View File

@@ -192,7 +192,7 @@ func main() {
fmt.Println("Plain text:")
for _, it := range doc.Document.Body.Items {
switch it.(type) {
case *docx.Paragraph, *docx.WTable: // printable
case *docx.Paragraph, *docx.Table: // printable
fmt.Println(it)
}
}

19
docx.go
View File

@@ -24,7 +24,6 @@ package docx
import (
"archive/zip"
"bytes"
"io"
"io/fs"
"sync"
@@ -50,9 +49,6 @@ type Docx struct {
tmplfs fs.FS
tmpfslst []string
buf *bytes.Buffer
isbufempty bool
io.Reader
io.WriterTo
}
@@ -109,20 +105,9 @@ func (f *Docx) WriteTo(writer io.Writer) (_ int64, err error) {
return 0, f.pack(zipWriter)
}
// Read allows to save a docx to buf
// Read is a fake function and cannot be used
func (f *Docx) Read(p []byte) (n int, err error) {
if !f.isbufempty {
n, err = f.buf.Read(p)
if err == io.EOF {
f.buf.Reset()
f.isbufempty = true
return
}
}
zipWriter := zip.NewWriter(f.buf)
defer zipWriter.Close()
f.isbufempty = false
return f.buf.Read(p)
panic("fake stub!")
}
// UseTemplate will replace template files

View File

@@ -21,7 +21,6 @@
package docx
import (
"bytes"
"encoding/xml"
)
@@ -71,7 +70,6 @@ func newEmptyA4File() *Docx {
slowIDs: make(map[string]uintptr, 64),
template: "a4",
tmpfslst: A4TemplateFilesList,
buf: bytes.NewBuffer(make([]byte, 0, 1024*1024)),
}
docx.Document.Body.file = docx
return docx

2
go.mod
View File

@@ -1,5 +1,5 @@
module github.com/fumiama/go-docx
go 1.18
go 1.20
require github.com/fumiama/imgsz v0.0.2

View File

@@ -81,7 +81,7 @@ func (b *Body) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
value.file = b.file
b.Items = append(b.Items, &value)
case "tbl":
var value WTable
var value Table
err = d.DecodeElement(&value, &tt)
if err != nil && !strings.HasPrefix(err.Error(), "expected") {
return err
@@ -146,3 +146,131 @@ func (doc *Document) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error
}
return nil
}
// ParagraphSplitRule check whether the paragraph is a separator or not
type ParagraphSplitRule func(*Paragraph) bool
// SplitByParagraph splits a doc to many docs by using a matched paragraph
// as the separator.
//
// The separator will be placed to the first doc item
func (doc *Docx) SplitByParagraph(separator ParagraphSplitRule) (docs []*Docx) {
items := doc.Document.Body.Items
newdoclop:
for len(items) > 0 {
ndoc := new(Docx)
// migrate base data
ndoc.mediaNameIdx = make(map[string]int, 64)
ndoc.slowIDs = make(map[string]uintptr, 64)
ndoc.template = doc.template
ndoc.tmplfs = doc.tmplfs
ndoc.tmpfslst = doc.tmpfslst
ndoc.Document.XMLW = XMLNS_W
ndoc.Document.XMLR = XMLNS_R
ndoc.Document.XMLWP = XMLNS_WP
// ndoc.Document.XMLMC = XMLNS_MC
// ndoc.Document.XMLO = XMLNS_O
// ndoc.Document.XMLV = XMLNS_V
ndoc.Document.XMLWPS = XMLNS_WPS
ndoc.Document.XMLWPC = XMLNS_WPC
ndoc.Document.XMLWPG = XMLNS_WPG
// ndoc.Document.XMLWP14 = XMLNS_WP14
ndoc.Document.XMLName.Space = XMLNS_W
ndoc.Document.XMLName.Local = "document"
ndoc.Document.Body.file = ndoc
ndoc.docRelation = Relationships{
Xmlns: XMLNS_REL,
Relationship: []Relationship{
{
ID: "rId1",
Type: `http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles`,
Target: "styles.xml",
},
{
ID: "rId2",
Type: `http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme`,
Target: "theme/theme1.xml",
},
{
ID: "rId3",
Type: `http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable`,
Target: "fontTable.xml",
},
},
}
ndoc.rID = 3
for i, item := range items {
switch o := item.(type) {
case *Paragraph:
if separator(o) && len(ndoc.Document.Body.Items) > 0 {
items = items[i:]
docs = append(docs, ndoc)
continue newdoclop
}
np := o.copymedia(ndoc)
ndoc.Document.Body.Items = append(ndoc.Document.Body.Items, &np)
case *Table:
nt := o.copymedia(ndoc)
ndoc.Document.Body.Items = append(ndoc.Document.Body.Items, &nt)
default:
ndoc.Document.Body.Items = append(ndoc.Document.Body.Items, o)
}
}
if len(ndoc.Document.Body.Items) > 0 {
docs = append(docs, ndoc)
}
break
}
return
}
func (p *Paragraph) copymedia(to *Docx) (np Paragraph) {
np = *p
np.Children = make([]interface{}, 0, len(p.Children))
np.file = to
for _, pc := range p.Children {
if r, ok := pc.(*Run); ok {
nr := *r
nr.Children = make([]interface{}, 0, len(r.Children))
nr.file = to
for _, rc := range r.Children {
if d, ok := rc.(*Drawing); ok {
nr.Children = append(nr.Children, d.copymedia(to))
continue
}
nr.Children = append(nr.Children, rc)
}
continue
}
np.Children = append(np.Children, pc)
}
return
}
func (t *Table) copymedia(to *Docx) (nt Table) {
nt = *t
nt.TableRows = make([]*WTableRow, 0, len(t.TableRows))
nt.file = to
for _, tr := range t.TableRows {
ntr := *tr
ntr.TableCells = make([]*WTableCell, 0, len(tr.TableCells))
ntr.file = to
for _, tc := range tr.TableCells {
ntc := *tc
ntc.Paragraphs = make([]Paragraph, 0, len(tc.Paragraphs))
ntc.file = to
for _, p := range tc.Paragraphs {
ntc.Paragraphs = append(ntc.Paragraphs, p.copymedia(to))
}
ntr.TableCells = append(ntr.TableCells, &ntc)
}
nt.TableRows = append(nt.TableRows, &ntr)
}
return
}

View File

@@ -27,6 +27,7 @@ import (
"io"
"strconv"
"strings"
"sync/atomic"
)
//nolint:revive,stylecheck
@@ -89,6 +90,22 @@ func (r *Drawing) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
return nil
}
func (r *Drawing) copymedia(to *Docx) *Drawing {
if r.Inline != nil {
return &Drawing{
Inline: r.Inline.copymedia(to),
file: to,
}
}
if r.Anchor != nil {
return &Drawing{
Anchor: r.Anchor.copymedia(to),
file: to,
}
}
return &Drawing{file: to}
}
// WPInline is an element that represents an inline image within a text paragraph.
//
// It contains information about the image's size and position,
@@ -275,6 +292,64 @@ func (r *WPInline) String() string {
return "![inln?](unknown)"
}
func (r *WPInline) copymedia(to *Docx) *WPInline {
if r.Graphic.GraphicData.Pic != nil {
if r.Graphic.GraphicData.Pic.BlipFill != nil {
tgt, err := r.file.ReferTarget(r.Graphic.GraphicData.Pic.BlipFill.Blip.Embed)
if err != nil {
return nil
}
format := tgt[strings.LastIndex(tgt, ".")+1:]
idn := int(atomic.AddUintptr(&to.docID, 1))
id := int(to.IncreaseID("图片"))
ids := strconv.Itoa(id)
m := r.file.Media(tgt[6:])
if m == nil {
return nil
}
rid := to.addImage(format, m.Data)
inln := *r
grph := *r.Graphic
inln.Graphic = &grph
grphdata := *r.Graphic.GraphicData
grph.GraphicData = &grphdata
pic := *r.Graphic.GraphicData.Pic
grphdata.Pic = &pic
grphdata.file = to
grph.file = to
inln.file = to
inln.DocPr = &WPDocPr{
ID: idn,
Name: "图片 " + ids,
}
pic.NonVisualPicProperties = &PICNonVisualPicProperties{
NonVisualDrawingProperties: NonVisualProperties{
ID: id,
Name: "图片 " + ids,
},
CNvPicPr: r.Graphic.GraphicData.Pic.NonVisualPicProperties.CNvPicPr,
}
pic.BlipFill = &PICBlipFill{
Blip: ABlip{
Embed: rid,
Cstate: r.Graphic.GraphicData.Pic.BlipFill.Blip.Cstate,
},
Stretch: r.Graphic.GraphicData.Pic.BlipFill.Stretch,
}
return &inln
}
return nil
}
if r.Graphic.GraphicData.Shape != nil { // shape has no media
return r
}
if r.Graphic.GraphicData.Canvas != nil { //TODO: copy canvas media
return r
}
return nil
}
// WPExtent represents the extent of a drawing in a Word document.
//
// CX CY 's unit is English Metric Units, which is 1/914400 inch
@@ -1350,6 +1425,64 @@ func (r *WPAnchor) String() string {
return "![anch?](unknown)"
}
func (r *WPAnchor) copymedia(to *Docx) *WPAnchor {
if r.Graphic.GraphicData.Pic != nil {
if r.Graphic.GraphicData.Pic.BlipFill != nil {
tgt, err := r.file.ReferTarget(r.Graphic.GraphicData.Pic.BlipFill.Blip.Embed)
if err != nil {
return nil
}
format := tgt[strings.LastIndex(tgt, ".")+1:]
idn := int(atomic.AddUintptr(&to.docID, 1))
id := int(to.IncreaseID("图片"))
ids := strconv.Itoa(id)
m := r.file.Media(tgt[6:])
if m == nil {
return nil
}
rid := to.addImage(format, m.Data)
anch := *r
grph := *r.Graphic
anch.Graphic = &grph
grphdata := *r.Graphic.GraphicData
grph.GraphicData = &grphdata
pic := *r.Graphic.GraphicData.Pic
grphdata.Pic = &pic
grphdata.file = to
grph.file = to
anch.file = to
anch.DocPr = &WPDocPr{
ID: idn,
Name: "图片 " + ids,
}
pic.NonVisualPicProperties = &PICNonVisualPicProperties{
NonVisualDrawingProperties: NonVisualProperties{
ID: id,
Name: "图片 " + ids,
},
CNvPicPr: r.Graphic.GraphicData.Pic.NonVisualPicProperties.CNvPicPr,
}
pic.BlipFill = &PICBlipFill{
Blip: ABlip{
Embed: rid,
Cstate: r.Graphic.GraphicData.Pic.BlipFill.Blip.Cstate,
},
Stretch: r.Graphic.GraphicData.Pic.BlipFill.Stretch,
}
return &anch
}
return nil
}
if r.Graphic.GraphicData.Shape != nil { // shape has no media
return r
}
if r.Graphic.GraphicData.Canvas != nil { //TODO: copy canvas media
return r
}
return nil
}
// WPSimplePos represents the position of an object in a Word document.
type WPSimplePos struct {
XMLName xml.Name `xml:"wp:simplePos,omitempty"`

View File

@@ -27,8 +27,8 @@ import (
"strings"
)
// WTable represents a table within a Word document.
type WTable struct {
// Table represents a table within a Word document.
type Table struct {
XMLName xml.Name `xml:"w:tbl,omitempty"`
TableProperties *WTableProperties
TableGrid *WTableGrid
@@ -37,7 +37,7 @@ type WTable struct {
file *Docx
}
func (t *WTable) String() string {
func (t *Table) String() string {
if len(t.TableRows) == 0 || len(t.TableRows[0].TableCells) == 0 {
return ""
}
@@ -62,7 +62,7 @@ func (t *WTable) String() string {
}
// UnmarshalXML implements the xml.Unmarshaler interface.
func (t *WTable) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
func (t *Table) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
for {
token, err := d.Token()
if err == io.EOF {

View File

@@ -22,7 +22,6 @@ package docx
import (
"archive/zip"
"bytes"
"encoding/xml"
"io"
"strings"
@@ -67,7 +66,8 @@ func unpack(zipReader *zip.Reader) (docx *Docx, err error) {
// fill remaining files into tmpfslst
docx.tmpfslst = append(docx.tmpfslst, f.Name)
}
docx.buf = bytes.NewBuffer(make([]byte, 0, 1024*1024))
//TODO: find last imageID
docx.imageID = 100000
return
}
@@ -94,6 +94,7 @@ func (f *Docx) parseDocument(file *zip.File) error {
f.Document.Body.file = f
//TODO: find last docID
f.docID = 100000
err = xml.NewDecoder(zf).Decode(&f.Document)
return err
}
@@ -108,12 +109,12 @@ func (f *Docx) parseDocRelation(file *zip.File) error {
f.docRelation.Xmlns = XMLNS_R
//TODO: find last rID
f.rID = 100000
return xml.NewDecoder(zf).Decode(&f.docRelation)
}
// parseMedia add the media into Docx struct
func (f *Docx) parseMedia(file *zip.File) error {
//TODO: find last imageID
name := file.Name[len(MEDIA_FOLDER):]
zf, err := file.Open()
if err != nil {