1
0
mirror of https://github.com/fumiama/go-docx.git synced 2026-06-21 19:13:37 +08:00

House cleaning

This commit is contained in:
Gonzalo Fernandez-Victorio
2021-04-29 17:38:39 +01:00
parent eae5f90385
commit c821a33692
16 changed files with 159 additions and 144 deletions

View File

@@ -1,10 +1,10 @@
# Docx library # Docx library
Yet another library to manipulate .docx (Microsoft Word) files in Go. Yet another library to read and write .docx (Microsoft Word) files in Go.
## Introduction ## Introduction
As part of my work for [Basement Crowd](https://www.basementcrowd.com) y [FromCounsel](https://www.fromcounsel.com), we were in need of a basic library to manipulate (both read and write) As part of my work for [Basement Crowd](https://www.basementcrowd.com) y [FromCounsel](https://www.fromcounsel.com), we were in need of a basic library to manipulate (both read and write) Microsoft Word documents.
The difference with other projects is the following: The difference with other projects is the following:

View File

@@ -2,7 +2,8 @@ package docxlib
import "strconv" import "strconv"
func (f *Docx) addLinkRelation(link string) string { // when adding an hyperlink we need to store a reference in the relationship field
func (f *DocxLib) addLinkRelation(link string) string {
rel := &Relationship{ rel := &Relationship{
ID: "rId" + strconv.Itoa(f.rId), ID: "rId" + strconv.Itoa(f.rId),
Type: REL_HYPERLINK, Type: REL_HYPERLINK,
@@ -17,7 +18,7 @@ func (f *Docx) addLinkRelation(link string) string {
return rel.ID return rel.ID
} }
// AddLink add hyperlink to paragraph // AddLink adds an hyperlink to paragraph
func (p *Paragraph) AddLink(text string, link string) *Hyperlink { func (p *Paragraph) AddLink(text string, link string) *Hyperlink {
rId := p.file.addLinkRelation(link) rId := p.file.addLinkRelation(link)
hyperlink := &Hyperlink{ hyperlink := &Hyperlink{

26
apipara.go Normal file
View File

@@ -0,0 +1,26 @@
package docxlib
// AddParagraph adds a new paragraph
func (f *DocxLib) AddParagraph() *Paragraph {
p := &Paragraph{
Data: make([]ParagraphChild, 0),
file: f,
}
f.Document.Body.Paragraphs = append(f.Document.Body.Paragraphs, p)
return p
}
func (f *DocxLib) Paragraphs() []*Paragraph {
return f.Document.Body.Paragraphs
}
func (p *Paragraph) Runs() (ret []*Run) {
data := p.Data
for _, d := range data {
if d.Run != nil {
ret = append(ret, d.Run)
}
}
return
}

34
apirun.go Normal file
View File

@@ -0,0 +1,34 @@
package docxlib
// Color allows to set run color
func (r *Run) Color(color string) *Run {
r.RunProperties.Color = &Color{
Val: color,
}
return r
}
// Size allows to set run size
func (r *Run) Size(size int) *Run {
r.RunProperties.Size = &Size{
Val: size * 2,
}
return r
}
// AddText add text to paragraph
func (p *Paragraph) AddText(text string) *Run {
t := &Text{
Text: text,
}
run := &Run{
Text: t,
RunProperties: &RunProperties{},
}
p.Data = append(p.Data, ParagraphChild{Run: run})
return run
}

View File

@@ -5,7 +5,9 @@ import (
"io" "io"
) )
type Docx struct { // DocxLib is the structure that allow to access the internal represntation
// in memory of the doc (either read or about to be written)
type DocxLib struct {
Document Document Document Document
DocRelation Relationships DocRelation Relationships
@@ -14,12 +16,12 @@ type Docx struct {
// New generates a new empty docx file that we can manipulate and // New generates a new empty docx file that we can manipulate and
// later on, save // later on, save
func New() *Docx { func New() *DocxLib {
return emptyFile() return emptyFile()
} }
// Parse generates a new docx file in memory from a reader // Parse generates a new docx file in memory from a reader
func Parse(reader io.ReaderAt, size int64) (doc *Docx, err error) { func Parse(reader io.ReaderAt, size int64) (doc *DocxLib, err error) {
zipReader, err := zip.NewReader(reader, size) zipReader, err := zip.NewReader(reader, size)
if err != nil { if err != nil {
return nil, err return nil, err
@@ -29,7 +31,7 @@ func Parse(reader io.ReaderAt, size int64) (doc *Docx, err error) {
} }
// Write allows to save a docx to a writer // Write allows to save a docx to a writer
func (f *Docx) Write(writer io.Writer) (err error) { func (f *DocxLib) Write(writer io.Writer) (err error) {
zipWriter := zip.NewWriter(writer) zipWriter := zip.NewWriter(writer)
defer zipWriter.Close() defer zipWriter.Close()

View File

@@ -23,8 +23,8 @@ func emptyRelationships() []*Relationship {
return defaultRel return defaultRel
} }
func emptyFile() *Docx { func emptyFile() *DocxLib {
docx := &Docx{ docx := &DocxLib{
Document: Document{ Document: Document{
XMLName: xml.Name{ XMLName: xml.Name{
Space: "w", Space: "w",

View File

@@ -6,7 +6,10 @@ import (
"fmt" "fmt"
) )
func (f *Docx) pack(zipWriter *zip.Writer) (err error) { // This receives a zip file writer (word documents are a zip with multiple xml inside)
// and writes the relevant files. Some of them come from the empty_constants file,
// others from the actual in-memory structure
func (f *DocxLib) pack(zipWriter *zip.Writer) (err error) {
files := map[string]string{} files := map[string]string{}
files["_rels/.rels"] = TEMP_REL files["_rels/.rels"] = TEMP_REL

View File

@@ -1,42 +0,0 @@
package docxlib
import (
"encoding/xml"
)
type ParagraphChild struct {
Link *Hyperlink `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main hyperlink"`
Run *Run `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main r"`
}
type Paragraph struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main p"`
Data []ParagraphChild
file *Docx
}
// AddParagraph adds a new paragraph
func (f *Docx) AddParagraph() *Paragraph {
p := &Paragraph{
Data: make([]ParagraphChild, 0),
file: f,
}
f.Document.Body.Paragraphs = append(f.Document.Body.Paragraphs, p)
return p
}
func (f *Docx) Paragraphs() []*Paragraph {
return f.Document.Body.Paragraphs
}
func (p *Paragraph) Runs() (ret []*Run) {
data := p.Data
for _, d := range data {
if d.Run != nil {
ret = append(ret, d.Run)
}
}
return
}

58
run.go
View File

@@ -1,58 +0,0 @@
package docxlib
import "encoding/xml"
// A Run is part of a paragraph that has its own style. It could be
// a piece of text in bold, or a link
type Run struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main r"`
RunProperties *RunProperties `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rPr,omitempty"`
InstrText string `xml:"w:instrText,omitempty"`
Text *Text
}
// The Text object contains the actual text
type Text struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main t"`
XMLSpace string `xml:"xml:space,attr,omitempty"`
Text string `xml:",chardata"`
}
type Hyperlink struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main hyperlink"`
ID string `xml:"http://schemas.openxmlformats.org/officeDocument/2006/relationships id,attr"`
Run Run
}
// Color allows to set run color
func (r *Run) Color(color string) *Run {
r.RunProperties.Color = &Color{
Val: color,
}
return r
}
// Size allows to set run size
func (r *Run) Size(size int) *Run {
r.RunProperties.Size = &Size{
Val: size * 2,
}
return r
}
// AddText add text to paragraph
func (p *Paragraph) AddText(text string) *Run {
t := &Text{
Text: text,
}
run := &Run{
Text: t,
RunProperties: &RunProperties{},
}
p.Data = append(p.Data, ParagraphChild{Run: run})
return run
}

View File

@@ -1,29 +0,0 @@
package docxlib
import "encoding/xml"
const (
HYPERLINK_STYLE = "a1"
)
type RunProperties struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rPr"`
Color *Color `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main color,omitempty"`
Size *Size `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main sz,omitempty"`
RunStyle *RunStyle `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rStyle,omitempty"`
}
type RunStyle struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rStyle"`
Val string `xml:"w:val,attr"`
}
type Color struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main color"`
Val string `xml:"w:val,attr"`
}
type Size struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main sz"`
Val int `xml:"w:val,attr"`
}

15
structnodes.go Normal file
View File

@@ -0,0 +1,15 @@
package docxlib
import "encoding/xml"
type ParagraphChild struct {
Link *Hyperlink `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main hyperlink"`
Run *Run `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main r"`
}
type Paragraph struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main p"`
Data []ParagraphChild
file *DocxLib
}

57
structrun.go Normal file
View File

@@ -0,0 +1,57 @@
package docxlib
import "encoding/xml"
const (
HYPERLINK_STYLE = "a1"
)
// A Run is part of a paragraph that has its own style. It could be
// a piece of text in bold, or a link
type Run struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main r"`
RunProperties *RunProperties `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rPr,omitempty"`
InstrText string `xml:"w:instrText,omitempty"`
Text *Text
}
// The Text object contains the actual text
type Text struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main t"`
XMLSpace string `xml:"xml:space,attr,omitempty"`
Text string `xml:",chardata"`
}
// The hyperlink element contains links
type Hyperlink struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main hyperlink"`
ID string `xml:"http://schemas.openxmlformats.org/officeDocument/2006/relationships id,attr"`
Run Run
}
// RunProperties encapsulates visual properties of a run
type RunProperties struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rPr"`
Color *Color `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main color,omitempty"`
Size *Size `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main sz,omitempty"`
RunStyle *RunStyle `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rStyle,omitempty"`
}
// RunStyle contains styling for a run
type RunStyle struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rStyle"`
Val string `xml:"w:val,attr"`
}
// Color contains the sound of music. :D
// I'm kidding. It contains the color
type Color struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main color"`
Val string `xml:"w:val,attr"`
}
// Size contains the font size
type Size struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main sz"`
Val int `xml:"w:val,attr"`
}

View File

@@ -1,5 +1,6 @@
package docxlib package docxlib
// This contains internal functions needed to unpack (read) a zip file
import ( import (
"archive/zip" "archive/zip"
"encoding/xml" "encoding/xml"
@@ -7,7 +8,11 @@ import (
"io/ioutil" "io/ioutil"
) )
func unpack(zipReader *zip.Reader) (docx *Docx, err error) { // This receives a zip file (word documents are a zip with multiple xml inside)
// and parses the files that are relevant for us:
// 1.-Document
// 2.-Relationships
func unpack(zipReader *zip.Reader) (docx *DocxLib, err error) {
var doc *Document var doc *Document
var relations *Relationships var relations *Relationships
for _, f := range zipReader.File { for _, f := range zipReader.File {
@@ -24,13 +29,14 @@ func unpack(zipReader *zip.Reader) (docx *Docx, err error) {
} }
} }
} }
docx = &Docx{ docx = &DocxLib{
Document: *doc, Document: *doc,
DocRelation: *relations, DocRelation: *relations,
} }
return docx, nil return docx, nil
} }
// Processes one of the relevant files, the one with the actual document
func processDoc(file *zip.File) (*Document, error) { func processDoc(file *zip.File) (*Document, error) {
filebytes, err := readZipFile(file) filebytes, err := readZipFile(file)
if err != nil { if err != nil {
@@ -42,8 +48,6 @@ func processDoc(file *zip.File) (*Document, error) {
XMLR: XMLNS_R, XMLR: XMLNS_R,
XMLName: xml.Name{Space: XMLNS_W, Local: "document"}} XMLName: xml.Name{Space: XMLNS_W, Local: "document"}}
err = xml.Unmarshal(filebytes, &doc) err = xml.Unmarshal(filebytes, &doc)
//r := bytes.NewReader(filebytes)
//err = decode(r)
if err != nil { if err != nil {
fmt.Println("Error unmarshalling doc") fmt.Println("Error unmarshalling doc")
fmt.Println(string(filebytes)) fmt.Println(string(filebytes))
@@ -52,6 +56,7 @@ func processDoc(file *zip.File) (*Document, error) {
return &doc, nil return &doc, nil
} }
// Processes one of the relevant files, the one with the relationships
func processRelations(file *zip.File) (*Relationships, error) { func processRelations(file *zip.File) (*Relationships, error) {
filebytes, err := readZipFile(file) filebytes, err := readZipFile(file)
if err != nil { if err != nil {
@@ -67,6 +72,7 @@ func processRelations(file *zip.File) (*Relationships, error) {
return &rels, nil return &rels, nil
} }
// From a zip file structure, we return a byte array
func readZipFile(zf *zip.File) ([]byte, error) { func readZipFile(zf *zip.File) ([]byte, error) {
f, err := zf.Open() f, err := zf.Open()
if err != nil { if err != nil {