1
0
mirror of https://github.com/fumiama/go-docx.git synced 2026-06-27 14:40:24 +08:00

House cleaning

This commit is contained in:
Gonzalo Fernandez-Victorio
2021-04-29 17:38:39 +01:00
parent eae5f90385
commit c821a33692
16 changed files with 159 additions and 144 deletions

View File

@@ -1,10 +1,10 @@
# Docx library
Yet another library to manipulate .docx (Microsoft Word) files in Go.
Yet another library to read and write .docx (Microsoft Word) files in Go.
## Introduction
As part of my work for [Basement Crowd](https://www.basementcrowd.com) y [FromCounsel](https://www.fromcounsel.com), we were in need of a basic library to manipulate (both read and write)
As part of my work for [Basement Crowd](https://www.basementcrowd.com) y [FromCounsel](https://www.fromcounsel.com), we were in need of a basic library to manipulate (both read and write) Microsoft Word documents.
The difference with other projects is the following:

View File

@@ -2,7 +2,8 @@ package docxlib
import "strconv"
func (f *Docx) addLinkRelation(link string) string {
// when adding an hyperlink we need to store a reference in the relationship field
func (f *DocxLib) addLinkRelation(link string) string {
rel := &Relationship{
ID: "rId" + strconv.Itoa(f.rId),
Type: REL_HYPERLINK,
@@ -17,7 +18,7 @@ func (f *Docx) addLinkRelation(link string) string {
return rel.ID
}
// AddLink add hyperlink to paragraph
// AddLink adds an hyperlink to paragraph
func (p *Paragraph) AddLink(text string, link string) *Hyperlink {
rId := p.file.addLinkRelation(link)
hyperlink := &Hyperlink{

26
apipara.go Normal file
View File

@@ -0,0 +1,26 @@
package docxlib
// AddParagraph adds a new paragraph
func (f *DocxLib) AddParagraph() *Paragraph {
p := &Paragraph{
Data: make([]ParagraphChild, 0),
file: f,
}
f.Document.Body.Paragraphs = append(f.Document.Body.Paragraphs, p)
return p
}
func (f *DocxLib) Paragraphs() []*Paragraph {
return f.Document.Body.Paragraphs
}
func (p *Paragraph) Runs() (ret []*Run) {
data := p.Data
for _, d := range data {
if d.Run != nil {
ret = append(ret, d.Run)
}
}
return
}

34
apirun.go Normal file
View File

@@ -0,0 +1,34 @@
package docxlib
// Color allows to set run color
func (r *Run) Color(color string) *Run {
r.RunProperties.Color = &Color{
Val: color,
}
return r
}
// Size allows to set run size
func (r *Run) Size(size int) *Run {
r.RunProperties.Size = &Size{
Val: size * 2,
}
return r
}
// AddText add text to paragraph
func (p *Paragraph) AddText(text string) *Run {
t := &Text{
Text: text,
}
run := &Run{
Text: t,
RunProperties: &RunProperties{},
}
p.Data = append(p.Data, ParagraphChild{Run: run})
return run
}

View File

@@ -5,7 +5,9 @@ import (
"io"
)
type Docx struct {
// DocxLib is the structure that allow to access the internal represntation
// in memory of the doc (either read or about to be written)
type DocxLib struct {
Document Document
DocRelation Relationships
@@ -14,12 +16,12 @@ type Docx struct {
// New generates a new empty docx file that we can manipulate and
// later on, save
func New() *Docx {
func New() *DocxLib {
return emptyFile()
}
// Parse generates a new docx file in memory from a reader
func Parse(reader io.ReaderAt, size int64) (doc *Docx, err error) {
func Parse(reader io.ReaderAt, size int64) (doc *DocxLib, err error) {
zipReader, err := zip.NewReader(reader, size)
if err != nil {
return nil, err
@@ -29,7 +31,7 @@ func Parse(reader io.ReaderAt, size int64) (doc *Docx, err error) {
}
// Write allows to save a docx to a writer
func (f *Docx) Write(writer io.Writer) (err error) {
func (f *DocxLib) Write(writer io.Writer) (err error) {
zipWriter := zip.NewWriter(writer)
defer zipWriter.Close()

View File

@@ -23,8 +23,8 @@ func emptyRelationships() []*Relationship {
return defaultRel
}
func emptyFile() *Docx {
docx := &Docx{
func emptyFile() *DocxLib {
docx := &DocxLib{
Document: Document{
XMLName: xml.Name{
Space: "w",

View File

@@ -6,7 +6,10 @@ import (
"fmt"
)
func (f *Docx) pack(zipWriter *zip.Writer) (err error) {
// This receives a zip file writer (word documents are a zip with multiple xml inside)
// and writes the relevant files. Some of them come from the empty_constants file,
// others from the actual in-memory structure
func (f *DocxLib) pack(zipWriter *zip.Writer) (err error) {
files := map[string]string{}
files["_rels/.rels"] = TEMP_REL

View File

@@ -1,42 +0,0 @@
package docxlib
import (
"encoding/xml"
)
type ParagraphChild struct {
Link *Hyperlink `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main hyperlink"`
Run *Run `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main r"`
}
type Paragraph struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main p"`
Data []ParagraphChild
file *Docx
}
// AddParagraph adds a new paragraph
func (f *Docx) AddParagraph() *Paragraph {
p := &Paragraph{
Data: make([]ParagraphChild, 0),
file: f,
}
f.Document.Body.Paragraphs = append(f.Document.Body.Paragraphs, p)
return p
}
func (f *Docx) Paragraphs() []*Paragraph {
return f.Document.Body.Paragraphs
}
func (p *Paragraph) Runs() (ret []*Run) {
data := p.Data
for _, d := range data {
if d.Run != nil {
ret = append(ret, d.Run)
}
}
return
}

58
run.go
View File

@@ -1,58 +0,0 @@
package docxlib
import "encoding/xml"
// A Run is part of a paragraph that has its own style. It could be
// a piece of text in bold, or a link
type Run struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main r"`
RunProperties *RunProperties `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rPr,omitempty"`
InstrText string `xml:"w:instrText,omitempty"`
Text *Text
}
// The Text object contains the actual text
type Text struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main t"`
XMLSpace string `xml:"xml:space,attr,omitempty"`
Text string `xml:",chardata"`
}
type Hyperlink struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main hyperlink"`
ID string `xml:"http://schemas.openxmlformats.org/officeDocument/2006/relationships id,attr"`
Run Run
}
// Color allows to set run color
func (r *Run) Color(color string) *Run {
r.RunProperties.Color = &Color{
Val: color,
}
return r
}
// Size allows to set run size
func (r *Run) Size(size int) *Run {
r.RunProperties.Size = &Size{
Val: size * 2,
}
return r
}
// AddText add text to paragraph
func (p *Paragraph) AddText(text string) *Run {
t := &Text{
Text: text,
}
run := &Run{
Text: t,
RunProperties: &RunProperties{},
}
p.Data = append(p.Data, ParagraphChild{Run: run})
return run
}

View File

@@ -1,29 +0,0 @@
package docxlib
import "encoding/xml"
const (
HYPERLINK_STYLE = "a1"
)
type RunProperties struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rPr"`
Color *Color `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main color,omitempty"`
Size *Size `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main sz,omitempty"`
RunStyle *RunStyle `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rStyle,omitempty"`
}
type RunStyle struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rStyle"`
Val string `xml:"w:val,attr"`
}
type Color struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main color"`
Val string `xml:"w:val,attr"`
}
type Size struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main sz"`
Val int `xml:"w:val,attr"`
}

15
structnodes.go Normal file
View File

@@ -0,0 +1,15 @@
package docxlib
import "encoding/xml"
type ParagraphChild struct {
Link *Hyperlink `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main hyperlink"`
Run *Run `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main r"`
}
type Paragraph struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main p"`
Data []ParagraphChild
file *DocxLib
}

57
structrun.go Normal file
View File

@@ -0,0 +1,57 @@
package docxlib
import "encoding/xml"
const (
HYPERLINK_STYLE = "a1"
)
// A Run is part of a paragraph that has its own style. It could be
// a piece of text in bold, or a link
type Run struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main r"`
RunProperties *RunProperties `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rPr,omitempty"`
InstrText string `xml:"w:instrText,omitempty"`
Text *Text
}
// The Text object contains the actual text
type Text struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main t"`
XMLSpace string `xml:"xml:space,attr,omitempty"`
Text string `xml:",chardata"`
}
// The hyperlink element contains links
type Hyperlink struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main hyperlink"`
ID string `xml:"http://schemas.openxmlformats.org/officeDocument/2006/relationships id,attr"`
Run Run
}
// RunProperties encapsulates visual properties of a run
type RunProperties struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rPr"`
Color *Color `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main color,omitempty"`
Size *Size `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main sz,omitempty"`
RunStyle *RunStyle `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rStyle,omitempty"`
}
// RunStyle contains styling for a run
type RunStyle struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rStyle"`
Val string `xml:"w:val,attr"`
}
// Color contains the sound of music. :D
// I'm kidding. It contains the color
type Color struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main color"`
Val string `xml:"w:val,attr"`
}
// Size contains the font size
type Size struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main sz"`
Val int `xml:"w:val,attr"`
}

View File

@@ -1,5 +1,6 @@
package docxlib
// This contains internal functions needed to unpack (read) a zip file
import (
"archive/zip"
"encoding/xml"
@@ -7,7 +8,11 @@ import (
"io/ioutil"
)
func unpack(zipReader *zip.Reader) (docx *Docx, err error) {
// This receives a zip file (word documents are a zip with multiple xml inside)
// and parses the files that are relevant for us:
// 1.-Document
// 2.-Relationships
func unpack(zipReader *zip.Reader) (docx *DocxLib, err error) {
var doc *Document
var relations *Relationships
for _, f := range zipReader.File {
@@ -24,13 +29,14 @@ func unpack(zipReader *zip.Reader) (docx *Docx, err error) {
}
}
}
docx = &Docx{
docx = &DocxLib{
Document: *doc,
DocRelation: *relations,
}
return docx, nil
}
// Processes one of the relevant files, the one with the actual document
func processDoc(file *zip.File) (*Document, error) {
filebytes, err := readZipFile(file)
if err != nil {
@@ -42,8 +48,6 @@ func processDoc(file *zip.File) (*Document, error) {
XMLR: XMLNS_R,
XMLName: xml.Name{Space: XMLNS_W, Local: "document"}}
err = xml.Unmarshal(filebytes, &doc)
//r := bytes.NewReader(filebytes)
//err = decode(r)
if err != nil {
fmt.Println("Error unmarshalling doc")
fmt.Println(string(filebytes))
@@ -52,6 +56,7 @@ func processDoc(file *zip.File) (*Document, error) {
return &doc, nil
}
// Processes one of the relevant files, the one with the relationships
func processRelations(file *zip.File) (*Relationships, error) {
filebytes, err := readZipFile(file)
if err != nil {
@@ -67,6 +72,7 @@ func processRelations(file *zip.File) (*Relationships, error) {
return &rels, nil
}
// From a zip file structure, we return a byte array
func readZipFile(zf *zip.File) ([]byte, error) {
f, err := zf.Open()
if err != nil {