diff --git a/README.md b/README.md index 54e4737..1d4be48 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ # Docx library -Yet another library to manipulate .docx (Microsoft Word) files in Go. +Yet another library to read and write .docx (Microsoft Word) files in Go. ## Introduction -As part of my work for [Basement Crowd](https://www.basementcrowd.com) y [FromCounsel](https://www.fromcounsel.com), we were in need of a basic library to manipulate (both read and write) +As part of my work for [Basement Crowd](https://www.basementcrowd.com) y [FromCounsel](https://www.fromcounsel.com), we were in need of a basic library to manipulate (both read and write) Microsoft Word documents. The difference with other projects is the following: diff --git a/link.go b/apilink.go similarity index 78% rename from link.go rename to apilink.go index bc661f4..80260ae 100644 --- a/link.go +++ b/apilink.go @@ -2,7 +2,8 @@ package docxlib import "strconv" -func (f *Docx) addLinkRelation(link string) string { +// when adding an hyperlink we need to store a reference in the relationship field +func (f *DocxLib) addLinkRelation(link string) string { rel := &Relationship{ ID: "rId" + strconv.Itoa(f.rId), Type: REL_HYPERLINK, @@ -17,7 +18,7 @@ func (f *Docx) addLinkRelation(link string) string { return rel.ID } -// AddLink add hyperlink to paragraph +// AddLink adds an hyperlink to paragraph func (p *Paragraph) AddLink(text string, link string) *Hyperlink { rId := p.file.addLinkRelation(link) hyperlink := &Hyperlink{ diff --git a/apipara.go b/apipara.go new file mode 100644 index 0000000..93cf807 --- /dev/null +++ b/apipara.go @@ -0,0 +1,26 @@ +package docxlib + +// AddParagraph adds a new paragraph +func (f *DocxLib) AddParagraph() *Paragraph { + p := &Paragraph{ + Data: make([]ParagraphChild, 0), + file: f, + } + + f.Document.Body.Paragraphs = append(f.Document.Body.Paragraphs, p) + return p +} + +func (f *DocxLib) Paragraphs() []*Paragraph { + return f.Document.Body.Paragraphs +} + +func (p *Paragraph) Runs() (ret []*Run) { + data := p.Data + for _, d := range data { + if d.Run != nil { + ret = append(ret, d.Run) + } + } + return +} diff --git a/apirun.go b/apirun.go new file mode 100644 index 0000000..0fc9c02 --- /dev/null +++ b/apirun.go @@ -0,0 +1,34 @@ +package docxlib + +// Color allows to set run color +func (r *Run) Color(color string) *Run { + r.RunProperties.Color = &Color{ + Val: color, + } + + return r +} + +// Size allows to set run size +func (r *Run) Size(size int) *Run { + r.RunProperties.Size = &Size{ + Val: size * 2, + } + return r +} + +// AddText add text to paragraph +func (p *Paragraph) AddText(text string) *Run { + t := &Text{ + Text: text, + } + + run := &Run{ + Text: t, + RunProperties: &RunProperties{}, + } + + p.Data = append(p.Data, ParagraphChild{Run: run}) + + return run +} diff --git a/docx.go b/docxlib.go similarity index 63% rename from docx.go rename to docxlib.go index 2aea587..6b749c6 100644 --- a/docx.go +++ b/docxlib.go @@ -5,7 +5,9 @@ import ( "io" ) -type Docx struct { +// DocxLib is the structure that allow to access the internal represntation +// in memory of the doc (either read or about to be written) +type DocxLib struct { Document Document DocRelation Relationships @@ -14,12 +16,12 @@ type Docx struct { // New generates a new empty docx file that we can manipulate and // later on, save -func New() *Docx { +func New() *DocxLib { return emptyFile() } // Parse generates a new docx file in memory from a reader -func Parse(reader io.ReaderAt, size int64) (doc *Docx, err error) { +func Parse(reader io.ReaderAt, size int64) (doc *DocxLib, err error) { zipReader, err := zip.NewReader(reader, size) if err != nil { return nil, err @@ -29,7 +31,7 @@ func Parse(reader io.ReaderAt, size int64) (doc *Docx, err error) { } // Write allows to save a docx to a writer -func (f *Docx) Write(writer io.Writer) (err error) { +func (f *DocxLib) Write(writer io.Writer) (err error) { zipWriter := zip.NewWriter(writer) defer zipWriter.Close() diff --git a/empty.go b/empty.go index 65633c3..929e3e9 100644 --- a/empty.go +++ b/empty.go @@ -23,8 +23,8 @@ func emptyRelationships() []*Relationship { return defaultRel } -func emptyFile() *Docx { - docx := &Docx{ +func emptyFile() *DocxLib { + docx := &DocxLib{ Document: Document{ XMLName: xml.Name{ Space: "w", diff --git a/constants.go b/empty_constants.go similarity index 100% rename from constants.go rename to empty_constants.go diff --git a/pack.go b/pack.go index 206bba6..d513fc6 100644 --- a/pack.go +++ b/pack.go @@ -6,7 +6,10 @@ import ( "fmt" ) -func (f *Docx) pack(zipWriter *zip.Writer) (err error) { +// This receives a zip file writer (word documents are a zip with multiple xml inside) +// and writes the relevant files. Some of them come from the empty_constants file, +// others from the actual in-memory structure +func (f *DocxLib) pack(zipWriter *zip.Writer) (err error) { files := map[string]string{} files["_rels/.rels"] = TEMP_REL diff --git a/paragraph.go b/paragraph.go deleted file mode 100644 index dc2bafc..0000000 --- a/paragraph.go +++ /dev/null @@ -1,42 +0,0 @@ -package docxlib - -import ( - "encoding/xml" -) - -type ParagraphChild struct { - Link *Hyperlink `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main hyperlink"` - Run *Run `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main r"` -} - -type Paragraph struct { - XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main p"` - Data []ParagraphChild - - file *Docx -} - -// AddParagraph adds a new paragraph -func (f *Docx) AddParagraph() *Paragraph { - p := &Paragraph{ - Data: make([]ParagraphChild, 0), - file: f, - } - - f.Document.Body.Paragraphs = append(f.Document.Body.Paragraphs, p) - return p -} - -func (f *Docx) Paragraphs() []*Paragraph { - return f.Document.Body.Paragraphs -} - -func (p *Paragraph) Runs() (ret []*Run) { - data := p.Data - for _, d := range data { - if d.Run != nil { - ret = append(ret, d.Run) - } - } - return -} diff --git a/run.go b/run.go deleted file mode 100644 index f687061..0000000 --- a/run.go +++ /dev/null @@ -1,58 +0,0 @@ -package docxlib - -import "encoding/xml" - -// A Run is part of a paragraph that has its own style. It could be -// a piece of text in bold, or a link -type Run struct { - XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main r"` - RunProperties *RunProperties `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rPr,omitempty"` - InstrText string `xml:"w:instrText,omitempty"` - Text *Text -} - -// The Text object contains the actual text -type Text struct { - XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main t"` - XMLSpace string `xml:"xml:space,attr,omitempty"` - Text string `xml:",chardata"` -} - -type Hyperlink struct { - XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main hyperlink"` - ID string `xml:"http://schemas.openxmlformats.org/officeDocument/2006/relationships id,attr"` - Run Run -} - -// Color allows to set run color -func (r *Run) Color(color string) *Run { - r.RunProperties.Color = &Color{ - Val: color, - } - - return r -} - -// Size allows to set run size -func (r *Run) Size(size int) *Run { - r.RunProperties.Size = &Size{ - Val: size * 2, - } - return r -} - -// AddText add text to paragraph -func (p *Paragraph) AddText(text string) *Run { - t := &Text{ - Text: text, - } - - run := &Run{ - Text: t, - RunProperties: &RunProperties{}, - } - - p.Data = append(p.Data, ParagraphChild{Run: run}) - - return run -} diff --git a/run_properties.go b/run_properties.go deleted file mode 100644 index 5e3f567..0000000 --- a/run_properties.go +++ /dev/null @@ -1,29 +0,0 @@ -package docxlib - -import "encoding/xml" - -const ( - HYPERLINK_STYLE = "a1" -) - -type RunProperties struct { - XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rPr"` - Color *Color `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main color,omitempty"` - Size *Size `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main sz,omitempty"` - RunStyle *RunStyle `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rStyle,omitempty"` -} - -type RunStyle struct { - XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rStyle"` - Val string `xml:"w:val,attr"` -} - -type Color struct { - XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main color"` - Val string `xml:"w:val,attr"` -} - -type Size struct { - XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main sz"` - Val int `xml:"w:val,attr"` -} diff --git a/document.go b/structdoc.go similarity index 100% rename from document.go rename to structdoc.go diff --git a/structnodes.go b/structnodes.go new file mode 100644 index 0000000..f5d16ee --- /dev/null +++ b/structnodes.go @@ -0,0 +1,15 @@ +package docxlib + +import "encoding/xml" + +type ParagraphChild struct { + Link *Hyperlink `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main hyperlink"` + Run *Run `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main r"` +} + +type Paragraph struct { + XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main p"` + Data []ParagraphChild + + file *DocxLib +} diff --git a/relationship.go b/structrel.go similarity index 100% rename from relationship.go rename to structrel.go diff --git a/structrun.go b/structrun.go new file mode 100644 index 0000000..55fbb8b --- /dev/null +++ b/structrun.go @@ -0,0 +1,57 @@ +package docxlib + +import "encoding/xml" + +const ( + HYPERLINK_STYLE = "a1" +) + +// A Run is part of a paragraph that has its own style. It could be +// a piece of text in bold, or a link +type Run struct { + XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main r"` + RunProperties *RunProperties `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rPr,omitempty"` + InstrText string `xml:"w:instrText,omitempty"` + Text *Text +} + +// The Text object contains the actual text +type Text struct { + XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main t"` + XMLSpace string `xml:"xml:space,attr,omitempty"` + Text string `xml:",chardata"` +} + +// The hyperlink element contains links +type Hyperlink struct { + XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main hyperlink"` + ID string `xml:"http://schemas.openxmlformats.org/officeDocument/2006/relationships id,attr"` + Run Run +} + +// RunProperties encapsulates visual properties of a run +type RunProperties struct { + XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rPr"` + Color *Color `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main color,omitempty"` + Size *Size `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main sz,omitempty"` + RunStyle *RunStyle `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rStyle,omitempty"` +} + +// RunStyle contains styling for a run +type RunStyle struct { + XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rStyle"` + Val string `xml:"w:val,attr"` +} + +// Color contains the sound of music. :D +// I'm kidding. It contains the color +type Color struct { + XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main color"` + Val string `xml:"w:val,attr"` +} + +// Size contains the font size +type Size struct { + XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main sz"` + Val int `xml:"w:val,attr"` +} diff --git a/unpack.go b/unpack.go index fc4dae5..945360f 100644 --- a/unpack.go +++ b/unpack.go @@ -1,5 +1,6 @@ package docxlib +// This contains internal functions needed to unpack (read) a zip file import ( "archive/zip" "encoding/xml" @@ -7,7 +8,11 @@ import ( "io/ioutil" ) -func unpack(zipReader *zip.Reader) (docx *Docx, err error) { +// This receives a zip file (word documents are a zip with multiple xml inside) +// and parses the files that are relevant for us: +// 1.-Document +// 2.-Relationships +func unpack(zipReader *zip.Reader) (docx *DocxLib, err error) { var doc *Document var relations *Relationships for _, f := range zipReader.File { @@ -24,13 +29,14 @@ func unpack(zipReader *zip.Reader) (docx *Docx, err error) { } } } - docx = &Docx{ + docx = &DocxLib{ Document: *doc, DocRelation: *relations, } return docx, nil } +// Processes one of the relevant files, the one with the actual document func processDoc(file *zip.File) (*Document, error) { filebytes, err := readZipFile(file) if err != nil { @@ -42,8 +48,6 @@ func processDoc(file *zip.File) (*Document, error) { XMLR: XMLNS_R, XMLName: xml.Name{Space: XMLNS_W, Local: "document"}} err = xml.Unmarshal(filebytes, &doc) - //r := bytes.NewReader(filebytes) - //err = decode(r) if err != nil { fmt.Println("Error unmarshalling doc") fmt.Println(string(filebytes)) @@ -52,6 +56,7 @@ func processDoc(file *zip.File) (*Document, error) { return &doc, nil } +// Processes one of the relevant files, the one with the relationships func processRelations(file *zip.File) (*Relationships, error) { filebytes, err := readZipFile(file) if err != nil { @@ -67,6 +72,7 @@ func processRelations(file *zip.File) (*Relationships, error) { return &rels, nil } +// From a zip file structure, we return a byte array func readZipFile(zf *zip.File) ([]byte, error) { f, err := zf.Open() if err != nil {