mirror of
https://github.com/fumiama/go-docx.git
synced 2026-06-23 20:16:38 +08:00
优化代码结构
This commit is contained in:
1
LICENSE
1
LICENSE
@@ -3,6 +3,7 @@ MIT License
|
|||||||
Copyright (c) 2020 gingfrederik
|
Copyright (c) 2020 gingfrederik
|
||||||
Copyright (c) 2021 Gonzalo Fernandez-Victorio
|
Copyright (c) 2021 Gonzalo Fernandez-Victorio
|
||||||
Copyright (c) 2021 Basement Crowd Ltd (https://www.basementcrowd.com)
|
Copyright (c) 2021 Basement Crowd Ltd (https://www.basementcrowd.com)
|
||||||
|
Copyright (c) 2023 Fumiama Minamoto (源文雨)
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
|||||||
@@ -2,6 +2,8 @@
|
|||||||
|
|
||||||
Yet another library to read and write .docx (a.k.a. Microsoft Word documents or ECMA-376 Office Open XML) files in Go.
|
Yet another library to read and write .docx (a.k.a. Microsoft Word documents or ECMA-376 Office Open XML) files in Go.
|
||||||
|
|
||||||
|
This is a variant optimized and expanded by fumiama. The original repo is [gonfva/docxlib](https://github.com/gonfva/docxlib).
|
||||||
|
|
||||||
## Introduction
|
## Introduction
|
||||||
|
|
||||||
As part of my work for [Basement Crowd](https://www.basementcrowd.com) and [FromCounsel](https://www.fromcounsel.com), we were in need of a basic library to manipulate (both read and write) Microsoft Word documents.
|
As part of my work for [Basement Crowd](https://www.basementcrowd.com) and [FromCounsel](https://www.fromcounsel.com), we were in need of a basic library to manipulate (both read and write) Microsoft Word documents.
|
||||||
@@ -31,7 +33,7 @@ In the mean time, shared as an example in case somebody finds it useful.
|
|||||||
Go modules supported
|
Go modules supported
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
go get github.com/gonfva/docxlib
|
go get github.com/fumiama/docxlib
|
||||||
```
|
```
|
||||||
|
|
||||||
### Usage
|
### Usage
|
||||||
@@ -39,8 +41,7 @@ go get github.com/gonfva/docxlib
|
|||||||
See [main](main/main.go) for an example
|
See [main](main/main.go) for an example
|
||||||
|
|
||||||
```
|
```
|
||||||
$ go build -o docxlib ./main
|
$ go run ./cmd/main
|
||||||
$ ./docxlib
|
|
||||||
Preparing new document to write at /tmp/new-file.docx
|
Preparing new document to write at /tmp/new-file.docx
|
||||||
Document writen.
|
Document writen.
|
||||||
Now trying to read it
|
Now trying to read it
|
||||||
@@ -53,7 +54,7 @@ End of main
|
|||||||
```
|
```
|
||||||
You can also increase the log level (-logtostderr=true -v=0) and just dump a specific file(-file /tmp/new-file.docx). See [getstructure/main](getstructure/main.go)
|
You can also increase the log level (-logtostderr=true -v=0) and just dump a specific file(-file /tmp/new-file.docx). See [getstructure/main](getstructure/main.go)
|
||||||
```
|
```
|
||||||
$ go build -o docxlib ./getstructure/ && ./docxlib -logtostderr=true -v=0 -file /tmp/new-file.docx
|
$ go build -o docxlib ./cmd/getstructure/ && ./docxlib -logtostderr=true -v=0 -file /tmp/new-file.docx
|
||||||
I0511 12:37:40.898493 18466 unpack.go:69] Relations: [...]
|
I0511 12:37:40.898493 18466 unpack.go:69] Relations: [...]
|
||||||
I0511 12:37:40.898787 18466 unpack.go:47] Doc: [...]
|
I0511 12:37:40.898787 18466 unpack.go:47] Doc: [...]
|
||||||
I0511 12:37:40.899330 18466 unpack.go:58] Paragraph [0xc000026d40 0xc000027d00 0xc000172340]
|
I0511 12:37:40.899330 18466 unpack.go:58] Paragraph [0xc000026d40 0xc000027d00 0xc000172340]
|
||||||
|
|||||||
11
apilink.go
11
apilink.go
@@ -1,18 +1,19 @@
|
|||||||
package docxlib
|
package docxlib
|
||||||
|
|
||||||
import "strconv"
|
import (
|
||||||
|
"strconv"
|
||||||
|
"sync/atomic"
|
||||||
|
)
|
||||||
|
|
||||||
// when adding an hyperlink we need to store a reference in the relationship field
|
// when adding an hyperlink we need to store a reference in the relationship field
|
||||||
func (f *DocxLib) addLinkRelation(link string) string {
|
func (f *Docx) addLinkRelation(link string) string {
|
||||||
rel := &Relationship{
|
rel := &Relationship{
|
||||||
ID: "rId" + strconv.Itoa(f.rId),
|
ID: "rId" + strconv.Itoa(int(atomic.AddUintptr(&f.rId, 1))),
|
||||||
Type: REL_HYPERLINK,
|
Type: REL_HYPERLINK,
|
||||||
Target: link,
|
Target: link,
|
||||||
TargetMode: REL_TARGETMODE,
|
TargetMode: REL_TARGETMODE,
|
||||||
}
|
}
|
||||||
|
|
||||||
f.rId += 1
|
|
||||||
|
|
||||||
f.DocRelation.Relationships = append(f.DocRelation.Relationships, rel)
|
f.DocRelation.Relationships = append(f.DocRelation.Relationships, rel)
|
||||||
|
|
||||||
return rel.ID
|
return rel.ID
|
||||||
|
|||||||
@@ -1,17 +1,18 @@
|
|||||||
package docxlib
|
package docxlib
|
||||||
|
|
||||||
// AddParagraph adds a new paragraph
|
// AddParagraph adds a new paragraph
|
||||||
func (f *DocxLib) AddParagraph() *Paragraph {
|
func (f *Docx) AddParagraph() *Paragraph {
|
||||||
p := &Paragraph{
|
p := &Paragraph{
|
||||||
Data: make([]ParagraphChild, 0),
|
Data: make([]ParagraphChild, 0, 64),
|
||||||
file: f,
|
file: f,
|
||||||
}
|
}
|
||||||
|
|
||||||
f.Document.Body.Paragraphs = append(f.Document.Body.Paragraphs, p)
|
f.Document.Body.Paragraphs = append(f.Document.Body.Paragraphs, p)
|
||||||
|
|
||||||
return p
|
return p
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *DocxLib) Paragraphs() []*Paragraph {
|
func (f *Docx) Paragraphs() []*Paragraph {
|
||||||
return f.Document.Body.Paragraphs
|
return f.Document.Body.Paragraphs
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ func (r *Run) Size(size int) *Run {
|
|||||||
r.RunProperties.Size = &Size{
|
r.RunProperties.Size = &Size{
|
||||||
Val: size * 2,
|
Val: size * 2,
|
||||||
}
|
}
|
||||||
|
|
||||||
return r
|
return r
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -5,8 +5,8 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
|
"github.com/fumiama/docxlib"
|
||||||
"github.com/golang/glog"
|
"github.com/golang/glog"
|
||||||
"github.com/gonfva/docxlib"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var fileLocation *string
|
var fileLocation *string
|
||||||
@@ -40,7 +40,7 @@ func main() {
|
|||||||
if child.Link != nil {
|
if child.Link != nil {
|
||||||
id := child.Link.ID
|
id := child.Link.ID
|
||||||
text := child.Link.Run.InstrText
|
text := child.Link.Run.InstrText
|
||||||
link, err := doc.References(id)
|
link, err := doc.Refer(id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Printf("\tWe found a link with id %s and text %s without target\n", id, text)
|
fmt.Printf("\tWe found a link with id %s and text %s without target\n", id, text)
|
||||||
} else {
|
} else {
|
||||||
@@ -5,7 +5,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
"github.com/gonfva/docxlib"
|
"github.com/fumiama/docxlib"
|
||||||
)
|
)
|
||||||
|
|
||||||
var fileLocation *string
|
var fileLocation *string
|
||||||
@@ -60,7 +60,7 @@ func main() {
|
|||||||
if child.Link != nil {
|
if child.Link != nil {
|
||||||
id := child.Link.ID
|
id := child.Link.ID
|
||||||
text := child.Link.Run.InstrText
|
text := child.Link.Run.InstrText
|
||||||
link, err := doc.References(id)
|
link, err := doc.Refer(id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Printf("\tWe found a link with id %s and text %s without target\n", id, text)
|
fmt.Printf("\tWe found a link with id %s and text %s without target\n", id, text)
|
||||||
} else {
|
} else {
|
||||||
47
docxlib.go
47
docxlib.go
@@ -6,33 +6,40 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
)
|
)
|
||||||
|
|
||||||
// DocxLib is the structure that allow to access the internal represntation
|
var (
|
||||||
|
// ErrRefIDNotFound cannot find such reference
|
||||||
|
ErrRefIDNotFound = errors.New("ref id not found")
|
||||||
|
)
|
||||||
|
|
||||||
|
// Docx is the structure that allow to access the internal represntation
|
||||||
// in memory of the doc (either read or about to be written)
|
// in memory of the doc (either read or about to be written)
|
||||||
type DocxLib struct {
|
type Docx struct {
|
||||||
Document Document
|
Document Document
|
||||||
DocRelation Relationships
|
DocRelation Relationships
|
||||||
|
|
||||||
rId int
|
rId uintptr
|
||||||
}
|
}
|
||||||
|
|
||||||
// New generates a new empty docx file that we can manipulate and
|
// New generates a new empty docx file that we can manipulate and
|
||||||
// later on, save
|
// later on, save
|
||||||
func New() *DocxLib {
|
func New() *Docx {
|
||||||
return emptyFile()
|
return newEmptyFile()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse generates a new docx file in memory from a reader
|
// Parse generates a new docx file in memory from a reader
|
||||||
// You can it invoke from a file
|
// You can it invoke from a file
|
||||||
// readFile, err := os.Open(FILE_PATH)
|
//
|
||||||
// if err != nil {
|
// readFile, err := os.Open(FILE_PATH)
|
||||||
// panic(err)
|
// if err != nil {
|
||||||
// }
|
// panic(err)
|
||||||
// fileinfo, err := readFile.Stat()
|
// }
|
||||||
// if err != nil {
|
// fileinfo, err := readFile.Stat()
|
||||||
// panic(err)
|
// if err != nil {
|
||||||
// }
|
// panic(err)
|
||||||
// size := fileinfo.Size()
|
// }
|
||||||
// doc, err := docxlib.Parse(readFile, int64(size))
|
// size := fileinfo.Size()
|
||||||
|
// doc, err := docxlib.Parse(readFile, int64(size))
|
||||||
|
//
|
||||||
// but also you can invoke from a webform (BEWARE of trusting users data!!!)
|
// but also you can invoke from a webform (BEWARE of trusting users data!!!)
|
||||||
//
|
//
|
||||||
// func uploadFile(w http.ResponseWriter, r *http.Request) {
|
// func uploadFile(w http.ResponseWriter, r *http.Request) {
|
||||||
@@ -48,7 +55,7 @@ func New() *DocxLib {
|
|||||||
// defer file.Close()
|
// defer file.Close()
|
||||||
// docxlib.Parse(file, handler.Size)
|
// docxlib.Parse(file, handler.Size)
|
||||||
// }
|
// }
|
||||||
func Parse(reader io.ReaderAt, size int64) (doc *DocxLib, err error) {
|
func Parse(reader io.ReaderAt, size int64) (doc *Docx, err error) {
|
||||||
zipReader, err := zip.NewReader(reader, size)
|
zipReader, err := zip.NewReader(reader, size)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -58,21 +65,21 @@ func Parse(reader io.ReaderAt, size int64) (doc *DocxLib, err error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Write allows to save a docx to a writer
|
// Write allows to save a docx to a writer
|
||||||
func (f *DocxLib) Write(writer io.Writer) (err error) {
|
func (f *Docx) Write(writer io.Writer) (err error) {
|
||||||
zipWriter := zip.NewWriter(writer)
|
zipWriter := zip.NewWriter(writer)
|
||||||
defer zipWriter.Close()
|
defer zipWriter.Close()
|
||||||
|
|
||||||
return f.pack(zipWriter)
|
return f.pack(zipWriter)
|
||||||
}
|
}
|
||||||
|
|
||||||
// References gets the url for a reference
|
// Refer gets the url for a reference
|
||||||
func (f *DocxLib) References(id string) (href string, err error) {
|
func (f *Docx) Refer(id string) (href string, err error) {
|
||||||
for _, a := range f.DocRelation.Relationships {
|
for _, a := range f.DocRelation.Relationships {
|
||||||
if a.ID == id {
|
if a.ID == id {
|
||||||
href = a.Target
|
href = a.Target
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
err = errors.New("id not found")
|
err = ErrRefIDNotFound
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|||||||
50
empty.go
50
empty.go
@@ -2,29 +2,8 @@ package docxlib
|
|||||||
|
|
||||||
import "encoding/xml"
|
import "encoding/xml"
|
||||||
|
|
||||||
func emptyRelationships() []*Relationship {
|
func newEmptyFile() *Docx {
|
||||||
defaultRel := []*Relationship{
|
return &Docx{
|
||||||
{
|
|
||||||
ID: "rId1",
|
|
||||||
Type: `http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles`,
|
|
||||||
Target: "styles.xml",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ID: "rId2",
|
|
||||||
Type: `http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme`,
|
|
||||||
Target: "theme/theme1.xml",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ID: "rId3",
|
|
||||||
Type: `http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable`,
|
|
||||||
Target: "fontTable.xml",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
return defaultRel
|
|
||||||
}
|
|
||||||
|
|
||||||
func emptyFile() *DocxLib {
|
|
||||||
docx := &DocxLib{
|
|
||||||
Document: Document{
|
Document: Document{
|
||||||
XMLName: xml.Name{
|
XMLName: xml.Name{
|
||||||
Space: "w",
|
Space: "w",
|
||||||
@@ -35,14 +14,29 @@ func emptyFile() *DocxLib {
|
|||||||
XMLName: xml.Name{
|
XMLName: xml.Name{
|
||||||
Space: "w",
|
Space: "w",
|
||||||
},
|
},
|
||||||
Paragraphs: make([]*Paragraph, 0),
|
Paragraphs: make([]*Paragraph, 0, 64),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
DocRelation: Relationships{
|
DocRelation: Relationships{
|
||||||
Xmlns: XMLNS,
|
Xmlns: XMLNS,
|
||||||
Relationships: emptyRelationships(),
|
Relationships: []*Relationship{
|
||||||
|
{
|
||||||
|
ID: "rId1",
|
||||||
|
Type: `http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles`,
|
||||||
|
Target: "styles.xml",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ID: "rId2",
|
||||||
|
Type: `http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme`,
|
||||||
|
Target: "theme/theme1.xml",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ID: "rId3",
|
||||||
|
Type: `http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable`,
|
||||||
|
Target: "fontTable.xml",
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
rId: 4,
|
rId: 3,
|
||||||
}
|
}
|
||||||
return docx
|
|
||||||
}
|
}
|
||||||
|
|||||||
2
go.mod
2
go.mod
@@ -1,4 +1,4 @@
|
|||||||
module github.com/gonfva/docxlib
|
module github.com/fumiama/docxlib
|
||||||
|
|
||||||
go 1.16
|
go 1.16
|
||||||
|
|
||||||
|
|||||||
20
helper.go
Normal file
20
helper.go
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
package docxlib
|
||||||
|
|
||||||
|
import (
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
// BytesToString 没有内存开销的转换
|
||||||
|
func BytesToString(b []byte) string {
|
||||||
|
return *(*string)(unsafe.Pointer(&b))
|
||||||
|
}
|
||||||
|
|
||||||
|
// StringToBytes 没有内存开销的转换
|
||||||
|
func StringToBytes(s string) (b []byte) {
|
||||||
|
bh := (*slice)(unsafe.Pointer(&b))
|
||||||
|
sh := (*slice)(unsafe.Pointer(&s))
|
||||||
|
bh.data = sh.data
|
||||||
|
bh.len = sh.len
|
||||||
|
bh.cap = sh.len
|
||||||
|
return b
|
||||||
|
}
|
||||||
12
pack.go
12
pack.go
@@ -3,6 +3,7 @@ package docxlib
|
|||||||
import (
|
import (
|
||||||
"archive/zip"
|
"archive/zip"
|
||||||
"encoding/xml"
|
"encoding/xml"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/golang/glog"
|
"github.com/golang/glog"
|
||||||
)
|
)
|
||||||
@@ -10,7 +11,7 @@ import (
|
|||||||
// This receives a zip file writer (word documents are a zip with multiple xml inside)
|
// This receives a zip file writer (word documents are a zip with multiple xml inside)
|
||||||
// and writes the relevant files. Some of them come from the empty_constants file,
|
// and writes the relevant files. Some of them come from the empty_constants file,
|
||||||
// others from the actual in-memory structure
|
// others from the actual in-memory structure
|
||||||
func (f *DocxLib) pack(zipWriter *zip.Writer) (err error) {
|
func (f *Docx) pack(zipWriter *zip.Writer) (err error) {
|
||||||
files := map[string]string{}
|
files := map[string]string{}
|
||||||
|
|
||||||
files["_rels/.rels"] = TEMP_REL
|
files["_rels/.rels"] = TEMP_REL
|
||||||
@@ -34,7 +35,7 @@ func (f *DocxLib) pack(zipWriter *zip.Writer) (err error) {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
_, err = w.Write([]byte(data))
|
_, err = w.Write(StringToBytes(data))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -44,12 +45,13 @@ func (f *DocxLib) pack(zipWriter *zip.Writer) (err error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func marshal(data interface{}) (out string, err error) {
|
func marshal(data interface{}) (out string, err error) {
|
||||||
body, err := xml.Marshal(data)
|
sb := strings.Builder{}
|
||||||
|
sb.WriteString(xml.Header)
|
||||||
|
err = xml.NewEncoder(&sb).Encode(data)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorln("Error marshalling", err)
|
glog.Errorln("Error marshalling", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
out = sb.String()
|
||||||
out = xml.Header + string(body)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|||||||
15
slice.go
Normal file
15
slice.go
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
package docxlib
|
||||||
|
|
||||||
|
import "unsafe"
|
||||||
|
|
||||||
|
// slice is the runtime representation of a slice.
|
||||||
|
// It cannot be used safely or portably and its representation may
|
||||||
|
// change in a later release.
|
||||||
|
//
|
||||||
|
// Unlike reflect.SliceHeader, its Data field is sufficient to guarantee the
|
||||||
|
// data it references will not be garbage collected.
|
||||||
|
type slice struct {
|
||||||
|
data unsafe.Pointer
|
||||||
|
len int
|
||||||
|
cap int
|
||||||
|
}
|
||||||
@@ -17,11 +17,11 @@ type Paragraph struct {
|
|||||||
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main p"`
|
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main p"`
|
||||||
Data []ParagraphChild
|
Data []ParagraphChild
|
||||||
|
|
||||||
file *DocxLib
|
file *Docx
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Paragraph) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
|
func (p *Paragraph) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
|
||||||
children := make([]ParagraphChild, 0)
|
children := make([]ParagraphChild, 0, 64)
|
||||||
for {
|
for {
|
||||||
t, err := d.Token()
|
t, err := d.Token()
|
||||||
if err == io.EOF {
|
if err == io.EOF {
|
||||||
@@ -30,7 +30,8 @@ func (p *Paragraph) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
|
|||||||
switch tt := t.(type) {
|
switch tt := t.(type) {
|
||||||
case xml.StartElement:
|
case xml.StartElement:
|
||||||
var elem ParagraphChild
|
var elem ParagraphChild
|
||||||
if tt.Name.Local == "hyperlink" {
|
switch tt.Name.Local {
|
||||||
|
case "hyperlink":
|
||||||
var value Hyperlink
|
var value Hyperlink
|
||||||
d.DecodeElement(&value, &start)
|
d.DecodeElement(&value, &start)
|
||||||
id := getAtt(tt.Attr, "id")
|
id := getAtt(tt.Attr, "id")
|
||||||
@@ -41,20 +42,20 @@ func (p *Paragraph) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
|
|||||||
if anchor != "" {
|
if anchor != "" {
|
||||||
value.ID = anchor
|
value.ID = anchor
|
||||||
}
|
}
|
||||||
elem = ParagraphChild{Link: &value}
|
elem.Link = &value
|
||||||
} else if tt.Name.Local == "r" {
|
case "r":
|
||||||
var value Run
|
var value Run
|
||||||
d.DecodeElement(&value, &start)
|
d.DecodeElement(&value, &start)
|
||||||
elem = ParagraphChild{Run: &value}
|
elem.Run = &value
|
||||||
if value.InstrText == "" && value.Text == nil {
|
if value.InstrText == "" && value.Text == nil {
|
||||||
glog.V(0).Infof("Empty run, we ignore")
|
glog.V(0).Infof("Empty run, we ignore")
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
} else if tt.Name.Local == "rPr" {
|
case "rPr":
|
||||||
var value RunProperties
|
var value RunProperties
|
||||||
d.DecodeElement(&value, &start)
|
d.DecodeElement(&value, &start)
|
||||||
elem = ParagraphChild{Properties: &value}
|
elem.Properties = &value
|
||||||
} else {
|
default:
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
children = append(children, elem)
|
children = append(children, elem)
|
||||||
|
|||||||
12
structrun.go
12
structrun.go
@@ -76,19 +76,20 @@ func (r *Run) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
|
|||||||
|
|
||||||
switch tt := t.(type) {
|
switch tt := t.(type) {
|
||||||
case xml.StartElement:
|
case xml.StartElement:
|
||||||
if tt.Name.Local == "rPr" {
|
switch tt.Name.Local {
|
||||||
|
case "rPr":
|
||||||
var value RunProperties
|
var value RunProperties
|
||||||
d.DecodeElement(&value, &start)
|
d.DecodeElement(&value, &start)
|
||||||
elem.RunProperties = &value
|
elem.RunProperties = &value
|
||||||
} else if tt.Name.Local == "instrText" {
|
case "instrText":
|
||||||
var value string
|
var value string
|
||||||
d.DecodeElement(&value, &start)
|
d.DecodeElement(&value, &start)
|
||||||
elem.InstrText = value
|
elem.InstrText = value
|
||||||
} else if tt.Name.Local == "t" {
|
case "t":
|
||||||
var value Text
|
var value Text
|
||||||
d.DecodeElement(&value, &start)
|
d.DecodeElement(&value, &start)
|
||||||
elem.Text = &value
|
elem.Text = &value
|
||||||
} else {
|
default:
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -109,8 +110,7 @@ func (r *Text) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
|
|||||||
|
|
||||||
switch tt := t.(type) {
|
switch tt := t.(type) {
|
||||||
case xml.CharData:
|
case xml.CharData:
|
||||||
cd := tt.Copy()
|
elem.Text = string(tt) // implicitly copy
|
||||||
elem.Text = string(cd)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
53
unpack.go
53
unpack.go
@@ -4,7 +4,7 @@ package docxlib
|
|||||||
import (
|
import (
|
||||||
"archive/zip"
|
"archive/zip"
|
||||||
"encoding/xml"
|
"encoding/xml"
|
||||||
"io/ioutil"
|
"io"
|
||||||
|
|
||||||
"github.com/golang/glog"
|
"github.com/golang/glog"
|
||||||
)
|
)
|
||||||
@@ -13,68 +13,63 @@ import (
|
|||||||
// and parses the files that are relevant for us:
|
// and parses the files that are relevant for us:
|
||||||
// 1.-Document
|
// 1.-Document
|
||||||
// 2.-Relationships
|
// 2.-Relationships
|
||||||
func unpack(zipReader *zip.Reader) (docx *DocxLib, err error) {
|
func unpack(zipReader *zip.Reader) (docx *Docx, err error) {
|
||||||
var doc *Document
|
docx = new(Docx)
|
||||||
var relations *Relationships
|
|
||||||
for _, f := range zipReader.File {
|
for _, f := range zipReader.File {
|
||||||
if f.Name == "word/_rels/document.xml.rels" {
|
if f.Name == "word/_rels/document.xml.rels" {
|
||||||
relations, err = processRelations(f)
|
err = processRelations(f, &docx.DocRelation)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if f.Name == "word/document.xml" {
|
if f.Name == "word/document.xml" {
|
||||||
doc, err = processDoc(f)
|
err = processDoc(f, &docx.Document)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
docx = &DocxLib{
|
return
|
||||||
Document: *doc,
|
|
||||||
DocRelation: *relations,
|
|
||||||
}
|
|
||||||
return docx, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Processes one of the relevant files, the one with the actual document
|
// Processes one of the relevant files, the one with the actual document
|
||||||
func processDoc(file *zip.File) (*Document, error) {
|
func processDoc(file *zip.File, doc *Document) error {
|
||||||
filebytes, err := readZipFile(file)
|
filebytes, err := readZipFile(file)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorln("Error reading from internal zip file")
|
glog.Errorln("Error reading from internal zip file")
|
||||||
return nil, err
|
return err
|
||||||
}
|
}
|
||||||
glog.V(0).Infoln("Doc:", string(filebytes))
|
glog.V(0).Infoln("Doc:", string(filebytes))
|
||||||
|
|
||||||
doc := Document{
|
doc.XMLW = XMLNS_W
|
||||||
XMLW: XMLNS_W,
|
doc.XMLR = XMLNS_R
|
||||||
XMLR: XMLNS_R,
|
doc.XMLName.Space = XMLNS_W
|
||||||
XMLName: xml.Name{Space: XMLNS_W, Local: "document"}}
|
doc.XMLName.Local = "document"
|
||||||
err = xml.Unmarshal(filebytes, &doc)
|
err = xml.Unmarshal(filebytes, doc)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorln("Error unmarshalling doc", string(filebytes))
|
glog.Errorln("Error unmarshalling doc", string(filebytes))
|
||||||
return nil, err
|
return err
|
||||||
}
|
}
|
||||||
glog.V(0).Infoln("Paragraph", doc.Body.Paragraphs)
|
glog.V(0).Infoln("Paragraph", doc.Body.Paragraphs)
|
||||||
return &doc, nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Processes one of the relevant files, the one with the relationships
|
// Processes one of the relevant files, the one with the relationships
|
||||||
func processRelations(file *zip.File) (*Relationships, error) {
|
func processRelations(file *zip.File, rels *Relationships) error {
|
||||||
filebytes, err := readZipFile(file)
|
filebytes, err := readZipFile(file)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorln("Error reading from internal zip file")
|
glog.Errorln("Error reading from internal zip file")
|
||||||
return nil, err
|
return err
|
||||||
}
|
}
|
||||||
glog.V(0).Infoln("Relations:", string(filebytes))
|
glog.V(0).Infoln("Relations:", string(filebytes))
|
||||||
|
|
||||||
rels := Relationships{Xmlns: XMLNS_R}
|
rels.Xmlns = XMLNS_R
|
||||||
err = xml.Unmarshal(filebytes, &rels)
|
err = xml.Unmarshal(filebytes, rels)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorln("Error unmarshalling relationships")
|
glog.Errorln("Error unmarshalling relationships")
|
||||||
return nil, err
|
return err
|
||||||
}
|
}
|
||||||
return &rels, nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// From a zip file structure, we return a byte array
|
// From a zip file structure, we return a byte array
|
||||||
@@ -84,5 +79,5 @@ func readZipFile(zf *zip.File) ([]byte, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
defer f.Close()
|
defer f.Close()
|
||||||
return ioutil.ReadAll(f)
|
return io.ReadAll(f)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user