mirror of
https://github.com/fumiama/go-docx.git
synced 2026-06-05 07:40:24 +08:00
Logging
This commit is contained in:
17
README.md
17
README.md
@@ -51,7 +51,22 @@ Now trying to read it
|
||||
We've found a new hyperlink with ref http://google.com and the text google
|
||||
End of main
|
||||
```
|
||||
|
||||
You can also increase the log level and just dump a specific file. See [getstructure/main](getstructure/main.go)
|
||||
```
|
||||
$ go build -o docxlib ./getstructure/ && ./docxlib -logtostderr=true -v=0
|
||||
I0511 12:37:40.898493 18466 unpack.go:69] Relations: [...]
|
||||
I0511 12:37:40.898787 18466 unpack.go:47] Doc: [...]
|
||||
I0511 12:37:40.899330 18466 unpack.go:58] Paragraph [0xc000026d40 0xc000027d00 0xc000172340]
|
||||
I0511 12:37:40.899369 18466 main.go:31] There is a new paragraph [...]
|
||||
We've found a new run with the text ->test
|
||||
We've found a new run with the text ->test font size
|
||||
We've found a new run with the text ->test color
|
||||
I0511 12:37:40.899389 18466 main.go:31] There is a new paragraph [...]
|
||||
We've found a new run with the text ->test font size and color
|
||||
I0511 12:37:40.899396 18466 main.go:31] There is a new paragraph [...]
|
||||
We've found a new hyperlink with ref http://google.com and the text google
|
||||
End of main
|
||||
```
|
||||
### Build
|
||||
|
||||
```
|
||||
|
||||
50
getstructure/main.go
Normal file
50
getstructure/main.go
Normal file
@@ -0,0 +1,50 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"github.com/gonfva/docxlib"
|
||||
)
|
||||
|
||||
const FILE_PATH = "/tmp/new-file.docx"
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
//Now let's try to read the file
|
||||
readFile, err := os.Open(FILE_PATH)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
fileinfo, err := readFile.Stat()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
size := fileinfo.Size()
|
||||
doc, err := docxlib.Parse(readFile, int64(size))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
for _, para := range doc.Paragraphs() {
|
||||
glog.Infoln("There is a new paragraph", para)
|
||||
for _, child := range para.Children() {
|
||||
if child.Run != nil {
|
||||
fmt.Printf("\tWe've found a new run with the text ->%s\n", child.Run.Text.Text)
|
||||
}
|
||||
if child.Link != nil {
|
||||
id := child.Link.ID
|
||||
text := child.Link.Run.InstrText
|
||||
link, err := doc.References(id)
|
||||
if err != nil {
|
||||
fmt.Printf("\tWe found a link with id %s and text %s without target\n", id, text)
|
||||
} else {
|
||||
fmt.Printf("\tWe've found a new hyperlink with ref %s and the text %s\n", link, text)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
fmt.Println("End of main")
|
||||
}
|
||||
2
go.mod
2
go.mod
@@ -1,3 +1,5 @@
|
||||
module github.com/gonfva/docxlib
|
||||
|
||||
go 1.16
|
||||
|
||||
require github.com/golang/glog v0.0.0-20210429001901-424d2337a529 // indirect
|
||||
|
||||
2
go.sum
Normal file
2
go.sum
Normal file
@@ -0,0 +1,2 @@
|
||||
github.com/golang/glog v0.0.0-20210429001901-424d2337a529 h1:2voWjNECnrZRbfwXxHB1/j8wa6xdKn85B5NzgVL/pTU=
|
||||
github.com/golang/glog v0.0.0-20210429001901-424d2337a529/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
|
||||
5
pack.go
5
pack.go
@@ -3,7 +3,8 @@ package docxlib
|
||||
import (
|
||||
"archive/zip"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
|
||||
"github.com/golang/glog"
|
||||
)
|
||||
|
||||
// This receives a zip file writer (word documents are a zip with multiple xml inside)
|
||||
@@ -45,7 +46,7 @@ func (f *DocxLib) pack(zipWriter *zip.Writer) (err error) {
|
||||
func marshal(data interface{}) (out string, err error) {
|
||||
body, err := xml.Marshal(data)
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
glog.Errorln("Error marshalling", err)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
31
structdoc_test.go
Normal file
31
structdoc_test.go
Normal file
@@ -0,0 +1,31 @@
|
||||
package docxlib
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"testing"
|
||||
)
|
||||
|
||||
const decoded_doc = `<w:document xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas" xmlns:cx="http://schemas.microsoft.com/office/drawing/2014/chartex" xmlns:cx1="http://schemas.microsoft.com/office/drawing/2015/9/8/chartex" xmlns:cx2="http://schemas.microsoft.com/office/drawing/2015/10/21/chartex" xmlns:cx3="http://schemas.microsoft.com/office/drawing/2016/5/9/chartex" xmlns:cx4="http://schemas.microsoft.com/office/drawing/2016/5/10/chartex" xmlns:cx5="http://schemas.microsoft.com/office/drawing/2016/5/11/chartex" xmlns:cx6="http://schemas.microsoft.com/office/drawing/2016/5/12/chartex" xmlns:cx7="http://schemas.microsoft.com/office/drawing/2016/5/13/chartex" xmlns:cx8="http://schemas.microsoft.com/office/drawing/2016/5/14/chartex" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:aink="http://schemas.microsoft.com/office/drawing/2016/ink" xmlns:am3d="http://schemas.microsoft.com/office/drawing/2017/model3d" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex" xmlns:w16cid="http://schemas.microsoft.com/office/word/2016/wordml/cid" xmlns:w16="http://schemas.microsoft.com/office/word/2018/wordml" xmlns:w16se="http://schemas.microsoft.com/office/word/2015/wordml/symex" xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup" xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape" mc:Ignorable="w14 w15 w16se w16cid w16 w16cex wp14"><w:body><w:p w14:paraId="77CA082D" w14:textId="4AF3264D" w:rsidR="00D66E3F" w:rsidRDefault="003A3F42"><w:pPr><w:rPr><w:color w:val="808080"/></w:rPr></w:pPr><w:proofErr w:type="spellStart"/><w:r><w:t>test</w:t></w:r><w:r><w:rPr><w:sz w:val="44"/></w:rPr><w:t>test</w:t></w:r><w:proofErr w:type="spellEnd"/><w:r><w:rPr><w:sz w:val="44"/></w:rPr><w:t xml:space="preserve"> font </w:t></w:r><w:proofErr w:type="spellStart"/><w:r><w:rPr><w:sz w:val="44"/></w:rPr><w:t>size</w:t></w:r><w:r><w:rPr><w:color w:val="808080"/></w:rPr><w:t>test</w:t></w:r><w:proofErr w:type="spellEnd"/><w:r><w:rPr><w:color w:val="808080"/></w:rPr><w:t xml:space="preserve"> color</w:t></w:r></w:p><w:p w14:paraId="6D114165" w14:textId="04580C29" w:rsidR="003A3F42" w:rsidRDefault="003A3F42" w:rsidP="003A3F42"><w:pPr><w:pStyle w:val="Heading1"/></w:pPr><w:r><w:t>New style 1</w:t></w:r></w:p><w:p w14:paraId="40D72B3B" w14:textId="76101901" w:rsidR="003A3F42" w:rsidRDefault="003A3F42" w:rsidP="003A3F42"><w:pPr><w:pStyle w:val="Heading2"/></w:pPr><w:r><w:t>New style 2</w:t></w:r></w:p><w:p w14:paraId="1CA8A9B3" w14:textId="77777777" w:rsidR="00D66E3F" w:rsidRDefault="003A3F42"><w:r><w:rPr><w:color w:val="FF0000"/><w:sz w:val="44"/></w:rPr><w:t>test font size and color</w:t></w:r></w:p><w:p w14:paraId="0D82FB8B" w14:textId="77777777" w:rsidR="00D66E3F" w:rsidRDefault="003A3F42"><w:hyperlink r:id="rId4"><w:r><w:rPr><w:rStyle w:val="Hyperlink"/></w:rPr><w:t>google</w:t></w:r></w:hyperlink></w:p><w:sectPr w:rsidR="00D66E3F"><w:pgSz w:w="11906" w:h="16838"/><w:pgMar w:top="1440" w:right="1440" w:bottom="1440" w:left="1440" w:header="708" w:footer="708" w:gutter="0"/><w:cols w:space="708"/><w:docGrid w:linePitch="360"/></w:sectPr></w:body></w:document>`
|
||||
const NUM_PARAGRAPHS = 5
|
||||
|
||||
func TestStructure(t *testing.T) {
|
||||
doc := Document{
|
||||
XMLW: XMLNS_W,
|
||||
XMLR: XMLNS_R,
|
||||
XMLName: xml.Name{Space: XMLNS_W, Local: "document"}}
|
||||
err := xml.Unmarshal([]byte(decoded_doc), &doc)
|
||||
if err != nil {
|
||||
t.Errorf("We expected to be able to decode %s but we didn't",
|
||||
decoded_doc)
|
||||
}
|
||||
if len(doc.Body.Paragraphs) != NUM_PARAGRAPHS {
|
||||
t.Errorf("We expected %d paragraph, we got %d",
|
||||
NUM_PARAGRAPHS, len(doc.Body.Paragraphs))
|
||||
}
|
||||
for _, p := range doc.Body.Paragraphs {
|
||||
if len(p.Children()) == 0 {
|
||||
t.Errorf("We were not able to parse paragraph %v",
|
||||
p)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3,8 +3,9 @@ package docxlib
|
||||
import "encoding/xml"
|
||||
|
||||
type ParagraphChild struct {
|
||||
Link *Hyperlink `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main hyperlink"`
|
||||
Run *Run `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main r"`
|
||||
Link *Hyperlink `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main hyperlink,omitempty"`
|
||||
Run *Run `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main r,omitempty"`
|
||||
Properties *RunProperties `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rPr,omitempty"`
|
||||
}
|
||||
|
||||
type Paragraph struct {
|
||||
|
||||
15
structrun.go
15
structrun.go
@@ -9,7 +9,7 @@ const (
|
||||
// A Run is part of a paragraph that has its own style. It could be
|
||||
// a piece of text in bold, or a link
|
||||
type Run struct {
|
||||
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main r"`
|
||||
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main r,omitempty"`
|
||||
RunProperties *RunProperties `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rPr,omitempty"`
|
||||
InstrText string `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main instrText,omitempty"`
|
||||
Text *Text
|
||||
@@ -24,22 +24,29 @@ type Text struct {
|
||||
|
||||
// The hyperlink element contains links
|
||||
type Hyperlink struct {
|
||||
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main hyperlink"`
|
||||
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main hyperlink,omitempty"`
|
||||
ID string `xml:"http://schemas.openxmlformats.org/officeDocument/2006/relationships id,attr"`
|
||||
Run Run
|
||||
}
|
||||
|
||||
// RunProperties encapsulates visual properties of a run
|
||||
type RunProperties struct {
|
||||
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rPr"`
|
||||
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rPr,omitempty"`
|
||||
Color *Color `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main color,omitempty"`
|
||||
Size *Size `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main sz,omitempty"`
|
||||
RunStyle *RunStyle `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rStyle,omitempty"`
|
||||
Style *Style `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main pStyle,omitempty"`
|
||||
}
|
||||
|
||||
// RunStyle contains styling for a run
|
||||
type RunStyle struct {
|
||||
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rStyle"`
|
||||
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rStyle,omitempty"`
|
||||
Val string `xml:"w:val,attr"`
|
||||
}
|
||||
|
||||
// Style contains styling for a paragraph
|
||||
type Style struct {
|
||||
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main pStyle,omitempty"`
|
||||
Val string `xml:"w:val,attr"`
|
||||
}
|
||||
|
||||
|
||||
17
unpack.go
17
unpack.go
@@ -4,8 +4,9 @@ package docxlib
|
||||
import (
|
||||
"archive/zip"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
|
||||
"github.com/golang/glog"
|
||||
)
|
||||
|
||||
// This receives a zip file (word documents are a zip with multiple xml inside)
|
||||
@@ -40,19 +41,21 @@ func unpack(zipReader *zip.Reader) (docx *DocxLib, err error) {
|
||||
func processDoc(file *zip.File) (*Document, error) {
|
||||
filebytes, err := readZipFile(file)
|
||||
if err != nil {
|
||||
fmt.Println("Error reading from internal zip file")
|
||||
glog.Errorln("Error reading from internal zip file")
|
||||
return nil, err
|
||||
}
|
||||
glog.V(0).Infoln("Doc:", string(filebytes))
|
||||
|
||||
doc := Document{
|
||||
XMLW: XMLNS_W,
|
||||
XMLR: XMLNS_R,
|
||||
XMLName: xml.Name{Space: XMLNS_W, Local: "document"}}
|
||||
err = xml.Unmarshal(filebytes, &doc)
|
||||
if err != nil {
|
||||
fmt.Println("Error unmarshalling doc")
|
||||
fmt.Println(string(filebytes))
|
||||
glog.Errorln("Error unmarshalling doc", string(filebytes))
|
||||
return nil, err
|
||||
}
|
||||
glog.V(0).Infoln("Paragraph", doc.Body.Paragraphs)
|
||||
return &doc, nil
|
||||
}
|
||||
|
||||
@@ -60,13 +63,15 @@ func processDoc(file *zip.File) (*Document, error) {
|
||||
func processRelations(file *zip.File) (*Relationships, error) {
|
||||
filebytes, err := readZipFile(file)
|
||||
if err != nil {
|
||||
fmt.Println("Error reading from internal zip file")
|
||||
glog.Errorln("Error reading from internal zip file")
|
||||
return nil, err
|
||||
}
|
||||
glog.V(0).Infoln("Relations:", string(filebytes))
|
||||
|
||||
rels := Relationships{Xmlns: XMLNS_R}
|
||||
err = xml.Unmarshal(filebytes, &rels)
|
||||
if err != nil {
|
||||
fmt.Println("Error unmarshalling relationships")
|
||||
glog.Errorln("Error unmarshalling relationships")
|
||||
return nil, err
|
||||
}
|
||||
return &rels, nil
|
||||
|
||||
Reference in New Issue
Block a user