diff --git a/apilink.go b/apilink.go
index f3d44e3..cc522fa 100644
--- a/apilink.go
+++ b/apilink.go
@@ -34,7 +34,7 @@ func (p *Paragraph) AddLink(text string, link string) *Hyperlink {
},
}
- p.Data = append(p.Data, ParagraphChild{Link: hyperlink})
+ p.Children = append(p.Children, ParagraphChild{Link: hyperlink})
return hyperlink
}
diff --git a/apipara.go b/apipara.go
index 28a2746..12bee30 100644
--- a/apipara.go
+++ b/apipara.go
@@ -3,19 +3,11 @@ package docxlib
// AddParagraph adds a new paragraph
func (f *Docx) AddParagraph() *Paragraph {
p := &Paragraph{
- Data: make([]ParagraphChild, 0, 64),
- file: f,
+ Children: make([]ParagraphChild, 0, 64),
+ file: f,
}
f.Document.Body.Paragraphs = append(f.Document.Body.Paragraphs, p)
return p
}
-
-func (f *Docx) Paragraphs() []*Paragraph {
- return f.Document.Body.Paragraphs
-}
-
-func (p *Paragraph) Children() (ret []ParagraphChild) {
- return p.Data
-}
diff --git a/apirun.go b/apirun.go
index 54b0298..f7af38c 100644
--- a/apirun.go
+++ b/apirun.go
@@ -29,7 +29,7 @@ func (p *Paragraph) AddText(text string) *Run {
RunProperties: &RunProperties{},
}
- p.Data = append(p.Data, ParagraphChild{Run: run})
+ p.Children = append(p.Children, ParagraphChild{Run: run})
return run
}
diff --git a/cmd/getstructure/main.go b/cmd/getstructure/main.go
index c4beef7..a0c7c63 100644
--- a/cmd/getstructure/main.go
+++ b/cmd/getstructure/main.go
@@ -6,7 +6,6 @@ import (
"os"
"github.com/fumiama/docxlib"
- "github.com/golang/glog"
)
var fileLocation *string
@@ -31,11 +30,16 @@ func main() {
if err != nil {
panic(err)
}
- for _, para := range doc.Paragraphs() {
- glog.Infoln("There is a new paragraph", para)
- for _, child := range para.Children() {
- if child.Run != nil && child.Run.Text != nil {
- fmt.Printf("\tWe've found a new run with the text ->%s\n", child.Run.Text.Text)
+ for _, para := range doc.Document.Body.Paragraphs {
+ fmt.Println("New paragraph")
+ for _, child := range para.Children {
+ if child.Run != nil {
+ if child.Run.Text != nil {
+ fmt.Printf("\tWe've found a new run with the text ->%s\n", child.Run.Text.Text)
+ }
+ if child.Run.Drawing != nil {
+ fmt.Printf("\tWe've found a new run with the drawing ->%s\n", child.Run.Drawing.Inline.DistT) // TODO: replace to refid
+ }
}
if child.Link != nil {
id := child.Link.ID
@@ -49,6 +53,7 @@ func main() {
}
}
+ fmt.Print("End of paragraph\n\n")
}
fmt.Println("End of main")
}
diff --git a/cmd/main/main.go b/cmd/main/main.go
index 9698a06..538e425 100644
--- a/cmd/main/main.go
+++ b/cmd/main/main.go
@@ -52,10 +52,15 @@ func main() {
if err != nil {
panic(err)
}
- for _, para := range doc.Paragraphs() {
- for _, child := range para.Children() {
+ for _, para := range doc.Document.Body.Paragraphs {
+ for _, child := range para.Children {
if child.Run != nil {
- fmt.Printf("\tWe've found a new run with the text ->%s\n", child.Run.Text.Text)
+ if child.Run.Text != nil {
+ fmt.Printf("\tWe've found a new run with the text ->%s\n", child.Run.Text.Text)
+ }
+ if child.Run.Drawing != nil {
+ fmt.Printf("\tWe've found a new run with the drawing ->%s\n", child.Run.Drawing.Inline.DistT) // TODO: replace to refid
+ }
}
if child.Link != nil {
id := child.Link.ID
diff --git a/go.mod b/go.mod
index b3b2de1..8257b64 100644
--- a/go.mod
+++ b/go.mod
@@ -1,5 +1,3 @@
module github.com/fumiama/docxlib
go 1.16
-
-require github.com/golang/glog v0.0.0-20210429001901-424d2337a529
diff --git a/go.sum b/go.sum
index 41b530d..e69de29 100644
--- a/go.sum
+++ b/go.sum
@@ -1,2 +0,0 @@
-github.com/golang/glog v0.0.0-20210429001901-424d2337a529 h1:2voWjNECnrZRbfwXxHB1/j8wa6xdKn85B5NzgVL/pTU=
-github.com/golang/glog v0.0.0-20210429001901-424d2337a529/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
diff --git a/pack.go b/pack.go
index d5e7359..e48a4d6 100644
--- a/pack.go
+++ b/pack.go
@@ -4,8 +4,6 @@ import (
"archive/zip"
"encoding/xml"
"strings"
-
- "github.com/golang/glog"
)
// This receives a zip file writer (word documents are a zip with multiple xml inside)
@@ -49,7 +47,6 @@ func marshal(data interface{}) (out string, err error) {
sb.WriteString(xml.Header)
err = xml.NewEncoder(&sb).Encode(data)
if err != nil {
- glog.Errorln("Error marshalling", err)
return
}
out = sb.String()
diff --git a/structdoc_test.go b/structdoc_test.go
index 869952b..194b5a0 100644
--- a/structdoc_test.go
+++ b/structdoc_test.go
@@ -7,9 +7,8 @@ import (
const decoded_doc_1 = `testtest font sizetest colorNew style 1New style 2test font size and colorgoogle`
const decoded_doc_2 = `Table of Contents TOC \h \z \t "Heading 1,2,S6,1,S0,1,S1,1,S2,1,S3,1,S4,1,S5,1" Holy Grail [xref:bRJduW6hNR] PAGEREF _Toc420414504 \h 21.What is your name? [xref:TH7u7QDqhD] PAGEREF _Toc420414505 \h 22.What is your quest? [xref:bC62HkFATC] PAGEREF _Toc420414506 \h 23.What is your favourite colour? [xref:I3TphuHX6N] PAGEREF _Toc420414507 \h 2Holy Grail [ FORMTEXT xref:bRJduW6hNR]What is your name? [ FORMTEXT xref:TH7u7QDqhD]My name is Sir Launcelot of Camelot.What is your quest? [ FORMTEXT xref:bC62HkFATC]To seek the Holy Grail[or a grail shaped beacon]. What is your favourite colour? [ FORMTEXT xref:I3TphuHX6N]Blue.How many paragraphs here then?`
-const NUM_PARAGRAPHS = 5
-func TestStructure(t *testing.T) {
+func TestPlainStructure(t *testing.T) {
doc := Document{
XMLW: XMLNS_W,
XMLR: XMLNS_R,
@@ -22,29 +21,538 @@ func TestStructure(t *testing.T) {
{decoded_doc_2, 19},
}
for _, tc := range testCases {
- err := xml.Unmarshal([]byte(tc.content), &doc)
+ err := xml.Unmarshal(StringToBytes(tc.content), &doc)
if err != nil {
- t.Errorf("We expected to be able to decode %s but we didn't",
- tc.content)
+ t.Fatal(err)
}
if len(doc.Body.Paragraphs) != tc.numParagraphs {
- t.Errorf("We expected %d paragraphs, we got %d",
- NUM_PARAGRAPHS, len(doc.Body.Paragraphs))
+ t.Fatalf("We expected %d paragraphs, we got %d", tc.numParagraphs, len(doc.Body.Paragraphs))
}
- for _, p := range doc.Body.Paragraphs {
- if len(p.Children()) == 0 {
- t.Errorf("We were not able to parse paragraph %v",
- p)
+ for i, p := range doc.Body.Paragraphs {
+ if len(p.Children) == 0 {
+ t.Fatalf("We were not able to parse paragraph %d", i)
}
- for _, child := range p.Children() {
+ for _, child := range p.Children {
if child.Link == nil && child.Properties == nil && child.Run == nil {
- t.Errorf("There are Paragraph children with all fields nil")
+ t.Fatalf("There are Paragraph children with all fields nil")
}
if child.Run != nil && child.Run.Text == nil && child.Run.InstrText == "" {
- t.Errorf("We have a run with no text")
+ t.Fatalf("We have a run with no text")
}
if child.Link != nil && child.Link.ID == "" {
- t.Errorf("We have a link without ID")
+ t.Fatalf("We have a link without ID")
+ }
+ }
+ }
+ }
+}
+
+const drawing_doc = `
+
+
+
+
+
+
+
+
+
+
+
+
+ 直接粘贴
+
+
+
+
+
+
+
+
+ inline
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 一行2个
+
+
+ inline
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 一行2个组合
+
+
+ inline
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 一个 浮于上方
+
+
+
+
+
+ 右侧对齐
+
+
+
+
+
+ 左
+
+
+ 11.32cm
+
+
+
+
+
+ 顶
+
+
+ 23.73cm
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 2935605
+
+
+ 97790
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 0
+
+
+ 0
+
+
+
+
+
+
+
+
+
+
+
+
+`
+
+func TestDrawingStructure(t *testing.T) {
+ doc := Document{
+ XMLW: XMLNS_W,
+ XMLR: XMLNS_R,
+ XMLName: xml.Name{Space: XMLNS_W, Local: "document"}}
+ err := xml.Unmarshal(StringToBytes(drawing_doc), &doc)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if len(doc.Body.Paragraphs) != 8 {
+ t.Fatalf("We expected %d paragraphs, we got %d", 8, len(doc.Body.Paragraphs))
+ }
+ for i, p := range doc.Body.Paragraphs {
+ if len(p.Children) == 0 {
+ t.Fatalf("We were not able to parse paragraph %d", i)
+ }
+ for j, child := range p.Children {
+ if child.Link == nil && child.Properties == nil && child.Run == nil {
+ t.Fatalf("There are Paragraph children with all fields nil")
+ }
+ if child.Run != nil && child.Run.Text == nil && child.Run.InstrText == "" && child.Run.Drawing == nil {
+ t.Fatalf("We have a run with no text and drawing")
+ }
+ if child.Link != nil && child.Link.ID == "" {
+ t.Fatalf("We have a link without ID")
+ }
+ if child.Run != nil && child.Run.Drawing != nil {
+ t.Log("fild drawing at aragraph", i, ", child", j)
+ if child.Run.Drawing.Inline != nil {
+ tail := "-mock-inline-p" + string(rune('0'+i)) + "-c" + string(rune('0'+j))
+ if "T"+tail != child.Run.Drawing.Inline.DistT {
+ t.Fatal("expect", "T"+tail, "but got", child.Run.Drawing.Inline.DistT)
+ }
+ if "B"+tail != child.Run.Drawing.Inline.DistB {
+ t.Fatal("expect", "B"+tail, "but got", child.Run.Drawing.Inline.DistB)
+ }
+ if "L"+tail != child.Run.Drawing.Inline.DistL {
+ t.Fatal("expect", "L"+tail, "but got", child.Run.Drawing.Inline.DistL)
+ }
+ if "R"+tail != child.Run.Drawing.Inline.DistR {
+ t.Fatal("expect", "R"+tail, "but got", child.Run.Drawing.Inline.DistR)
+ }
}
}
}
diff --git a/structnodes.go b/structnodes.go
index 8aaab31..4b5a19b 100644
--- a/structnodes.go
+++ b/structnodes.go
@@ -3,8 +3,6 @@ package docxlib
import (
"encoding/xml"
"io"
-
- "github.com/golang/glog"
)
type ParagraphChild struct {
@@ -14,8 +12,8 @@ type ParagraphChild struct {
}
type Paragraph struct {
- XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main p"`
- Data []ParagraphChild
+ XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main p"`
+ Children []ParagraphChild
file *Docx
}
@@ -47,8 +45,7 @@ func (p *Paragraph) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
var value Run
d.DecodeElement(&value, &start)
elem.Run = &value
- if value.InstrText == "" && value.Text == nil {
- glog.V(0).Infof("Empty run, we ignore")
+ if value.InstrText == "" && value.Text == nil && value.Drawing == nil {
continue
}
case "rPr":
@@ -62,7 +59,7 @@ func (p *Paragraph) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
}
}
- *p = Paragraph{Data: children}
+ p.Children = children
return nil
}
diff --git a/structrun.go b/structrun.go
index fdb7096..ca10f40 100644
--- a/structrun.go
+++ b/structrun.go
@@ -9,29 +9,45 @@ const (
HYPERLINK_STYLE = "a1"
)
-// A Run is part of a paragraph that has its own style. It could be
+// Run is part of a paragraph that has its own style. It could be
// a piece of text in bold, or a link
type Run struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main r,omitempty"`
RunProperties *RunProperties `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rPr,omitempty"`
InstrText string `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main instrText,omitempty"`
Text *Text
+ Drawing *Drawing
}
-// The Text object contains the actual text
+// Text object contains the actual text
type Text struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main t"`
XMLSpace string `xml:"xml:space,attr,omitempty"`
Text string `xml:",chardata"`
}
-// The hyperlink element contains links
+// Hyperlink element contains links
type Hyperlink struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main hyperlink,omitempty"`
ID string `xml:"http://schemas.openxmlformats.org/officeDocument/2006/relationships id,attr"`
Run Run
}
+// Drawing element contains photos
+type Drawing struct {
+ XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main drawing,omitempty"`
+ Inline *WPInline
+}
+
+// WPInline wp:inline
+type WPInline struct {
+ XMLName xml.Name `xml:"http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing inline,omitempty"`
+ DistT string `xml:"wp:distT,attr"`
+ DistB string `xml:"wp:distB,attr"`
+ DistL string `xml:"wp:distL,attr"`
+ DistR string `xml:"wp:distR,attr"`
+}
+
// RunProperties encapsulates visual properties of a run
type RunProperties struct {
XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main rPr,omitempty"`
@@ -67,7 +83,6 @@ type Size struct {
}
func (r *Run) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
- var elem Run
for {
t, err := d.Token()
if err == io.EOF {
@@ -80,28 +95,30 @@ func (r *Run) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
case "rPr":
var value RunProperties
d.DecodeElement(&value, &start)
- elem.RunProperties = &value
+ r.RunProperties = &value
case "instrText":
var value string
d.DecodeElement(&value, &start)
- elem.InstrText = value
+ r.InstrText = value
case "t":
var value Text
d.DecodeElement(&value, &start)
- elem.Text = &value
+ r.Text = &value
+ case "drawing":
+ var value Drawing
+ d.DecodeElement(&value, &start)
+ r.Drawing = &value
default:
continue
}
}
}
- *r = elem
return nil
}
func (r *Text) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
- var elem Text
for {
t, err := d.Token()
if err == io.EOF {
@@ -110,16 +127,14 @@ func (r *Text) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
switch tt := t.(type) {
case xml.CharData:
- elem.Text = string(tt) // implicitly copy
+ r.Text = string(tt) // implicitly copy
}
}
- *r = elem
return nil
}
func (r *Hyperlink) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
- var elem Hyperlink
for {
t, err := d.Token()
if err == io.EOF {
@@ -129,19 +144,17 @@ func (r *Hyperlink) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
switch tt := t.(type) {
case xml.StartElement:
if tt.Name.Local == "r" {
- d.DecodeElement(&elem.Run, &start)
+ d.DecodeElement(&r.Run, &start)
} else {
continue
}
}
}
- *r = elem
return nil
}
-func (r *RunStyle) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
- var elem RunStyle
+func (r *Drawing) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
for {
t, err := d.Token()
if err == io.EOF {
@@ -150,11 +163,57 @@ func (r *RunStyle) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
switch tt := t.(type) {
case xml.StartElement:
- elem.Val = getAtt(tt.Attr, "val")
+ switch tt.Name.Local {
+ case "inline":
+ r.Inline = new(WPInline)
+ r.Inline.DistT = getAtt(tt.Attr, "distT")
+ r.Inline.DistB = getAtt(tt.Attr, "distB")
+ r.Inline.DistL = getAtt(tt.Attr, "distL")
+ r.Inline.DistR = getAtt(tt.Attr, "distR")
+ d.DecodeElement(r.Inline, &start)
+ default:
+ continue
+ }
+ }
+
+ }
+ return nil
+
+}
+func (r *WPInline) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
+ for {
+ t, err := d.Token()
+ if err == io.EOF {
+ break
+ }
+
+ switch tt := t.(type) {
+ case xml.StartElement:
+ switch tt.Name.Local {
+ case "inline":
+
+ default:
+ continue
+ }
+ }
+
+ }
+ return nil
+
+}
+func (r *RunStyle) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
+ for {
+ t, err := d.Token()
+ if err == io.EOF {
+ break
+ }
+
+ switch tt := t.(type) {
+ case xml.StartElement:
+ r.Val = getAtt(tt.Attr, "val")
}
}
- *r = elem
return nil
}
diff --git a/unpack.go b/unpack.go
index d595ad7..2616fae 100644
--- a/unpack.go
+++ b/unpack.go
@@ -5,8 +5,6 @@ import (
"archive/zip"
"encoding/xml"
"io"
-
- "github.com/golang/glog"
)
// This receives a zip file (word documents are a zip with multiple xml inside)
@@ -36,10 +34,8 @@ func unpack(zipReader *zip.Reader) (docx *Docx, err error) {
func processDoc(file *zip.File, doc *Document) error {
filebytes, err := readZipFile(file)
if err != nil {
- glog.Errorln("Error reading from internal zip file")
return err
}
- glog.V(0).Infoln("Doc:", string(filebytes))
doc.XMLW = XMLNS_W
doc.XMLR = XMLNS_R
@@ -47,10 +43,8 @@ func processDoc(file *zip.File, doc *Document) error {
doc.XMLName.Local = "document"
err = xml.Unmarshal(filebytes, doc)
if err != nil {
- glog.Errorln("Error unmarshalling doc", string(filebytes))
return err
}
- glog.V(0).Infoln("Paragraph", doc.Body.Paragraphs)
return nil
}
@@ -58,15 +52,12 @@ func processDoc(file *zip.File, doc *Document) error {
func processRelations(file *zip.File, rels *Relationships) error {
filebytes, err := readZipFile(file)
if err != nil {
- glog.Errorln("Error reading from internal zip file")
return err
}
- glog.V(0).Infoln("Relations:", string(filebytes))
rels.Xmlns = XMLNS_R
err = xml.Unmarshal(filebytes, rels)
if err != nil {
- glog.Errorln("Error unmarshalling relationships")
return err
}
return nil