diff --git a/getstructure/main.go b/getstructure/main.go
index 47ff6eb..db70522 100644
--- a/getstructure/main.go
+++ b/getstructure/main.go
@@ -34,7 +34,7 @@ func main() {
for _, para := range doc.Paragraphs() {
glog.Infoln("There is a new paragraph", para)
for _, child := range para.Children() {
- if child.Run != nil {
+ if child.Run != nil && child.Run.Text != nil {
fmt.Printf("\tWe've found a new run with the text ->%s\n", child.Run.Text.Text)
}
if child.Link != nil {
diff --git a/structdoc_test.go b/structdoc_test.go
index 7b1baa2..869952b 100644
--- a/structdoc_test.go
+++ b/structdoc_test.go
@@ -5,7 +5,8 @@ import (
"testing"
)
-const decoded_doc = `testtest font sizetest colorNew style 1New style 2test font size and colorgoogle`
+const decoded_doc_1 = `testtest font sizetest colorNew style 1New style 2test font size and colorgoogle`
+const decoded_doc_2 = `Table of Contents TOC \h \z \t "Heading 1,2,S6,1,S0,1,S1,1,S2,1,S3,1,S4,1,S5,1" Holy Grail [xref:bRJduW6hNR] PAGEREF _Toc420414504 \h 21.What is your name? [xref:TH7u7QDqhD] PAGEREF _Toc420414505 \h 22.What is your quest? [xref:bC62HkFATC] PAGEREF _Toc420414506 \h 23.What is your favourite colour? [xref:I3TphuHX6N] PAGEREF _Toc420414507 \h 2Holy Grail [ FORMTEXT xref:bRJduW6hNR]What is your name? [ FORMTEXT xref:TH7u7QDqhD]My name is Sir Launcelot of Camelot.What is your quest? [ FORMTEXT xref:bC62HkFATC]To seek the Holy Grail[or a grail shaped beacon]. What is your favourite colour? [ FORMTEXT xref:I3TphuHX6N]Blue.How many paragraphs here then?`
const NUM_PARAGRAPHS = 5
func TestStructure(t *testing.T) {
@@ -13,32 +14,38 @@ func TestStructure(t *testing.T) {
XMLW: XMLNS_W,
XMLR: XMLNS_R,
XMLName: xml.Name{Space: XMLNS_W, Local: "document"}}
- err := xml.Unmarshal([]byte(decoded_doc), &doc)
- if err != nil {
- t.Errorf("We expected to be able to decode %s but we didn't",
- decoded_doc)
+ testCases := []struct {
+ content string
+ numParagraphs int
+ }{
+ {decoded_doc_1, 5},
+ {decoded_doc_2, 19},
}
- if len(doc.Body.Paragraphs) != NUM_PARAGRAPHS {
- t.Errorf("We expected %d paragraphs, we got %d",
- NUM_PARAGRAPHS, len(doc.Body.Paragraphs))
- }
- for _, p := range doc.Body.Paragraphs {
- if len(p.Children()) == 0 {
- t.Errorf("We were not able to parse paragraph %v",
- p)
+ for _, tc := range testCases {
+ err := xml.Unmarshal([]byte(tc.content), &doc)
+ if err != nil {
+ t.Errorf("We expected to be able to decode %s but we didn't",
+ tc.content)
}
- for _, child := range p.Children() {
- if child.Link == nil && child.Properties == nil && child.Run == nil {
- t.Errorf("There are Paragraph children with all fields nil")
+ if len(doc.Body.Paragraphs) != tc.numParagraphs {
+ t.Errorf("We expected %d paragraphs, we got %d",
+ NUM_PARAGRAPHS, len(doc.Body.Paragraphs))
+ }
+ for _, p := range doc.Body.Paragraphs {
+ if len(p.Children()) == 0 {
+ t.Errorf("We were not able to parse paragraph %v",
+ p)
}
- if child.Run != nil && child.Run.Text == nil {
- t.Errorf("We have a run with no text")
- }
- if child.Run != nil && child.Run.Text != nil && child.Run.Text.Text == "" {
- t.Errorf("We have a text with no text")
- }
- if child.Link != nil && child.Link.ID == "" {
- t.Errorf("We have a link without ID")
+ for _, child := range p.Children() {
+ if child.Link == nil && child.Properties == nil && child.Run == nil {
+ t.Errorf("There are Paragraph children with all fields nil")
+ }
+ if child.Run != nil && child.Run.Text == nil && child.Run.InstrText == "" {
+ t.Errorf("We have a run with no text")
+ }
+ if child.Link != nil && child.Link.ID == "" {
+ t.Errorf("We have a link without ID")
+ }
}
}
}
diff --git a/structnodes.go b/structnodes.go
index ad138a7..a08dbe0 100644
--- a/structnodes.go
+++ b/structnodes.go
@@ -3,6 +3,8 @@ package docxlib
import (
"encoding/xml"
"io"
+
+ "github.com/golang/glog"
)
type ParagraphChild struct {
@@ -31,12 +33,23 @@ func (p *Paragraph) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
if tt.Name.Local == "hyperlink" {
var value Hyperlink
d.DecodeElement(&value, &start)
- value.ID = getAtt(tt.Attr, "id")
+ id := getAtt(tt.Attr, "id")
+ anchor := getAtt(tt.Attr, "anchor")
+ if id != "" {
+ value.ID = id
+ }
+ if anchor != "" {
+ value.ID = anchor
+ }
elem = ParagraphChild{Link: &value}
} else if tt.Name.Local == "r" {
var value Run
d.DecodeElement(&value, &start)
elem = ParagraphChild{Run: &value}
+ if value.InstrText == "" && value.Text == nil {
+ glog.V(0).Infof("Empty run, we ignore")
+ continue
+ }
} else if tt.Name.Local == "rPr" {
var value RunProperties
d.DecodeElement(&value, &start)
diff --git a/structrun.go b/structrun.go
index b861440..0805627 100644
--- a/structrun.go
+++ b/structrun.go
@@ -95,6 +95,7 @@ func (r *Run) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
}
*r = elem
+
return nil
}