From 4c88ae23fec0e8e9b7b096cd26dae9fe8230d275 Mon Sep 17 00:00:00 2001 From: Gonzalo Fernandez-Victorio Date: Wed, 12 May 2021 21:27:28 +0100 Subject: [PATCH] More tests --- getstructure/main.go | 2 +- structdoc_test.go | 55 +++++++++++++++++++++++++------------------- structnodes.go | 15 +++++++++++- structrun.go | 1 + 4 files changed, 47 insertions(+), 26 deletions(-) diff --git a/getstructure/main.go b/getstructure/main.go index 47ff6eb..db70522 100644 --- a/getstructure/main.go +++ b/getstructure/main.go @@ -34,7 +34,7 @@ func main() { for _, para := range doc.Paragraphs() { glog.Infoln("There is a new paragraph", para) for _, child := range para.Children() { - if child.Run != nil { + if child.Run != nil && child.Run.Text != nil { fmt.Printf("\tWe've found a new run with the text ->%s\n", child.Run.Text.Text) } if child.Link != nil { diff --git a/structdoc_test.go b/structdoc_test.go index 7b1baa2..869952b 100644 --- a/structdoc_test.go +++ b/structdoc_test.go @@ -5,7 +5,8 @@ import ( "testing" ) -const decoded_doc = `testtest font sizetest colorNew style 1New style 2test font size and colorgoogle` +const decoded_doc_1 = `testtest font sizetest colorNew style 1New style 2test font size and colorgoogle` +const decoded_doc_2 = `Table of Contents TOC \h \z \t "Heading 1,2,S6,1,S0,1,S1,1,S2,1,S3,1,S4,1,S5,1" Holy Grail [xref:bRJduW6hNR] PAGEREF _Toc420414504 \h 21.What is your name? [xref:TH7u7QDqhD] PAGEREF _Toc420414505 \h 22.What is your quest? [xref:bC62HkFATC] PAGEREF _Toc420414506 \h 23.What is your favourite colour? [xref:I3TphuHX6N] PAGEREF _Toc420414507 \h 2Holy Grail [ FORMTEXT xref:bRJduW6hNR]What is your name? [ FORMTEXT xref:TH7u7QDqhD]My name is Sir Launcelot of Camelot.What is your quest? [ FORMTEXT xref:bC62HkFATC]To seek the Holy Grail[or a grail shaped beacon]. What is your favourite colour? [ FORMTEXT xref:I3TphuHX6N]Blue.How many paragraphs here then?` const NUM_PARAGRAPHS = 5 func TestStructure(t *testing.T) { @@ -13,32 +14,38 @@ func TestStructure(t *testing.T) { XMLW: XMLNS_W, XMLR: XMLNS_R, XMLName: xml.Name{Space: XMLNS_W, Local: "document"}} - err := xml.Unmarshal([]byte(decoded_doc), &doc) - if err != nil { - t.Errorf("We expected to be able to decode %s but we didn't", - decoded_doc) + testCases := []struct { + content string + numParagraphs int + }{ + {decoded_doc_1, 5}, + {decoded_doc_2, 19}, } - if len(doc.Body.Paragraphs) != NUM_PARAGRAPHS { - t.Errorf("We expected %d paragraphs, we got %d", - NUM_PARAGRAPHS, len(doc.Body.Paragraphs)) - } - for _, p := range doc.Body.Paragraphs { - if len(p.Children()) == 0 { - t.Errorf("We were not able to parse paragraph %v", - p) + for _, tc := range testCases { + err := xml.Unmarshal([]byte(tc.content), &doc) + if err != nil { + t.Errorf("We expected to be able to decode %s but we didn't", + tc.content) } - for _, child := range p.Children() { - if child.Link == nil && child.Properties == nil && child.Run == nil { - t.Errorf("There are Paragraph children with all fields nil") + if len(doc.Body.Paragraphs) != tc.numParagraphs { + t.Errorf("We expected %d paragraphs, we got %d", + NUM_PARAGRAPHS, len(doc.Body.Paragraphs)) + } + for _, p := range doc.Body.Paragraphs { + if len(p.Children()) == 0 { + t.Errorf("We were not able to parse paragraph %v", + p) } - if child.Run != nil && child.Run.Text == nil { - t.Errorf("We have a run with no text") - } - if child.Run != nil && child.Run.Text != nil && child.Run.Text.Text == "" { - t.Errorf("We have a text with no text") - } - if child.Link != nil && child.Link.ID == "" { - t.Errorf("We have a link without ID") + for _, child := range p.Children() { + if child.Link == nil && child.Properties == nil && child.Run == nil { + t.Errorf("There are Paragraph children with all fields nil") + } + if child.Run != nil && child.Run.Text == nil && child.Run.InstrText == "" { + t.Errorf("We have a run with no text") + } + if child.Link != nil && child.Link.ID == "" { + t.Errorf("We have a link without ID") + } } } } diff --git a/structnodes.go b/structnodes.go index ad138a7..a08dbe0 100644 --- a/structnodes.go +++ b/structnodes.go @@ -3,6 +3,8 @@ package docxlib import ( "encoding/xml" "io" + + "github.com/golang/glog" ) type ParagraphChild struct { @@ -31,12 +33,23 @@ func (p *Paragraph) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { if tt.Name.Local == "hyperlink" { var value Hyperlink d.DecodeElement(&value, &start) - value.ID = getAtt(tt.Attr, "id") + id := getAtt(tt.Attr, "id") + anchor := getAtt(tt.Attr, "anchor") + if id != "" { + value.ID = id + } + if anchor != "" { + value.ID = anchor + } elem = ParagraphChild{Link: &value} } else if tt.Name.Local == "r" { var value Run d.DecodeElement(&value, &start) elem = ParagraphChild{Run: &value} + if value.InstrText == "" && value.Text == nil { + glog.V(0).Infof("Empty run, we ignore") + continue + } } else if tt.Name.Local == "rPr" { var value RunProperties d.DecodeElement(&value, &start) diff --git a/structrun.go b/structrun.go index b861440..0805627 100644 --- a/structrun.go +++ b/structrun.go @@ -95,6 +95,7 @@ func (r *Run) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { } *r = elem + return nil }