优化代码结构

2026-06-23 20:16:38 +08:00 · 2023-02-08 16:19:09 +08:00
parent d8f39cecf1
commit 7ff4850504
16 changed files with 153 additions and 114 deletions
--- a/1
+++ b/1
@@ -3,6 +3,7 @@ MIT License
 Copyright (c) 2020 gingfrederik
 Copyright (c) 2021 Gonzalo Fernandez-Victorio
 Copyright (c) 2021 Basement Crowd Ltd (https://www.basementcrowd.com)
 Copyright (c) 2023 Fumiama Minamoto (源文雨)
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
--- a/README.md
+++ b/README.md
@@ -2,6 +2,8 @@
 Yet another library to read and write .docx (a.k.a. Microsoft Word documents or ECMA-376 Office Open XML) files in Go.
 This is a variant optimized and expanded by fumiama. The original repo is [gonfva/docxlib](https://github.com/gonfva/docxlib).
 ## Introduction
 As part of my work for [Basement Crowd](https://www.basementcrowd.com) and [FromCounsel](https://www.fromcounsel.com), we were in need of a basic library to manipulate (both read and write) Microsoft Word documents.
@@ -31,7 +33,7 @@ In the mean time, shared as an example in case somebody finds it useful.
 Go modules supported
 ```sh
-go get github.com/gonfva/docxlib
+go get github.com/fumiama/docxlib
 ```
 ### Usage
@@ -39,8 +41,7 @@ go get github.com/gonfva/docxlib
 See [main](main/main.go) for an example
 ```
-$ go build -o docxlib ./main
+$ go run ./cmd/main
 $ ./docxlib
 Preparing new document to write at /tmp/new-file.docx
 Document writen.
 Now trying to read it
@@ -53,7 +54,7 @@ End of main
 ```
 You can also increase the log level (-logtostderr=true -v=0) and just dump a specific file(-file /tmp/new-file.docx). See [getstructure/main](getstructure/main.go)
 ```
-$ go build -o docxlib ./getstructure/ && ./docxlib -logtostderr=true -v=0 -file /tmp/new-file.docx
+$ go build -o docxlib ./cmd/getstructure/ && ./docxlib -logtostderr=true -v=0 -file /tmp/new-file.docx
 I0511 12:37:40.898493   18466 unpack.go:69] Relations: [...]
 I0511 12:37:40.898787   18466 unpack.go:47] Doc: [...]
 I0511 12:37:40.899330   18466 unpack.go:58] Paragraph [0xc000026d40 0xc000027d00 0xc000172340]
--- a/apilink.go
+++ b/apilink.go
@@ -1,18 +1,19 @@
 package docxlib
-import "strconv"
+import (
 	"strconv"
 	"sync/atomic"
 )
 // when adding an hyperlink we need to store a reference in the relationship field
-func (f *DocxLib) addLinkRelation(link string) string {
+func (f *Docx) addLinkRelation(link string) string {
 	rel := &Relationship{
-		ID:         "rId" + strconv.Itoa(f.rId),
+		ID:         "rId" + strconv.Itoa(int(atomic.AddUintptr(&f.rId, 1))),
 		Type:       REL_HYPERLINK,
 		Target:     link,
 		TargetMode: REL_TARGETMODE,
 	}
 	f.rId += 1
 	f.DocRelation.Relationships = append(f.DocRelation.Relationships, rel)
 	return rel.ID
--- a/apipara.go
+++ b/apipara.go
@@ -1,17 +1,18 @@
 package docxlib
 // AddParagraph adds a new paragraph
-func (f *DocxLib) AddParagraph() *Paragraph {
+func (f *Docx) AddParagraph() *Paragraph {
 	p := &Paragraph{
-		Data: make([]ParagraphChild, 0),
+		Data: make([]ParagraphChild, 0, 64),
 		file: f,
 	}
 	f.Document.Body.Paragraphs = append(f.Document.Body.Paragraphs, p)
 	return p
 }
-func (f *DocxLib) Paragraphs() []*Paragraph {
+func (f *Docx) Paragraphs() []*Paragraph {
 	return f.Document.Body.Paragraphs
 }
--- a/apirun.go
+++ b/apirun.go
@@ -14,6 +14,7 @@ func (r *Run) Size(size int) *Run {
 	r.RunProperties.Size = &Size{
 		Val: size * 2,
 	}
 	return r
 }
--- a/cmd/getstructure/main.go
+++ b/cmd/getstructure/main.go
@@ -5,8 +5,8 @@ import (
 	"fmt"
 	"os"
 	"github.com/fumiama/docxlib"
 	"github.com/golang/glog"
 	"github.com/gonfva/docxlib"
 )
 var fileLocation *string
@@ -40,7 +40,7 @@ func main() {
 			if child.Link != nil {
 				id := child.Link.ID
 				text := child.Link.Run.InstrText
-				link, err := doc.References(id)
+				link, err := doc.Refer(id)
 				if err != nil {
 					fmt.Printf("\tWe found a link with id %s and text %s without target\n", id, text)
 				} else {
--- a/cmd/main/main.go
+++ b/cmd/main/main.go
@@ -5,7 +5,7 @@ import (
 	"fmt"
 	"os"
-	"github.com/gonfva/docxlib"
+	"github.com/fumiama/docxlib"
 )
 var fileLocation *string
@@ -60,7 +60,7 @@ func main() {
 			if child.Link != nil {
 				id := child.Link.ID
 				text := child.Link.Run.InstrText
-				link, err := doc.References(id)
+				link, err := doc.Refer(id)
 				if err != nil {
 					fmt.Printf("\tWe found a link with id %s and text %s without target\n", id, text)
 				} else {
--- a/docxlib.go
+++ b/docxlib.go
@@ -6,33 +6,40 @@ import (
 	"io"
 )
-// DocxLib is the structure that allow to access the internal represntation
+var (
 	// ErrRefIDNotFound cannot find such reference
 	ErrRefIDNotFound = errors.New("ref id not found")
 )
 // Docx is the structure that allow to access the internal represntation
 // in memory of the doc (either read or about to be written)
-type DocxLib struct {
+type Docx struct {
 	Document    Document
 	DocRelation Relationships
-	rId int
+	rId uintptr
 }
 // New generates a new empty docx file that we can manipulate and
 // later on, save
-func New() *DocxLib {
+func New() *Docx {
-	return emptyFile()
+	return newEmptyFile()
 }
 // Parse generates a new docx file in memory from a reader
 // You can it invoke from a file
-//		readFile, err := os.Open(FILE_PATH)
+//
-//		if err != nil {
+//	readFile, err := os.Open(FILE_PATH)
-//			panic(err)
+//	if err != nil {
-//		}
+//		panic(err)
-//		fileinfo, err := readFile.Stat()
+//	}
-//		if err != nil {
+//	fileinfo, err := readFile.Stat()
-//			panic(err)
+//	if err != nil {
-//		}
+//		panic(err)
-//		size := fileinfo.Size()
+//	}
-//		doc, err := docxlib.Parse(readFile, int64(size))
+//	size := fileinfo.Size()
 //	doc, err := docxlib.Parse(readFile, int64(size))
 //
 // but also you can invoke from a webform (BEWARE of trusting users data!!!)
 //
 //	func uploadFile(w http.ResponseWriter, r *http.Request) {
@@ -48,7 +55,7 @@ func New() *DocxLib {
 //		defer file.Close()
 //		docxlib.Parse(file, handler.Size)
 //	}
-func Parse(reader io.ReaderAt, size int64) (doc *DocxLib, err error) {
+func Parse(reader io.ReaderAt, size int64) (doc *Docx, err error) {
 	zipReader, err := zip.NewReader(reader, size)
 	if err != nil {
 		return nil, err
@@ -58,21 +65,21 @@ func Parse(reader io.ReaderAt, size int64) (doc *DocxLib, err error) {
 }
 // Write allows to save a docx to a writer
-func (f *DocxLib) Write(writer io.Writer) (err error) {
+func (f *Docx) Write(writer io.Writer) (err error) {
 	zipWriter := zip.NewWriter(writer)
 	defer zipWriter.Close()
 	return f.pack(zipWriter)
 }
-// References gets the url for a reference
+// Refer gets the url for a reference
-func (f *DocxLib) References(id string) (href string, err error) {
+func (f *Docx) Refer(id string) (href string, err error) {
 	for _, a := range f.DocRelation.Relationships {
 		if a.ID == id {
 			href = a.Target
 			return
 		}
 	}
-	err = errors.New("id not found")
+	err = ErrRefIDNotFound
 	return
 }
--- a/empty.go
+++ b/empty.go
@@ -2,29 +2,8 @@ package docxlib
 import "encoding/xml"
-func emptyRelationships() []*Relationship {
+func newEmptyFile() *Docx {
-	defaultRel := []*Relationship{
+	return &Docx{
 		{
 			ID:     "rId1",
 			Type:   `http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles`,
 			Target: "styles.xml",
 		},
 		{
 			ID:     "rId2",
 			Type:   `http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme`,
 			Target: "theme/theme1.xml",
 		},
 		{
 			ID:     "rId3",
 			Type:   `http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable`,
 			Target: "fontTable.xml",
 		},
 	}
 	return defaultRel
 }
 func emptyFile() *DocxLib {
 	docx := &DocxLib{
 		Document: Document{
 			XMLName: xml.Name{
 				Space: "w",
@@ -35,14 +14,29 @@ func emptyFile() *DocxLib {
 				XMLName: xml.Name{
 					Space: "w",
 				},
-				Paragraphs: make([]*Paragraph, 0),
+				Paragraphs: make([]*Paragraph, 0, 64),
 			},
 		},
 		DocRelation: Relationships{
-			Xmlns:         XMLNS,
+			Xmlns: XMLNS,
-			Relationships: emptyRelationships(),
+			Relationships: []*Relationship{
 				{
 					ID:     "rId1",
 					Type:   `http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles`,
 					Target: "styles.xml",
 				},
 				{
 					ID:     "rId2",
 					Type:   `http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme`,
 					Target: "theme/theme1.xml",
 				},
 				{
 					ID:     "rId3",
 					Type:   `http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable`,
 					Target: "fontTable.xml",
 				},
 			},
 		},
-		rId: 4,
+		rId: 3,
 	}
 	return docx
 }
--- a/go.mod
+++ b/go.mod
@@ -1,4 +1,4 @@
-module github.com/gonfva/docxlib
+module github.com/fumiama/docxlib
 go 1.16
--- a/helper.go
+++ b/helper.go
@@ -0,0 +1,20 @@
 package docxlib
 import (
 	"unsafe"
 )
 // BytesToString 没有内存开销的转换
 func BytesToString(b []byte) string {
 	return *(*string)(unsafe.Pointer(&b))
 }
 // StringToBytes 没有内存开销的转换
 func StringToBytes(s string) (b []byte) {
 	bh := (*slice)(unsafe.Pointer(&b))
 	sh := (*slice)(unsafe.Pointer(&s))
 	bh.data = sh.data
 	bh.len = sh.len
 	bh.cap = sh.len
 	return b
 }
--- a/pack.go
+++ b/pack.go
@@ -3,6 +3,7 @@ package docxlib
 import (
 	"archive/zip"
 	"encoding/xml"
 	"strings"
 	"github.com/golang/glog"
 )
@@ -10,7 +11,7 @@ import (
 // This receives a zip file writer (word documents are a zip with multiple xml inside)
 // and writes the relevant files. Some of them come from the empty_constants file,
 // others from the actual in-memory structure
-func (f *DocxLib) pack(zipWriter *zip.Writer) (err error) {
+func (f *Docx) pack(zipWriter *zip.Writer) (err error) {
 	files := map[string]string{}
 	files["_rels/.rels"] = TEMP_REL
@@ -34,7 +35,7 @@ func (f *DocxLib) pack(zipWriter *zip.Writer) (err error) {
 			return err
 		}
-		_, err = w.Write([]byte(data))
+		_, err = w.Write(StringToBytes(data))
 		if err != nil {
 			return err
 		}
@@ -44,12 +45,13 @@ func (f *DocxLib) pack(zipWriter *zip.Writer) (err error) {
 }
 func marshal(data interface{}) (out string, err error) {
-	body, err := xml.Marshal(data)
+	sb := strings.Builder{}
 	sb.WriteString(xml.Header)
 	err = xml.NewEncoder(&sb).Encode(data)
 	if err != nil {
 		glog.Errorln("Error marshalling", err)
 		return
 	}
-
+	out = sb.String()
 	out = xml.Header + string(body)
 	return
 }
--- a/slice.go
+++ b/slice.go
@@ -0,0 +1,15 @@
 package docxlib
 import "unsafe"
 // slice is the runtime representation of a slice.
 // It cannot be used safely or portably and its representation may
 // change in a later release.
 //
 // Unlike reflect.SliceHeader, its Data field is sufficient to guarantee the
 // data it references will not be garbage collected.
 type slice struct {
 	data unsafe.Pointer
 	len  int
 	cap  int
 }
--- a/structnodes.go
+++ b/structnodes.go
@@ -17,11 +17,11 @@ type Paragraph struct {
 	XMLName xml.Name `xml:"http://schemas.openxmlformats.org/wordprocessingml/2006/main p"`
 	Data    []ParagraphChild
-	file *DocxLib
+	file *Docx
 }
 func (p *Paragraph) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
-	children := make([]ParagraphChild, 0)
+	children := make([]ParagraphChild, 0, 64)
 	for {
 		t, err := d.Token()
 		if err == io.EOF {
@@ -30,7 +30,8 @@ func (p *Paragraph) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
 		switch tt := t.(type) {
 		case xml.StartElement:
 			var elem ParagraphChild
-			if tt.Name.Local == "hyperlink" {
+			switch tt.Name.Local {
 			case "hyperlink":
 				var value Hyperlink
 				d.DecodeElement(&value, &start)
 				id := getAtt(tt.Attr, "id")
@@ -41,20 +42,20 @@ func (p *Paragraph) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
 				if anchor != "" {
 					value.ID = anchor
 				}
-				elem = ParagraphChild{Link: &value}
+				elem.Link = &value
-			} else if tt.Name.Local == "r" {
+			case "r":
 				var value Run
 				d.DecodeElement(&value, &start)
-				elem = ParagraphChild{Run: &value}
+				elem.Run = &value
 				if value.InstrText == "" && value.Text == nil {
 					glog.V(0).Infof("Empty run, we ignore")
 					continue
 				}
-			} else if tt.Name.Local == "rPr" {
+			case "rPr":
 				var value RunProperties
 				d.DecodeElement(&value, &start)
-				elem = ParagraphChild{Properties: &value}
+				elem.Properties = &value
-			} else {
+			default:
 				continue
 			}
 			children = append(children, elem)
--- a/structrun.go
+++ b/structrun.go
@@ -76,19 +76,20 @@ func (r *Run) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
 		switch tt := t.(type) {
 		case xml.StartElement:
-			if tt.Name.Local == "rPr" {
+			switch tt.Name.Local {
 			case "rPr":
 				var value RunProperties
 				d.DecodeElement(&value, &start)
 				elem.RunProperties = &value
-			} else if tt.Name.Local == "instrText" {
+			case "instrText":
 				var value string
 				d.DecodeElement(&value, &start)
 				elem.InstrText = value
-			} else if tt.Name.Local == "t" {
+			case "t":
 				var value Text
 				d.DecodeElement(&value, &start)
 				elem.Text = &value
-			} else {
+			default:
 				continue
 			}
 		}
@@ -109,8 +110,7 @@ func (r *Text) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
 		switch tt := t.(type) {
 		case xml.CharData:
-			cd := tt.Copy()
+			elem.Text = string(tt) // implicitly copy
 			elem.Text = string(cd)
 		}
 	}
--- a/unpack.go
+++ b/unpack.go
@@ -4,7 +4,7 @@ package docxlib
 import (
 	"archive/zip"
 	"encoding/xml"
-	"io/ioutil"
+	"io"
 	"github.com/golang/glog"
 )
@@ -13,68 +13,63 @@ import (
 // and parses the files that are relevant for us:
 // 1.-Document
 // 2.-Relationships
-func unpack(zipReader *zip.Reader) (docx *DocxLib, err error) {
+func unpack(zipReader *zip.Reader) (docx *Docx, err error) {
-	var doc *Document
+	docx = new(Docx)
 	var relations *Relationships
 	for _, f := range zipReader.File {
 		if f.Name == "word/_rels/document.xml.rels" {
-			relations, err = processRelations(f)
+			err = processRelations(f, &docx.DocRelation)
 			if err != nil {
-				return nil, err
+				return
 			}
 		}
 		if f.Name == "word/document.xml" {
-			doc, err = processDoc(f)
+			err = processDoc(f, &docx.Document)
 			if err != nil {
-				return nil, err
+				return
 			}
 		}
 	}
-	docx = &DocxLib{
+	return
 		Document:    *doc,
 		DocRelation: *relations,
 	}
 	return docx, nil
 }
 // Processes one of the relevant files, the one with the actual document
-func processDoc(file *zip.File) (*Document, error) {
+func processDoc(file *zip.File, doc *Document) error {
 	filebytes, err := readZipFile(file)
 	if err != nil {
 		glog.Errorln("Error reading from internal zip file")
-		return nil, err
+		return err
 	}
 	glog.V(0).Infoln("Doc:", string(filebytes))
-	doc := Document{
+	doc.XMLW = XMLNS_W
-		XMLW:    XMLNS_W,
+	doc.XMLR = XMLNS_R
-		XMLR:    XMLNS_R,
+	doc.XMLName.Space = XMLNS_W
-		XMLName: xml.Name{Space: XMLNS_W, Local: "document"}}
+	doc.XMLName.Local = "document"
-	err = xml.Unmarshal(filebytes, &doc)
+	err = xml.Unmarshal(filebytes, doc)
 	if err != nil {
 		glog.Errorln("Error unmarshalling doc", string(filebytes))
-		return nil, err
+		return err
 	}
 	glog.V(0).Infoln("Paragraph", doc.Body.Paragraphs)
-	return &doc, nil
+	return nil
 }
 // Processes one of the relevant files, the one with the relationships
-func processRelations(file *zip.File) (*Relationships, error) {
+func processRelations(file *zip.File, rels *Relationships) error {
 	filebytes, err := readZipFile(file)
 	if err != nil {
 		glog.Errorln("Error reading from internal zip file")
-		return nil, err
+		return err
 	}
 	glog.V(0).Infoln("Relations:", string(filebytes))
-	rels := Relationships{Xmlns: XMLNS_R}
+	rels.Xmlns = XMLNS_R
-	err = xml.Unmarshal(filebytes, &rels)
+	err = xml.Unmarshal(filebytes, rels)
 	if err != nil {
 		glog.Errorln("Error unmarshalling relationships")
-		return nil, err
+		return err
 	}
-	return &rels, nil
+	return nil
 }
 // From a zip file structure, we return a byte array
@@ -84,5 +79,5 @@ func readZipFile(zf *zip.File) ([]byte, error) {
 		return nil, err
 	}
 	defer f.Close()
-	return ioutil.ReadAll(f)
+	return io.ReadAll(f)
 }
`@@ -1,4 +1,4 @@`
	`module github.com/gonfva/docxlib`	`module github.com/fumiama/docxlib`

	`go 1.16`	`go 1.16`