1
0
mirror of https://github.com/fumiama/go-docx.git synced 2026-06-06 00:00:24 +08:00
Files
go-docx/unpack.go
2023-02-21 16:12:20 +08:00

98 lines
2.1 KiB
Go

package docxlib
import (
"archive/zip"
"bytes"
"encoding/xml"
"io"
"strings"
)
// unpack receives a zip file (word documents are a zip with multiple xml inside)
// and parses the files that are relevant for us:
//
// 1. Document
// 2. Relationships
func unpack(zipReader *zip.Reader) (docx *Docx, err error) {
docx = new(Docx)
docx.mediaNameIdx = make(map[string]int, 64)
docx.tmplfs = zipReader
docx.tmpfslst = make([]string, 0, 64)
for _, f := range zipReader.File {
if f.Name == "word/_rels/document.xml.rels" {
err = docx.parseDocRelation(f)
if err != nil {
return
}
continue
}
if f.Name == "word/document.xml" {
err = docx.parseDocument(f)
if err != nil {
return
}
continue
}
if strings.HasPrefix(f.Name, MEDIA_FOLDER) {
err = docx.parseMedia(f)
if err != nil {
return
}
continue
}
// fill remaining files into tmpfslst
docx.tmpfslst = append(docx.tmpfslst, f.Name)
}
docx.buf = bytes.NewBuffer(make([]byte, 0, 1024*1024*4))
return
}
// parseDocument processes one of the relevant files, the one with the actual document
func (f *Docx) parseDocument(file *zip.File) error {
zf, err := file.Open()
if err != nil {
return err
}
defer zf.Close()
f.Document.XMLW = XMLNS_W
f.Document.XMLR = XMLNS_R
f.Document.XMLWP = XMLNS_WP
// f.Document.XMLWP14 = XMLNS_WP14
f.Document.XMLName.Space = XMLNS_W
f.Document.XMLName.Local = "document"
err = xml.NewDecoder(zf).Decode(&f.Document)
if err != nil {
return err
}
return nil
}
// parseDocRelation processes one of the relevant files, the one with the relationships
func (f *Docx) parseDocRelation(file *zip.File) error {
zf, err := file.Open()
if err != nil {
return err
}
defer zf.Close()
f.DocRelation.Xmlns = XMLNS_R
//TODO: find last rId & imageId
return xml.NewDecoder(zf).Decode(&f.DocRelation)
}
func (f *Docx) parseMedia(file *zip.File) error {
name := file.Name[len(MEDIA_FOLDER):]
zf, err := file.Open()
if err != nil {
return err
}
data, err := io.ReadAll(zf)
if err != nil {
return err
}
f.mediaNameIdx[name] = len(f.media)
f.media = append(f.media, Media{Name: name, Data: data})
return zf.Close()
}