1
0
mirror of https://github.com/fumiama/go-docx.git synced 2026-06-04 23:30:25 +08:00
Files
go-docx/unpack.go
2023-04-23 19:28:05 +08:00

147 lines
3.7 KiB
Go

/*
Copyright (c) 2020 gingfrederik
Copyright (c) 2021 Gonzalo Fernandez-Victorio
Copyright (c) 2021 Basement Crowd Ltd (https://www.basementcrowd.com)
Copyright (c) 2023 Fumiama Minamoto (源文雨)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package docx
import (
"archive/zip"
"encoding/xml"
"errors"
"io"
"strconv"
"strings"
)
// unpack receives a zip file (word documents are a zip with multiple xml inside)
// and parses the files that are relevant for us:
//
// 1. Document
// 2. Relationships
// 3. Media
//
// Then it stores all other files into tmpfslist for packing.
func unpack(zipReader *zip.Reader) (docx *Docx, err error) {
docx = new(Docx)
docx.mediaNameIdx = make(map[string]int, 64)
docx.slowIDs = make(map[string]uintptr, 64)
docx.tmplfs = zipReader
docx.tmpfslst = make([]string, 0, 64)
for _, f := range zipReader.File {
if f.Name == "word/_rels/document.xml.rels" {
err = docx.parseDocRelation(f)
if err != nil {
return
}
continue
}
if f.Name == "word/document.xml" {
err = docx.parseDocument(f)
if err != nil {
return
}
continue
}
if strings.HasPrefix(f.Name, MEDIA_FOLDER) {
err = docx.parseMedia(f)
if err != nil {
return
}
continue
}
// fill remaining files into tmpfslst
docx.tmpfslst = append(docx.tmpfslst, f.Name)
}
//TODO: find last imageID
docx.imageID = 100000
return
}
// parseDocument processes one of the relevant files, the one with the actual document
func (f *Docx) parseDocument(file *zip.File) error {
zf, err := file.Open()
if err != nil {
return err
}
defer zf.Close()
f.Document.XMLW = XMLNS_W
f.Document.XMLR = XMLNS_R
f.Document.XMLWP = XMLNS_WP
// f.Document.XMLMC = XMLNS_MC
// f.Document.XMLO = XMLNS_O
// f.Document.XMLV = XMLNS_V
f.Document.XMLWPS = XMLNS_WPS
f.Document.XMLWPC = XMLNS_WPC
f.Document.XMLWPG = XMLNS_WPG
// f.Document.XMLWP14 = XMLNS_WP14
f.Document.XMLName.Space = XMLNS_W
f.Document.XMLName.Local = "document"
f.Document.Body.file = f
//TODO: find last docID
f.docID = 100000
err = xml.NewDecoder(zf).Decode(&f.Document)
return err
}
// parseDocRelation processes one of the relevant files, the one with the relationships
func (f *Docx) parseDocRelation(file *zip.File) error {
zf, err := file.Open()
if err != nil {
return err
}
defer zf.Close()
f.docRelation.Xmlns = XMLNS_R
err = xml.NewDecoder(zf).Decode(&f.docRelation)
if err != nil {
return err
}
for _, r := range f.docRelation.Relationship {
if !strings.HasPrefix(r.ID, "rId") {
return errors.New("invalid rel ID: " + r.ID)
}
id, err := strconv.ParseUint(r.ID[3:], 10, 64)
if err != nil {
return err
}
if f.rID < uintptr(id) {
f.rID = uintptr(id)
}
}
return nil
}
// parseMedia add the media into Docx struct
func (f *Docx) parseMedia(file *zip.File) error {
name := file.Name[len(MEDIA_FOLDER):]
zf, err := file.Open()
if err != nil {
return err
}
data, err := io.ReadAll(zf)
if err != nil {
return err
}
f.mediaNameIdx[name] = len(f.media)
f.media = append(f.media, Media{Name: name, Data: data})
return zf.Close()
}