mirror of
https://github.com/fumiama/go-docx.git
synced 2026-06-04 23:30:25 +08:00
147 lines
3.7 KiB
Go
147 lines
3.7 KiB
Go
/*
|
|
Copyright (c) 2020 gingfrederik
|
|
Copyright (c) 2021 Gonzalo Fernandez-Victorio
|
|
Copyright (c) 2021 Basement Crowd Ltd (https://www.basementcrowd.com)
|
|
Copyright (c) 2023 Fumiama Minamoto (源文雨)
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU Affero General Public License as published
|
|
by the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU Affero General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Affero General Public License
|
|
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
package docx
|
|
|
|
import (
|
|
"archive/zip"
|
|
"encoding/xml"
|
|
"errors"
|
|
"io"
|
|
"strconv"
|
|
"strings"
|
|
)
|
|
|
|
// unpack receives a zip file (word documents are a zip with multiple xml inside)
|
|
// and parses the files that are relevant for us:
|
|
//
|
|
// 1. Document
|
|
// 2. Relationships
|
|
// 3. Media
|
|
//
|
|
// Then it stores all other files into tmpfslist for packing.
|
|
func unpack(zipReader *zip.Reader) (docx *Docx, err error) {
|
|
docx = new(Docx)
|
|
docx.mediaNameIdx = make(map[string]int, 64)
|
|
docx.slowIDs = make(map[string]uintptr, 64)
|
|
docx.tmplfs = zipReader
|
|
docx.tmpfslst = make([]string, 0, 64)
|
|
for _, f := range zipReader.File {
|
|
if f.Name == "word/_rels/document.xml.rels" {
|
|
err = docx.parseDocRelation(f)
|
|
if err != nil {
|
|
return
|
|
}
|
|
continue
|
|
}
|
|
if f.Name == "word/document.xml" {
|
|
err = docx.parseDocument(f)
|
|
if err != nil {
|
|
return
|
|
}
|
|
continue
|
|
}
|
|
if strings.HasPrefix(f.Name, MEDIA_FOLDER) {
|
|
err = docx.parseMedia(f)
|
|
if err != nil {
|
|
return
|
|
}
|
|
continue
|
|
}
|
|
// fill remaining files into tmpfslst
|
|
docx.tmpfslst = append(docx.tmpfslst, f.Name)
|
|
}
|
|
//TODO: find last imageID
|
|
docx.imageID = 100000
|
|
return
|
|
}
|
|
|
|
// parseDocument processes one of the relevant files, the one with the actual document
|
|
func (f *Docx) parseDocument(file *zip.File) error {
|
|
zf, err := file.Open()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer zf.Close()
|
|
|
|
f.Document.XMLW = XMLNS_W
|
|
f.Document.XMLR = XMLNS_R
|
|
f.Document.XMLWP = XMLNS_WP
|
|
// f.Document.XMLMC = XMLNS_MC
|
|
// f.Document.XMLO = XMLNS_O
|
|
// f.Document.XMLV = XMLNS_V
|
|
f.Document.XMLWPS = XMLNS_WPS
|
|
f.Document.XMLWPC = XMLNS_WPC
|
|
f.Document.XMLWPG = XMLNS_WPG
|
|
// f.Document.XMLWP14 = XMLNS_WP14
|
|
f.Document.XMLName.Space = XMLNS_W
|
|
f.Document.XMLName.Local = "document"
|
|
|
|
f.Document.Body.file = f
|
|
//TODO: find last docID
|
|
f.docID = 100000
|
|
err = xml.NewDecoder(zf).Decode(&f.Document)
|
|
return err
|
|
}
|
|
|
|
// parseDocRelation processes one of the relevant files, the one with the relationships
|
|
func (f *Docx) parseDocRelation(file *zip.File) error {
|
|
zf, err := file.Open()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer zf.Close()
|
|
|
|
f.docRelation.Xmlns = XMLNS_R
|
|
err = xml.NewDecoder(zf).Decode(&f.docRelation)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
for _, r := range f.docRelation.Relationship {
|
|
if !strings.HasPrefix(r.ID, "rId") {
|
|
return errors.New("invalid rel ID: " + r.ID)
|
|
}
|
|
id, err := strconv.ParseUint(r.ID[3:], 10, 64)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if f.rID < uintptr(id) {
|
|
f.rID = uintptr(id)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// parseMedia add the media into Docx struct
|
|
func (f *Docx) parseMedia(file *zip.File) error {
|
|
name := file.Name[len(MEDIA_FOLDER):]
|
|
zf, err := file.Open()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
data, err := io.ReadAll(zf)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
f.mediaNameIdx[name] = len(f.media)
|
|
f.media = append(f.media, Media{Name: name, Data: data})
|
|
return zf.Close()
|
|
}
|