diff --git a/README.md b/README.md index 43e462e..4be425f 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,6 @@ # go-base16384 base16384 interface of golang -## Functions - -### func EncodeString(b string) []byte -Encode string b to utf16be. -### func DecodeString(d []byte) string -Decode to string from encoded b. -### func Encode(b []byte) []byte -Encode b to utf16be. -### func Decode(b []byte) []byte -Decode from encoded b. -### func UTF16be2utf8(b []byte) ([]byte, error) -Display the result. -### func UTF82utf16be(b []byte) ([]byte, error) -Turn the result to its original coding form to decode. - # Usage ## As package Just import it in your project. diff --git a/base14.go b/base14.go index edb6921..5e614cf 100644 --- a/base14.go +++ b/base14.go @@ -1,14 +1,27 @@ // Package base14 base16384 的 go 接口 package base14 -import "encoding/binary" +import ( + "encoding/binary" + "errors" +) -func EncodeString(s string) []byte { - return Encode(StringToBytes(s)) -} - -func DecodeString(d []byte) string { - return BytesToString(Decode(d)) +func EncodeLen(in int) (out int) { + out = in / 7 * 8 + offset := in % 7 + switch offset { //算上偏移标志字符占用的2字节 + case 0: + break + case 1: + out += 4 + case 2, 3: + out += 6 + case 4, 5: + out += 8 + case 6: + out += 10 + } + return } func Encode(b []byte) (encd []byte) { @@ -75,6 +88,90 @@ func Encode(b []byte) (encd []byte) { return } +func EncodeTo(b, encd []byte) error { + outlen := len(b) / 7 * 8 + offset := len(b) % 7 + switch offset { //算上偏移标志字符占用的2字节 + case 0: + break + case 1: + outlen += 4 + case 2, 3: + outlen += 6 + case 4, 5: + outlen += 8 + case 6: + outlen += 10 + } + if len(encd) < outlen { + return errors.New("encd too small") + } + var n int + i := 0 + for ; i <= len(b)-7; i += 7 { + sum := 0x000000000000003f & ((uint64)(b[i]) >> 2) + sum |= (((uint64)(b[i+1]) << 6) | ((uint64)(b[i]) << 14)) & 0x000000000000ff00 + sum |= (((uint64)(b[i+1]) << 20) | ((uint64)(b[i+2]) << 12)) & 0x00000000003f0000 + sum |= (((uint64)(b[i+2]) << 28) | ((uint64)(b[i+3]) << 20)) & 0x00000000ff000000 + sum |= (((uint64)(b[i+3]) << 34) | ((uint64)(b[i+4]) << 26)) & 0x0000003f00000000 + sum |= (((uint64)(b[i+4]) << 42) | ((uint64)(b[i+5]) << 34)) & 0x0000ff0000000000 + sum |= ((uint64)(b[i+5]) << 48) & 0x003f000000000000 + sum |= ((uint64)(b[i+6]) << 56) & 0xff00000000000000 + sum += 0x004e004e004e004e + binary.LittleEndian.PutUint64(encd[n:], sum) + n += 8 + } + if offset > 0 { + sum := 0x000000000000003f & ((uint64)(b[i]) >> 2) + sum |= ((uint64)(b[i]) << 14) & 0x000000000000c000 + if offset > 1 { + sum |= ((uint64)(b[i+1]) << 6) & 0x0000000000003f00 + sum |= ((uint64)(b[i+1]) << 20) & 0x0000000000300000 + if offset > 2 { + sum |= ((uint64)(b[i+2]) << 12) & 0x00000000000f0000 + sum |= ((uint64)(b[i+2]) << 28) & 0x00000000f0000000 + if offset > 3 { + sum |= ((uint64)(b[i+3]) << 20) & 0x000000000f000000 + sum |= ((uint64)(b[i+3]) << 34) & 0x0000003c00000000 + if offset > 4 { + sum |= ((uint64)(b[i+4]) << 26) & 0x0000000300000000 + sum |= ((uint64)(b[i+4]) << 42) & 0x0000fc0000000000 + if offset > 5 { + sum |= ((uint64)(b[i+5]) << 34) & 0x0000030000000000 + sum |= ((uint64)(b[i+5]) << 48) & 0x003f000000000000 + } + } + } + } + } + sum += 0x004e004e004e004e + var tmp [8]byte + binary.LittleEndian.PutUint64(tmp[:], sum) + copy(encd[n:], tmp[:]) + encd[outlen-2] = '=' + encd[outlen-1] = byte(offset) + } + return nil +} + +func DecodeLen(in, offset int) (out int) { + out = in + switch offset { //算上偏移标志字符占用的2字节 + case 0: + break + case 1: + out -= 4 + case 2, 3: + out -= 6 + case 4, 5: + out -= 8 + case 6: + out -= 10 + } + out = out/8*7 + offset + return +} + func Decode(b []byte) (decd []byte) { outlen := len(b) offset := 0 @@ -97,7 +194,7 @@ func Decode(b []byte) (decd []byte) { decd = make([]byte, outlen) var n uintptr i := 0 - for ; i <= len(decd)-7; n += 8 { + for ; i <= outlen-7; n += 8 { sum := binary.LittleEndian.Uint64(b[n:]) - 0x004e004e004e004e decd[i] = byte(((sum & 0x000000000000003f) << 2) | ((sum & 0x000000000000c000) >> 14)) i++ @@ -146,3 +243,77 @@ func Decode(b []byte) (decd []byte) { } return } + +func DecodeTo(b []byte, decd []byte) error { + outlen := len(b) + offset := 0 + if b[len(b)-2] == '=' { + offset = int(b[len(b)-1]) + switch offset { //算上偏移标志字符占用的2字节 + case 0: + break + case 1: + outlen -= 4 + case 2, 3: + outlen -= 6 + case 4, 5: + outlen -= 8 + case 6: + outlen -= 10 + } + } + outlen = outlen/8*7 + offset + if len(decd) < outlen { + return errors.New("decd too small") + } + var n uintptr + i := 0 + for ; i <= outlen-7; n += 8 { + sum := binary.LittleEndian.Uint64(b[n:]) - 0x004e004e004e004e + decd[i] = byte(((sum & 0x000000000000003f) << 2) | ((sum & 0x000000000000c000) >> 14)) + i++ + decd[i] = byte(((sum & 0x0000000000003f00) >> 6) | ((sum & 0x0000000000300000) >> 20)) + i++ + decd[i] = byte(((sum & 0x00000000000f0000) >> 12) | ((sum & 0x00000000f0000000) >> 28)) + i++ + decd[i] = byte(((sum & 0x000000000f000000) >> 20) | ((sum & 0x0000003c00000000) >> 34)) + i++ + decd[i] = byte(((sum & 0x0000000300000000) >> 26) | ((sum & 0x0000fc0000000000) >> 42)) + i++ + decd[i] = byte(((sum & 0x0000030000000000) >> 34) | ((sum & 0x003f000000000000) >> 48)) + i++ + decd[i] = byte(((sum & 0xff00000000000000) >> 56)) + i++ + } + if offset > 0 { + var tmp [8]byte + copy(tmp[:], b[n:]) + sum := binary.LittleEndian.Uint64(tmp[:]) - 0x000000000000004e + decd[i] = byte(((sum & 0x000000000000003f) << 2) | ((sum & 0x000000000000c000) >> 14)) + i++ + if offset > 1 { + sum -= 0x00000000004e0000 + decd[i] = byte(((sum & 0x0000000000003f00) >> 6) | ((sum & 0x0000000000300000) >> 20)) + i++ + if offset > 2 { + decd[i] = byte(((sum & 0x00000000000f0000) >> 12) | ((sum & 0x00000000f0000000) >> 28)) + i++ + if offset > 3 { + sum -= 0x0000004e00000000 + decd[i] = byte(((sum & 0x000000000f000000) >> 20) | ((sum & 0x0000003c00000000) >> 34)) + i++ + if offset > 4 { + decd[i] = byte(((sum & 0x0000000300000000) >> 26) | ((sum & 0x0000fc0000000000) >> 42)) + i++ + if offset > 5 { + sum -= 0x004e000000000000 + decd[i] = byte(((sum & 0x0000030000000000) >> 34) | ((sum & 0x003f000000000000) >> 48)) + i++ + } + } + } + } + } + } + return nil +} diff --git a/base14_test.go b/base14_test.go index b894653..0860bf4 100644 --- a/base14_test.go +++ b/base14_test.go @@ -1,33 +1,92 @@ package base14 import ( + "bytes" + "io" + "math/rand" "testing" + + "github.com/stretchr/testify/assert" ) func TestBase14(t *testing.T) { teststr := "一个测试293大大的啊定位为恶的我284的我……#@%@%@" - e := EncodeString(teststr) - es, err := UTF16be2utf8(e) - if err == nil { - t.Log(string(es)) - if string(es) != "蜮嘎惢磦筢貊豔耹嫹桊涖犧蟦癎摖壥禦籋萷犸粹瘛榞梄螢圓因苧璡屨灇炀瞸瘊暍严帉戀㴃" { - t.Fail() - } - d, err := UTF82utf16be(es) - if string(d) == string(e) { - if err == nil { - ds := DecodeString(d) - t.Log(ds) - if ds != teststr { - t.Fail() - } - } else { - t.Fatal(err) - } - } else { - t.Fatal(d) - } - } else { + es := EncodeString(teststr) + assert.Equal(t, "蜮嘎惢磦筢貊豔耹嫹桊涖犧蟦癎摖壥禦籋萷犸粹瘛榞梄螢圓因苧璡屨灇炀瞸瘊暍严帉戀㴃", es) + assert.Equal(t, teststr, DecodeString(es)) +} + +func TestEncoder(t *testing.T) { + buf := make([]byte, 42242141) + _, err := rand.Read(buf) + if err != nil { t.Fatal(err) } + e := NewEncoder(bytes.NewReader(buf)) + w := bytes.NewBuffer(make([]byte, 0, 42242150)) + _, err = io.Copy(w, e) + if err != nil { + t.Fatal(err) + } + out := w.Bytes() + assert.Equal(t, 48276736, w.Len()) + d := Decode(out) + t.Log(len(out)) + assert.Equal(t, buf, d) +} + +func TestBufferedEncoder(t *testing.T) { + buf := make([]byte, 1024*1024+1) + _, err := rand.Read(buf) + if err != nil { + t.Fatal(err) + } + e := NewBufferedEncoder(buf) + w := bytes.NewBuffer(make([]byte, 0, 1024*1024+16)) + _, err = io.Copy(w, e) + if err != nil { + t.Fatal(err) + } + out := w.Bytes() + t.Log(w.Len()) + d := Decode(out) + if !bytes.Equal(buf, d) { + t.Fail() + } +} + +func TestDecoder(t *testing.T) { + buf := make([]byte, 1024*1024+1) + _, err := rand.Read(buf) + if err != nil { + t.Fatal(err) + } + w := bytes.NewBuffer(make([]byte, 0, 1024*1024+1)) + d := NewDecoder(bytes.NewReader(Encode(buf))) + _, err = io.Copy(w, d) + if err != nil { + t.Fatal(err) + } + t.Log(w.Len()) + if !bytes.Equal(buf, w.Bytes()) { + t.Fail() + } +} + +func TestBufferedDecoder(t *testing.T) { + buf := make([]byte, 1024*1024+1) + _, err := rand.Read(buf) + if err != nil { + t.Fatal(err) + } + w := bytes.NewBuffer(make([]byte, 0, 1024*1024+1)) + d := NewBufferedDecoder(Encode(buf)) + _, err = io.Copy(w, d) + if err != nil { + t.Fatal(err) + } + t.Log(w.Len()) + if !bytes.Equal(buf, w.Bytes()) { + t.Fail() + } } diff --git a/conv.go b/conv.go index f91099b..714d1ba 100644 --- a/conv.go +++ b/conv.go @@ -6,12 +6,36 @@ import ( var format = unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM) -// UTF16be2utf8 to display the result as string -func UTF16be2utf8(b []byte) ([]byte, error) { +// UTF16BE2UTF8 to display the result as string +func UTF16BE2UTF8(b []byte) ([]byte, error) { return format.NewDecoder().Bytes(b) } -// UTF82utf16be to decode from string -func UTF82utf16be(b []byte) ([]byte, error) { +// UTF82UTF16BE to decode from string +func UTF82UTF16BE(b []byte) ([]byte, error) { return format.NewEncoder().Bytes(b) } + +func EncodeFromString(s string) []byte { + return Encode(StringToBytes(s)) +} + +func EncodeString(s string) string { + out, err := UTF16BE2UTF8(Encode(StringToBytes(s))) + if err != nil { + return "" + } + return BytesToString(out) +} + +func DecodeToString(d []byte) string { + return BytesToString(Decode(d)) +} + +func DecodeString(s string) string { + d, err := UTF82UTF16BE(StringToBytes(s)) + if err != nil { + return "" + } + return BytesToString(Decode(d)) +} diff --git a/decoder.go b/decoder.go new file mode 100644 index 0000000..363f135 --- /dev/null +++ b/decoder.go @@ -0,0 +1,66 @@ +package base14 + +import ( + "io" +) + +type Decoder struct { + b []byte + r io.Reader + io.Reader +} + +func NewDecoder(r io.Reader) *Decoder { + return &Decoder{r: r} +} + +func NewBufferedDecoder(b []byte) *Decoder { + return &Decoder{b: b} +} + +func (d *Decoder) Read(p []byte) (n int, err error) { + i := len(d.b) + if i == 0 && d.r == nil { + err = io.EOF + return + } + inlen := len(p)/7*8 + 2 + if d.r != nil { + d.b = append(d.b, make([]byte, inlen)...) + n, err = d.r.Read(d.b[i:]) + inlen = i + n + d.b = d.b[:inlen] + if err != nil { + if len(d.b) > 0 { + offset := 0 + if d.b[len(d.b)-2] == '=' { + offset = int(d.b[len(d.b)-1]) + } + n = DecodeLen(len(d.b), offset) + _ = DecodeTo(d.b, p) + d.b = nil + d.r = nil + } + return + } + } else if inlen > len(d.b) { + inlen = len(d.b) + } + if inlen >= 2 { + inlen -= 2 + } + offset := 0 + if d.b[len(d.b)-2] == '=' { + offset = int(d.b[len(d.b)-1]) + } + if offset > 0 { + n = DecodeLen(len(d.b), offset) + _ = DecodeTo(d.b, p) + d.b = nil + } else { + n = DecodeLen(inlen, 0) + _ = DecodeTo(d.b[:inlen], p) + d.b = d.b[inlen:] + } + return +} diff --git a/encoder.go b/encoder.go new file mode 100644 index 0000000..5073f7c --- /dev/null +++ b/encoder.go @@ -0,0 +1,49 @@ +package base14 + +import ( + "io" +) + +type Encoder struct { + b []byte + r io.Reader + io.Reader +} + +func NewEncoder(r io.Reader) *Encoder { + return &Encoder{r: r} +} + +func NewBufferedEncoder(b []byte) *Encoder { + return &Encoder{b: b} +} + +func (e *Encoder) Read(p []byte) (n int, err error) { + i := len(e.b) + if i == 0 && e.r == nil { + err = io.EOF + return + } + inlen := len(p) / 8 * 7 + if e.r != nil { + e.b = append(e.b, make([]byte, inlen)...) + n, err = e.r.Read(e.b[i:]) + inlen = i + n + e.b = e.b[:inlen] + if err != nil { + if len(e.b) > 0 { + n = EncodeLen(inlen) + _ = EncodeTo(e.b[:inlen], p) + } + e.b = nil + e.r = nil + return + } + } else if inlen > len(e.b) { + inlen = len(e.b) + } + n = EncodeLen(inlen) + _ = EncodeTo(e.b[:inlen], p) + e.b = e.b[inlen:] + return +} diff --git a/go.mod b/go.mod index e6e4b15..6eb139b 100644 --- a/go.mod +++ b/go.mod @@ -2,4 +2,7 @@ module github.com/fumiama/go-base16384 go 1.16 -require golang.org/x/text v0.3.7 +require ( + github.com/stretchr/testify v1.7.1 + golang.org/x/text v0.3.7 +) diff --git a/go.sum b/go.sum index 2274b80..eedcb75 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,14 @@ +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=