From 74b92c07111481ab7681bd64ce7f20a9d8f5b5ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Fri, 30 Sep 2022 21:51:02 +0800 Subject: [PATCH] =?UTF-8?q?=E9=80=82=E9=85=8D=E5=AD=97=E8=8A=82=E5=BA=8F?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- base.go | 11 +++++++---- code.go | 9 +++++++++ conv.go | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ go.mod | 2 ++ go.sum | 2 ++ pack.go | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++------ 6 files changed, 129 insertions(+), 10 deletions(-) create mode 100644 code.go create mode 100644 conv.go create mode 100644 go.sum diff --git a/base.go b/base.go index 5532eae..9bad97a 100644 --- a/base.go +++ b/base.go @@ -7,15 +7,15 @@ import ( // Base has an encoding buffer thus should not be copied. // total size: 8 bytes type Base struct { - off uint16 // starting offset + off uint16 // starting offset (0 is not permitted) til uint16 // remianing indicator starting offset bit uint8 // 2^bit, max is 15 (32768) - pos uint8 // bitwise buffer position - buf [2]byte // en/decoding buffer + _ [3]byte // always 0, indicates the byte order } var ( ErrInvalidBitSize = errors.New("bit size >= 16 or == 0") + ErrZeroOffsetStart = errors.New("zero offset start") ErrOffsetOverflow = errors.New("offset overflow") ErrTailOverflow = errors.New("tail overflow") ErrTailInCodingArea = errors.New("tail in coding area") @@ -23,6 +23,9 @@ var ( // NewBase generates a new base2n config func NewBase(off, til uint16, bit uint8) (*Base, error) { + if off == 0 { + return nil, ErrZeroOffsetStart + } if bit >= 16 || bit == 0 { return nil, ErrInvalidBitSize } @@ -31,7 +34,7 @@ func NewBase(off, til uint16, bit uint8) (*Base, error) { return nil, ErrOffsetOverflow } tile := uint32(til) // [til, tile) - if bit%2 == 0 { + if bit > 8 && bit%2 == 0 { tile += uint32(bit / 2) } else { tile += uint32(bit) diff --git a/code.go b/code.go new file mode 100644 index 0000000..018f991 --- /dev/null +++ b/code.go @@ -0,0 +1,9 @@ +package unibase2n + +func (bs *Base) Encode(data []byte) []byte { + return nil +} + +func (bs *Base) Decode(data []byte) []byte { + return nil +} diff --git a/conv.go b/conv.go new file mode 100644 index 0000000..286948c --- /dev/null +++ b/conv.go @@ -0,0 +1,57 @@ +package unibase2n + +import ( + "golang.org/x/text/encoding/unicode" +) + +var format = unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM) + +// UTF16BE2UTF8 to display the result as string +func UTF16BE2UTF8(b []byte) ([]byte, error) { + return format.NewDecoder().Bytes(b) +} + +// UTF82UTF16BE to decode from string +func UTF82UTF16BE(b []byte) ([]byte, error) { + return format.NewEncoder().Bytes(b) +} + +func (bs *Base) EncodeToString(b []byte) string { + out, err := UTF16BE2UTF8(bs.Encode(b)) + if err != nil { + return "" + } + return BytesToString(out) +} + +func (bs *Base) EncodeFromString(s string) []byte { + return bs.Encode(StringToBytes(s)) +} + +func (bs *Base) EncodeString(s string) string { + out, err := UTF16BE2UTF8(bs.Encode(StringToBytes(s))) + if err != nil { + return "" + } + return BytesToString(out) +} + +func (bs *Base) DecodeToString(d []byte) string { + return BytesToString(bs.Decode(d)) +} + +func (bs *Base) DecodeFromString(s string) []byte { + d, err := UTF82UTF16BE(StringToBytes(s)) + if err != nil { + return nil + } + return bs.Decode(d) +} + +func (bs *Base) DecodeString(s string) string { + d, err := UTF82UTF16BE(StringToBytes(s)) + if err != nil { + return "" + } + return BytesToString(bs.Decode(d)) +} diff --git a/go.mod b/go.mod index 7230110..51d1d9e 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,5 @@ module github.com/fumiama/unibase2n go 1.18 + +require golang.org/x/text v0.3.7 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..1f78e03 --- /dev/null +++ b/go.sum @@ -0,0 +1,2 @@ +golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= diff --git a/pack.go b/pack.go index 8cc127f..77fe962 100644 --- a/pack.go +++ b/pack.go @@ -1,13 +1,35 @@ package unibase2n -import "unsafe" +import ( + "encoding/binary" + "math/bits" + "unsafe" +) type Pack uint64 var ( + // Base16384 CJK Unified Ideographs + // see https://github.com/fumiama/base16384 Base16384 = newbasepack(0x4e00, 0x3d00, 14) - Base8192 = newbasepack(0xac00, 0xcc00, 13) - Base256 = newbasepack(0x1100, 0x0000, 4) + // Base8192 谚文音節 Hangul Syllables + Base8192 = newbasepack(0xac00, 0xcc00, 13) + // Base256 谚文字母 Hangul Jamo + Base256 = newbasepack(0x1100, 0, 8) + // BaseMath (256) 數學運算符 Mathematical Operators + BaseMath = newbasepack(0x2200, 0, 8) + // Base128 帶圈或括號的字母數字 Enclosed Alphanumerics + Base128 = newbasepack(0x2460, 0x24f4, 7) + // Base64 箭頭 Arrows + Base64 = newbasepack(0x2190, 0x21d0, 6) + // Base64Gua 六十四卦 YiJing Hexagram Symbols + Base64Gua = newbasepack(0x3400, 0x262f, 6) + // Base32 方塊元素 Block Elements + Base32 = newbasepack(0x2580, 0x259f, 5) + // Base16 漢文訓讀點 Kanbun Kundoku Den + Base16 = newbasepack(0x3190, 0, 4) + // Base8 八卦 YiJing Hexagram Symbols + Base8 = newbasepack(0x2630, 0x2689, 3) ) func newbasepack(off, til uint16, bit uint8) Pack { @@ -18,12 +40,36 @@ func newbasepack(off, til uint16, bit uint8) Pack { return b.Pack() } +// New base2n from a packed config func New(pack Pack) *Base { b := &Base{} - *(*Pack)(unsafe.Pointer(b)) = pack + ismele := isLittleEndian() + isitle := pack&0xffffff != 0 + if ismele == isitle { // same endian + *(*Pack)(unsafe.Pointer(b)) = pack + return b + } + // change to native endian + n := bits.Reverse64(uint64(pack)) + field := (*[8]byte)(unsafe.Pointer(&n)) + if isitle { // packed in little endian but I am big + b.off = binary.BigEndian.Uint16(field[6:8]) + b.til = binary.BigEndian.Uint16(field[4:6]) + } else { // packed in big endian but I am little + b.off = binary.LittleEndian.Uint16(field[6:8]) + b.til = binary.LittleEndian.Uint16(field[4:6]) + } + b.bit = field[3] return b } -func (b *Base) Pack() Pack { - return *(*Pack)(unsafe.Pointer(b)) +// Pack this config into an integer +func (bs *Base) Pack() Pack { + return *(*Pack)(unsafe.Pointer(bs)) +} + +func isLittleEndian() bool { + n := 0x1234 + f := *((*byte)(unsafe.Pointer(&n))) + return (f ^ 0x34) == 0 }