[performance] update storage backend and make use of seek syscall when available (#2924)

* update to use go-storage/ instead of go-store/v2/storage/

* pull in latest version from codeberg

* remove test output 😇

* add code comments

* set the exclusive bit when creating new files in disk config

* bump to actual release version

* bump to v0.1.1 (tis a simple no-logic change)

* update readme

* only use a temporary read seeker when decoding video if required (should only be S3 now)

* use fastcopy library to use memory pooled buffers when calling TempFileSeeker()

* update to use seek call in serveFileRange()
This commit is contained in:
kim 2024-05-22 09:46:24 +00:00 committed by GitHub
parent 06b1e0173b
commit 3d3e99ae52
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
91 changed files with 1610 additions and 12737 deletions

View file

@ -283,7 +283,7 @@ The following open source libraries, frameworks, and tools are used by GoToSocia
- [gruf/go-mutexes](https://codeberg.org/gruf/go-mutexes); safemutex & mutex map. [MIT License](https://spdx.org/licenses/MIT.html).
- [gruf/go-runners](https://codeberg.org/gruf/go-runners); synchronization utilities. [MIT License](https://spdx.org/licenses/MIT.html).
- [gruf/go-sched](https://codeberg.org/gruf/go-sched); task scheduler. [MIT License](https://spdx.org/licenses/MIT.html).
- [gruf/go-store](https://codeberg.org/gruf/go-store); file storage backend (local & s3). [MIT License](https://spdx.org/licenses/MIT.html).
- [gruf/go-storage](https://codeberg.org/gruf/go-storage); file storage backend (local & s3). [MIT License](https://spdx.org/licenses/MIT.html).
- [gruf/go-structr](https://codeberg.org/gruf/go-structr); struct caching + queueing with automated indexing by field. [MIT License](https://spdx.org/licenses/MIT.html).
- [h2non/filetype](https://github.com/h2non/filetype); filetype checking. [MIT License](https://spdx.org/licenses/MIT.html).
- jackc:

View file

@ -80,10 +80,6 @@ func setupPrune(ctx context.Context) (*prune, error) {
func (p *prune) shutdown() error {
errs := gtserror.NewMultiError(2)
if err := p.storage.Close(); err != nil {
errs.Appendf("error closing storage backend: %w", err)
}
if err := p.dbService.Close(); err != nil {
errs.Appendf("error stopping database: %w", err)
}

5
go.mod
View file

@ -20,7 +20,7 @@ require (
codeberg.org/gruf/go-mutexes v1.5.0
codeberg.org/gruf/go-runners v1.6.2
codeberg.org/gruf/go-sched v1.2.3
codeberg.org/gruf/go-store/v2 v2.2.4
codeberg.org/gruf/go-storage v0.1.1
codeberg.org/gruf/go-structr v0.8.4
codeberg.org/superseriousbusiness/exif-terminator v0.7.0
github.com/DmitriyVTitov/size v1.5.0
@ -103,7 +103,6 @@ require (
github.com/cloudwego/iasm v0.2.0 // indirect
github.com/containerd/cgroups/v3 v3.0.1 // indirect
github.com/coreos/go-systemd/v22 v22.3.2 // indirect
github.com/cornelk/hashmap v1.0.8 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/docker/go-units v0.5.0 // indirect
github.com/dolthub/maphash v0.1.0 // indirect
@ -160,7 +159,7 @@ require (
github.com/jinzhu/inflection v1.0.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/compress v1.17.7 // indirect
github.com/klauspost/compress v1.17.8 // indirect
github.com/klauspost/cpuid/v2 v2.2.7 // indirect
github.com/kr/pretty v0.3.1 // indirect
github.com/kr/text v0.2.0 // indirect

10
go.sum
View file

@ -74,8 +74,8 @@ codeberg.org/gruf/go-runners v1.6.2 h1:oQef9niahfHu/wch14xNxlRMP8i+ABXH1Cb9PzZ4o
codeberg.org/gruf/go-runners v1.6.2/go.mod h1:Tq5PrZ/m/rBXbLZz0u5if+yP3nG5Sf6S8O/GnyEePeQ=
codeberg.org/gruf/go-sched v1.2.3 h1:H5ViDxxzOBR3uIyGBCf0eH8b1L8wMybOXcdtUUTXZHk=
codeberg.org/gruf/go-sched v1.2.3/go.mod h1:vT9uB6KWFIIwnG9vcPY2a0alYNoqdL1mSzRM8I+PK7A=
codeberg.org/gruf/go-store/v2 v2.2.4 h1:8HO1Jh2gg7boQKA3hsDAIXd9zwieu5uXwDXEcTOD9js=
codeberg.org/gruf/go-store/v2 v2.2.4/go.mod h1:zI4VWe5CpXAktYMtaBMrgA5QmO0sQH53LBRvfn1huys=
codeberg.org/gruf/go-storage v0.1.1 h1:CSX1PMMg/7vqqK8aCFtq94xCrOB3xhj7eWIvzILdLpY=
codeberg.org/gruf/go-storage v0.1.1/go.mod h1:145IWMUOc6YpIiZIiCIEwkkNZZPiSbwMnZxRjSc5q6c=
codeberg.org/gruf/go-structr v0.8.4 h1:2eT1VOTWG6T9gIGZwF/1Jop6k6plvfdUY5yBcvbizVg=
codeberg.org/gruf/go-structr v0.8.4/go.mod h1:c5UvVDSA3lZ1kv05V+7pXkO8u8Jea+VRWFDRFBCOxSA=
codeberg.org/superseriousbusiness/exif-terminator v0.7.0 h1:Y6VApSXhKqExG0H2hZ2JelRK4xmWdjDQjn13CpEfzko=
@ -137,8 +137,6 @@ github.com/coreos/go-oidc/v3 v3.10.0 h1:tDnXHnLyiTVyT/2zLDGj09pFPkhND8Gl8lnTRhoE
github.com/coreos/go-oidc/v3 v3.10.0/go.mod h1:5j11xcw0D3+SGxn6Z/WFADsgcWVMyNAlSQupk0KK3ac=
github.com/coreos/go-systemd/v22 v22.3.2 h1:D9/bQk5vlXQFZ6Kwuu6zaiXJ9oTPe68++AzAJc1DzSI=
github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/cornelk/hashmap v1.0.8 h1:nv0AWgw02n+iDcawr5It4CjQIAcdMMKRrs10HOJYlrc=
github.com/cornelk/hashmap v1.0.8/go.mod h1:RfZb7JO3RviW/rT6emczVuC/oxpdz4UsSB2LJSclR1k=
github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
@ -397,8 +395,8 @@ github.com/k0kubun/colorstring v0.0.0-20150214042306-9440f1994b88/go.mod h1:3w7q
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.10.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/compress v1.10.10/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/compress v1.17.7 h1:ehO88t2UGzQK66LMdE8tibEd1ErmzZjNEqWkjLAKQQg=
github.com/klauspost/compress v1.17.7/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU=
github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=

View file

@ -19,7 +19,6 @@
import (
"bytes"
"context"
"crypto/rand"
"encoding/base64"
"encoding/json"
@ -152,7 +151,7 @@ func (suite *MediaCreateTestSuite) TestMediaCreateSuccessful() {
// see what's in storage *before* the request
var storageKeysBeforeRequest []string
if err := suite.storage.WalkKeys(ctx, func(ctx context.Context, key string) error {
if err := suite.storage.WalkKeys(ctx, func(key string) error {
storageKeysBeforeRequest = append(storageKeysBeforeRequest, key)
return nil
}); err != nil {
@ -177,7 +176,7 @@ func (suite *MediaCreateTestSuite) TestMediaCreateSuccessful() {
// check what's in storage *after* the request
var storageKeysAfterRequest []string
if err := suite.storage.WalkKeys(ctx, func(ctx context.Context, key string) error {
if err := suite.storage.WalkKeys(ctx, func(key string) error {
storageKeysAfterRequest = append(storageKeysAfterRequest, key)
return nil
}); err != nil {
@ -237,7 +236,7 @@ func (suite *MediaCreateTestSuite) TestMediaCreateSuccessfulV2() {
// see what's in storage *before* the request
var storageKeysBeforeRequest []string
if err := suite.storage.WalkKeys(ctx, func(ctx context.Context, key string) error {
if err := suite.storage.WalkKeys(ctx, func(key string) error {
storageKeysBeforeRequest = append(storageKeysBeforeRequest, key)
return nil
}); err != nil {
@ -262,7 +261,7 @@ func (suite *MediaCreateTestSuite) TestMediaCreateSuccessfulV2() {
// check what's in storage *after* the request
var storageKeysAfterRequest []string
if err := suite.storage.WalkKeys(ctx, func(ctx context.Context, key string) error {
if err := suite.storage.WalkKeys(ctx, func(key string) error {
storageKeysAfterRequest = append(storageKeysAfterRequest, key)
return nil
}); err != nil {

View file

@ -224,11 +224,21 @@ func serveFileRange(rw http.ResponseWriter, r *http.Request, src io.Reader, rng
return
}
// Dump the first 'start' many bytes into the void...
if rs, ok := src.(io.ReadSeeker); ok {
// Source supports seeking (usually *os.File),
// seek to the 'start' byte position in file.
if _, err := rs.Seek(start, 0); err != nil {
log.Errorf(r.Context(), "error seeking in source: %v", err)
return
}
} else {
// Compat for when no seek call is implemented,
// dump the first 'start' many bytes into void.
if _, err := fastcopy.CopyN(io.Discard, src, start); err != nil {
log.Errorf(r.Context(), "error reading from source: %v", err)
return
}
}
// Determine new content length
// after slicing to given range.

View file

@ -19,15 +19,14 @@
import (
"context"
"errors"
"time"
"codeberg.org/gruf/go-store/v2/storage"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/state"
"github.com/superseriousbusiness/gotosocial/internal/storage"
)
const (
@ -89,7 +88,7 @@ func (c *Cleaner) removeFiles(ctx context.Context, files ...string) (int, error)
// Remove each provided storage path.
log.Debugf(ctx, "removing file: %s", path)
err := c.state.Storage.Delete(ctx, path)
if err != nil && !errors.Is(err, storage.ErrNotFound) {
if err != nil && !storage.IsNotFound(err) {
errs.Appendf("error removing %s: %w", path, err)
errCount++
}

View file

@ -96,7 +96,7 @@ func (m *Media) PruneOrphaned(ctx context.Context) (int, error) {
var files []string
// All media files in storage will have path fitting: {$account}/{$type}/{$size}/{$id}.{$ext}
if err := m.state.Storage.WalkKeys(ctx, func(ctx context.Context, path string) error {
if err := m.state.Storage.WalkKeys(ctx, func(path string) error {
// Check for our expected fileserver path format.
if !regexes.FilePath.MatchString(path) {
log.Warn(ctx, "unexpected storage item: %s", path)

View file

@ -364,13 +364,13 @@ func (suite *MediaTestSuite) TestUncacheAndRecache() {
// media should no longer be stored
_, err = suite.storage.Get(ctx, testStatusAttachment.File.Path)
suite.ErrorIs(err, storage.ErrNotFound)
suite.True(storage.IsNotFound(err))
_, err = suite.storage.Get(ctx, testStatusAttachment.Thumbnail.Path)
suite.ErrorIs(err, storage.ErrNotFound)
suite.True(storage.IsNotFound(err))
_, err = suite.storage.Get(ctx, testHeader.File.Path)
suite.ErrorIs(err, storage.ErrNotFound)
suite.True(storage.IsNotFound(err))
_, err = suite.storage.Get(ctx, testHeader.Thumbnail.Path)
suite.ErrorIs(err, storage.ErrNotFound)
suite.True(storage.IsNotFound(err))
// now recache the image....
data := func(_ context.Context) (io.ReadCloser, int64, error) {

View file

@ -20,10 +20,11 @@
import (
"context"
"database/sql"
"errors"
"fmt"
"path"
"codeberg.org/gruf/go-store/v2/storage"
"codeberg.org/gruf/go-storage"
"codeberg.org/gruf/go-storage/disk"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
@ -32,13 +33,13 @@
func init() {
deleteAttachment := func(ctx context.Context, l log.Entry, a *gtsmodel.MediaAttachment, s storage.Storage, tx bun.Tx) {
if err := s.Remove(ctx, a.File.Path); err != nil && err != storage.ErrNotFound {
if err := s.Remove(ctx, a.File.Path); err != nil && !errors.Is(err, storage.ErrNotFound) {
l.Errorf("error removing file %s: %s", a.File.Path, err)
} else {
l.Debugf("deleted %s", a.File.Path)
}
if err := s.Remove(ctx, a.Thumbnail.Path); err != nil && err != storage.ErrNotFound {
if err := s.Remove(ctx, a.Thumbnail.Path); err != nil && !errors.Is(err, storage.ErrNotFound) {
l.Errorf("error removing file %s: %s", a.Thumbnail.Path, err)
} else {
l.Debugf("deleted %s", a.Thumbnail.Path)
@ -68,13 +69,10 @@ func init() {
}
return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
s, err := storage.OpenDisk(storageBasePath, &storage.DiskConfig{
LockFile: path.Join(storageBasePath, "store.lock"),
})
s, err := disk.Open(storageBasePath, nil)
if err != nil {
return fmt.Errorf("error creating storage backend: %s", err)
}
defer s.Close()
// step 1. select all media attachment remote URLs that have duplicates
var dupes int

View file

@ -20,6 +20,8 @@
import (
"io"
"os"
"codeberg.org/gruf/go-fastcopy"
)
// ReadFnCloser takes an io.Reader and wraps it to use the provided function to implement io.Closer.
@ -179,7 +181,7 @@ func TempFileSeeker(r io.Reader) (io.ReadSeekCloser, error) {
return nil, err
}
if _, err := io.Copy(tmp, r); err != nil {
if _, err := fastcopy.Copy(tmp, r); err != nil {
return nil, err
}

View file

@ -19,17 +19,16 @@
import (
"context"
"errors"
"io"
"time"
"codeberg.org/gruf/go-iotools"
"codeberg.org/gruf/go-store/v2/storage"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/id"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/state"
"github.com/superseriousbusiness/gotosocial/internal/storage"
"github.com/superseriousbusiness/gotosocial/internal/uris"
"github.com/superseriousbusiness/gotosocial/internal/util"
)
@ -260,11 +259,11 @@ func (m *Manager) PreProcessEmoji(
// Wrap closer to cleanup old data.
c := iotools.CloserCallback(rc, func() {
if err := m.state.Storage.Delete(ctx, originalImagePath); err != nil && !errors.Is(err, storage.ErrNotFound) {
if err := m.state.Storage.Delete(ctx, originalImagePath); err != nil && !storage.IsNotFound(err) {
log.Errorf(ctx, "error removing old emoji %s@%s from storage: %v", emoji.Shortcode, emoji.Domain, err)
}
if err := m.state.Storage.Delete(ctx, originalImageStaticPath); err != nil && !errors.Is(err, storage.ErrNotFound) {
if err := m.state.Storage.Delete(ctx, originalImageStaticPath); err != nil && !storage.IsNotFound(err) {
log.Errorf(ctx, "error removing old static emoji %s@%s from storage: %v", emoji.Shortcode, emoji.Domain, err)
}
})

View file

@ -23,15 +23,15 @@
"fmt"
"io"
"os"
"path"
"testing"
"time"
"codeberg.org/gruf/go-store/v2/storage"
"codeberg.org/gruf/go-storage/disk"
"github.com/stretchr/testify/suite"
gtsmodel "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/media"
"github.com/superseriousbusiness/gotosocial/internal/state"
"github.com/superseriousbusiness/gotosocial/internal/storage"
gtsstorage "github.com/superseriousbusiness/gotosocial/internal/storage"
"github.com/superseriousbusiness/gotosocial/testrig"
)
@ -189,9 +189,9 @@ func (suite *ManagerTestSuite) TestEmojiProcessBlockingRefresh() {
// the old image files should no longer be in storage
_, err = suite.storage.Get(ctx, oldEmojiImagePath)
suite.ErrorIs(err, storage.ErrNotFound)
suite.True(storage.IsNotFound(err))
_, err = suite.storage.Get(ctx, oldEmojiImageStaticPath)
suite.ErrorIs(err, storage.ErrNotFound)
suite.True(storage.IsNotFound(err))
}
func (suite *ManagerTestSuite) TestEmojiProcessBlockingTooLarge() {
@ -1189,9 +1189,7 @@ func (suite *ManagerTestSuite) TestSimpleJpegProcessBlockingWithDiskStorage() {
temp := fmt.Sprintf("%s/gotosocial-test", os.TempDir())
defer os.RemoveAll(temp)
disk, err := storage.OpenDisk(temp, &storage.DiskConfig{
LockFile: path.Join(temp, "store.lock"),
})
disk, err := disk.Open(temp, nil)
if err != nil {
panic(err)
}

View file

@ -20,7 +20,6 @@
import (
"bytes"
"context"
"errors"
"image/jpeg"
"io"
"time"
@ -156,7 +155,7 @@ func (p *ProcessingMedia) load(ctx context.Context) (*gtsmodel.MediaAttachment,
// never decoded). Try to clean up in this case.
if p.media.Type == gtsmodel.FileTypeUnknown {
deleteErr := p.mgr.state.Storage.Delete(ctx, p.media.File.Path)
if deleteErr != nil && !errors.Is(deleteErr, storage.ErrNotFound) {
if deleteErr != nil && !storage.IsNotFound(deleteErr) {
errs.Append(deleteErr)
}
}

View file

@ -36,20 +36,30 @@ type gtsVideo struct {
// decodeVideoFrame decodes and returns an image from a single frame in the given video stream.
// (note: currently this only returns a blank image resized to fit video dimensions).
func decodeVideoFrame(r io.Reader) (*gtsVideo, error) {
// we need a readseeker to decode the video...
tfs, err := iotools.TempFileSeeker(r)
// Check if video stream supports
// seeking, usually when *os.File.
rsc, ok := r.(io.ReadSeekCloser)
if !ok {
var err error
// Store stream to temporary location
// in order that we can get seek-reads.
rsc, err = iotools.TempFileSeeker(r)
if err != nil {
return nil, fmt.Errorf("error creating temp file seeker: %w", err)
}
defer func() {
if err := tfs.Close(); err != nil {
// Ensure temp. read seeker closed.
if err := rsc.Close(); err != nil {
log.Errorf(nil, "error closing temp file seeker: %s", err)
}
}()
}
// probe the video file to extract useful metadata from it; for methodology, see:
// https://github.com/abema/go-mp4/blob/7d8e5a7c5e644e0394261b0cf72fef79ce246d31/mp4tool/probe/probe.go#L85-L154
info, err := mp4.Probe(tfs)
info, err := mp4.Probe(rsc)
if err != nil {
return nil, fmt.Errorf("error during mp4 probe: %w", err)
}

View file

@ -23,9 +23,9 @@
"fmt"
"strings"
"codeberg.org/gruf/go-store/v2/storage"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/storage"
)
// Delete deletes the media attachment with the given ID, including all files pertaining to that attachment.
@ -44,14 +44,14 @@ func (p *Processor) Delete(ctx context.Context, mediaAttachmentID string) gtserr
// delete the thumbnail from storage
if attachment.Thumbnail.Path != "" {
if err := p.state.Storage.Delete(ctx, attachment.Thumbnail.Path); err != nil && !errors.Is(err, storage.ErrNotFound) {
if err := p.state.Storage.Delete(ctx, attachment.Thumbnail.Path); err != nil && !storage.IsNotFound(err) {
errs = append(errs, fmt.Sprintf("remove thumbnail at path %s: %s", attachment.Thumbnail.Path, err))
}
}
// delete the file from storage
if attachment.File.Path != "" {
if err := p.state.Storage.Delete(ctx, attachment.File.Path); err != nil && !errors.Is(err, storage.ErrNotFound) {
if err := p.state.Storage.Delete(ctx, attachment.File.Path); err != nil && !storage.IsNotFound(err) {
errs = append(errs, fmt.Sprintf("remove file at path %s: %s", attachment.File.Path, err))
}
}

View file

@ -19,16 +19,20 @@
import (
"context"
"errors"
"fmt"
"io"
"mime"
"net/url"
"path"
"syscall"
"time"
"codeberg.org/gruf/go-bytesize"
"codeberg.org/gruf/go-cache/v3/ttl"
"codeberg.org/gruf/go-store/v2/storage"
"codeberg.org/gruf/go-storage"
"codeberg.org/gruf/go-storage/disk"
"codeberg.org/gruf/go-storage/s3"
"github.com/minio/minio-go/v7"
"github.com/minio/minio-go/v7/pkg/credentials"
"github.com/superseriousbusiness/gotosocial/internal/config"
@ -48,11 +52,17 @@ type PresignedURL struct {
Expiry time.Time // link expires at this time
}
var (
// Ptrs to underlying storage library errors.
ErrAlreadyExists = storage.ErrAlreadyExists
ErrNotFound = storage.ErrNotFound
)
// IsAlreadyExist returns whether error is an already-exists
// type error returned by the underlying storage library.
func IsAlreadyExist(err error) bool {
return errors.Is(err, storage.ErrAlreadyExists)
}
// IsNotFound returns whether error is a not-found error
// type returned by the underlying storage library.
func IsNotFound(err error) bool {
return errors.Is(err, storage.ErrNotFound)
}
// Driver wraps a kv.KVStore to also provide S3 presigned GET URLs.
type Driver struct {
@ -92,30 +102,23 @@ func (d *Driver) Delete(ctx context.Context, key string) error {
// Has checks if the supplied key is in the storage.
func (d *Driver) Has(ctx context.Context, key string) (bool, error) {
return d.Storage.Stat(ctx, key)
stat, err := d.Storage.Stat(ctx, key)
return (stat != nil), err
}
// WalkKeys walks the keys in the storage.
func (d *Driver) WalkKeys(ctx context.Context, walk func(context.Context, string) error) error {
return d.Storage.WalkKeys(ctx, storage.WalkKeysOptions{
WalkFn: func(ctx context.Context, entry storage.Entry) error {
if entry.Key == "store.lock" {
return nil // skip this.
}
return walk(ctx, entry.Key)
func (d *Driver) WalkKeys(ctx context.Context, walk func(string) error) error {
return d.Storage.WalkKeys(ctx, storage.WalkKeysOpts{
Step: func(entry storage.Entry) error {
return walk(entry.Key)
},
})
}
// Close will close the storage, releasing any file locks.
func (d *Driver) Close() error {
return d.Storage.Close()
}
// URL will return a presigned GET object URL, but only if running on S3 storage with proxying disabled.
func (d *Driver) URL(ctx context.Context, key string) *PresignedURL {
// Check whether S3 *without* proxying is enabled
s3, ok := d.Storage.(*storage.S3Storage)
s3, ok := d.Storage.(*s3.S3Storage)
if !ok || d.Proxy {
return nil
}
@ -166,7 +169,7 @@ func (d *Driver) ProbeCSPUri(ctx context.Context) (string, error) {
// Check whether S3 without proxying
// is enabled. If it's not, there's
// no need to add anything to the CSP.
s3, ok := d.Storage.(*storage.S3Storage)
s3, ok := d.Storage.(*s3.S3Storage)
if !ok || d.Proxy {
return "", nil
}
@ -217,16 +220,17 @@ func NewFileStorage() (*Driver, error) {
// Load runtime configuration
basePath := config.GetStorageLocalBasePath()
// Use default disk config but with
// increased write buffer size and
// 'exclusive' bit sets when creating
// files to ensure we don't overwrite
// existing files unless intending to.
diskCfg := disk.DefaultConfig()
diskCfg.OpenWrite.Flags |= syscall.O_EXCL
diskCfg.WriteBufSize = int(16 * bytesize.KiB)
// Open the disk storage implementation
disk, err := storage.OpenDisk(basePath, &storage.DiskConfig{
// Put the store lockfile in the storage dir itself.
// Normally this would not be safe, since we could end up
// overwriting the lockfile if we store a file called 'store.lock'.
// However, in this case it's OK because the keys are set by
// GtS and not the user, so we know we're never going to overwrite it.
LockFile: path.Join(basePath, "store.lock"),
WriteBufSize: int(16 * bytesize.KiB),
})
disk, err := disk.Open(basePath, &diskCfg)
if err != nil {
return nil, fmt.Errorf("error opening disk storage: %w", err)
}
@ -245,7 +249,7 @@ func NewS3Storage() (*Driver, error) {
bucket := config.GetStorageS3BucketName()
// Open the s3 storage implementation
s3, err := storage.OpenS3(endpoint, bucket, &storage.S3Config{
s3, err := s3.Open(endpoint, bucket, &s3.Config{
CoreOpts: minio.Options{
Creds: credentials.NewStaticV4(access, secret, ""),
Secure: secure,

View file

@ -23,13 +23,13 @@
"os"
"path"
"codeberg.org/gruf/go-store/v2/storage"
"codeberg.org/gruf/go-storage/memory"
gtsstorage "github.com/superseriousbusiness/gotosocial/internal/storage"
)
// NewInMemoryStorage returns a new in memory storage with the default test config
func NewInMemoryStorage() *gtsstorage.Driver {
storage := storage.OpenMemory(200, false)
storage := memory.Open(200, false)
return &gtsstorage.Driver{
Storage: storage,
}
@ -98,7 +98,7 @@ func StandardStorageTeardown(storage *gtsstorage.Driver) {
var keys []string
_ = storage.WalkKeys(context.Background(), func(ctx context.Context, key string) error {
_ = storage.WalkKeys(context.Background(), func(key string) error {
keys = append(keys, key)
return nil
})

5
vendor/codeberg.org/gruf/go-storage/README.md generated vendored Normal file
View file

@ -0,0 +1,5 @@
# go-storage
A simple library providing various storage implementations with a simple read-write-stat interface.
Supports: on-disk, memory, S3.

467
vendor/codeberg.org/gruf/go-storage/disk/disk.go generated vendored Normal file
View file

@ -0,0 +1,467 @@
package disk
import (
"bytes"
"context"
"errors"
"io"
"io/fs"
"os"
"path"
"strings"
"syscall"
"codeberg.org/gruf/go-fastcopy"
"codeberg.org/gruf/go-fastpath/v2"
"codeberg.org/gruf/go-storage"
"codeberg.org/gruf/go-storage/internal"
)
// ensure DiskStorage conforms to storage.Storage.
var _ storage.Storage = (*DiskStorage)(nil)
// DefaultConfig returns the default DiskStorage configuration.
func DefaultConfig() Config {
return defaultConfig
}
// immutable default configuration.
var defaultConfig = Config{
OpenRead: OpenArgs{syscall.O_RDONLY, 0o644},
OpenWrite: OpenArgs{syscall.O_CREAT | syscall.O_WRONLY, 0o644},
MkdirPerms: 0o755,
WriteBufSize: 4096,
}
// OpenArgs defines args passed
// in a syscall.Open() operation.
type OpenArgs struct {
Flags int
Perms uint32
}
// Config defines options to be
// used when opening a DiskStorage.
type Config struct {
// OpenRead are the arguments passed
// to syscall.Open() when opening a
// file for read operations.
OpenRead OpenArgs
// OpenWrite are the arguments passed
// to syscall.Open() when opening a
// file for write operations.
OpenWrite OpenArgs
// MkdirPerms are the permissions used
// when creating necessary sub-dirs in
// a storage key with slashes.
MkdirPerms uint32
// WriteBufSize is the buffer size
// to use when writing file streams.
WriteBufSize int
}
// getDiskConfig returns valid (and owned!) Config for given ptr.
func getDiskConfig(cfg *Config) Config {
if cfg == nil {
// use defaults.
return defaultConfig
}
// Ensure non-zero syscall args.
if cfg.OpenRead.Flags == 0 {
cfg.OpenRead.Flags = defaultConfig.OpenRead.Flags
}
if cfg.OpenRead.Perms == 0 {
cfg.OpenRead.Perms = defaultConfig.OpenRead.Perms
}
if cfg.OpenWrite.Flags == 0 {
cfg.OpenWrite.Flags = defaultConfig.OpenWrite.Flags
}
if cfg.OpenWrite.Perms == 0 {
cfg.OpenWrite.Perms = defaultConfig.OpenWrite.Perms
}
if cfg.MkdirPerms == 0 {
cfg.MkdirPerms = defaultConfig.MkdirPerms
}
// Ensure valid write buf.
if cfg.WriteBufSize <= 0 {
cfg.WriteBufSize = defaultConfig.WriteBufSize
}
return Config{
OpenRead: cfg.OpenRead,
OpenWrite: cfg.OpenWrite,
MkdirPerms: cfg.MkdirPerms,
WriteBufSize: cfg.WriteBufSize,
}
}
// DiskStorage is a Storage implementation
// that stores directly to a filesystem.
type DiskStorage struct {
path string // path is the root path of this store
pool fastcopy.CopyPool // pool is the prepared io copier with buffer pool
cfg Config // cfg is the supplied configuration for this store
}
// Open opens a DiskStorage instance for given folder path and configuration.
func Open(path string, cfg *Config) (*DiskStorage, error) {
// Check + set config defaults.
config := getDiskConfig(cfg)
// Clean provided storage path, ensure
// final '/' to help with path trimming.
pb := internal.GetPathBuilder()
path = pb.Clean(path) + "/"
internal.PutPathBuilder(pb)
// Ensure directories up-to path exist.
perms := fs.FileMode(config.MkdirPerms)
err := os.MkdirAll(path, perms)
if err != nil {
return nil, err
}
// Prepare DiskStorage.
st := &DiskStorage{
path: path,
cfg: config,
}
// Set fastcopy pool buffer size.
st.pool.Buffer(config.WriteBufSize)
return st, nil
}
// Clean: implements Storage.Clean().
func (st *DiskStorage) Clean(ctx context.Context) error {
// Check context still valid.
if err := ctx.Err(); err != nil {
return err
}
// Clean unused directories.
return cleanDirs(st.path, OpenArgs{
Flags: syscall.O_RDONLY,
})
}
// ReadBytes: implements Storage.ReadBytes().
func (st *DiskStorage) ReadBytes(ctx context.Context, key string) ([]byte, error) {
// Get stream reader for key
rc, err := st.ReadStream(ctx, key)
if err != nil {
return nil, err
}
// Read all data to memory.
data, err := io.ReadAll(rc)
if err != nil {
_ = rc.Close()
return nil, err
}
// Close storage stream reader.
if err := rc.Close(); err != nil {
return nil, err
}
return data, nil
}
// ReadStream: implements Storage.ReadStream().
func (st *DiskStorage) ReadStream(ctx context.Context, key string) (io.ReadCloser, error) {
// Generate file path for key.
kpath, err := st.Filepath(key)
if err != nil {
return nil, err
}
// Check context still valid.
if err := ctx.Err(); err != nil {
return nil, err
}
// Attempt to open file with read args.
file, err := open(kpath, st.cfg.OpenRead)
if err != nil {
if err == syscall.ENOENT {
// Translate not-found errors and wrap with key.
err = internal.ErrWithKey(storage.ErrNotFound, key)
}
return nil, err
}
return file, nil
}
// WriteBytes: implements Storage.WriteBytes().
func (st *DiskStorage) WriteBytes(ctx context.Context, key string, value []byte) (int, error) {
n, err := st.WriteStream(ctx, key, bytes.NewReader(value))
return int(n), err
}
// WriteStream: implements Storage.WriteStream().
func (st *DiskStorage) WriteStream(ctx context.Context, key string, stream io.Reader) (int64, error) {
// Acquire path builder buffer.
pb := internal.GetPathBuilder()
// Generate the file path for given key.
kpath, subdir, err := st.filepath(pb, key)
if err != nil {
return 0, err
}
// Done with path buffer.
internal.PutPathBuilder(pb)
// Check context still valid.
if err := ctx.Err(); err != nil {
return 0, err
}
if subdir {
// Get dir of key path.
dir := path.Dir(kpath)
// Note that subdir will only be set if
// the transformed key (without base path)
// contains any slashes. This is not a
// definitive check, but it allows us to
// skip a syscall if mkdirall not needed!
perms := fs.FileMode(st.cfg.MkdirPerms)
err = os.MkdirAll(dir, perms)
if err != nil {
return 0, err
}
}
// Attempt to open file with write args.
file, err := open(kpath, st.cfg.OpenWrite)
if err != nil {
if st.cfg.OpenWrite.Flags&syscall.O_EXCL != 0 &&
err == syscall.EEXIST {
// Translate already exists errors and wrap with key.
err = internal.ErrWithKey(storage.ErrAlreadyExists, key)
}
return 0, err
}
// Copy provided stream to file interface.
n, err := st.pool.Copy(file, stream)
if err != nil {
_ = file.Close()
return n, err
}
// Finally, close file.
return n, file.Close()
}
// Stat implements Storage.Stat().
func (st *DiskStorage) Stat(ctx context.Context, key string) (*storage.Entry, error) {
// Generate file path for key.
kpath, err := st.Filepath(key)
if err != nil {
return nil, err
}
// Check context still valid.
if err := ctx.Err(); err != nil {
return nil, err
}
// Stat file on disk.
stat, err := stat(kpath)
if stat == nil {
return nil, err
}
return &storage.Entry{
Key: key,
Size: stat.Size,
}, nil
}
// Remove implements Storage.Remove().
func (st *DiskStorage) Remove(ctx context.Context, key string) error {
// Generate file path for key.
kpath, err := st.Filepath(key)
if err != nil {
return err
}
// Check context still valid.
if err := ctx.Err(); err != nil {
return err
}
// Stat file on disk.
stat, err := stat(kpath)
if err != nil {
return err
}
// Not-found (or handled
// as) error situations.
if stat == nil {
return internal.ErrWithKey(storage.ErrNotFound, key)
} else if stat.Mode&syscall.S_IFREG == 0 {
err := errors.New("storage/disk: not a regular file")
return internal.ErrWithKey(err, key)
}
// Remove at path (we know this is file).
if err := unlink(kpath); err != nil {
if err == syscall.ENOENT {
// Translate not-found errors and wrap with key.
err = internal.ErrWithKey(storage.ErrNotFound, key)
}
return err
}
return nil
}
// WalkKeys implements Storage.WalkKeys().
func (st *DiskStorage) WalkKeys(ctx context.Context, opts storage.WalkKeysOpts) error {
if opts.Step == nil {
panic("nil step fn")
}
// Check context still valid.
if err := ctx.Err(); err != nil {
return err
}
// Acquire path builder for walk.
pb := internal.GetPathBuilder()
defer internal.PutPathBuilder(pb)
// Dir to walk.
dir := st.path
if opts.Prefix != "" {
// Convert key prefix to one of our storage filepaths.
pathprefix, subdir, err := st.filepath(pb, opts.Prefix)
if err != nil {
return internal.ErrWithMsg(err, "prefix error")
}
if subdir {
// Note that subdir will only be set if
// the transformed key (without base path)
// contains any slashes. This is not a
// definitive check, but it allows us to
// update the directory we walk in case
// it might narrow search parameters!
dir = path.Dir(pathprefix)
}
// Set updated storage
// path prefix in opts.
opts.Prefix = pathprefix
}
// Only need to open dirs as read-only.
args := OpenArgs{Flags: syscall.O_RDONLY}
return walkDir(pb, dir, args, func(kpath string, fsentry fs.DirEntry) error {
if !fsentry.Type().IsRegular() {
// Ignore anything but
// regular file types.
return nil
}
// Get full item path (without root).
kpath = pb.Join(kpath, fsentry.Name())
// Perform a fast filter check against storage path prefix (if set).
if opts.Prefix != "" && !strings.HasPrefix(kpath, opts.Prefix) {
return nil // ignore
}
// Storage key without base.
key := kpath[len(st.path):]
// Ignore filtered keys.
if opts.Filter != nil &&
!opts.Filter(key) {
return nil // ignore
}
// Load file info. This should already
// be loaded due to the underlying call
// to os.File{}.ReadDir() populating them.
info, err := fsentry.Info()
if err != nil {
return err
}
// Perform provided walk function
return opts.Step(storage.Entry{
Key: key,
Size: info.Size(),
})
})
}
// Filepath checks and returns a formatted Filepath for given key.
func (st *DiskStorage) Filepath(key string) (path string, err error) {
pb := internal.GetPathBuilder()
path, _, err = st.filepath(pb, key)
internal.PutPathBuilder(pb)
return
}
// filepath performs the "meat" of Filepath(), returning also if path *may* be a subdir of base.
func (st *DiskStorage) filepath(pb *fastpath.Builder, key string) (path string, subdir bool, err error) {
// Fast check for whether this may be a
// sub-directory. This is not a definitive
// check, it's only for a fastpath check.
subdir = strings.ContainsRune(key, '/')
// Build from base.
pb.Append(st.path)
pb.Append(key)
// Take COPY of bytes.
path = string(pb.B)
// Check for dir traversal outside base.
if isDirTraversal(st.path, path) {
err = internal.ErrWithKey(storage.ErrInvalidKey, key)
}
return
}
// isDirTraversal will check if rootPlusPath is a dir traversal outside of root,
// assuming that both are cleaned and that rootPlusPath is path.Join(root, somePath).
func isDirTraversal(root, rootPlusPath string) bool {
switch {
// Root is $PWD, check for traversal out of
case root == ".":
return strings.HasPrefix(rootPlusPath, "../")
// The path MUST be prefixed by root
case !strings.HasPrefix(rootPlusPath, root):
return true
// In all other cases, check not equal
default:
return len(root) == len(rootPlusPath)
}
}

View file

@ -1,24 +1,14 @@
package storage
package disk
import (
"errors"
"fmt"
"io/fs"
"os"
"syscall"
"codeberg.org/gruf/go-fastpath/v2"
"codeberg.org/gruf/go-store/v2/util"
)
const (
// default file permission bits
defaultDirPerms = 0o755
defaultFilePerms = 0o644
// default file open flags
defaultFileROFlags = syscall.O_RDONLY
defaultFileRWFlags = syscall.O_CREAT | syscall.O_RDWR
defaultFileLockFlags = syscall.O_RDONLY | syscall.O_CREAT
"codeberg.org/gruf/go-storage/internal"
)
// NOTE:
@ -26,9 +16,9 @@
// not necessarily for e.g. initial setup (OpenFile)
// walkDir traverses the dir tree of the supplied path, performing the supplied walkFn on each entry
func walkDir(pb *fastpath.Builder, path string, walkFn func(string, fs.DirEntry) error) error {
// Read directory entries
entries, err := readDir(path)
func walkDir(pb *fastpath.Builder, path string, args OpenArgs, walkFn func(string, fs.DirEntry) error) error {
// Read directory entries at path.
entries, err := readDir(path, args)
if err != nil {
return err
}
@ -85,7 +75,7 @@ type frame struct {
path = pb.Join(path, entry.Name())
// Read next directory entries
next, err := readDir(path)
next, err := readDir(path, args)
if err != nil {
return err
}
@ -102,16 +92,17 @@ type frame struct {
}
// cleanDirs traverses the dir tree of the supplied path, removing any folders with zero children
func cleanDirs(path string) error {
pb := util.GetPathBuilder()
defer util.PutPathBuilder(pb)
return cleanDir(pb, path, true)
func cleanDirs(path string, args OpenArgs) error {
pb := internal.GetPathBuilder()
err := cleanDir(pb, path, args, true)
internal.PutPathBuilder(pb)
return err
}
// cleanDir performs the actual dir cleaning logic for the above top-level version.
func cleanDir(pb *fastpath.Builder, path string, top bool) error {
// Get dir entries at path.
entries, err := readDir(path)
func cleanDir(pb *fastpath.Builder, path string, args OpenArgs, top bool) error {
// Get directory entries at path.
entries, err := readDir(path, args)
if err != nil {
return err
}
@ -121,30 +112,36 @@ func cleanDir(pb *fastpath.Builder, path string, top bool) error {
return rmdir(path)
}
var errs []error
// Iterate all directory entries.
for _, entry := range entries {
if entry.IsDir() {
// Calculate directory path.
dirPath := pb.Join(path, entry.Name())
dir := pb.Join(path, entry.Name())
// Recursively clean sub-directory entries.
if err := cleanDir(pb, dirPath, false); err != nil {
fmt.Fprintf(os.Stderr, "[go-store/storage] error cleaning %s: %v", dirPath, err)
// Recursively clean sub-directory entries, adding errs.
if err := cleanDir(pb, dir, args, false); err != nil {
err = fmt.Errorf("error(s) cleaning subdir %s: %w", dir, err)
errs = append(errs, err)
}
}
}
return nil
// Return combined errors.
return errors.Join(errs...)
}
// readDir will open file at path, read the unsorted list of entries, then close.
func readDir(path string) ([]fs.DirEntry, error) {
// Open file at path
file, err := open(path, defaultFileROFlags)
func readDir(path string, args OpenArgs) ([]fs.DirEntry, error) {
// Open directory at path.
file, err := open(path, args)
if err != nil {
return nil, err
}
// Read directory entries
// Read ALL directory entries.
entries, err := file.ReadDir(-1)
// Done with file
@ -153,11 +150,11 @@ func readDir(path string) ([]fs.DirEntry, error) {
return entries, err
}
// open will open a file at the given path with flags and default file perms.
func open(path string, flags int) (*os.File, error) {
// open is a simple wrapper around syscall.Open().
func open(path string, args OpenArgs) (*os.File, error) {
var fd int
err := retryOnEINTR(func() (err error) {
fd, err = syscall.Open(path, flags, defaultFilePerms)
fd, err = syscall.Open(path, args.Flags, args.Perms)
return
})
if err != nil {
@ -166,8 +163,8 @@ func open(path string, flags int) (*os.File, error) {
return os.NewFile(uintptr(fd), path), nil
}
// stat checks for a file on disk.
func stat(path string) (bool, error) {
// stat is a simple wrapper around syscall.Stat().
func stat(path string) (*syscall.Stat_t, error) {
var stat syscall.Stat_t
err := retryOnEINTR(func() error {
return syscall.Stat(path, &stat)
@ -177,26 +174,27 @@ func stat(path string) (bool, error) {
// not-found is no error
err = nil
}
return false, err
return nil, err
}
return true, nil
return &stat, nil
}
// unlink removes a file (not dir!) on disk.
// unlink is a simple wrapper around syscall.Unlink().
func unlink(path string) error {
return retryOnEINTR(func() error {
return syscall.Unlink(path)
})
}
// rmdir removes a dir (not file!) on disk.
// rmdir is a simple wrapper around syscall.Rmdir().
func rmdir(path string) error {
return retryOnEINTR(func() error {
return syscall.Rmdir(path)
})
}
// retryOnEINTR is a low-level filesystem function for retrying syscalls on O_EINTR received.
// retryOnEINTR is a low-level filesystem function
// for retrying syscalls on O_EINTR received.
func retryOnEINTR(do func() error) error {
for {
err := do()

16
vendor/codeberg.org/gruf/go-storage/errors.go generated vendored Normal file
View file

@ -0,0 +1,16 @@
package storage
import (
"errors"
)
var (
// ErrNotFound is the error returned when a key cannot be found in storage
ErrNotFound = errors.New("storage: key not found")
// ErrAlreadyExist is the error returned when a key already exists in storage
ErrAlreadyExists = errors.New("storage: key already exists")
// ErrInvalidkey is the error returned when an invalid key is passed to storage
ErrInvalidKey = errors.New("storage: invalid key")
)

56
vendor/codeberg.org/gruf/go-storage/internal/errors.go generated vendored Normal file
View file

@ -0,0 +1,56 @@
package internal
func ErrWithKey(err error, key string) error {
return &errorWithKey{key: key, err: err}
}
type errorWithKey struct {
key string
err error
}
func (err *errorWithKey) Error() string {
return err.err.Error() + ": " + err.key
}
func (err *errorWithKey) Unwrap() error {
return err.err
}
func ErrWithMsg(err error, msg string) error {
return &errorWithMsg{msg: msg, err: err}
}
type errorWithMsg struct {
msg string
err error
}
func (err *errorWithMsg) Error() string {
return err.msg + ": " + err.err.Error()
}
func (err *errorWithMsg) Unwrap() error {
return err.err
}
func WrapErr(inner, outer error) error {
return &wrappedError{inner: inner, outer: outer}
}
type wrappedError struct {
inner error
outer error
}
func (err *wrappedError) Is(other error) bool {
return err.inner == other || err.outer == other
}
func (err *wrappedError) Error() string {
return err.inner.Error() + ": " + err.outer.Error()
}
func (err *wrappedError) Unwrap() []error {
return []error{err.inner, err.outer}
}

24
vendor/codeberg.org/gruf/go-storage/internal/path.go generated vendored Normal file
View file

@ -0,0 +1,24 @@
package internal
import (
"sync"
"codeberg.org/gruf/go-fastpath/v2"
)
var pathBuilderPool sync.Pool
func GetPathBuilder() *fastpath.Builder {
v := pathBuilderPool.Get()
if v == nil {
pb := new(fastpath.Builder)
pb.B = make([]byte, 0, 512)
v = pb
}
return v.(*fastpath.Builder)
}
func PutPathBuilder(pb *fastpath.Builder) {
pb.Reset()
pathBuilderPool.Put(pb)
}

253
vendor/codeberg.org/gruf/go-storage/memory/memory.go generated vendored Normal file
View file

@ -0,0 +1,253 @@
package memory
import (
"bytes"
"context"
"io"
"strings"
"sync"
"codeberg.org/gruf/go-iotools"
"codeberg.org/gruf/go-storage"
"codeberg.org/gruf/go-storage/internal"
)
// ensure MemoryStorage conforms to storage.Storage.
var _ storage.Storage = (*MemoryStorage)(nil)
// MemoryStorage is a storage implementation that simply stores key-value
// pairs in a Go map in-memory. The map is protected by a mutex.
type MemoryStorage struct {
ow bool // overwrites
fs map[string][]byte
mu sync.Mutex
}
// Open opens a new MemoryStorage instance with internal map starting size.
func Open(size int, overwrites bool) *MemoryStorage {
return &MemoryStorage{
ow: overwrites,
fs: make(map[string][]byte, size),
}
}
// Clean: implements Storage.Clean().
func (st *MemoryStorage) Clean(ctx context.Context) error {
// Check context still valid
if err := ctx.Err(); err != nil {
return err
}
// Lock map.
st.mu.Lock()
// Resize map to only necessary size in-mem.
fs := make(map[string][]byte, len(st.fs))
for key, val := range st.fs {
fs[key] = val
}
st.fs = fs
// Done with lock.
st.mu.Unlock()
return nil
}
// ReadBytes: implements Storage.ReadBytes().
func (st *MemoryStorage) ReadBytes(ctx context.Context, key string) ([]byte, error) {
// Check context still valid.
if err := ctx.Err(); err != nil {
return nil, err
}
// Lock map.
st.mu.Lock()
// Check key in store.
b, ok := st.fs[key]
if ok {
// COPY bytes.
b = copyb(b)
}
// Done with lock.
st.mu.Unlock()
if !ok {
return nil, internal.ErrWithKey(storage.ErrNotFound, key)
}
return b, nil
}
// ReadStream: implements Storage.ReadStream().
func (st *MemoryStorage) ReadStream(ctx context.Context, key string) (io.ReadCloser, error) {
// Read value data from store.
b, err := st.ReadBytes(ctx, key)
if err != nil {
return nil, err
}
// Wrap in readcloser.
r := bytes.NewReader(b)
return iotools.NopReadCloser(r), nil
}
// WriteBytes: implements Storage.WriteBytes().
func (st *MemoryStorage) WriteBytes(ctx context.Context, key string, b []byte) (int, error) {
// Check context still valid
if err := ctx.Err(); err != nil {
return 0, err
}
// Lock map.
st.mu.Lock()
// Check key in store.
_, ok := st.fs[key]
if ok && !st.ow {
// Done with lock.
st.mu.Unlock()
// Overwrites are disabled, return existing key error.
return 0, internal.ErrWithKey(storage.ErrAlreadyExists, key)
}
// Write copy to store.
st.fs[key] = copyb(b)
// Done with lock.
st.mu.Unlock()
return len(b), nil
}
// WriteStream: implements Storage.WriteStream().
func (st *MemoryStorage) WriteStream(ctx context.Context, key string, r io.Reader) (int64, error) {
// Read all from reader.
b, err := io.ReadAll(r)
if err != nil {
return 0, err
}
// Write in-memory data to store.
n, err := st.WriteBytes(ctx, key, b)
return int64(n), err
}
// Stat: implements Storage.Stat().
func (st *MemoryStorage) Stat(ctx context.Context, key string) (*storage.Entry, error) {
// Check context still valid
if err := ctx.Err(); err != nil {
return nil, err
}
// Lock map.
st.mu.Lock()
// Check key in store.
b, ok := st.fs[key]
// Get entry size.
sz := int64(len(b))
// Done with lock.
st.mu.Unlock()
if !ok {
return nil, nil
}
return &storage.Entry{
Key: key,
Size: sz,
}, nil
}
// Remove: implements Storage.Remove().
func (st *MemoryStorage) Remove(ctx context.Context, key string) error {
// Check context still valid
if err := ctx.Err(); err != nil {
return err
}
// Lock map.
st.mu.Lock()
// Check key in store.
_, ok := st.fs[key]
if ok {
// Delete store key.
delete(st.fs, key)
}
// Done with lock.
st.mu.Unlock()
if !ok {
return internal.ErrWithKey(storage.ErrNotFound, key)
}
return nil
}
// WalkKeys: implements Storage.WalkKeys().
func (st *MemoryStorage) WalkKeys(ctx context.Context, opts storage.WalkKeysOpts) error {
if opts.Step == nil {
panic("nil step fn")
}
// Check context still valid.
if err := ctx.Err(); err != nil {
return err
}
var err error
// Lock map.
st.mu.Lock()
// Ensure unlocked.
defer st.mu.Unlock()
// Range all key-vals in hash map.
for key, val := range st.fs {
// Check for filtered prefix.
if opts.Prefix != "" &&
!strings.HasPrefix(key, opts.Prefix) {
continue // ignore
}
// Check for filtered key.
if opts.Filter != nil &&
!opts.Filter(key) {
continue // ignore
}
// Pass to provided step func.
err = opts.Step(storage.Entry{
Key: key,
Size: int64(len(val)),
})
if err != nil {
return err
}
}
return err
}
// copyb returns a copy of byte-slice b.
func copyb(b []byte) []byte {
if b == nil {
return nil
}
p := make([]byte, len(b))
_ = copy(p, b)
return p
}

47
vendor/codeberg.org/gruf/go-storage/s3/errors.go generated vendored Normal file
View file

@ -0,0 +1,47 @@
package s3
import (
"strings"
"codeberg.org/gruf/go-storage"
"codeberg.org/gruf/go-storage/internal"
"github.com/minio/minio-go/v7"
)
// transformS3Error transforms an error returned from S3Storage underlying
// minio.Core client, by wrapping where necessary with our own error types.
func transformS3Error(err error) error {
// Cast this to a minio error response
ersp, ok := err.(minio.ErrorResponse)
if ok {
switch ersp.Code {
case "NoSuchKey":
return internal.WrapErr(err, storage.ErrNotFound)
case "Conflict":
return internal.WrapErr(err, storage.ErrAlreadyExists)
default:
return err
}
}
// Check if error has an invalid object name prefix
if strings.HasPrefix(err.Error(), "Object name ") {
return internal.WrapErr(err, storage.ErrInvalidKey)
}
return err
}
func isNotFoundError(err error) bool {
errRsp, ok := err.(minio.ErrorResponse)
return ok && errRsp.Code == "NoSuchKey"
}
func isConflictError(err error) bool {
errRsp, ok := err.(minio.ErrorResponse)
return ok && errRsp.Code == "Conflict"
}
func isObjectNameError(err error) bool {
return strings.HasPrefix(err.Error(), "Object name ")
}

479
vendor/codeberg.org/gruf/go-storage/s3/s3.go generated vendored Normal file
View file

@ -0,0 +1,479 @@
package s3
import (
"bytes"
"context"
"errors"
"io"
"codeberg.org/gruf/go-storage"
"codeberg.org/gruf/go-storage/internal"
"github.com/minio/minio-go/v7"
)
// ensure S3Storage conforms to storage.Storage.
var _ storage.Storage = (*S3Storage)(nil)
// ensure bytes.Reader conforms to ReaderSize.
var _ ReaderSize = (*bytes.Reader)(nil)
// ReaderSize is an extension of the io.Reader interface
// that may be implemented by callers of WriteStream() in
// order to improve performance. When the size is known it
// is passed onto the underlying minio S3 library.
type ReaderSize interface {
io.Reader
Size() int64
}
// DefaultConfig returns the default S3Storage configuration.
func DefaultConfig() Config {
return defaultConfig
}
// immutable default configuration.
var defaultConfig = Config{
CoreOpts: minio.Options{},
GetOpts: minio.GetObjectOptions{},
PutOpts: minio.PutObjectOptions{},
PutChunkOpts: minio.PutObjectPartOptions{},
PutChunkSize: 4 * 1024 * 1024, // 4MiB
StatOpts: minio.StatObjectOptions{},
RemoveOpts: minio.RemoveObjectOptions{},
ListSize: 200,
}
// Config defines options to be used when opening an S3Storage,
// mostly options for underlying S3 client library.
type Config struct {
// CoreOpts are S3 client options
// passed during initialization.
CoreOpts minio.Options
// GetOpts are S3 client options
// passed during .Read___() calls.
GetOpts minio.GetObjectOptions
// PutOpts are S3 client options
// passed during .Write___() calls.
PutOpts minio.PutObjectOptions
// PutChunkSize is the chunk size (in bytes)
// to use when sending a byte stream reader
// of unknown size as a multi-part object.
PutChunkSize int64
// PutChunkOpts are S3 client options
// passed during chunked .Write___() calls.
PutChunkOpts minio.PutObjectPartOptions
// StatOpts are S3 client options
// passed during .Stat() calls.
StatOpts minio.StatObjectOptions
// RemoveOpts are S3 client options
// passed during .Remove() calls.
RemoveOpts minio.RemoveObjectOptions
// ListSize determines how many items
// to include in each list request, made
// during calls to .WalkKeys().
ListSize int
}
// getS3Config returns valid (and owned!) Config for given ptr.
func getS3Config(cfg *Config) Config {
// See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html
const minChunkSz = 5 * 1024 * 1024
if cfg == nil {
// use defaults.
return defaultConfig
}
// Ensure a minimum compat chunk size.
if cfg.PutChunkSize <= minChunkSz {
cfg.PutChunkSize = minChunkSz
}
// Ensure valid list size.
if cfg.ListSize <= 0 {
cfg.ListSize = 200
}
return Config{
CoreOpts: cfg.CoreOpts,
GetOpts: cfg.GetOpts,
PutOpts: cfg.PutOpts,
PutChunkSize: cfg.PutChunkSize,
ListSize: cfg.ListSize,
StatOpts: cfg.StatOpts,
RemoveOpts: cfg.RemoveOpts,
}
}
// S3Storage is a storage implementation that stores key-value
// pairs in an S3 instance at given endpoint with bucket name.
type S3Storage struct {
client *minio.Core
bucket string
config Config
}
// Open opens a new S3Storage instance with given S3 endpoint URL, bucket name and configuration.
func Open(endpoint string, bucket string, cfg *Config) (*S3Storage, error) {
// Check + set config defaults.
config := getS3Config(cfg)
// Create new S3 client connection to given endpoint.
client, err := minio.NewCore(endpoint, &config.CoreOpts)
if err != nil {
return nil, err
}
ctx := context.Background()
// Check that provided bucket actually exists.
exists, err := client.BucketExists(ctx, bucket)
if err != nil {
return nil, err
} else if !exists {
return nil, errors.New("storage/s3: bucket does not exist")
}
return &S3Storage{
client: client,
bucket: bucket,
config: config,
}, nil
}
// Client: returns access to the underlying S3 client.
func (st *S3Storage) Client() *minio.Core {
return st.client
}
// Clean: implements Storage.Clean().
func (st *S3Storage) Clean(ctx context.Context) error {
return nil // nothing to do for S3
}
// ReadBytes: implements Storage.ReadBytes().
func (st *S3Storage) ReadBytes(ctx context.Context, key string) ([]byte, error) {
// Get stream reader for key
rc, err := st.ReadStream(ctx, key)
if err != nil {
return nil, err
}
// Read all data to memory.
data, err := io.ReadAll(rc)
if err != nil {
_ = rc.Close()
return nil, err
}
// Close storage stream reader.
if err := rc.Close(); err != nil {
return nil, err
}
return data, nil
}
// ReadStream: implements Storage.ReadStream().
func (st *S3Storage) ReadStream(ctx context.Context, key string) (io.ReadCloser, error) {
// Fetch object reader from S3 bucket
rc, _, _, err := st.client.GetObject(
ctx,
st.bucket,
key,
st.config.GetOpts,
)
if err != nil {
if isNotFoundError(err) {
// Wrap not found errors as our not found type.
err = internal.WrapErr(err, storage.ErrNotFound)
} else if !isObjectNameError(err) {
// Wrap object name errors as our invalid key type.
err = internal.WrapErr(err, storage.ErrInvalidKey)
}
return nil, transformS3Error(err)
}
return rc, nil
}
// WriteBytes: implements Storage.WriteBytes().
func (st *S3Storage) WriteBytes(ctx context.Context, key string, value []byte) (int, error) {
n, err := st.WriteStream(ctx, key, bytes.NewReader(value))
return int(n), err
}
// WriteStream: implements Storage.WriteStream().
func (st *S3Storage) WriteStream(ctx context.Context, key string, r io.Reader) (int64, error) {
if rs, ok := r.(ReaderSize); ok {
// This reader supports providing us the size of
// the encompassed data, allowing us to perform
// a singular .PutObject() call with length.
info, err := st.client.PutObject(
ctx,
st.bucket,
key,
r,
rs.Size(),
"",
"",
st.config.PutOpts,
)
if err != nil {
if isConflictError(err) {
// Wrap conflict errors as our already exists type.
err = internal.WrapErr(err, storage.ErrAlreadyExists)
} else if !isObjectNameError(err) {
// Wrap object name errors as our invalid key type.
err = internal.WrapErr(err, storage.ErrInvalidKey)
}
return 0, err
}
return info.Size, nil
}
// Start a new multipart upload to get ID.
uploadID, err := st.client.NewMultipartUpload(
ctx,
st.bucket,
key,
st.config.PutOpts,
)
if err != nil {
if isConflictError(err) {
// Wrap conflict errors as our already exists type.
err = internal.WrapErr(err, storage.ErrAlreadyExists)
} else if !isObjectNameError(err) {
// Wrap object name errors as our invalid key type.
err = internal.WrapErr(err, storage.ErrInvalidKey)
}
return 0, transformS3Error(err)
}
var (
index = int(1) // parts index
total = int64(0)
parts []minio.CompletePart
chunk = make([]byte, st.config.PutChunkSize)
rbuf = bytes.NewReader(nil)
)
// Note that we do not perform any kind of
// memory pooling of the chunk buffers here.
// Optimal chunking sizes for S3 writes are in
// the orders of megabytes, so letting the GC
// collect these ASAP is much preferred.
loop:
for done := false; !done; {
// Read next chunk into byte buffer.
n, err := io.ReadFull(r, chunk)
switch err {
// Successful read.
case nil:
// Reached end, buffer empty.
case io.EOF:
break loop
// Reached end, but buffer not empty.
case io.ErrUnexpectedEOF:
done = true
// All other errors.
default:
return 0, err
}
// Reset byte reader.
rbuf.Reset(chunk[:n])
// Put this object chunk in S3 store.
pt, err := st.client.PutObjectPart(
ctx,
st.bucket,
key,
uploadID,
index,
rbuf,
int64(n),
st.config.PutChunkOpts,
)
if err != nil {
return 0, err
}
// Append completed part to slice.
parts = append(parts, minio.CompletePart{
PartNumber: pt.PartNumber,
ETag: pt.ETag,
ChecksumCRC32: pt.ChecksumCRC32,
ChecksumCRC32C: pt.ChecksumCRC32C,
ChecksumSHA1: pt.ChecksumSHA1,
ChecksumSHA256: pt.ChecksumSHA256,
})
// Iterate.
index++
// Update total size.
total += pt.Size
}
// Complete this multi-part upload operation
_, err = st.client.CompleteMultipartUpload(
ctx,
st.bucket,
key,
uploadID,
parts,
st.config.PutOpts,
)
if err != nil {
return 0, err
}
return total, nil
}
// Stat: implements Storage.Stat().
func (st *S3Storage) Stat(ctx context.Context, key string) (*storage.Entry, error) {
// Query object in S3 bucket.
stat, err := st.client.StatObject(
ctx,
st.bucket,
key,
st.config.StatOpts,
)
if err != nil {
if isNotFoundError(err) {
// Ignore err return
// for not-found.
err = nil
} else if !isObjectNameError(err) {
// Wrap object name errors as our invalid key type.
err = internal.WrapErr(err, storage.ErrInvalidKey)
}
return nil, err
}
return &storage.Entry{
Key: key,
Size: stat.Size,
}, nil
}
// Remove: implements Storage.Remove().
func (st *S3Storage) Remove(ctx context.Context, key string) error {
// Query object in S3 bucket.
_, err := st.client.StatObject(
ctx,
st.bucket,
key,
st.config.StatOpts,
)
if err != nil {
if isNotFoundError(err) {
// Wrap not found errors as our not found type.
err = internal.WrapErr(err, storage.ErrNotFound)
} else if !isObjectNameError(err) {
// Wrap object name errors as our invalid key type.
err = internal.WrapErr(err, storage.ErrInvalidKey)
}
return err
}
// Remove object from S3 bucket
err = st.client.RemoveObject(
ctx,
st.bucket,
key,
st.config.RemoveOpts,
)
if err != nil {
if isNotFoundError(err) {
// Wrap not found errors as our not found type.
err = internal.WrapErr(err, storage.ErrNotFound)
} else if !isObjectNameError(err) {
// Wrap object name errors as our invalid key type.
err = internal.WrapErr(err, storage.ErrInvalidKey)
}
return err
}
return nil
}
// WalkKeys: implements Storage.WalkKeys().
func (st *S3Storage) WalkKeys(ctx context.Context, opts storage.WalkKeysOpts) error {
if opts.Step == nil {
panic("nil step fn")
}
var (
prev string
token string
)
for {
// List objects in bucket starting at marker.
result, err := st.client.ListObjectsV2(
st.bucket,
opts.Prefix,
prev,
token,
"",
st.config.ListSize,
)
if err != nil {
return err
}
// Iterate through list result contents.
for _, obj := range result.Contents {
// Skip filtered obj keys.
if opts.Filter != nil &&
opts.Filter(obj.Key) {
continue
}
// Pass each obj through step func.
if err := opts.Step(storage.Entry{
Key: obj.Key,
Size: obj.Size,
}); err != nil {
return err
}
}
// No token means we reached end of bucket.
if result.NextContinuationToken == "" {
return nil
}
// Set continue token and prev mark
token = result.NextContinuationToken
prev = result.StartAfter
}
}

73
vendor/codeberg.org/gruf/go-storage/storage.go generated vendored Normal file
View file

@ -0,0 +1,73 @@
package storage
import (
"context"
"io"
)
// Storage defines a means of accessing and storing
// data to some abstracted underlying mechanism. Whether
// that be in-memory, an on-disk filesystem or S3 bucket.
type Storage interface {
// ReadBytes returns the data located at key (e.g. filepath) in storage.
ReadBytes(ctx context.Context, key string) ([]byte, error)
// ReadStream returns an io.ReadCloser for the data at key (e.g. filepath) in storage.
ReadStream(ctx context.Context, key string) (io.ReadCloser, error)
// WriteBytes writes the supplied data at key (e.g. filepath) in storage.
WriteBytes(ctx context.Context, key string, data []byte) (int, error)
// WriteStream writes the supplied data stream at key (e.g. filepath) in storage.
WriteStream(ctx context.Context, key string, stream io.Reader) (int64, error)
// Stat returns details about key (e.g. filepath) in storage, nil indicates not found.
Stat(ctx context.Context, key string) (*Entry, error)
// Remove will remove data at key from storage.
Remove(ctx context.Context, key string) error
// Clean in simple terms performs a clean of underlying
// storage mechanism. For memory implementations this may
// compact the underlying hashmap, for disk filesystems
// this may remove now-unused directories.
Clean(ctx context.Context) error
// WalkKeys walks available keys using opts in storage.
WalkKeys(ctx context.Context, opts WalkKeysOpts) error
}
// Entry represents a key in a Storage{} implementation,
// with any associated metadata that may have been set.
type Entry struct {
// Key is this entry's
// unique storage key.
Key string
// Size is the size of
// this entry in storage.
Size int64
}
// WalkKeysOpts are arguments provided
// to a storage WalkKeys() implementation.
type WalkKeysOpts struct {
// Prefix can be used to filter entries
// by the given key prefix, for example
// only those under a subdirectory. This
// is preferred over Filter() function.
Prefix string
// Filter can be used to filter entries
// by any custom metric before before they
// are passed to Step() function. E.g.
// filter storage entries by regexp.
Filter func(string) bool
// Step is called for each entry during
// WalkKeys, error triggers early return.
Step func(Entry) error
}

29
vendor/codeberg.org/gruf/go-storage/test.sh generated vendored Normal file
View file

@ -0,0 +1,29 @@
#!/bin/sh
export \
MINIO_ADDR='127.0.0.1:8080' \
MINIO_BUCKET='test' \
MINIO_ROOT_USER='root' \
MINIO_ROOT_PASSWORD='password' \
MINIO_PID=0 \
S3_DIR=$(mktemp -d)
# Drop the test S3 bucket and kill minio on exit
trap 'rm -rf "$S3_DIR"; [ $MINIO_PID -ne 0 ] && kill -9 $MINIO_PID' \
HUP INT QUIT ABRT KILL TERM EXIT
# Create required S3 bucket dir
mkdir -p "${S3_DIR}/${MINIO_BUCKET}"
# Start the minio test S3 server instance
minio server --address "$MINIO_ADDR" "$S3_DIR" & > /dev/null 2>&1
MINIO_PID=$!; [ $? -ne 0 ] && {
echo 'failed to start minio'
exit 1
}
# Let server startup
sleep 1
# Run go-store tests
go test ./... -v

View file

@ -1,303 +0,0 @@
package storage
import (
"bytes"
"io"
"sync"
"codeberg.org/gruf/go-iotools"
"github.com/klauspost/compress/gzip"
"github.com/klauspost/compress/snappy"
"github.com/klauspost/compress/zlib"
)
// Compressor defines a means of compressing/decompressing values going into a key-value store
type Compressor interface {
// Reader returns a new decompressing io.ReadCloser based on supplied (compressed) io.Reader
Reader(io.ReadCloser) (io.ReadCloser, error)
// Writer returns a new compressing io.WriteCloser based on supplied (uncompressed) io.Writer
Writer(io.WriteCloser) (io.WriteCloser, error)
}
type gzipCompressor struct {
rpool sync.Pool
wpool sync.Pool
}
// GZipCompressor returns a new Compressor that implements GZip at default compression level
func GZipCompressor() Compressor {
return GZipCompressorLevel(gzip.DefaultCompression)
}
// GZipCompressorLevel returns a new Compressor that implements GZip at supplied compression level
func GZipCompressorLevel(level int) Compressor {
// GZip readers immediately check for valid
// header data on allocation / reset, so we
// need a set of valid header data so we can
// iniitialize reader instances in mempool.
hdr := bytes.NewBuffer(nil)
// Init writer to ensure valid level provided
gw, err := gzip.NewWriterLevel(hdr, level)
if err != nil {
panic(err)
}
// Write empty data to ensure gzip
// header data is in byte buffer.
_, _ = gw.Write([]byte{})
_ = gw.Close()
return &gzipCompressor{
rpool: sync.Pool{
New: func() any {
hdr := bytes.NewReader(hdr.Bytes())
gr, _ := gzip.NewReader(hdr)
return gr
},
},
wpool: sync.Pool{
New: func() any {
gw, _ := gzip.NewWriterLevel(nil, level)
return gw
},
},
}
}
func (c *gzipCompressor) Reader(rc io.ReadCloser) (io.ReadCloser, error) {
var released bool
// Acquire from pool.
gr := c.rpool.Get().(*gzip.Reader)
if err := gr.Reset(rc); err != nil {
c.rpool.Put(gr)
return nil, err
}
return iotools.ReadCloser(gr, iotools.CloserFunc(func() error {
if !released {
released = true
defer c.rpool.Put(gr)
}
// Close compressor
err1 := gr.Close()
// Close original stream.
err2 := rc.Close()
// Return err1 or 2
if err1 != nil {
return err1
}
return err2
})), nil
}
func (c *gzipCompressor) Writer(wc io.WriteCloser) (io.WriteCloser, error) {
var released bool
// Acquire from pool.
gw := c.wpool.Get().(*gzip.Writer)
gw.Reset(wc)
return iotools.WriteCloser(gw, iotools.CloserFunc(func() error {
if !released {
released = true
c.wpool.Put(gw)
}
// Close compressor
err1 := gw.Close()
// Close original stream.
err2 := wc.Close()
// Return err1 or 2
if err1 != nil {
return err1
}
return err2
})), nil
}
type zlibCompressor struct {
rpool sync.Pool
wpool sync.Pool
dict []byte
}
// ZLibCompressor returns a new Compressor that implements ZLib at default compression level
func ZLibCompressor() Compressor {
return ZLibCompressorLevelDict(zlib.DefaultCompression, nil)
}
// ZLibCompressorLevel returns a new Compressor that implements ZLib at supplied compression level
func ZLibCompressorLevel(level int) Compressor {
return ZLibCompressorLevelDict(level, nil)
}
// ZLibCompressorLevelDict returns a new Compressor that implements ZLib at supplied compression level with supplied dict
func ZLibCompressorLevelDict(level int, dict []byte) Compressor {
// ZLib readers immediately check for valid
// header data on allocation / reset, so we
// need a set of valid header data so we can
// iniitialize reader instances in mempool.
hdr := bytes.NewBuffer(nil)
// Init writer to ensure valid level + dict provided
zw, err := zlib.NewWriterLevelDict(hdr, level, dict)
if err != nil {
panic(err)
}
// Write empty data to ensure zlib
// header data is in byte buffer.
zw.Write([]byte{})
zw.Close()
return &zlibCompressor{
rpool: sync.Pool{
New: func() any {
hdr := bytes.NewReader(hdr.Bytes())
zr, _ := zlib.NewReaderDict(hdr, dict)
return zr
},
},
wpool: sync.Pool{
New: func() any {
zw, _ := zlib.NewWriterLevelDict(nil, level, dict)
return zw
},
},
dict: dict,
}
}
func (c *zlibCompressor) Reader(rc io.ReadCloser) (io.ReadCloser, error) {
var released bool
zr := c.rpool.Get().(interface {
io.ReadCloser
zlib.Resetter
})
if err := zr.Reset(rc, c.dict); err != nil {
c.rpool.Put(zr)
return nil, err
}
return iotools.ReadCloser(zr, iotools.CloserFunc(func() error {
if !released {
released = true
defer c.rpool.Put(zr)
}
// Close compressor
err1 := zr.Close()
// Close original stream.
err2 := rc.Close()
// Return err1 or 2
if err1 != nil {
return err1
}
return err2
})), nil
}
func (c *zlibCompressor) Writer(wc io.WriteCloser) (io.WriteCloser, error) {
var released bool
// Acquire from pool.
zw := c.wpool.Get().(*zlib.Writer)
zw.Reset(wc)
return iotools.WriteCloser(zw, iotools.CloserFunc(func() error {
if !released {
released = true
c.wpool.Put(zw)
}
// Close compressor
err1 := zw.Close()
// Close original stream.
err2 := wc.Close()
// Return err1 or 2
if err1 != nil {
return err1
}
return err2
})), nil
}
type snappyCompressor struct {
rpool sync.Pool
wpool sync.Pool
}
// SnappyCompressor returns a new Compressor that implements Snappy.
func SnappyCompressor() Compressor {
return &snappyCompressor{
rpool: sync.Pool{
New: func() any { return snappy.NewReader(nil) },
},
wpool: sync.Pool{
New: func() any { return snappy.NewWriter(nil) },
},
}
}
func (c *snappyCompressor) Reader(rc io.ReadCloser) (io.ReadCloser, error) {
var released bool
// Acquire from pool.
sr := c.rpool.Get().(*snappy.Reader)
sr.Reset(rc)
return iotools.ReadCloser(sr, iotools.CloserFunc(func() error {
if !released {
released = true
defer c.rpool.Put(sr)
}
// Close original stream.
return rc.Close()
})), nil
}
func (c *snappyCompressor) Writer(wc io.WriteCloser) (io.WriteCloser, error) {
var released bool
// Acquire from pool.
sw := c.wpool.Get().(*snappy.Writer)
sw.Reset(wc)
return iotools.WriteCloser(sw, iotools.CloserFunc(func() error {
if !released {
released = true
c.wpool.Put(sw)
}
// Close original stream.
return wc.Close()
})), nil
}
type nopCompressor struct{}
// NoCompression is a Compressor that simply does nothing.
func NoCompression() Compressor {
return &nopCompressor{}
}
func (c *nopCompressor) Reader(rc io.ReadCloser) (io.ReadCloser, error) {
return rc, nil
}
func (c *nopCompressor) Writer(wc io.WriteCloser) (io.WriteCloser, error) {
return wc, nil
}

View file

@ -1,424 +0,0 @@
package storage
import (
"context"
"errors"
"io"
"io/fs"
"os"
"path"
_path "path"
"strings"
"syscall"
"codeberg.org/gruf/go-bytes"
"codeberg.org/gruf/go-fastcopy"
"codeberg.org/gruf/go-store/v2/util"
)
// DefaultDiskConfig is the default DiskStorage configuration.
var DefaultDiskConfig = &DiskConfig{
Overwrite: true,
WriteBufSize: 4096,
Transform: NopTransform(),
Compression: NoCompression(),
}
// DiskConfig defines options to be used when opening a DiskStorage.
type DiskConfig struct {
// Transform is the supplied key <--> path KeyTransform.
Transform KeyTransform
// WriteBufSize is the buffer size to use when writing file streams.
WriteBufSize int
// Overwrite allows overwriting values of stored keys in the storage.
Overwrite bool
// LockFile allows specifying the filesystem path to use for the lockfile,
// providing only a filename it will store the lockfile within provided store
// path and nest the store under `path/store` to prevent access to lockfile.
LockFile string
// Compression is the Compressor to use when reading / writing files,
// default is no compression.
Compression Compressor
}
// getDiskConfig returns a valid DiskConfig for supplied ptr.
func getDiskConfig(cfg *DiskConfig) DiskConfig {
// If nil, use default
if cfg == nil {
cfg = DefaultDiskConfig
}
// Assume nil transform == none
if cfg.Transform == nil {
cfg.Transform = NopTransform()
}
// Assume nil compress == none
if cfg.Compression == nil {
cfg.Compression = NoCompression()
}
// Assume 0 buf size == use default
if cfg.WriteBufSize <= 0 {
cfg.WriteBufSize = DefaultDiskConfig.WriteBufSize
}
// Assume empty lockfile path == use default
if len(cfg.LockFile) == 0 {
cfg.LockFile = LockFile
}
// Return owned config copy
return DiskConfig{
Transform: cfg.Transform,
WriteBufSize: cfg.WriteBufSize,
Overwrite: cfg.Overwrite,
LockFile: cfg.LockFile,
Compression: cfg.Compression,
}
}
// DiskStorage is a Storage implementation that stores directly to a filesystem.
type DiskStorage struct {
path string // path is the root path of this store
cppool fastcopy.CopyPool // cppool is the prepared io copier with buffer pool
config DiskConfig // cfg is the supplied configuration for this store
lock *Lock // lock is the opened lockfile for this storage instance
}
// OpenDisk opens a DiskStorage instance for given folder path and configuration.
func OpenDisk(path string, cfg *DiskConfig) (*DiskStorage, error) {
// Get checked config
config := getDiskConfig(cfg)
// Acquire path builder
pb := util.GetPathBuilder()
defer util.PutPathBuilder(pb)
// Clean provided store path, ensure
// ends in '/' to help later path trimming
storePath := pb.Clean(path) + "/"
// Clean provided lockfile path
lockfile := pb.Clean(config.LockFile)
// Check if lockfile is an *actual* path or just filename
if lockDir, _ := _path.Split(lockfile); lockDir == "" {
// Lockfile is a filename, store must be nested under
// $storePath/store to prevent access to the lockfile
storePath += "store/"
lockfile = pb.Join(path, lockfile)
}
// Attempt to open dir path
file, err := os.OpenFile(storePath, defaultFileROFlags, defaultDirPerms)
if err != nil {
// If not a not-exist error, return
if !os.IsNotExist(err) {
return nil, err
}
// Attempt to make store path dirs
err = os.MkdirAll(storePath, defaultDirPerms)
if err != nil {
return nil, err
}
// Reopen dir now it's been created
file, err = os.OpenFile(storePath, defaultFileROFlags, defaultDirPerms)
if err != nil {
return nil, err
}
}
defer file.Close()
// Double check this is a dir (NOT a file!)
stat, err := file.Stat()
if err != nil {
return nil, err
} else if !stat.IsDir() {
return nil, errors.New("store/storage: path is file")
}
// Open and acquire storage lock for path
lock, err := OpenLock(lockfile)
if err != nil {
return nil, err
}
// Prepare DiskStorage
st := &DiskStorage{
path: storePath,
config: config,
lock: lock,
}
// Set copypool buffer size
st.cppool.Buffer(config.WriteBufSize)
return st, nil
}
// Clean implements Storage.Clean().
func (st *DiskStorage) Clean(ctx context.Context) error {
// Check if open
if st.lock.Closed() {
return ErrClosed
}
// Check context still valid
if err := ctx.Err(); err != nil {
return err
}
// Clean-out unused directories
return cleanDirs(st.path)
}
// ReadBytes implements Storage.ReadBytes().
func (st *DiskStorage) ReadBytes(ctx context.Context, key string) ([]byte, error) {
// Get stream reader for key
rc, err := st.ReadStream(ctx, key)
if err != nil {
return nil, err
}
defer rc.Close()
// Read all bytes and return
return io.ReadAll(rc)
}
// ReadStream implements Storage.ReadStream().
func (st *DiskStorage) ReadStream(ctx context.Context, key string) (io.ReadCloser, error) {
// Get file path for key
kpath, err := st.Filepath(key)
if err != nil {
return nil, err
}
// Check if open
if st.lock.Closed() {
return nil, ErrClosed
}
// Check context still valid
if err := ctx.Err(); err != nil {
return nil, err
}
// Attempt to open file (replace ENOENT with our own)
file, err := open(kpath, defaultFileROFlags)
if err != nil {
return nil, errSwapNotFound(err)
}
// Wrap the file in a compressor
cFile, err := st.config.Compression.Reader(file)
if err != nil {
_ = file.Close()
return nil, err
}
return cFile, nil
}
// WriteBytes implements Storage.WriteBytes().
func (st *DiskStorage) WriteBytes(ctx context.Context, key string, value []byte) (int, error) {
n, err := st.WriteStream(ctx, key, bytes.NewReader(value))
return int(n), err
}
// WriteStream implements Storage.WriteStream().
func (st *DiskStorage) WriteStream(ctx context.Context, key string, r io.Reader) (int64, error) {
// Get file path for key
kpath, err := st.Filepath(key)
if err != nil {
return 0, err
}
// Check if open
if st.lock.Closed() {
return 0, ErrClosed
}
// Check context still valid
if err := ctx.Err(); err != nil {
return 0, err
}
// Ensure dirs leading up to file exist
err = os.MkdirAll(path.Dir(kpath), defaultDirPerms)
if err != nil {
return 0, err
}
// Prepare to swap error if need-be
errSwap := errSwapNoop
// Build file RW flags
flags := defaultFileRWFlags
if !st.config.Overwrite {
flags |= syscall.O_EXCL
// Catch + replace err exist
errSwap = errSwapExist
}
// Attempt to open file
file, err := open(kpath, flags)
if err != nil {
return 0, errSwap(err)
}
// Wrap the file in a compressor
cFile, err := st.config.Compression.Writer(file)
if err != nil {
_ = file.Close()
return 0, err
}
// Wraps file.Close().
defer cFile.Close()
// Copy provided reader to file
return st.cppool.Copy(cFile, r)
}
// Stat implements Storage.Stat().
func (st *DiskStorage) Stat(ctx context.Context, key string) (bool, error) {
// Get file path for key
kpath, err := st.Filepath(key)
if err != nil {
return false, err
}
// Check if open
if st.lock.Closed() {
return false, ErrClosed
}
// Check context still valid
if err := ctx.Err(); err != nil {
return false, err
}
// Check for file on disk
return stat(kpath)
}
// Remove implements Storage.Remove().
func (st *DiskStorage) Remove(ctx context.Context, key string) error {
// Get file path for key
kpath, err := st.Filepath(key)
if err != nil {
return err
}
// Check if open
if st.lock.Closed() {
return ErrClosed
}
// Check context still valid
if err := ctx.Err(); err != nil {
return err
}
// Remove at path (we know this is file)
if err := unlink(kpath); err != nil {
return errSwapNotFound(err)
}
return nil
}
// Close implements Storage.Close().
func (st *DiskStorage) Close() error {
return st.lock.Close()
}
// WalkKeys implements Storage.WalkKeys().
func (st *DiskStorage) WalkKeys(ctx context.Context, opts WalkKeysOptions) error {
// Check if open
if st.lock.Closed() {
return ErrClosed
}
// Check context still valid
if err := ctx.Err(); err != nil {
return err
}
// Acquire path builder
pb := util.GetPathBuilder()
defer util.PutPathBuilder(pb)
// Walk dir for entries
return walkDir(pb, st.path, func(kpath string, fsentry fs.DirEntry) error {
if !fsentry.Type().IsRegular() {
// Only deal with regular files
return nil
}
// Get full item path (without root)
kpath = pb.Join(kpath, fsentry.Name())
kpath = kpath[len(st.path):]
// Load file info. This should already
// be loaded due to the underlying call
// to os.File{}.ReadDir() populating them
info, err := fsentry.Info()
if err != nil {
return err
}
// Perform provided walk function
return opts.WalkFn(ctx, Entry{
Key: st.config.Transform.PathToKey(kpath),
Size: info.Size(),
})
})
}
// Filepath checks and returns a formatted Filepath for given key.
func (st *DiskStorage) Filepath(key string) (string, error) {
// Calculate transformed key path
key = st.config.Transform.KeyToPath(key)
// Acquire path builder
pb := util.GetPathBuilder()
defer util.PutPathBuilder(pb)
// Generate key path
pb.Append(st.path)
pb.Append(key)
// Check for dir traversal outside of root
if isDirTraversal(st.path, pb.String()) {
return "", ErrInvalidKey
}
return string(pb.B), nil
}
// isDirTraversal will check if rootPlusPath is a dir traversal outside of root,
// assuming that both are cleaned and that rootPlusPath is path.Join(root, somePath).
func isDirTraversal(root, rootPlusPath string) bool {
switch {
// Root is $PWD, check for traversal out of
case root == ".":
return strings.HasPrefix(rootPlusPath, "../")
// The path MUST be prefixed by root
case !strings.HasPrefix(rootPlusPath, root):
return true
// In all other cases, check not equal
default:
return len(root) == len(rootPlusPath)
}
}

View file

@ -1,110 +0,0 @@
package storage
import (
"errors"
"strings"
"syscall"
"github.com/minio/minio-go/v7"
)
var (
// ErrClosed is returned on operations on a closed storage
ErrClosed = new_error("closed")
// ErrNotFound is the error returned when a key cannot be found in storage
ErrNotFound = new_error("key not found")
// ErrAlreadyExist is the error returned when a key already exists in storage
ErrAlreadyExists = new_error("key already exists")
// ErrInvalidkey is the error returned when an invalid key is passed to storage
ErrInvalidKey = new_error("invalid key")
// ErrAlreadyLocked is returned on fail opening a storage lockfile
ErrAlreadyLocked = new_error("storage lock already open")
)
// new_error returns a new error instance prefixed by package prefix.
func new_error(msg string) error {
return errors.New("store/storage: " + msg)
}
// wrappedError allows wrapping together an inner with outer error.
type wrappedError struct {
inner error
outer error
}
// wrap will return a new wrapped error from given inner and outer errors.
func wrap(outer, inner error) *wrappedError {
return &wrappedError{
inner: inner,
outer: outer,
}
}
func (e *wrappedError) Is(target error) bool {
return e.outer == target || e.inner == target
}
func (e *wrappedError) Error() string {
return e.outer.Error() + ": " + e.inner.Error()
}
func (e *wrappedError) Unwrap() error {
return e.inner
}
// errSwapNoop performs no error swaps
func errSwapNoop(err error) error {
return err
}
// ErrSwapNotFound swaps syscall.ENOENT for ErrNotFound
func errSwapNotFound(err error) error {
if err == syscall.ENOENT {
return ErrNotFound
}
return err
}
// errSwapExist swaps syscall.EEXIST for ErrAlreadyExists
func errSwapExist(err error) error {
if err == syscall.EEXIST {
return ErrAlreadyExists
}
return err
}
// errSwapUnavailable swaps syscall.EAGAIN for ErrAlreadyLocked
func errSwapUnavailable(err error) error {
if err == syscall.EAGAIN {
return ErrAlreadyLocked
}
return err
}
// transformS3Error transforms an error returned from S3Storage underlying
// minio.Core client, by wrapping where necessary with our own error types.
func transformS3Error(err error) error {
// Cast this to a minio error response
ersp, ok := err.(minio.ErrorResponse)
if ok {
switch ersp.Code {
case "NoSuchKey":
return wrap(ErrNotFound, err)
case "Conflict":
return wrap(ErrAlreadyExists, err)
default:
return err
}
}
// Check if error has an invalid object name prefix
if strings.HasPrefix(err.Error(), "Object name ") {
return wrap(ErrInvalidKey, err)
}
return err
}

View file

@ -1,59 +0,0 @@
package storage
import (
"sync/atomic"
"syscall"
)
// LockFile is our standard lockfile name.
const LockFile = "store.lock"
// Lock represents a filesystem lock to ensure only one storage instance open per path.
type Lock struct {
fd int
st uint32
}
// OpenLock opens a lockfile at path.
func OpenLock(path string) (*Lock, error) {
var fd int
// Open the file descriptor at path
err := retryOnEINTR(func() (err error) {
fd, err = syscall.Open(path, defaultFileLockFlags, defaultFilePerms)
return
})
if err != nil {
return nil, err
}
// Get a flock on the file descriptor
err = retryOnEINTR(func() error {
return syscall.Flock(fd, syscall.LOCK_EX|syscall.LOCK_NB)
})
if err != nil {
return nil, errSwapUnavailable(err)
}
return &Lock{fd: fd}, nil
}
// Close will attempt to close the lockfile and file descriptor.
func (f *Lock) Close() error {
var err error
if atomic.CompareAndSwapUint32(&f.st, 0, 1) {
// Ensure gets closed
defer syscall.Close(f.fd)
// Call funlock on the file descriptor
err = retryOnEINTR(func() error {
return syscall.Flock(f.fd, syscall.LOCK_UN|syscall.LOCK_NB)
})
}
return err
}
// Closed will return whether this lockfile has been closed (and unlocked).
func (f *Lock) Closed() bool {
return (atomic.LoadUint32(&f.st) == 1)
}

View file

@ -1,228 +0,0 @@
package storage
import (
"context"
"io"
"sync/atomic"
"codeberg.org/gruf/go-bytes"
"codeberg.org/gruf/go-iotools"
"github.com/cornelk/hashmap"
)
// MemoryStorage is a storage implementation that simply stores key-value
// pairs in a Go map in-memory. The map is protected by a mutex.
type MemoryStorage struct {
ow bool // overwrites
fs *hashmap.Map[string, []byte]
st uint32
}
// OpenMemory opens a new MemoryStorage instance with internal map starting size.
func OpenMemory(size int, overwrites bool) *MemoryStorage {
if size <= 0 {
size = 8
}
return &MemoryStorage{
fs: hashmap.NewSized[string, []byte](uintptr(size)),
ow: overwrites,
}
}
// Clean implements Storage.Clean().
func (st *MemoryStorage) Clean(ctx context.Context) error {
// Check store open
if st.closed() {
return ErrClosed
}
// Check context still valid
if err := ctx.Err(); err != nil {
return err
}
return nil
}
// ReadBytes implements Storage.ReadBytes().
func (st *MemoryStorage) ReadBytes(ctx context.Context, key string) ([]byte, error) {
// Check store open
if st.closed() {
return nil, ErrClosed
}
// Check context still valid
if err := ctx.Err(); err != nil {
return nil, err
}
// Check for key in store
b, ok := st.fs.Get(key)
if !ok {
return nil, ErrNotFound
}
// Create return copy
return copyb(b), nil
}
// ReadStream implements Storage.ReadStream().
func (st *MemoryStorage) ReadStream(ctx context.Context, key string) (io.ReadCloser, error) {
// Check store open
if st.closed() {
return nil, ErrClosed
}
// Check context still valid
if err := ctx.Err(); err != nil {
return nil, err
}
// Check for key in store
b, ok := st.fs.Get(key)
if !ok {
return nil, ErrNotFound
}
// Create io.ReadCloser from 'b' copy
r := bytes.NewReader(copyb(b))
return iotools.NopReadCloser(r), nil
}
// WriteBytes implements Storage.WriteBytes().
func (st *MemoryStorage) WriteBytes(ctx context.Context, key string, b []byte) (int, error) {
// Check store open
if st.closed() {
return 0, ErrClosed
}
// Check context still valid
if err := ctx.Err(); err != nil {
return 0, err
}
// Check for key that already exists
if _, ok := st.fs.Get(key); ok && !st.ow {
return 0, ErrAlreadyExists
}
// Write key copy to store
st.fs.Set(key, copyb(b))
return len(b), nil
}
// WriteStream implements Storage.WriteStream().
func (st *MemoryStorage) WriteStream(ctx context.Context, key string, r io.Reader) (int64, error) {
// Check store open
if st.closed() {
return 0, ErrClosed
}
// Check context still valid
if err := ctx.Err(); err != nil {
return 0, err
}
// Check for key that already exists
if _, ok := st.fs.Get(key); ok && !st.ow {
return 0, ErrAlreadyExists
}
// Read all from reader
b, err := io.ReadAll(r)
if err != nil {
return 0, err
}
// Write key to store
st.fs.Set(key, b)
return int64(len(b)), nil
}
// Stat implements Storage.Stat().
func (st *MemoryStorage) Stat(ctx context.Context, key string) (bool, error) {
// Check store open
if st.closed() {
return false, ErrClosed
}
// Check context still valid
if err := ctx.Err(); err != nil {
return false, err
}
// Check for key in store
_, ok := st.fs.Get(key)
return ok, nil
}
// Remove implements Storage.Remove().
func (st *MemoryStorage) Remove(ctx context.Context, key string) error {
// Check store open
if st.closed() {
return ErrClosed
}
// Check context still valid
if err := ctx.Err(); err != nil {
return err
}
// Attempt to delete key
ok := st.fs.Del(key)
if !ok {
return ErrNotFound
}
return nil
}
// WalkKeys implements Storage.WalkKeys().
func (st *MemoryStorage) WalkKeys(ctx context.Context, opts WalkKeysOptions) error {
// Check store open
if st.closed() {
return ErrClosed
}
// Check context still valid
if err := ctx.Err(); err != nil {
return err
}
var err error
// Nil check func
_ = opts.WalkFn
// Pass each key in map to walk function
st.fs.Range(func(key string, val []byte) bool {
err = opts.WalkFn(ctx, Entry{
Key: key,
Size: int64(len(val)),
})
return (err == nil)
})
return err
}
// Close implements Storage.Close().
func (st *MemoryStorage) Close() error {
atomic.StoreUint32(&st.st, 1)
return nil
}
// closed returns whether MemoryStorage is closed.
func (st *MemoryStorage) closed() bool {
return (atomic.LoadUint32(&st.st) == 1)
}
// copyb returns a copy of byte-slice b.
func copyb(b []byte) []byte {
if b == nil {
return nil
}
p := make([]byte, len(b))
_ = copy(p, b)
return p
}

View file

@ -1,397 +0,0 @@
package storage
import (
"bytes"
"context"
"io"
"sync/atomic"
"codeberg.org/gruf/go-store/v2/util"
"github.com/minio/minio-go/v7"
)
// DefaultS3Config is the default S3Storage configuration.
var DefaultS3Config = &S3Config{
CoreOpts: minio.Options{},
GetOpts: minio.GetObjectOptions{},
PutOpts: minio.PutObjectOptions{},
PutChunkOpts: minio.PutObjectPartOptions{},
PutChunkSize: 4 * 1024 * 1024, // 4MiB
StatOpts: minio.StatObjectOptions{},
RemoveOpts: minio.RemoveObjectOptions{},
ListSize: 200,
}
// S3Config defines options to be used when opening an S3Storage,
// mostly options for underlying S3 client library.
type S3Config struct {
// CoreOpts are S3 client options passed during initialization.
CoreOpts minio.Options
// GetOpts are S3 client options passed during .Read___() calls.
GetOpts minio.GetObjectOptions
// PutOpts are S3 client options passed during .Write___() calls.
PutOpts minio.PutObjectOptions
// PutChunkSize is the chunk size (in bytes) to use when sending
// a byte stream reader of unknown size as a multi-part object.
PutChunkSize int64
// PutChunkOpts are S3 client options passed during chunked .Write___() calls.
PutChunkOpts minio.PutObjectPartOptions
// StatOpts are S3 client options passed during .Stat() calls.
StatOpts minio.StatObjectOptions
// RemoveOpts are S3 client options passed during .Remove() calls.
RemoveOpts minio.RemoveObjectOptions
// ListSize determines how many items to include in each
// list request, made during calls to .WalkKeys().
ListSize int
}
// getS3Config returns a valid S3Config for supplied ptr.
func getS3Config(cfg *S3Config) S3Config {
const minChunkSz = 5 * 1024 * 1024
// If nil, use default
if cfg == nil {
cfg = DefaultS3Config
}
// Ensure a minimum compatible chunk size
if cfg.PutChunkSize <= minChunkSz {
// See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html
cfg.PutChunkSize = minChunkSz
}
// Assume 0 list size == use default
if cfg.ListSize <= 0 {
cfg.ListSize = 200
}
// Return owned config copy
return S3Config{
CoreOpts: cfg.CoreOpts,
GetOpts: cfg.GetOpts,
PutOpts: cfg.PutOpts,
PutChunkSize: cfg.PutChunkSize,
ListSize: cfg.ListSize,
StatOpts: cfg.StatOpts,
RemoveOpts: cfg.RemoveOpts,
}
}
// S3Storage is a storage implementation that stores key-value
// pairs in an S3 instance at given endpoint with bucket name.
type S3Storage struct {
client *minio.Core
bucket string
config S3Config
state uint32
}
// OpenS3 opens a new S3Storage instance with given S3 endpoint URL, bucket name and configuration.
func OpenS3(endpoint string, bucket string, cfg *S3Config) (*S3Storage, error) {
// Get checked config
config := getS3Config(cfg)
// Create new S3 client connection
client, err := minio.NewCore(endpoint, &config.CoreOpts)
if err != nil {
return nil, err
}
// Check that provided bucket actually exists
exists, err := client.BucketExists(context.Background(), bucket)
if err != nil {
return nil, err
} else if !exists {
return nil, new_error("bucket does not exist")
}
return &S3Storage{
client: client,
bucket: bucket,
config: config,
}, nil
}
// Client returns access to the underlying S3 client.
func (st *S3Storage) Client() *minio.Core {
return st.client
}
// Clean implements Storage.Clean().
func (st *S3Storage) Clean(ctx context.Context) error {
return nil // nothing to do for S3
}
// ReadBytes implements Storage.ReadBytes().
func (st *S3Storage) ReadBytes(ctx context.Context, key string) ([]byte, error) {
// Fetch object reader from S3 bucket
rc, err := st.ReadStream(ctx, key)
if err != nil {
return nil, err
}
defer rc.Close()
// Read all bytes and return
return io.ReadAll(rc)
}
// ReadStream implements Storage.ReadStream().
func (st *S3Storage) ReadStream(ctx context.Context, key string) (io.ReadCloser, error) {
// Check storage open
if st.closed() {
return nil, ErrClosed
}
// Fetch object reader from S3 bucket
rc, _, _, err := st.client.GetObject(
ctx,
st.bucket,
key,
st.config.GetOpts,
)
if err != nil {
return nil, transformS3Error(err)
}
return rc, nil
}
// WriteBytes implements Storage.WriteBytes().
func (st *S3Storage) WriteBytes(ctx context.Context, key string, value []byte) (int, error) {
n, err := st.WriteStream(ctx, key, util.NewByteReaderSize(value))
return int(n), err
}
// WriteStream implements Storage.WriteStream().
func (st *S3Storage) WriteStream(ctx context.Context, key string, r io.Reader) (int64, error) {
// Check storage open
if st.closed() {
return 0, ErrClosed
}
if rs, ok := r.(util.ReaderSize); ok {
// This reader supports providing us the size of
// the encompassed data, allowing us to perform
// a singular .PutObject() call with length.
info, err := st.client.PutObject(
ctx,
st.bucket,
key,
r,
rs.Size(),
"",
"",
st.config.PutOpts,
)
if err != nil {
err = transformS3Error(err)
}
return info.Size, err
}
// Start a new multipart upload to get ID
uploadID, err := st.client.NewMultipartUpload(
ctx,
st.bucket,
key,
st.config.PutOpts,
)
if err != nil {
return 0, transformS3Error(err)
}
var (
index = int(1) // parts index
total = int64(0)
parts []minio.CompletePart
chunk = make([]byte, st.config.PutChunkSize)
rbuf = bytes.NewReader(nil)
)
// Note that we do not perform any kind of
// memory pooling of the chunk buffers here.
// Optimal chunking sizes for S3 writes are in
// the orders of megabytes, so letting the GC
// collect these ASAP is much preferred.
loop:
for done := false; !done; {
// Read next chunk into byte buffer
n, err := io.ReadFull(r, chunk)
switch err {
// Successful read
case nil:
// Reached end, buffer empty
case io.EOF:
break loop
// Reached end, but buffer not empty
case io.ErrUnexpectedEOF:
done = true
// All other errors
default:
return 0, err
}
// Reset byte reader
rbuf.Reset(chunk[:n])
// Put this object chunk in S3 store
pt, err := st.client.PutObjectPart(
ctx,
st.bucket,
key,
uploadID,
index,
rbuf,
int64(n),
st.config.PutChunkOpts,
)
if err != nil {
return 0, err
}
// Append completed part to slice
parts = append(parts, minio.CompletePart{
PartNumber: pt.PartNumber,
ETag: pt.ETag,
ChecksumCRC32: pt.ChecksumCRC32,
ChecksumCRC32C: pt.ChecksumCRC32C,
ChecksumSHA1: pt.ChecksumSHA1,
ChecksumSHA256: pt.ChecksumSHA256,
})
// Iterate idx
index++
// Update total size
total += pt.Size
}
// Complete this multi-part upload operation
_, err = st.client.CompleteMultipartUpload(
ctx,
st.bucket,
key,
uploadID,
parts,
st.config.PutOpts,
)
if err != nil {
return 0, err
}
return total, nil
}
// Stat implements Storage.Stat().
func (st *S3Storage) Stat(ctx context.Context, key string) (bool, error) {
// Check storage open
if st.closed() {
return false, ErrClosed
}
// Query object in S3 bucket
_, err := st.client.StatObject(
ctx,
st.bucket,
key,
st.config.StatOpts,
)
if err != nil {
return false, transformS3Error(err)
}
return true, nil
}
// Remove implements Storage.Remove().
func (st *S3Storage) Remove(ctx context.Context, key string) error {
// Check storage open
if st.closed() {
return ErrClosed
}
// S3 returns no error on remove for non-existent keys
if ok, err := st.Stat(ctx, key); err != nil {
return err
} else if !ok {
return ErrNotFound
}
// Remove object from S3 bucket
err := st.client.RemoveObject(
ctx,
st.bucket,
key,
st.config.RemoveOpts,
)
if err != nil {
return transformS3Error(err)
}
return nil
}
// WalkKeys implements Storage.WalkKeys().
func (st *S3Storage) WalkKeys(ctx context.Context, opts WalkKeysOptions) error {
var (
prev string
token string
)
for {
// List the objects in bucket starting at marker
result, err := st.client.ListObjectsV2(
st.bucket,
"",
prev,
token,
"",
st.config.ListSize,
)
if err != nil {
return err
}
// Pass each object through walk func
for _, obj := range result.Contents {
if err := opts.WalkFn(ctx, Entry{
Key: obj.Key,
Size: obj.Size,
}); err != nil {
return err
}
}
// No token means we reached end of bucket
if result.NextContinuationToken == "" {
return nil
}
// Set continue token and prev mark
token = result.NextContinuationToken
prev = result.StartAfter
}
}
// Close implements Storage.Close().
func (st *S3Storage) Close() error {
atomic.StoreUint32(&st.state, 1)
return nil
}
// closed returns whether S3Storage is closed.
func (st *S3Storage) closed() bool {
return (atomic.LoadUint32(&st.state) == 1)
}

View file

@ -1,53 +0,0 @@
package storage
import (
"context"
"io"
)
// Storage defines a means of storing and accessing key value pairs
type Storage interface {
// ReadBytes returns the byte value for key in storage
ReadBytes(ctx context.Context, key string) ([]byte, error)
// ReadStream returns an io.ReadCloser for the value bytes at key in the storage
ReadStream(ctx context.Context, key string) (io.ReadCloser, error)
// WriteBytes writes the supplied value bytes at key in the storage
WriteBytes(ctx context.Context, key string, value []byte) (int, error)
// WriteStream writes the bytes from supplied reader at key in the storage
WriteStream(ctx context.Context, key string, r io.Reader) (int64, error)
// Stat checks if the supplied key is in the storage
Stat(ctx context.Context, key string) (bool, error)
// Remove attempts to remove the supplied key-value pair from storage
Remove(ctx context.Context, key string) error
// Close will close the storage, releasing any file locks
Close() error
// Clean removes unused values and unclutters the storage (e.g. removing empty folders)
Clean(ctx context.Context) error
// WalkKeys walks the keys in the storage
WalkKeys(ctx context.Context, opts WalkKeysOptions) error
}
// Entry represents a key in a Storage{} implementation,
// with any associated metadata that may have been set.
type Entry struct {
// Key is this entry's unique storage key.
Key string
// Size is the size of this entry in storage.
// Note that size < 0 indicates unknown.
Size int64
}
// WalkKeysOptions defines how to walk the keys in a storage implementation
type WalkKeysOptions struct {
// WalkFn is the function to apply on each StorageEntry
WalkFn func(context.Context, Entry) error
}

View file

@ -1,25 +0,0 @@
package storage
// KeyTransform defines a method of converting store keys to storage paths (and vice-versa)
type KeyTransform interface {
// KeyToPath converts a supplied key to storage path
KeyToPath(string) string
// PathToKey converts a supplied storage path to key
PathToKey(string) string
}
type nopKeyTransform struct{}
// NopTransform returns a nop key transform (i.e. key = path)
func NopTransform() KeyTransform {
return &nopKeyTransform{}
}
func (t *nopKeyTransform) KeyToPath(key string) string {
return key
}
func (t *nopKeyTransform) PathToKey(path string) string {
return path
}

View file

@ -1,41 +0,0 @@
package util
import (
"bytes"
"io"
)
// ReaderSize defines a reader of known size in bytes.
type ReaderSize interface {
io.Reader
Size() int64
}
// ByteReaderSize implements ReaderSize for an in-memory byte-slice.
type ByteReaderSize struct {
br bytes.Reader
sz int64
}
// NewByteReaderSize returns a new ByteReaderSize instance reset to slice b.
func NewByteReaderSize(b []byte) *ByteReaderSize {
rs := new(ByteReaderSize)
rs.Reset(b)
return rs
}
// Read implements io.Reader.
func (rs *ByteReaderSize) Read(b []byte) (int, error) {
return rs.br.Read(b)
}
// Size implements ReaderSize.
func (rs *ByteReaderSize) Size() int64 {
return rs.sz
}
// Reset resets the ReaderSize to be reading from b.
func (rs *ByteReaderSize) Reset(b []byte) {
rs.br.Reset(b)
rs.sz = int64(len(b))
}

View file

@ -1,26 +0,0 @@
package util
import (
"sync"
"codeberg.org/gruf/go-fastpath/v2"
)
// pathBuilderPool is the global fastpath.Builder pool.
var pathBuilderPool = sync.Pool{
New: func() any {
return &fastpath.Builder{B: make([]byte, 0, 512)}
},
}
// GetPathBuilder fetches a fastpath.Builder object from the pool.
func GetPathBuilder() *fastpath.Builder {
pb, _ := pathBuilderPool.Get().(*fastpath.Builder)
return pb
}
// PutPathBuilder places supplied fastpath.Builder back in the pool.
func PutPathBuilder(pb *fastpath.Builder) {
pb.Reset()
pathBuilderPool.Put(pb)
}

View file

@ -1,6 +0,0 @@
coverage:
status:
project:
default:
target: 70%
threshold: 5%

View file

@ -1,14 +0,0 @@
*.exe
.idea
.vscode
*.iml
*.local
/*.log
*.out
*.prof
*.test
.DS_Store
*.dmp
*.db
.testCoverage

View file

@ -1,68 +0,0 @@
run:
deadline: 5m
linters:
enable:
- asasalint # check for pass []any as any in variadic func(...any)
- asciicheck # Simple linter to check that your code does not contain non-ASCII identifiers
- bidichk # Checks for dangerous unicode character sequences
- containedctx # detects struct contained context.Context field
- contextcheck # check the function whether use a non-inherited context
- cyclop # checks function and package cyclomatic complexity
- decorder # check declaration order and count of types, constants, variables and functions
- depguard # Go linter that checks if package imports are in a list of acceptable packages
- dogsled # Checks assignments with too many blank identifiers (e.g. x, _, _, _, := f())
- durationcheck # check for two durations multiplied together
- errcheck # checking for unchecked errors
- errname # Checks that errors are prefixed with the `Err` and error types are suffixed with the `Error`
- errorlint # finds code that will cause problems with the error wrapping scheme introduced in Go 1.13
- exportloopref # checks for pointers to enclosing loop variables
- funlen # Tool for detection of long functions
- gci # controls golang package import order and makes it always deterministic
- gocognit # Computes and checks the cognitive complexity of functions
- gocritic # Provides diagnostics that check for bugs, performance and style issues
- gocyclo # Computes and checks the cyclomatic complexity of functions
- godot # Check if comments end in a period
- goerr113 # Golang linter to check the errors handling expressions
- gosimple # Linter for Go source code that specializes in simplifying a code
- govet # reports suspicious constructs, such as Printf calls with wrong arguments
- ineffassign # Detects when assignments to existing variables are not used
- maintidx # measures the maintainability index of each function
- makezero # Finds slice declarations with non-zero initial length
- misspell # Finds commonly misspelled English words in comments
- nakedret # Finds naked returns in functions
- nestif # Reports deeply nested if statements
- nilerr # Finds the code that returns nil even if it checks that the error is not nil
- nilnil # Checks that there is no simultaneous return of `nil` error and an invalid value
- prealloc # Finds slice declarations that could potentially be preallocated
- predeclared # find code that shadows one of Go's predeclared identifiers
- revive # drop-in replacement of golint
- staticcheck # drop-in replacement of go vet
- stylecheck # Stylecheck is a replacement for golint
- tenv # detects using os.Setenv instead of t.Setenv
- thelper # checks the consistency of test helpers
- tparallel # detects inappropriate usage of t.Parallel()
- typecheck # parses and type-checks Go code
- unconvert # Remove unnecessary type conversions
- unparam # Reports unused function parameters
- unused # Checks Go code for unused constants, variables, functions and types
- usestdlibvars # detect the possibility to use variables/constants from the Go standard library
- wastedassign # finds wasted assignment statements
- whitespace # detects leading and trailing whitespace
linters-settings:
cyclop:
max-complexity: 15
gocritic:
disabled-checks:
- newDeref
govet:
disable:
- unsafeptr
issues:
exclude-use-default: false
exclude-rules:
- linters:
- goerr113
text: "do not define dynamic errors"

View file

@ -1,201 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "{}"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright cornelk
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View file

@ -1,25 +0,0 @@
help: ## show help, shown by default if no target is specified
@grep -E '^[0-9a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
lint: ## run code linters
golangci-lint run
benchmark: ## run benchmarks
cd benchmarks && perflock go test -cpu 8 -run=^# -bench=.
benchmark-perflock: ## run benchmarks using perflock - https://github.com/aclements/perflock
cd benchmarks && perflock -governor 80% go test -count 3 -cpu 8 -run=^# -bench=.
test: ## run tests
go test -race ./...
GOARCH=386 go test ./...
test-coverage: ## run unit tests and create test coverage
go test ./... -coverprofile .testCoverage -covermode=atomic -coverpkg=./...
test-coverage-web: test-coverage ## run unit tests and show test coverage in browser
go tool cover -func .testCoverage | grep total | awk '{print "Total coverage: "$$3}'
go tool cover -html=.testCoverage
install-linters: ## install all used linters
curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $$(go env GOPATH)/bin v1.49.0

View file

@ -1,88 +0,0 @@
# hashmap
[![Build status](https://github.com/cornelk/hashmap/actions/workflows/go.yaml/badge.svg?branch=main)](https://github.com/cornelk/hashmap/actions)
[![go.dev reference](https://img.shields.io/badge/go.dev-reference-007d9c?logo=go&logoColor=white&style=flat-square)](https://pkg.go.dev/github.com/cornelk/hashmap)
[![Go Report Card](https://goreportcard.com/badge/github.com/cornelk/hashmap)](https://goreportcard.com/report/github.com/cornelk/hashmap)
[![codecov](https://codecov.io/gh/cornelk/hashmap/branch/main/graph/badge.svg?token=NS5UY28V3A)](https://codecov.io/gh/cornelk/hashmap)
## Overview
A Golang lock-free thread-safe HashMap optimized for fastest read access.
It is not a general-use HashMap and currently has slow write performance for write heavy uses.
The minimal supported Golang version is 1.19 as it makes use of Generics and the new atomic package helpers.
## Usage
Example uint8 key map uses:
```
m := New[uint8, int]()
m.Set(1, 123)
value, ok := m.Get(1)
```
Example string key map uses:
```
m := New[string, int]()
m.Set("amount", 123)
value, ok := m.Get("amount")
```
Using the map to count URL requests:
```
m := New[string, *int64]()
var i int64
counter, _ := m.GetOrInsert("api/123", &i)
atomic.AddInt64(counter, 1) // increase counter
...
count := atomic.LoadInt64(counter) // read counter
```
## Benchmarks
Reading from the hash map for numeric key types in a thread-safe way is faster than reading from a standard Golang map
in an unsafe way and four times faster than Golang's `sync.Map`:
```
BenchmarkReadHashMapUint-8 1774460 677.3 ns/op
BenchmarkReadHaxMapUint-8 1758708 679.0 ns/op
BenchmarkReadGoMapUintUnsafe-8 1497732 790.9 ns/op
BenchmarkReadGoMapUintMutex-8 41562 28672 ns/op
BenchmarkReadGoSyncMapUint-8 454401 2646 ns/op
```
Reading from the map while writes are happening:
```
BenchmarkReadHashMapWithWritesUint-8 1388560 859.1 ns/op
BenchmarkReadHaxMapWithWritesUint-8 1306671 914.5 ns/op
BenchmarkReadGoSyncMapWithWritesUint-8 335732 3113 ns/op
```
Write performance without any concurrent reads:
```
BenchmarkWriteHashMapUint-8 54756 21977 ns/op
BenchmarkWriteGoMapMutexUint-8 83907 14827 ns/op
BenchmarkWriteGoSyncMapUint-8 16983 70305 ns/op
```
The benchmarks were run with Golang 1.19.0 on Linux and AMD64 using `make benchmark`.
## Technical details
* Technical design decisions have been made based on benchmarks that are stored in an external repository:
[go-benchmark](https://github.com/cornelk/go-benchmark)
* The library uses a sorted linked list and a slice as an index into that list.
* The Get() function contains helper functions that have been inlined manually until the Golang compiler will inline them automatically.
* It optimizes the slice access by circumventing the Golang size check when reading from the slice.
Once a slice is allocated, the size of it does not change.
The library limits the index into the slice, therefore the Golang size check is obsolete.
When the slice reaches a defined fill rate, a bigger slice is allocated and all keys are recalculated and transferred into the new slice.
* For hashing, specialized xxhash implementations are used that match the size of the key type where available

View file

@ -1,12 +0,0 @@
package hashmap
// defaultSize is the default size for a map.
const defaultSize = 8
// maxFillRate is the maximum fill rate for the slice before a resize will happen.
const maxFillRate = 50
// support all numeric and string types and aliases of those.
type hashable interface {
~int | ~int8 | ~int16 | ~int32 | ~int64 | ~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 | ~uintptr | ~float32 | ~float64 | ~string
}

View file

@ -1,348 +0,0 @@
// Package hashmap provides a lock-free and thread-safe HashMap.
package hashmap
import (
"bytes"
"fmt"
"reflect"
"strconv"
"sync/atomic"
"unsafe"
)
// Map implements a read optimized hash map.
type Map[Key hashable, Value any] struct {
hasher func(Key) uintptr
store atomic.Pointer[store[Key, Value]] // pointer to a map instance that gets replaced if the map resizes
linkedList *List[Key, Value] // key sorted linked list of elements
// resizing marks a resizing operation in progress.
// this is using uintptr instead of atomic.Bool to avoid using 32 bit int on 64 bit systems
resizing atomic.Uintptr
}
// New returns a new map instance.
func New[Key hashable, Value any]() *Map[Key, Value] {
return NewSized[Key, Value](defaultSize)
}
// NewSized returns a new map instance with a specific initialization size.
func NewSized[Key hashable, Value any](size uintptr) *Map[Key, Value] {
m := &Map[Key, Value]{}
m.allocate(size)
m.setDefaultHasher()
return m
}
// SetHasher sets a custom hasher.
func (m *Map[Key, Value]) SetHasher(hasher func(Key) uintptr) {
m.hasher = hasher
}
// Len returns the number of elements within the map.
func (m *Map[Key, Value]) Len() int {
return m.linkedList.Len()
}
// Get retrieves an element from the map under given hash key.
func (m *Map[Key, Value]) Get(key Key) (Value, bool) {
hash := m.hasher(key)
for element := m.store.Load().item(hash); element != nil; element = element.Next() {
if element.keyHash == hash && element.key == key {
return element.Value(), true
}
if element.keyHash > hash {
return *new(Value), false
}
}
return *new(Value), false
}
// GetOrInsert returns the existing value for the key if present.
// Otherwise, it stores and returns the given value.
// The returned bool is true if the value was loaded, false if stored.
func (m *Map[Key, Value]) GetOrInsert(key Key, value Value) (Value, bool) {
hash := m.hasher(key)
var newElement *ListElement[Key, Value]
for {
for element := m.store.Load().item(hash); element != nil; element = element.Next() {
if element.keyHash == hash && element.key == key {
actual := element.Value()
return actual, true
}
if element.keyHash > hash {
break
}
}
if newElement == nil { // allocate only once
newElement = &ListElement[Key, Value]{
key: key,
keyHash: hash,
}
newElement.value.Store(&value)
}
if m.insertElement(newElement, hash, key, value) {
return value, false
}
}
}
// FillRate returns the fill rate of the map as a percentage integer.
func (m *Map[Key, Value]) FillRate() int {
store := m.store.Load()
count := int(store.count.Load())
l := len(store.index)
return (count * 100) / l
}
// Del deletes the key from the map and returns whether the key was deleted.
func (m *Map[Key, Value]) Del(key Key) bool {
hash := m.hasher(key)
store := m.store.Load()
element := store.item(hash)
for ; element != nil; element = element.Next() {
if element.keyHash == hash && element.key == key {
m.deleteElement(element)
m.linkedList.Delete(element)
return true
}
if element.keyHash > hash {
return false
}
}
return false
}
// Insert sets the value under the specified key to the map if it does not exist yet.
// If a resizing operation is happening concurrently while calling Insert, the item might show up in the map
// after the resize operation is finished.
// Returns true if the item was inserted or false if it existed.
func (m *Map[Key, Value]) Insert(key Key, value Value) bool {
hash := m.hasher(key)
var (
existed, inserted bool
element *ListElement[Key, Value]
)
for {
store := m.store.Load()
searchStart := store.item(hash)
if !inserted { // if retrying after insert during grow, do not add to list again
element, existed, inserted = m.linkedList.Add(searchStart, hash, key, value)
if existed {
return false
}
if !inserted {
continue // a concurrent add did interfere, try again
}
}
count := store.addItem(element)
currentStore := m.store.Load()
if store != currentStore { // retry insert in case of insert during grow
continue
}
if m.isResizeNeeded(store, count) && m.resizing.CompareAndSwap(0, 1) {
go m.grow(0, true)
}
return true
}
}
// Set sets the value under the specified key to the map. An existing item for this key will be overwritten.
// If a resizing operation is happening concurrently while calling Set, the item might show up in the map
// after the resize operation is finished.
func (m *Map[Key, Value]) Set(key Key, value Value) {
hash := m.hasher(key)
for {
store := m.store.Load()
searchStart := store.item(hash)
element, added := m.linkedList.AddOrUpdate(searchStart, hash, key, value)
if !added {
continue // a concurrent add did interfere, try again
}
count := store.addItem(element)
currentStore := m.store.Load()
if store != currentStore { // retry insert in case of insert during grow
continue
}
if m.isResizeNeeded(store, count) && m.resizing.CompareAndSwap(0, 1) {
go m.grow(0, true)
}
return
}
}
// Grow resizes the map to a new size, the size gets rounded up to next power of 2.
// To double the size of the map use newSize 0.
// This function returns immediately, the resize operation is done in a goroutine.
// No resizing is done in case of another resize operation already being in progress.
func (m *Map[Key, Value]) Grow(newSize uintptr) {
if m.resizing.CompareAndSwap(0, 1) {
go m.grow(newSize, true)
}
}
// String returns the map as a string, only hashed keys are printed.
func (m *Map[Key, Value]) String() string {
buffer := bytes.NewBufferString("")
buffer.WriteRune('[')
first := m.linkedList.First()
item := first
for item != nil {
if item != first {
buffer.WriteRune(',')
}
fmt.Fprint(buffer, item.keyHash)
item = item.Next()
}
buffer.WriteRune(']')
return buffer.String()
}
// Range calls f sequentially for each key and value present in the map.
// If f returns false, range stops the iteration.
func (m *Map[Key, Value]) Range(f func(Key, Value) bool) {
item := m.linkedList.First()
for item != nil {
value := item.Value()
if !f(item.key, value) {
return
}
item = item.Next()
}
}
func (m *Map[Key, Value]) allocate(newSize uintptr) {
m.linkedList = NewList[Key, Value]()
if m.resizing.CompareAndSwap(0, 1) {
m.grow(newSize, false)
}
}
func (m *Map[Key, Value]) isResizeNeeded(store *store[Key, Value], count uintptr) bool {
l := uintptr(len(store.index)) // l can't be 0 as it gets initialized in New()
fillRate := (count * 100) / l
return fillRate > maxFillRate
}
func (m *Map[Key, Value]) insertElement(element *ListElement[Key, Value], hash uintptr, key Key, value Value) bool {
var existed, inserted bool
for {
store := m.store.Load()
searchStart := store.item(element.keyHash)
if !inserted { // if retrying after insert during grow, do not add to list again
_, existed, inserted = m.linkedList.Add(searchStart, hash, key, value)
if existed {
return false
}
if !inserted {
continue // a concurrent add did interfere, try again
}
}
count := store.addItem(element)
currentStore := m.store.Load()
if store != currentStore { // retry insert in case of insert during grow
continue
}
if m.isResizeNeeded(store, count) && m.resizing.CompareAndSwap(0, 1) {
go m.grow(0, true)
}
return true
}
}
// deleteElement deletes an element from index.
func (m *Map[Key, Value]) deleteElement(element *ListElement[Key, Value]) {
for {
store := m.store.Load()
index := element.keyHash >> store.keyShifts
ptr := (*unsafe.Pointer)(unsafe.Pointer(uintptr(store.array) + index*intSizeBytes))
next := element.Next()
if next != nil && element.keyHash>>store.keyShifts != index {
next = nil // do not set index to next item if it's not the same slice index
}
atomic.CompareAndSwapPointer(ptr, unsafe.Pointer(element), unsafe.Pointer(next))
currentStore := m.store.Load()
if store == currentStore { // check that no resize happened
break
}
}
}
func (m *Map[Key, Value]) grow(newSize uintptr, loop bool) {
defer m.resizing.CompareAndSwap(1, 0)
for {
currentStore := m.store.Load()
if newSize == 0 {
newSize = uintptr(len(currentStore.index)) << 1
} else {
newSize = roundUpPower2(newSize)
}
index := make([]*ListElement[Key, Value], newSize)
header := (*reflect.SliceHeader)(unsafe.Pointer(&index))
newStore := &store[Key, Value]{
keyShifts: strconv.IntSize - log2(newSize),
array: unsafe.Pointer(header.Data), // use address of slice data storage
index: index,
}
m.fillIndexItems(newStore) // initialize new index slice with longer keys
m.store.Store(newStore)
m.fillIndexItems(newStore) // make sure that the new index is up-to-date with the current state of the linked list
if !loop {
return
}
// check if a new resize needs to be done already
count := uintptr(m.Len())
if !m.isResizeNeeded(newStore, count) {
return
}
newSize = 0 // 0 means double the current size
}
}
func (m *Map[Key, Value]) fillIndexItems(store *store[Key, Value]) {
first := m.linkedList.First()
item := first
lastIndex := uintptr(0)
for item != nil {
index := item.keyHash >> store.keyShifts
if item == first || index != lastIndex { // store item with smallest hash key for every index
store.addItem(item)
lastIndex = index
}
item = item.Next()
}
}

View file

@ -1,127 +0,0 @@
package hashmap
import (
"sync/atomic"
)
// List is a sorted linked list.
type List[Key comparable, Value any] struct {
count atomic.Uintptr
head *ListElement[Key, Value]
}
// NewList returns an initialized list.
func NewList[Key comparable, Value any]() *List[Key, Value] {
return &List[Key, Value]{
head: &ListElement[Key, Value]{},
}
}
// Len returns the number of elements within the list.
func (l *List[Key, Value]) Len() int {
return int(l.count.Load())
}
// First returns the first item of the list.
func (l *List[Key, Value]) First() *ListElement[Key, Value] {
return l.head.Next()
}
// Add adds an item to the list and returns false if an item for the hash existed.
// searchStart = nil will start to search at the head item.
func (l *List[Key, Value]) Add(searchStart *ListElement[Key, Value], hash uintptr, key Key, value Value) (element *ListElement[Key, Value], existed bool, inserted bool) {
left, found, right := l.search(searchStart, hash, key)
if found != nil { // existing item found
return found, true, false
}
element = &ListElement[Key, Value]{
key: key,
keyHash: hash,
}
element.value.Store(&value)
return element, false, l.insertAt(element, left, right)
}
// AddOrUpdate adds or updates an item to the list.
func (l *List[Key, Value]) AddOrUpdate(searchStart *ListElement[Key, Value], hash uintptr, key Key, value Value) (*ListElement[Key, Value], bool) {
left, found, right := l.search(searchStart, hash, key)
if found != nil { // existing item found
found.value.Store(&value) // update the value
return found, true
}
element := &ListElement[Key, Value]{
key: key,
keyHash: hash,
}
element.value.Store(&value)
return element, l.insertAt(element, left, right)
}
// Delete deletes an element from the list.
func (l *List[Key, Value]) Delete(element *ListElement[Key, Value]) {
if !element.deleted.CompareAndSwap(0, 1) {
return // concurrent delete of the item is in progress
}
right := element.Next()
// point head to next element if element to delete was head
l.head.next.CompareAndSwap(element, right)
// element left from the deleted element will replace its next
// pointer to the next valid element on call of Next().
l.count.Add(^uintptr(0)) // decrease counter
}
func (l *List[Key, Value]) search(searchStart *ListElement[Key, Value], hash uintptr, key Key) (left, found, right *ListElement[Key, Value]) {
if searchStart != nil && hash < searchStart.keyHash { // key would remain left from item? {
searchStart = nil // start search at head
}
if searchStart == nil { // start search at head?
left = l.head
found = left.Next()
if found == nil { // no items beside head?
return nil, nil, nil
}
} else {
found = searchStart
}
for {
if hash == found.keyHash && key == found.key { // key hash already exists, compare keys
return nil, found, nil
}
if hash < found.keyHash { // new item needs to be inserted before the found value
if l.head == left {
return nil, nil, found
}
return left, nil, found
}
// go to next element in sorted linked list
left = found
found = left.Next()
if found == nil { // no more items on the right
return left, nil, nil
}
}
}
func (l *List[Key, Value]) insertAt(element, left, right *ListElement[Key, Value]) bool {
if left == nil {
left = l.head
}
element.next.Store(right)
if !left.next.CompareAndSwap(right, element) {
return false // item was modified concurrently
}
l.count.Add(1)
return true
}

View file

@ -1,47 +0,0 @@
package hashmap
import (
"sync/atomic"
)
// ListElement is an element of a list.
type ListElement[Key comparable, Value any] struct {
keyHash uintptr
// deleted marks the item as deleting or deleted
// this is using uintptr instead of atomic.Bool to avoid using 32 bit int on 64 bit systems
deleted atomic.Uintptr
// next points to the next element in the list.
// it is nil for the last item in the list.
next atomic.Pointer[ListElement[Key, Value]]
value atomic.Pointer[Value]
key Key
}
// Value returns the value of the list item.
func (e *ListElement[Key, Value]) Value() Value {
return *e.value.Load()
}
// Next returns the item on the right.
func (e *ListElement[Key, Value]) Next() *ListElement[Key, Value] {
for next := e.next.Load(); next != nil; {
// if the next item is not deleted, return it
if next.deleted.Load() == 0 {
return next
}
// point current elements next to the following item
// after the deleted one until a non deleted or list end is found
following := next.Next()
if e.next.CompareAndSwap(next, following) {
next = following
} else {
next = next.Next()
}
}
return nil // end of the list reached
}

View file

@ -1,45 +0,0 @@
package hashmap
import (
"sync/atomic"
"unsafe"
)
type store[Key comparable, Value any] struct {
keyShifts uintptr // Pointer size - log2 of array size, to be used as index in the data array
count atomic.Uintptr // count of filled elements in the slice
array unsafe.Pointer // pointer to slice data array
index []*ListElement[Key, Value] // storage for the slice for the garbage collector to not clean it up
}
// item returns the item for the given hashed key.
func (s *store[Key, Value]) item(hashedKey uintptr) *ListElement[Key, Value] {
index := hashedKey >> s.keyShifts
ptr := (*unsafe.Pointer)(unsafe.Pointer(uintptr(s.array) + index*intSizeBytes))
item := (*ListElement[Key, Value])(atomic.LoadPointer(ptr))
return item
}
// adds an item to the index if needed and returns the new item counter if it changed, otherwise 0.
func (s *store[Key, Value]) addItem(item *ListElement[Key, Value]) uintptr {
index := item.keyHash >> s.keyShifts
ptr := (*unsafe.Pointer)(unsafe.Pointer(uintptr(s.array) + index*intSizeBytes))
for { // loop until the smallest key hash is in the index
element := (*ListElement[Key, Value])(atomic.LoadPointer(ptr)) // get the current item in the index
if element == nil { // no item yet at this index
if atomic.CompareAndSwapPointer(ptr, nil, unsafe.Pointer(item)) {
return s.count.Add(1)
}
continue // a new item was inserted concurrently, retry
}
if item.keyHash < element.keyHash {
// the new item is the smallest for this index?
if !atomic.CompareAndSwapPointer(ptr, unsafe.Pointer(element), unsafe.Pointer(item)) {
continue // a new item was inserted concurrently, retry
}
}
return 0
}
}

View file

@ -1,32 +0,0 @@
package hashmap
import (
"strconv"
)
const (
// intSizeBytes is the size in byte of an int or uint value.
intSizeBytes = strconv.IntSize >> 3
)
// roundUpPower2 rounds a number to the next power of 2.
func roundUpPower2(i uintptr) uintptr {
i--
i |= i >> 1
i |= i >> 2
i |= i >> 4
i |= i >> 8
i |= i >> 16
i |= i >> 32
i++
return i
}
// log2 computes the binary logarithm of x, rounded up to the next integer.
func log2(i uintptr) uintptr {
var n, p uintptr
for p = 1; p < i; p += p {
n++
}
return n
}

View file

@ -1,258 +0,0 @@
package hashmap
import (
"encoding/binary"
"fmt"
"math/bits"
"reflect"
"unsafe"
)
const (
prime1 uint64 = 11400714785074694791
prime2 uint64 = 14029467366897019727
prime3 uint64 = 1609587929392839161
prime4 uint64 = 9650029242287828579
prime5 uint64 = 2870177450012600261
)
var prime1v = prime1
/*
Copyright (c) 2016 Caleb Spare
MIT License
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
// setDefaultHasher sets the default hasher depending on the key type.
// Inlines hashing as anonymous functions for performance improvements, other options like
// returning an anonymous functions from another function turned out to not be as performant.
func (m *Map[Key, Value]) setDefaultHasher() {
var key Key
kind := reflect.ValueOf(&key).Elem().Type().Kind()
switch kind {
case reflect.Int, reflect.Uint, reflect.Uintptr:
switch intSizeBytes {
case 2:
m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashWord))
case 4:
m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashDword))
case 8:
m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashQword))
default:
panic(fmt.Errorf("unsupported integer byte size %d", intSizeBytes))
}
case reflect.Int8, reflect.Uint8:
m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashByte))
case reflect.Int16, reflect.Uint16:
m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashWord))
case reflect.Int32, reflect.Uint32:
m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashDword))
case reflect.Int64, reflect.Uint64:
m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashQword))
case reflect.Float32:
m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashFloat32))
case reflect.Float64:
m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashFloat64))
case reflect.String:
m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashString))
default:
panic(fmt.Errorf("unsupported key type %T of kind %v", key, kind))
}
}
// Specialized xxhash hash functions, optimized for the bit size of the key where available,
// for all supported types beside string.
var xxHashByte = func(key uint8) uintptr {
h := prime5 + 1
h ^= uint64(key) * prime5
h = bits.RotateLeft64(h, 11) * prime1
h ^= h >> 33
h *= prime2
h ^= h >> 29
h *= prime3
h ^= h >> 32
return uintptr(h)
}
var xxHashWord = func(key uint16) uintptr {
h := prime5 + 2
h ^= (uint64(key) & 0xff) * prime5
h = bits.RotateLeft64(h, 11) * prime1
h ^= ((uint64(key) >> 8) & 0xff) * prime5
h = bits.RotateLeft64(h, 11) * prime1
h ^= h >> 33
h *= prime2
h ^= h >> 29
h *= prime3
h ^= h >> 32
return uintptr(h)
}
var xxHashDword = func(key uint32) uintptr {
h := prime5 + 4
h ^= uint64(key) * prime1
h = bits.RotateLeft64(h, 23)*prime2 + prime3
h ^= h >> 33
h *= prime2
h ^= h >> 29
h *= prime3
h ^= h >> 32
return uintptr(h)
}
var xxHashFloat32 = func(key float32) uintptr {
h := prime5 + 4
h ^= uint64(key) * prime1
h = bits.RotateLeft64(h, 23)*prime2 + prime3
h ^= h >> 33
h *= prime2
h ^= h >> 29
h *= prime3
h ^= h >> 32
return uintptr(h)
}
var xxHashFloat64 = func(key float64) uintptr {
h := prime5 + 4
h ^= uint64(key) * prime1
h = bits.RotateLeft64(h, 23)*prime2 + prime3
h ^= h >> 33
h *= prime2
h ^= h >> 29
h *= prime3
h ^= h >> 32
return uintptr(h)
}
var xxHashQword = func(key uint64) uintptr {
k1 := key * prime2
k1 = bits.RotateLeft64(k1, 31)
k1 *= prime1
h := (prime5 + 8) ^ k1
h = bits.RotateLeft64(h, 27)*prime1 + prime4
h ^= h >> 33
h *= prime2
h ^= h >> 29
h *= prime3
h ^= h >> 32
return uintptr(h)
}
var xxHashString = func(key string) uintptr {
sh := (*reflect.StringHeader)(unsafe.Pointer(&key))
bh := reflect.SliceHeader{
Data: sh.Data,
Len: sh.Len,
Cap: sh.Len, // cap needs to be set, otherwise xxhash fails on ARM Macs
}
b := *(*[]byte)(unsafe.Pointer(&bh))
var h uint64
if sh.Len >= 32 {
v1 := prime1v + prime2
v2 := prime2
v3 := uint64(0)
v4 := -prime1v
for len(b) >= 32 {
v1 = round(v1, binary.LittleEndian.Uint64(b[0:8:len(b)]))
v2 = round(v2, binary.LittleEndian.Uint64(b[8:16:len(b)]))
v3 = round(v3, binary.LittleEndian.Uint64(b[16:24:len(b)]))
v4 = round(v4, binary.LittleEndian.Uint64(b[24:32:len(b)]))
b = b[32:len(b):len(b)]
}
h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4)
h = mergeRound(h, v1)
h = mergeRound(h, v2)
h = mergeRound(h, v3)
h = mergeRound(h, v4)
} else {
h = prime5
}
h += uint64(sh.Len)
i, end := 0, len(b)
for ; i+8 <= end; i += 8 {
k1 := round(0, binary.LittleEndian.Uint64(b[i:i+8:len(b)]))
h ^= k1
h = rol27(h)*prime1 + prime4
}
if i+4 <= end {
h ^= uint64(binary.LittleEndian.Uint32(b[i:i+4:len(b)])) * prime1
h = rol23(h)*prime2 + prime3
i += 4
}
for ; i < end; i++ {
h ^= uint64(b[i]) * prime5
h = rol11(h) * prime1
}
h ^= h >> 33
h *= prime2
h ^= h >> 29
h *= prime3
h ^= h >> 32
return uintptr(h)
}
func round(acc, input uint64) uint64 {
acc += input * prime2
acc = rol31(acc)
acc *= prime1
return acc
}
func mergeRound(acc, val uint64) uint64 {
val = round(0, val)
acc ^= val
acc = acc*prime1 + prime4
return acc
}
func rol1(x uint64) uint64 { return bits.RotateLeft64(x, 1) }
func rol7(x uint64) uint64 { return bits.RotateLeft64(x, 7) }
func rol11(x uint64) uint64 { return bits.RotateLeft64(x, 11) }
func rol12(x uint64) uint64 { return bits.RotateLeft64(x, 12) }
func rol18(x uint64) uint64 { return bits.RotateLeft64(x, 18) }
func rol23(x uint64) uint64 { return bits.RotateLeft64(x, 23) }
func rol27(x uint64) uint64 { return bits.RotateLeft64(x, 27) }
func rol31(x uint64) uint64 { return bits.RotateLeft64(x, 31) }

File diff suppressed because it is too large Load diff

View file

@ -1,184 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
// dictDecoder implements the LZ77 sliding dictionary as used in decompression.
// LZ77 decompresses data through sequences of two forms of commands:
//
// - Literal insertions: Runs of one or more symbols are inserted into the data
// stream as is. This is accomplished through the writeByte method for a
// single symbol, or combinations of writeSlice/writeMark for multiple symbols.
// Any valid stream must start with a literal insertion if no preset dictionary
// is used.
//
// - Backward copies: Runs of one or more symbols are copied from previously
// emitted data. Backward copies come as the tuple (dist, length) where dist
// determines how far back in the stream to copy from and length determines how
// many bytes to copy. Note that it is valid for the length to be greater than
// the distance. Since LZ77 uses forward copies, that situation is used to
// perform a form of run-length encoding on repeated runs of symbols.
// The writeCopy and tryWriteCopy are used to implement this command.
//
// For performance reasons, this implementation performs little to no sanity
// checks about the arguments. As such, the invariants documented for each
// method call must be respected.
type dictDecoder struct {
hist []byte // Sliding window history
// Invariant: 0 <= rdPos <= wrPos <= len(hist)
wrPos int // Current output position in buffer
rdPos int // Have emitted hist[:rdPos] already
full bool // Has a full window length been written yet?
}
// init initializes dictDecoder to have a sliding window dictionary of the given
// size. If a preset dict is provided, it will initialize the dictionary with
// the contents of dict.
func (dd *dictDecoder) init(size int, dict []byte) {
*dd = dictDecoder{hist: dd.hist}
if cap(dd.hist) < size {
dd.hist = make([]byte, size)
}
dd.hist = dd.hist[:size]
if len(dict) > len(dd.hist) {
dict = dict[len(dict)-len(dd.hist):]
}
dd.wrPos = copy(dd.hist, dict)
if dd.wrPos == len(dd.hist) {
dd.wrPos = 0
dd.full = true
}
dd.rdPos = dd.wrPos
}
// histSize reports the total amount of historical data in the dictionary.
func (dd *dictDecoder) histSize() int {
if dd.full {
return len(dd.hist)
}
return dd.wrPos
}
// availRead reports the number of bytes that can be flushed by readFlush.
func (dd *dictDecoder) availRead() int {
return dd.wrPos - dd.rdPos
}
// availWrite reports the available amount of output buffer space.
func (dd *dictDecoder) availWrite() int {
return len(dd.hist) - dd.wrPos
}
// writeSlice returns a slice of the available buffer to write data to.
//
// This invariant will be kept: len(s) <= availWrite()
func (dd *dictDecoder) writeSlice() []byte {
return dd.hist[dd.wrPos:]
}
// writeMark advances the writer pointer by cnt.
//
// This invariant must be kept: 0 <= cnt <= availWrite()
func (dd *dictDecoder) writeMark(cnt int) {
dd.wrPos += cnt
}
// writeByte writes a single byte to the dictionary.
//
// This invariant must be kept: 0 < availWrite()
func (dd *dictDecoder) writeByte(c byte) {
dd.hist[dd.wrPos] = c
dd.wrPos++
}
// writeCopy copies a string at a given (dist, length) to the output.
// This returns the number of bytes copied and may be less than the requested
// length if the available space in the output buffer is too small.
//
// This invariant must be kept: 0 < dist <= histSize()
func (dd *dictDecoder) writeCopy(dist, length int) int {
dstBase := dd.wrPos
dstPos := dstBase
srcPos := dstPos - dist
endPos := dstPos + length
if endPos > len(dd.hist) {
endPos = len(dd.hist)
}
// Copy non-overlapping section after destination position.
//
// This section is non-overlapping in that the copy length for this section
// is always less than or equal to the backwards distance. This can occur
// if a distance refers to data that wraps-around in the buffer.
// Thus, a backwards copy is performed here; that is, the exact bytes in
// the source prior to the copy is placed in the destination.
if srcPos < 0 {
srcPos += len(dd.hist)
dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:])
srcPos = 0
}
// Copy possibly overlapping section before destination position.
//
// This section can overlap if the copy length for this section is larger
// than the backwards distance. This is allowed by LZ77 so that repeated
// strings can be succinctly represented using (dist, length) pairs.
// Thus, a forwards copy is performed here; that is, the bytes copied is
// possibly dependent on the resulting bytes in the destination as the copy
// progresses along. This is functionally equivalent to the following:
//
// for i := 0; i < endPos-dstPos; i++ {
// dd.hist[dstPos+i] = dd.hist[srcPos+i]
// }
// dstPos = endPos
//
for dstPos < endPos {
dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
}
dd.wrPos = dstPos
return dstPos - dstBase
}
// tryWriteCopy tries to copy a string at a given (distance, length) to the
// output. This specialized version is optimized for short distances.
//
// This method is designed to be inlined for performance reasons.
//
// This invariant must be kept: 0 < dist <= histSize()
func (dd *dictDecoder) tryWriteCopy(dist, length int) int {
dstPos := dd.wrPos
endPos := dstPos + length
if dstPos < dist || endPos > len(dd.hist) {
return 0
}
dstBase := dstPos
srcPos := dstPos - dist
// Copy possibly overlapping section before destination position.
loop:
dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
if dstPos < endPos {
goto loop // Avoid for-loop so that this function can be inlined
}
dd.wrPos = dstPos
return dstPos - dstBase
}
// readFlush returns a slice of the historical buffer that is ready to be
// emitted to the user. The data returned by readFlush must be fully consumed
// before calling any other dictDecoder methods.
func (dd *dictDecoder) readFlush() []byte {
toRead := dd.hist[dd.rdPos:dd.wrPos]
dd.rdPos = dd.wrPos
if dd.wrPos == len(dd.hist) {
dd.wrPos, dd.rdPos = 0, 0
dd.full = true
}
return toRead
}

View file

@ -1,193 +0,0 @@
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
// Modified for deflate by Klaus Post (c) 2015.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import (
"encoding/binary"
"fmt"
)
type fastEnc interface {
Encode(dst *tokens, src []byte)
Reset()
}
func newFastEnc(level int) fastEnc {
switch level {
case 1:
return &fastEncL1{fastGen: fastGen{cur: maxStoreBlockSize}}
case 2:
return &fastEncL2{fastGen: fastGen{cur: maxStoreBlockSize}}
case 3:
return &fastEncL3{fastGen: fastGen{cur: maxStoreBlockSize}}
case 4:
return &fastEncL4{fastGen: fastGen{cur: maxStoreBlockSize}}
case 5:
return &fastEncL5{fastGen: fastGen{cur: maxStoreBlockSize}}
case 6:
return &fastEncL6{fastGen: fastGen{cur: maxStoreBlockSize}}
default:
panic("invalid level specified")
}
}
const (
tableBits = 15 // Bits used in the table
tableSize = 1 << tableBits // Size of the table
tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32.
baseMatchOffset = 1 // The smallest match offset
baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5
maxMatchOffset = 1 << 15 // The largest match offset
bTableBits = 17 // Bits used in the big tables
bTableSize = 1 << bTableBits // Size of the table
allocHistory = maxStoreBlockSize * 5 // Size to preallocate for history.
bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize - 1 // Reset the buffer offset when reaching this.
)
const (
prime3bytes = 506832829
prime4bytes = 2654435761
prime5bytes = 889523592379
prime6bytes = 227718039650203
prime7bytes = 58295818150454627
prime8bytes = 0xcf1bbcdcb7a56463
)
func load3232(b []byte, i int32) uint32 {
return binary.LittleEndian.Uint32(b[i:])
}
func load6432(b []byte, i int32) uint64 {
return binary.LittleEndian.Uint64(b[i:])
}
type tableEntry struct {
offset int32
}
// fastGen maintains the table for matches,
// and the previous byte block for level 2.
// This is the generic implementation.
type fastGen struct {
hist []byte
cur int32
}
func (e *fastGen) addBlock(src []byte) int32 {
// check if we have space already
if len(e.hist)+len(src) > cap(e.hist) {
if cap(e.hist) == 0 {
e.hist = make([]byte, 0, allocHistory)
} else {
if cap(e.hist) < maxMatchOffset*2 {
panic("unexpected buffer size")
}
// Move down
offset := int32(len(e.hist)) - maxMatchOffset
// copy(e.hist[0:maxMatchOffset], e.hist[offset:])
*(*[maxMatchOffset]byte)(e.hist) = *(*[maxMatchOffset]byte)(e.hist[offset:])
e.cur += offset
e.hist = e.hist[:maxMatchOffset]
}
}
s := int32(len(e.hist))
e.hist = append(e.hist, src...)
return s
}
type tableEntryPrev struct {
Cur tableEntry
Prev tableEntry
}
// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash7(u uint64, h uint8) uint32 {
return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & reg8SizeMask64))
}
// hashLen returns a hash of the lowest mls bytes of with length output bits.
// mls must be >=3 and <=8. Any other value will return hash for 4 bytes.
// length should always be < 32.
// Preferably length and mls should be a constant for inlining.
func hashLen(u uint64, length, mls uint8) uint32 {
switch mls {
case 3:
return (uint32(u<<8) * prime3bytes) >> (32 - length)
case 5:
return uint32(((u << (64 - 40)) * prime5bytes) >> (64 - length))
case 6:
return uint32(((u << (64 - 48)) * prime6bytes) >> (64 - length))
case 7:
return uint32(((u << (64 - 56)) * prime7bytes) >> (64 - length))
case 8:
return uint32((u * prime8bytes) >> (64 - length))
default:
return (uint32(u) * prime4bytes) >> (32 - length)
}
}
// matchlen will return the match length between offsets and t in src.
// The maximum length returned is maxMatchLength - 4.
// It is assumed that s > t, that t >=0 and s < len(src).
func (e *fastGen) matchlen(s, t int32, src []byte) int32 {
if debugDecode {
if t >= s {
panic(fmt.Sprint("t >=s:", t, s))
}
if int(s) >= len(src) {
panic(fmt.Sprint("s >= len(src):", s, len(src)))
}
if t < 0 {
panic(fmt.Sprint("t < 0:", t))
}
if s-t > maxMatchOffset {
panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
}
}
s1 := int(s) + maxMatchLength - 4
if s1 > len(src) {
s1 = len(src)
}
// Extend the match to be as long as possible.
return int32(matchLen(src[s:s1], src[t:]))
}
// matchlenLong will return the match length between offsets and t in src.
// It is assumed that s > t, that t >=0 and s < len(src).
func (e *fastGen) matchlenLong(s, t int32, src []byte) int32 {
if debugDeflate {
if t >= s {
panic(fmt.Sprint("t >=s:", t, s))
}
if int(s) >= len(src) {
panic(fmt.Sprint("s >= len(src):", s, len(src)))
}
if t < 0 {
panic(fmt.Sprint("t < 0:", t))
}
if s-t > maxMatchOffset {
panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
}
}
// Extend the match to be as long as possible.
return int32(matchLen(src[s:], src[t:]))
}
// Reset the encoding table.
func (e *fastGen) Reset() {
if cap(e.hist) < allocHistory {
e.hist = make([]byte, 0, allocHistory)
}
// We offset current position so everything will be out of reach.
// If we are above the buffer reset it will be cleared anyway since len(hist) == 0.
if e.cur <= bufferReset {
e.cur += maxMatchOffset + int32(len(e.hist))
}
e.hist = e.hist[:0]
}

File diff suppressed because it is too large Load diff

View file

@ -1,417 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import (
"math"
"math/bits"
)
const (
maxBitsLimit = 16
// number of valid literals
literalCount = 286
)
// hcode is a huffman code with a bit code and bit length.
type hcode uint32
func (h hcode) len() uint8 {
return uint8(h)
}
func (h hcode) code64() uint64 {
return uint64(h >> 8)
}
func (h hcode) zero() bool {
return h == 0
}
type huffmanEncoder struct {
codes []hcode
bitCount [17]int32
// Allocate a reusable buffer with the longest possible frequency table.
// Possible lengths are codegenCodeCount, offsetCodeCount and literalCount.
// The largest of these is literalCount, so we allocate for that case.
freqcache [literalCount + 1]literalNode
}
type literalNode struct {
literal uint16
freq uint16
}
// A levelInfo describes the state of the constructed tree for a given depth.
type levelInfo struct {
// Our level. for better printing
level int32
// The frequency of the last node at this level
lastFreq int32
// The frequency of the next character to add to this level
nextCharFreq int32
// The frequency of the next pair (from level below) to add to this level.
// Only valid if the "needed" value of the next lower level is 0.
nextPairFreq int32
// The number of chains remaining to generate for this level before moving
// up to the next level
needed int32
}
// set sets the code and length of an hcode.
func (h *hcode) set(code uint16, length uint8) {
*h = hcode(length) | (hcode(code) << 8)
}
func newhcode(code uint16, length uint8) hcode {
return hcode(length) | (hcode(code) << 8)
}
func reverseBits(number uint16, bitLength byte) uint16 {
return bits.Reverse16(number << ((16 - bitLength) & 15))
}
func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxUint16} }
func newHuffmanEncoder(size int) *huffmanEncoder {
// Make capacity to next power of two.
c := uint(bits.Len32(uint32(size - 1)))
return &huffmanEncoder{codes: make([]hcode, size, 1<<c)}
}
// Generates a HuffmanCode corresponding to the fixed literal table
func generateFixedLiteralEncoding() *huffmanEncoder {
h := newHuffmanEncoder(literalCount)
codes := h.codes
var ch uint16
for ch = 0; ch < literalCount; ch++ {
var bits uint16
var size uint8
switch {
case ch < 144:
// size 8, 000110000 .. 10111111
bits = ch + 48
size = 8
case ch < 256:
// size 9, 110010000 .. 111111111
bits = ch + 400 - 144
size = 9
case ch < 280:
// size 7, 0000000 .. 0010111
bits = ch - 256
size = 7
default:
// size 8, 11000000 .. 11000111
bits = ch + 192 - 280
size = 8
}
codes[ch] = newhcode(reverseBits(bits, size), size)
}
return h
}
func generateFixedOffsetEncoding() *huffmanEncoder {
h := newHuffmanEncoder(30)
codes := h.codes
for ch := range codes {
codes[ch] = newhcode(reverseBits(uint16(ch), 5), 5)
}
return h
}
var fixedLiteralEncoding = generateFixedLiteralEncoding()
var fixedOffsetEncoding = generateFixedOffsetEncoding()
func (h *huffmanEncoder) bitLength(freq []uint16) int {
var total int
for i, f := range freq {
if f != 0 {
total += int(f) * int(h.codes[i].len())
}
}
return total
}
func (h *huffmanEncoder) bitLengthRaw(b []byte) int {
var total int
for _, f := range b {
total += int(h.codes[f].len())
}
return total
}
// canReuseBits returns the number of bits or math.MaxInt32 if the encoder cannot be reused.
func (h *huffmanEncoder) canReuseBits(freq []uint16) int {
var total int
for i, f := range freq {
if f != 0 {
code := h.codes[i]
if code.zero() {
return math.MaxInt32
}
total += int(f) * int(code.len())
}
}
return total
}
// Return the number of literals assigned to each bit size in the Huffman encoding
//
// This method is only called when list.length >= 3
// The cases of 0, 1, and 2 literals are handled by special case code.
//
// list An array of the literals with non-zero frequencies
//
// and their associated frequencies. The array is in order of increasing
// frequency, and has as its last element a special element with frequency
// MaxInt32
//
// maxBits The maximum number of bits that should be used to encode any literal.
//
// Must be less than 16.
//
// return An integer array in which array[i] indicates the number of literals
//
// that should be encoded in i bits.
func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
if maxBits >= maxBitsLimit {
panic("flate: maxBits too large")
}
n := int32(len(list))
list = list[0 : n+1]
list[n] = maxNode()
// The tree can't have greater depth than n - 1, no matter what. This
// saves a little bit of work in some small cases
if maxBits > n-1 {
maxBits = n - 1
}
// Create information about each of the levels.
// A bogus "Level 0" whose sole purpose is so that
// level1.prev.needed==0. This makes level1.nextPairFreq
// be a legitimate value that never gets chosen.
var levels [maxBitsLimit]levelInfo
// leafCounts[i] counts the number of literals at the left
// of ancestors of the rightmost node at level i.
// leafCounts[i][j] is the number of literals at the left
// of the level j ancestor.
var leafCounts [maxBitsLimit][maxBitsLimit]int32
// Descending to only have 1 bounds check.
l2f := int32(list[2].freq)
l1f := int32(list[1].freq)
l0f := int32(list[0].freq) + int32(list[1].freq)
for level := int32(1); level <= maxBits; level++ {
// For every level, the first two items are the first two characters.
// We initialize the levels as if we had already figured this out.
levels[level] = levelInfo{
level: level,
lastFreq: l1f,
nextCharFreq: l2f,
nextPairFreq: l0f,
}
leafCounts[level][level] = 2
if level == 1 {
levels[level].nextPairFreq = math.MaxInt32
}
}
// We need a total of 2*n - 2 items at top level and have already generated 2.
levels[maxBits].needed = 2*n - 4
level := uint32(maxBits)
for level < 16 {
l := &levels[level]
if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 {
// We've run out of both leafs and pairs.
// End all calculations for this level.
// To make sure we never come back to this level or any lower level,
// set nextPairFreq impossibly large.
l.needed = 0
levels[level+1].nextPairFreq = math.MaxInt32
level++
continue
}
prevFreq := l.lastFreq
if l.nextCharFreq < l.nextPairFreq {
// The next item on this row is a leaf node.
n := leafCounts[level][level] + 1
l.lastFreq = l.nextCharFreq
// Lower leafCounts are the same of the previous node.
leafCounts[level][level] = n
e := list[n]
if e.literal < math.MaxUint16 {
l.nextCharFreq = int32(e.freq)
} else {
l.nextCharFreq = math.MaxInt32
}
} else {
// The next item on this row is a pair from the previous row.
// nextPairFreq isn't valid until we generate two
// more values in the level below
l.lastFreq = l.nextPairFreq
// Take leaf counts from the lower level, except counts[level] remains the same.
if true {
save := leafCounts[level][level]
leafCounts[level] = leafCounts[level-1]
leafCounts[level][level] = save
} else {
copy(leafCounts[level][:level], leafCounts[level-1][:level])
}
levels[l.level-1].needed = 2
}
if l.needed--; l.needed == 0 {
// We've done everything we need to do for this level.
// Continue calculating one level up. Fill in nextPairFreq
// of that level with the sum of the two nodes we've just calculated on
// this level.
if l.level == maxBits {
// All done!
break
}
levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq
level++
} else {
// If we stole from below, move down temporarily to replenish it.
for levels[level-1].needed > 0 {
level--
}
}
}
// Somethings is wrong if at the end, the top level is null or hasn't used
// all of the leaves.
if leafCounts[maxBits][maxBits] != n {
panic("leafCounts[maxBits][maxBits] != n")
}
bitCount := h.bitCount[:maxBits+1]
bits := 1
counts := &leafCounts[maxBits]
for level := maxBits; level > 0; level-- {
// chain.leafCount gives the number of literals requiring at least "bits"
// bits to encode.
bitCount[bits] = counts[level] - counts[level-1]
bits++
}
return bitCount
}
// Look at the leaves and assign them a bit count and an encoding as specified
// in RFC 1951 3.2.2
func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) {
code := uint16(0)
for n, bits := range bitCount {
code <<= 1
if n == 0 || bits == 0 {
continue
}
// The literals list[len(list)-bits] .. list[len(list)-bits]
// are encoded using "bits" bits, and get the values
// code, code + 1, .... The code values are
// assigned in literal order (not frequency order).
chunk := list[len(list)-int(bits):]
sortByLiteral(chunk)
for _, node := range chunk {
h.codes[node.literal] = newhcode(reverseBits(code, uint8(n)), uint8(n))
code++
}
list = list[0 : len(list)-int(bits)]
}
}
// Update this Huffman Code object to be the minimum code for the specified frequency count.
//
// freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
// maxBits The maximum number of bits to use for any literal.
func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
list := h.freqcache[:len(freq)+1]
codes := h.codes[:len(freq)]
// Number of non-zero literals
count := 0
// Set list to be the set of all non-zero literals and their frequencies
for i, f := range freq {
if f != 0 {
list[count] = literalNode{uint16(i), f}
count++
} else {
codes[i] = 0
}
}
list[count] = literalNode{}
list = list[:count]
if count <= 2 {
// Handle the small cases here, because they are awkward for the general case code. With
// two or fewer literals, everything has bit length 1.
for i, node := range list {
// "list" is in order of increasing literal value.
h.codes[node.literal].set(uint16(i), 1)
}
return
}
sortByFreq(list)
// Get the number of literals for each bit count
bitCount := h.bitCounts(list, maxBits)
// And do the assignment
h.assignEncodingAndSize(bitCount, list)
}
// atLeastOne clamps the result between 1 and 15.
func atLeastOne(v float32) float32 {
if v < 1 {
return 1
}
if v > 15 {
return 15
}
return v
}
func histogram(b []byte, h []uint16) {
if true && len(b) >= 8<<10 {
// Split for bigger inputs
histogramSplit(b, h)
} else {
h = h[:256]
for _, t := range b {
h[t]++
}
}
}
func histogramSplit(b []byte, h []uint16) {
// Tested, and slightly faster than 2-way.
// Writing to separate arrays and combining is also slightly slower.
h = h[:256]
for len(b)&3 != 0 {
h[b[0]]++
b = b[1:]
}
n := len(b) / 4
x, y, z, w := b[:n], b[n:], b[n+n:], b[n+n+n:]
y, z, w = y[:len(x)], z[:len(x)], w[:len(x)]
for i, t := range x {
v0 := &h[t]
v1 := &h[y[i]]
v3 := &h[w[i]]
v2 := &h[z[i]]
*v0++
*v1++
*v2++
*v3++
}
}

View file

@ -1,159 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
// Sort sorts data.
// It makes one call to data.Len to determine n, and O(n*log(n)) calls to
// data.Less and data.Swap. The sort is not guaranteed to be stable.
func sortByFreq(data []literalNode) {
n := len(data)
quickSortByFreq(data, 0, n, maxDepth(n))
}
func quickSortByFreq(data []literalNode, a, b, maxDepth int) {
for b-a > 12 { // Use ShellSort for slices <= 12 elements
if maxDepth == 0 {
heapSort(data, a, b)
return
}
maxDepth--
mlo, mhi := doPivotByFreq(data, a, b)
// Avoiding recursion on the larger subproblem guarantees
// a stack depth of at most lg(b-a).
if mlo-a < b-mhi {
quickSortByFreq(data, a, mlo, maxDepth)
a = mhi // i.e., quickSortByFreq(data, mhi, b)
} else {
quickSortByFreq(data, mhi, b, maxDepth)
b = mlo // i.e., quickSortByFreq(data, a, mlo)
}
}
if b-a > 1 {
// Do ShellSort pass with gap 6
// It could be written in this simplified form cause b-a <= 12
for i := a + 6; i < b; i++ {
if data[i].freq == data[i-6].freq && data[i].literal < data[i-6].literal || data[i].freq < data[i-6].freq {
data[i], data[i-6] = data[i-6], data[i]
}
}
insertionSortByFreq(data, a, b)
}
}
func doPivotByFreq(data []literalNode, lo, hi int) (midlo, midhi int) {
m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
if hi-lo > 40 {
// Tukey's ``Ninther,'' median of three medians of three.
s := (hi - lo) / 8
medianOfThreeSortByFreq(data, lo, lo+s, lo+2*s)
medianOfThreeSortByFreq(data, m, m-s, m+s)
medianOfThreeSortByFreq(data, hi-1, hi-1-s, hi-1-2*s)
}
medianOfThreeSortByFreq(data, lo, m, hi-1)
// Invariants are:
// data[lo] = pivot (set up by ChoosePivot)
// data[lo < i < a] < pivot
// data[a <= i < b] <= pivot
// data[b <= i < c] unexamined
// data[c <= i < hi-1] > pivot
// data[hi-1] >= pivot
pivot := lo
a, c := lo+1, hi-1
for ; a < c && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ {
}
b := a
for {
for ; b < c && (data[pivot].freq == data[b].freq && data[pivot].literal > data[b].literal || data[pivot].freq > data[b].freq); b++ { // data[b] <= pivot
}
for ; b < c && (data[pivot].freq == data[c-1].freq && data[pivot].literal < data[c-1].literal || data[pivot].freq < data[c-1].freq); c-- { // data[c-1] > pivot
}
if b >= c {
break
}
// data[b] > pivot; data[c-1] <= pivot
data[b], data[c-1] = data[c-1], data[b]
b++
c--
}
// If hi-c<3 then there are duplicates (by property of median of nine).
// Let's be a bit more conservative, and set border to 5.
protect := hi-c < 5
if !protect && hi-c < (hi-lo)/4 {
// Lets test some points for equality to pivot
dups := 0
if data[pivot].freq == data[hi-1].freq && data[pivot].literal > data[hi-1].literal || data[pivot].freq > data[hi-1].freq { // data[hi-1] = pivot
data[c], data[hi-1] = data[hi-1], data[c]
c++
dups++
}
if data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq { // data[b-1] = pivot
b--
dups++
}
// m-lo = (hi-lo)/2 > 6
// b-lo > (hi-lo)*3/4-1 > 8
// ==> m < b ==> data[m] <= pivot
if data[m].freq == data[pivot].freq && data[m].literal > data[pivot].literal || data[m].freq > data[pivot].freq { // data[m] = pivot
data[m], data[b-1] = data[b-1], data[m]
b--
dups++
}
// if at least 2 points are equal to pivot, assume skewed distribution
protect = dups > 1
}
if protect {
// Protect against a lot of duplicates
// Add invariant:
// data[a <= i < b] unexamined
// data[b <= i < c] = pivot
for {
for ; a < b && (data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq); b-- { // data[b] == pivot
}
for ; a < b && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { // data[a] < pivot
}
if a >= b {
break
}
// data[a] == pivot; data[b-1] < pivot
data[a], data[b-1] = data[b-1], data[a]
a++
b--
}
}
// Swap pivot into middle
data[pivot], data[b-1] = data[b-1], data[pivot]
return b - 1, c
}
// Insertion sort
func insertionSortByFreq(data []literalNode, a, b int) {
for i := a + 1; i < b; i++ {
for j := i; j > a && (data[j].freq == data[j-1].freq && data[j].literal < data[j-1].literal || data[j].freq < data[j-1].freq); j-- {
data[j], data[j-1] = data[j-1], data[j]
}
}
}
// quickSortByFreq, loosely following Bentley and McIlroy,
// ``Engineering a Sort Function,'' SP&E November 1993.
// medianOfThreeSortByFreq moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
func medianOfThreeSortByFreq(data []literalNode, m1, m0, m2 int) {
// sort 3 elements
if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq {
data[m1], data[m0] = data[m0], data[m1]
}
// data[m0] <= data[m1]
if data[m2].freq == data[m1].freq && data[m2].literal < data[m1].literal || data[m2].freq < data[m1].freq {
data[m2], data[m1] = data[m1], data[m2]
// data[m0] <= data[m2] && data[m1] < data[m2]
if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq {
data[m1], data[m0] = data[m0], data[m1]
}
}
// now data[m0] <= data[m1] <= data[m2]
}

View file

@ -1,201 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
// Sort sorts data.
// It makes one call to data.Len to determine n, and O(n*log(n)) calls to
// data.Less and data.Swap. The sort is not guaranteed to be stable.
func sortByLiteral(data []literalNode) {
n := len(data)
quickSort(data, 0, n, maxDepth(n))
}
func quickSort(data []literalNode, a, b, maxDepth int) {
for b-a > 12 { // Use ShellSort for slices <= 12 elements
if maxDepth == 0 {
heapSort(data, a, b)
return
}
maxDepth--
mlo, mhi := doPivot(data, a, b)
// Avoiding recursion on the larger subproblem guarantees
// a stack depth of at most lg(b-a).
if mlo-a < b-mhi {
quickSort(data, a, mlo, maxDepth)
a = mhi // i.e., quickSort(data, mhi, b)
} else {
quickSort(data, mhi, b, maxDepth)
b = mlo // i.e., quickSort(data, a, mlo)
}
}
if b-a > 1 {
// Do ShellSort pass with gap 6
// It could be written in this simplified form cause b-a <= 12
for i := a + 6; i < b; i++ {
if data[i].literal < data[i-6].literal {
data[i], data[i-6] = data[i-6], data[i]
}
}
insertionSort(data, a, b)
}
}
func heapSort(data []literalNode, a, b int) {
first := a
lo := 0
hi := b - a
// Build heap with greatest element at top.
for i := (hi - 1) / 2; i >= 0; i-- {
siftDown(data, i, hi, first)
}
// Pop elements, largest first, into end of data.
for i := hi - 1; i >= 0; i-- {
data[first], data[first+i] = data[first+i], data[first]
siftDown(data, lo, i, first)
}
}
// siftDown implements the heap property on data[lo, hi).
// first is an offset into the array where the root of the heap lies.
func siftDown(data []literalNode, lo, hi, first int) {
root := lo
for {
child := 2*root + 1
if child >= hi {
break
}
if child+1 < hi && data[first+child].literal < data[first+child+1].literal {
child++
}
if data[first+root].literal > data[first+child].literal {
return
}
data[first+root], data[first+child] = data[first+child], data[first+root]
root = child
}
}
func doPivot(data []literalNode, lo, hi int) (midlo, midhi int) {
m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
if hi-lo > 40 {
// Tukey's ``Ninther,'' median of three medians of three.
s := (hi - lo) / 8
medianOfThree(data, lo, lo+s, lo+2*s)
medianOfThree(data, m, m-s, m+s)
medianOfThree(data, hi-1, hi-1-s, hi-1-2*s)
}
medianOfThree(data, lo, m, hi-1)
// Invariants are:
// data[lo] = pivot (set up by ChoosePivot)
// data[lo < i < a] < pivot
// data[a <= i < b] <= pivot
// data[b <= i < c] unexamined
// data[c <= i < hi-1] > pivot
// data[hi-1] >= pivot
pivot := lo
a, c := lo+1, hi-1
for ; a < c && data[a].literal < data[pivot].literal; a++ {
}
b := a
for {
for ; b < c && data[pivot].literal > data[b].literal; b++ { // data[b] <= pivot
}
for ; b < c && data[pivot].literal < data[c-1].literal; c-- { // data[c-1] > pivot
}
if b >= c {
break
}
// data[b] > pivot; data[c-1] <= pivot
data[b], data[c-1] = data[c-1], data[b]
b++
c--
}
// If hi-c<3 then there are duplicates (by property of median of nine).
// Let's be a bit more conservative, and set border to 5.
protect := hi-c < 5
if !protect && hi-c < (hi-lo)/4 {
// Lets test some points for equality to pivot
dups := 0
if data[pivot].literal > data[hi-1].literal { // data[hi-1] = pivot
data[c], data[hi-1] = data[hi-1], data[c]
c++
dups++
}
if data[b-1].literal > data[pivot].literal { // data[b-1] = pivot
b--
dups++
}
// m-lo = (hi-lo)/2 > 6
// b-lo > (hi-lo)*3/4-1 > 8
// ==> m < b ==> data[m] <= pivot
if data[m].literal > data[pivot].literal { // data[m] = pivot
data[m], data[b-1] = data[b-1], data[m]
b--
dups++
}
// if at least 2 points are equal to pivot, assume skewed distribution
protect = dups > 1
}
if protect {
// Protect against a lot of duplicates
// Add invariant:
// data[a <= i < b] unexamined
// data[b <= i < c] = pivot
for {
for ; a < b && data[b-1].literal > data[pivot].literal; b-- { // data[b] == pivot
}
for ; a < b && data[a].literal < data[pivot].literal; a++ { // data[a] < pivot
}
if a >= b {
break
}
// data[a] == pivot; data[b-1] < pivot
data[a], data[b-1] = data[b-1], data[a]
a++
b--
}
}
// Swap pivot into middle
data[pivot], data[b-1] = data[b-1], data[pivot]
return b - 1, c
}
// Insertion sort
func insertionSort(data []literalNode, a, b int) {
for i := a + 1; i < b; i++ {
for j := i; j > a && data[j].literal < data[j-1].literal; j-- {
data[j], data[j-1] = data[j-1], data[j]
}
}
}
// maxDepth returns a threshold at which quicksort should switch
// to heapsort. It returns 2*ceil(lg(n+1)).
func maxDepth(n int) int {
var depth int
for i := n; i > 0; i >>= 1 {
depth++
}
return depth * 2
}
// medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
func medianOfThree(data []literalNode, m1, m0, m2 int) {
// sort 3 elements
if data[m1].literal < data[m0].literal {
data[m1], data[m0] = data[m0], data[m1]
}
// data[m0] <= data[m1]
if data[m2].literal < data[m1].literal {
data[m2], data[m1] = data[m1], data[m2]
// data[m0] <= data[m2] && data[m1] < data[m2]
if data[m1].literal < data[m0].literal {
data[m1], data[m0] = data[m0], data[m1]
}
}
// now data[m0] <= data[m1] <= data[m2]
}

View file

@ -1,829 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package flate implements the DEFLATE compressed data format, described in
// RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file
// formats.
package flate
import (
"bufio"
"compress/flate"
"fmt"
"io"
"math/bits"
"sync"
)
const (
maxCodeLen = 16 // max length of Huffman code
maxCodeLenMask = 15 // mask for max length of Huffman code
// The next three numbers come from the RFC section 3.2.7, with the
// additional proviso in section 3.2.5 which implies that distance codes
// 30 and 31 should never occur in compressed data.
maxNumLit = 286
maxNumDist = 30
numCodes = 19 // number of codes in Huffman meta-code
debugDecode = false
)
// Value of length - 3 and extra bits.
type lengthExtra struct {
length, extra uint8
}
var decCodeToLen = [32]lengthExtra{{length: 0x0, extra: 0x0}, {length: 0x1, extra: 0x0}, {length: 0x2, extra: 0x0}, {length: 0x3, extra: 0x0}, {length: 0x4, extra: 0x0}, {length: 0x5, extra: 0x0}, {length: 0x6, extra: 0x0}, {length: 0x7, extra: 0x0}, {length: 0x8, extra: 0x1}, {length: 0xa, extra: 0x1}, {length: 0xc, extra: 0x1}, {length: 0xe, extra: 0x1}, {length: 0x10, extra: 0x2}, {length: 0x14, extra: 0x2}, {length: 0x18, extra: 0x2}, {length: 0x1c, extra: 0x2}, {length: 0x20, extra: 0x3}, {length: 0x28, extra: 0x3}, {length: 0x30, extra: 0x3}, {length: 0x38, extra: 0x3}, {length: 0x40, extra: 0x4}, {length: 0x50, extra: 0x4}, {length: 0x60, extra: 0x4}, {length: 0x70, extra: 0x4}, {length: 0x80, extra: 0x5}, {length: 0xa0, extra: 0x5}, {length: 0xc0, extra: 0x5}, {length: 0xe0, extra: 0x5}, {length: 0xff, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}}
var bitMask32 = [32]uint32{
0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF,
0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF,
0x1ffff, 0x3ffff, 0x7FFFF, 0xfFFFF, 0x1fFFFF, 0x3fFFFF, 0x7fFFFF, 0xffFFFF,
0x1ffFFFF, 0x3ffFFFF, 0x7ffFFFF, 0xfffFFFF, 0x1fffFFFF, 0x3fffFFFF, 0x7fffFFFF,
} // up to 32 bits
// Initialize the fixedHuffmanDecoder only once upon first use.
var fixedOnce sync.Once
var fixedHuffmanDecoder huffmanDecoder
// A CorruptInputError reports the presence of corrupt input at a given offset.
type CorruptInputError = flate.CorruptInputError
// An InternalError reports an error in the flate code itself.
type InternalError string
func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
// A ReadError reports an error encountered while reading input.
//
// Deprecated: No longer returned.
type ReadError = flate.ReadError
// A WriteError reports an error encountered while writing output.
//
// Deprecated: No longer returned.
type WriteError = flate.WriteError
// Resetter resets a ReadCloser returned by NewReader or NewReaderDict to
// to switch to a new underlying Reader. This permits reusing a ReadCloser
// instead of allocating a new one.
type Resetter interface {
// Reset discards any buffered data and resets the Resetter as if it was
// newly initialized with the given reader.
Reset(r io.Reader, dict []byte) error
}
// The data structure for decoding Huffman tables is based on that of
// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
// For codes smaller than the table width, there are multiple entries
// (each combination of trailing bits has the same value). For codes
// larger than the table width, the table contains a link to an overflow
// table. The width of each entry in the link table is the maximum code
// size minus the chunk width.
//
// Note that you can do a lookup in the table even without all bits
// filled. Since the extra bits are zero, and the DEFLATE Huffman codes
// have the property that shorter codes come before longer ones, the
// bit length estimate in the result is a lower bound on the actual
// number of bits.
//
// See the following:
// http://www.gzip.org/algorithm.txt
// chunk & 15 is number of bits
// chunk >> 4 is value, including table link
const (
huffmanChunkBits = 9
huffmanNumChunks = 1 << huffmanChunkBits
huffmanCountMask = 15
huffmanValueShift = 4
)
type huffmanDecoder struct {
maxRead int // the maximum number of bits we can read and not overread
chunks *[huffmanNumChunks]uint16 // chunks as described above
links [][]uint16 // overflow links
linkMask uint32 // mask the width of the link table
}
// Initialize Huffman decoding tables from array of code lengths.
// Following this function, h is guaranteed to be initialized into a complete
// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
// degenerate case where the tree has only a single symbol with length 1. Empty
// trees are permitted.
func (h *huffmanDecoder) init(lengths []int) bool {
// Sanity enables additional runtime tests during Huffman
// table construction. It's intended to be used during
// development to supplement the currently ad-hoc unit tests.
const sanity = false
if h.chunks == nil {
h.chunks = new([huffmanNumChunks]uint16)
}
if h.maxRead != 0 {
*h = huffmanDecoder{chunks: h.chunks, links: h.links}
}
// Count number of codes of each length,
// compute maxRead and max length.
var count [maxCodeLen]int
var min, max int
for _, n := range lengths {
if n == 0 {
continue
}
if min == 0 || n < min {
min = n
}
if n > max {
max = n
}
count[n&maxCodeLenMask]++
}
// Empty tree. The decompressor.huffSym function will fail later if the tree
// is used. Technically, an empty tree is only valid for the HDIST tree and
// not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
// is guaranteed to fail since it will attempt to use the tree to decode the
// codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
// guaranteed to fail later since the compressed data section must be
// composed of at least one symbol (the end-of-block marker).
if max == 0 {
return true
}
code := 0
var nextcode [maxCodeLen]int
for i := min; i <= max; i++ {
code <<= 1
nextcode[i&maxCodeLenMask] = code
code += count[i&maxCodeLenMask]
}
// Check that the coding is complete (i.e., that we've
// assigned all 2-to-the-max possible bit sequences).
// Exception: To be compatible with zlib, we also need to
// accept degenerate single-code codings. See also
// TestDegenerateHuffmanCoding.
if code != 1<<uint(max) && !(code == 1 && max == 1) {
if debugDecode {
fmt.Println("coding failed, code, max:", code, max, code == 1<<uint(max), code == 1 && max == 1, "(one should be true)")
}
return false
}
h.maxRead = min
chunks := h.chunks[:]
for i := range chunks {
chunks[i] = 0
}
if max > huffmanChunkBits {
numLinks := 1 << (uint(max) - huffmanChunkBits)
h.linkMask = uint32(numLinks - 1)
// create link tables
link := nextcode[huffmanChunkBits+1] >> 1
if cap(h.links) < huffmanNumChunks-link {
h.links = make([][]uint16, huffmanNumChunks-link)
} else {
h.links = h.links[:huffmanNumChunks-link]
}
for j := uint(link); j < huffmanNumChunks; j++ {
reverse := int(bits.Reverse16(uint16(j)))
reverse >>= uint(16 - huffmanChunkBits)
off := j - uint(link)
if sanity && h.chunks[reverse] != 0 {
panic("impossible: overwriting existing chunk")
}
h.chunks[reverse] = uint16(off<<huffmanValueShift | (huffmanChunkBits + 1))
if cap(h.links[off]) < numLinks {
h.links[off] = make([]uint16, numLinks)
} else {
h.links[off] = h.links[off][:numLinks]
}
}
} else {
h.links = h.links[:0]
}
for i, n := range lengths {
if n == 0 {
continue
}
code := nextcode[n]
nextcode[n]++
chunk := uint16(i<<huffmanValueShift | n)
reverse := int(bits.Reverse16(uint16(code)))
reverse >>= uint(16 - n)
if n <= huffmanChunkBits {
for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
// We should never need to overwrite
// an existing chunk. Also, 0 is
// never a valid chunk, because the
// lower 4 "count" bits should be
// between 1 and 15.
if sanity && h.chunks[off] != 0 {
panic("impossible: overwriting existing chunk")
}
h.chunks[off] = chunk
}
} else {
j := reverse & (huffmanNumChunks - 1)
if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
// Longer codes should have been
// associated with a link table above.
panic("impossible: not an indirect chunk")
}
value := h.chunks[j] >> huffmanValueShift
linktab := h.links[value]
reverse >>= huffmanChunkBits
for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
if sanity && linktab[off] != 0 {
panic("impossible: overwriting existing chunk")
}
linktab[off] = chunk
}
}
}
if sanity {
// Above we've sanity checked that we never overwrote
// an existing entry. Here we additionally check that
// we filled the tables completely.
for i, chunk := range h.chunks {
if chunk == 0 {
// As an exception, in the degenerate
// single-code case, we allow odd
// chunks to be missing.
if code == 1 && i%2 == 1 {
continue
}
panic("impossible: missing chunk")
}
}
for _, linktab := range h.links {
for _, chunk := range linktab {
if chunk == 0 {
panic("impossible: missing chunk")
}
}
}
}
return true
}
// Reader is the actual read interface needed by NewReader.
// If the passed in io.Reader does not also have ReadByte,
// the NewReader will introduce its own buffering.
type Reader interface {
io.Reader
io.ByteReader
}
type step uint8
const (
copyData step = iota + 1
nextBlock
huffmanBytesBuffer
huffmanBytesReader
huffmanBufioReader
huffmanStringsReader
huffmanGenericReader
)
// Decompress state.
type decompressor struct {
// Input source.
r Reader
roffset int64
// Huffman decoders for literal/length, distance.
h1, h2 huffmanDecoder
// Length arrays used to define Huffman codes.
bits *[maxNumLit + maxNumDist]int
codebits *[numCodes]int
// Output history, buffer.
dict dictDecoder
// Next step in the decompression,
// and decompression state.
step step
stepState int
err error
toRead []byte
hl, hd *huffmanDecoder
copyLen int
copyDist int
// Temporary buffer (avoids repeated allocation).
buf [4]byte
// Input bits, in top of b.
b uint32
nb uint
final bool
}
func (f *decompressor) nextBlock() {
for f.nb < 1+2 {
if f.err = f.moreBits(); f.err != nil {
return
}
}
f.final = f.b&1 == 1
f.b >>= 1
typ := f.b & 3
f.b >>= 2
f.nb -= 1 + 2
switch typ {
case 0:
f.dataBlock()
if debugDecode {
fmt.Println("stored block")
}
case 1:
// compressed, fixed Huffman tables
f.hl = &fixedHuffmanDecoder
f.hd = nil
f.huffmanBlockDecoder()
if debugDecode {
fmt.Println("predefinied huffman block")
}
case 2:
// compressed, dynamic Huffman tables
if f.err = f.readHuffman(); f.err != nil {
break
}
f.hl = &f.h1
f.hd = &f.h2
f.huffmanBlockDecoder()
if debugDecode {
fmt.Println("dynamic huffman block")
}
default:
// 3 is reserved.
if debugDecode {
fmt.Println("reserved data block encountered")
}
f.err = CorruptInputError(f.roffset)
}
}
func (f *decompressor) Read(b []byte) (int, error) {
for {
if len(f.toRead) > 0 {
n := copy(b, f.toRead)
f.toRead = f.toRead[n:]
if len(f.toRead) == 0 {
return n, f.err
}
return n, nil
}
if f.err != nil {
return 0, f.err
}
f.doStep()
if f.err != nil && len(f.toRead) == 0 {
f.toRead = f.dict.readFlush() // Flush what's left in case of error
}
}
}
// WriteTo implements the io.WriteTo interface for io.Copy and friends.
func (f *decompressor) WriteTo(w io.Writer) (int64, error) {
total := int64(0)
flushed := false
for {
if len(f.toRead) > 0 {
n, err := w.Write(f.toRead)
total += int64(n)
if err != nil {
f.err = err
return total, err
}
if n != len(f.toRead) {
return total, io.ErrShortWrite
}
f.toRead = f.toRead[:0]
}
if f.err != nil && flushed {
if f.err == io.EOF {
return total, nil
}
return total, f.err
}
if f.err == nil {
f.doStep()
}
if len(f.toRead) == 0 && f.err != nil && !flushed {
f.toRead = f.dict.readFlush() // Flush what's left in case of error
flushed = true
}
}
}
func (f *decompressor) Close() error {
if f.err == io.EOF {
return nil
}
return f.err
}
// RFC 1951 section 3.2.7.
// Compression with dynamic Huffman codes
var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
func (f *decompressor) readHuffman() error {
// HLIT[5], HDIST[5], HCLEN[4].
for f.nb < 5+5+4 {
if err := f.moreBits(); err != nil {
return err
}
}
nlit := int(f.b&0x1F) + 257
if nlit > maxNumLit {
if debugDecode {
fmt.Println("nlit > maxNumLit", nlit)
}
return CorruptInputError(f.roffset)
}
f.b >>= 5
ndist := int(f.b&0x1F) + 1
if ndist > maxNumDist {
if debugDecode {
fmt.Println("ndist > maxNumDist", ndist)
}
return CorruptInputError(f.roffset)
}
f.b >>= 5
nclen := int(f.b&0xF) + 4
// numCodes is 19, so nclen is always valid.
f.b >>= 4
f.nb -= 5 + 5 + 4
// (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
for i := 0; i < nclen; i++ {
for f.nb < 3 {
if err := f.moreBits(); err != nil {
return err
}
}
f.codebits[codeOrder[i]] = int(f.b & 0x7)
f.b >>= 3
f.nb -= 3
}
for i := nclen; i < len(codeOrder); i++ {
f.codebits[codeOrder[i]] = 0
}
if !f.h1.init(f.codebits[0:]) {
if debugDecode {
fmt.Println("init codebits failed")
}
return CorruptInputError(f.roffset)
}
// HLIT + 257 code lengths, HDIST + 1 code lengths,
// using the code length Huffman code.
for i, n := 0, nlit+ndist; i < n; {
x, err := f.huffSym(&f.h1)
if err != nil {
return err
}
if x < 16 {
// Actual length.
f.bits[i] = x
i++
continue
}
// Repeat previous length or zero.
var rep int
var nb uint
var b int
switch x {
default:
return InternalError("unexpected length code")
case 16:
rep = 3
nb = 2
if i == 0 {
if debugDecode {
fmt.Println("i==0")
}
return CorruptInputError(f.roffset)
}
b = f.bits[i-1]
case 17:
rep = 3
nb = 3
b = 0
case 18:
rep = 11
nb = 7
b = 0
}
for f.nb < nb {
if err := f.moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits:", err)
}
return err
}
}
rep += int(f.b & uint32(1<<(nb&regSizeMaskUint32)-1))
f.b >>= nb & regSizeMaskUint32
f.nb -= nb
if i+rep > n {
if debugDecode {
fmt.Println("i+rep > n", i, rep, n)
}
return CorruptInputError(f.roffset)
}
for j := 0; j < rep; j++ {
f.bits[i] = b
i++
}
}
if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
if debugDecode {
fmt.Println("init2 failed")
}
return CorruptInputError(f.roffset)
}
// As an optimization, we can initialize the maxRead bits to read at a time
// for the HLIT tree to the length of the EOB marker since we know that
// every block must terminate with one. This preserves the property that
// we never read any extra bytes after the end of the DEFLATE stream.
if f.h1.maxRead < f.bits[endBlockMarker] {
f.h1.maxRead = f.bits[endBlockMarker]
}
if !f.final {
// If not the final block, the smallest block possible is
// a predefined table, BTYPE=01, with a single EOB marker.
// This will take up 3 + 7 bits.
f.h1.maxRead += 10
}
return nil
}
// Copy a single uncompressed data block from input to output.
func (f *decompressor) dataBlock() {
// Uncompressed.
// Discard current half-byte.
left := (f.nb) & 7
f.nb -= left
f.b >>= left
offBytes := f.nb >> 3
// Unfilled values will be overwritten.
f.buf[0] = uint8(f.b)
f.buf[1] = uint8(f.b >> 8)
f.buf[2] = uint8(f.b >> 16)
f.buf[3] = uint8(f.b >> 24)
f.roffset += int64(offBytes)
f.nb, f.b = 0, 0
// Length then ones-complement of length.
nr, err := io.ReadFull(f.r, f.buf[offBytes:4])
f.roffset += int64(nr)
if err != nil {
f.err = noEOF(err)
return
}
n := uint16(f.buf[0]) | uint16(f.buf[1])<<8
nn := uint16(f.buf[2]) | uint16(f.buf[3])<<8
if nn != ^n {
if debugDecode {
ncomp := ^n
fmt.Println("uint16(nn) != uint16(^n)", nn, ncomp)
}
f.err = CorruptInputError(f.roffset)
return
}
if n == 0 {
f.toRead = f.dict.readFlush()
f.finishBlock()
return
}
f.copyLen = int(n)
f.copyData()
}
// copyData copies f.copyLen bytes from the underlying reader into f.hist.
// It pauses for reads when f.hist is full.
func (f *decompressor) copyData() {
buf := f.dict.writeSlice()
if len(buf) > f.copyLen {
buf = buf[:f.copyLen]
}
cnt, err := io.ReadFull(f.r, buf)
f.roffset += int64(cnt)
f.copyLen -= cnt
f.dict.writeMark(cnt)
if err != nil {
f.err = noEOF(err)
return
}
if f.dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = f.dict.readFlush()
f.step = copyData
return
}
f.finishBlock()
}
func (f *decompressor) finishBlock() {
if f.final {
if f.dict.availRead() > 0 {
f.toRead = f.dict.readFlush()
}
f.err = io.EOF
}
f.step = nextBlock
}
func (f *decompressor) doStep() {
switch f.step {
case copyData:
f.copyData()
case nextBlock:
f.nextBlock()
case huffmanBytesBuffer:
f.huffmanBytesBuffer()
case huffmanBytesReader:
f.huffmanBytesReader()
case huffmanBufioReader:
f.huffmanBufioReader()
case huffmanStringsReader:
f.huffmanStringsReader()
case huffmanGenericReader:
f.huffmanGenericReader()
default:
panic("BUG: unexpected step state")
}
}
// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
func noEOF(e error) error {
if e == io.EOF {
return io.ErrUnexpectedEOF
}
return e
}
func (f *decompressor) moreBits() error {
c, err := f.r.ReadByte()
if err != nil {
return noEOF(err)
}
f.roffset++
f.b |= uint32(c) << (f.nb & regSizeMaskUint32)
f.nb += 8
return nil
}
// Read the next Huffman-encoded symbol from f according to h.
func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
// Since a huffmanDecoder can be empty or be composed of a degenerate tree
// with single element, huffSym must error on these two edge cases. In both
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(h.maxRead)
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
// but is smart enough to keep local variables in registers, so use nb and b,
// inline call to moreBits and reassign b,nb back to f on return.
nb, b := f.nb, f.b
for {
for nb < n {
c, err := f.r.ReadByte()
if err != nil {
f.b = b
f.nb = nb
return 0, noEOF(err)
}
f.roffset++
b |= uint32(c) << (nb & regSizeMaskUint32)
nb += 8
}
chunk := h.chunks[b&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
n = uint(chunk & huffmanCountMask)
}
if n <= nb {
if n == 0 {
f.b = b
f.nb = nb
if debugDecode {
fmt.Println("huffsym: n==0")
}
f.err = CorruptInputError(f.roffset)
return 0, f.err
}
f.b = b >> (n & regSizeMaskUint32)
f.nb = nb - n
return int(chunk >> huffmanValueShift), nil
}
}
}
func makeReader(r io.Reader) Reader {
if rr, ok := r.(Reader); ok {
return rr
}
return bufio.NewReader(r)
}
func fixedHuffmanDecoderInit() {
fixedOnce.Do(func() {
// These come from the RFC section 3.2.6.
var bits [288]int
for i := 0; i < 144; i++ {
bits[i] = 8
}
for i := 144; i < 256; i++ {
bits[i] = 9
}
for i := 256; i < 280; i++ {
bits[i] = 7
}
for i := 280; i < 288; i++ {
bits[i] = 8
}
fixedHuffmanDecoder.init(bits[:])
})
}
func (f *decompressor) Reset(r io.Reader, dict []byte) error {
*f = decompressor{
r: makeReader(r),
bits: f.bits,
codebits: f.codebits,
h1: f.h1,
h2: f.h2,
dict: f.dict,
step: nextBlock,
}
f.dict.init(maxMatchOffset, dict)
return nil
}
// NewReader returns a new ReadCloser that can be used
// to read the uncompressed version of r.
// If r does not also implement io.ByteReader,
// the decompressor may read more data than necessary from r.
// It is the caller's responsibility to call Close on the ReadCloser
// when finished reading.
//
// The ReadCloser returned by NewReader also implements Resetter.
func NewReader(r io.Reader) io.ReadCloser {
fixedHuffmanDecoderInit()
var f decompressor
f.r = makeReader(r)
f.bits = new([maxNumLit + maxNumDist]int)
f.codebits = new([numCodes]int)
f.step = nextBlock
f.dict.init(maxMatchOffset, nil)
return &f
}
// NewReaderDict is like NewReader but initializes the reader
// with a preset dictionary. The returned Reader behaves as if
// the uncompressed data stream started with the given dictionary,
// which has already been read. NewReaderDict is typically used
// to read data compressed by NewWriterDict.
//
// The ReadCloser returned by NewReader also implements Resetter.
func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
fixedHuffmanDecoderInit()
var f decompressor
f.r = makeReader(r)
f.bits = new([maxNumLit + maxNumDist]int)
f.codebits = new([numCodes]int)
f.step = nextBlock
f.dict.init(maxMatchOffset, dict)
return &f
}

File diff suppressed because it is too large Load diff

View file

@ -1,241 +0,0 @@
package flate
import (
"encoding/binary"
"fmt"
"math/bits"
)
// fastGen maintains the table for matches,
// and the previous byte block for level 2.
// This is the generic implementation.
type fastEncL1 struct {
fastGen
table [tableSize]tableEntry
}
// EncodeL1 uses a similar algorithm to level 1
func (e *fastEncL1) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
hashBytes = 5
)
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i].offset
if v <= minOff {
v = 0
} else {
v = v - e.cur + maxMatchOffset
}
e.table[i].offset = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load6432(src, s)
for {
const skipLog = 5
const doEvery = 2
nextS := s
var candidate tableEntry
for {
nextHash := hashLen(cv, tableBits, hashBytes)
candidate = e.table[nextHash]
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
now := load6432(src, nextS)
e.table[nextHash] = tableEntry{offset: s + e.cur}
nextHash = hashLen(now, tableBits, hashBytes)
offset := s - (candidate.offset - e.cur)
if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break
}
// Do one right away...
cv = now
s = nextS
nextS++
candidate = e.table[nextHash]
now >>= 8
e.table[nextHash] = tableEntry{offset: s + e.cur}
offset = s - (candidate.offset - e.cur)
if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break
}
cv = now
s = nextS
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
for {
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
// Extend the 4-byte match as long as possible.
t := candidate.offset - e.cur
var l = int32(4)
if false {
l = e.matchlenLong(s+4, t+4, src) + 4
} else {
// inlined:
a := src[s+4:]
b := src[t+4:]
for len(a) >= 8 {
if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 {
l += int32(bits.TrailingZeros64(diff) >> 3)
break
}
l += 8
a = a[8:]
b = b[8:]
}
if len(a) < 8 {
b = b[:len(a)]
for i := range a {
if a[i] != b[i] {
break
}
l++
}
}
}
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
if false {
emitLiteral(dst, src[nextEmit:s])
} else {
for _, v := range src[nextEmit:s] {
dst.tokens[dst.n] = token(v)
dst.litHist[v]++
dst.n++
}
}
}
// Save the match found
if false {
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
} else {
// Inlined...
xoffset := uint32(s - t - baseMatchOffset)
xlength := l
oc := offsetCode(xoffset)
xoffset |= oc << 16
for xlength > 0 {
xl := xlength
if xl > 258 {
if xl > 258+baseMatchLength {
xl = 258
} else {
xl = 258 - baseMatchLength
}
}
xlength -= xl
xl -= baseMatchLength
dst.extraHist[lengthCodes1[uint8(xl)]]++
dst.offHist[oc]++
dst.tokens[dst.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
dst.n++
}
}
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
// Index first pair after match end.
if int(s+l+8) < len(src) {
cv := load6432(src, s)
e.table[hashLen(cv, tableBits, hashBytes)] = tableEntry{offset: s + e.cur}
}
goto emitRemainder
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-2 and at s. If
// another emitCopy is not our next move, also calculate nextHash
// at s+1. At least on GOARCH=amd64, these three hash calculations
// are faster as one load64 call (with some shifts) instead of
// three load32 calls.
x := load6432(src, s-2)
o := e.cur + s - 2
prevHash := hashLen(x, tableBits, hashBytes)
e.table[prevHash] = tableEntry{offset: o}
x >>= 16
currHash := hashLen(x, tableBits, hashBytes)
candidate = e.table[currHash]
e.table[currHash] = tableEntry{offset: o + 2}
offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || uint32(x) != load3232(src, candidate.offset-e.cur) {
cv = x >> 8
s++
break
}
}
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}

View file

@ -1,214 +0,0 @@
package flate
import "fmt"
// fastGen maintains the table for matches,
// and the previous byte block for level 2.
// This is the generic implementation.
type fastEncL2 struct {
fastGen
table [bTableSize]tableEntry
}
// EncodeL2 uses a similar algorithm to level 1, but is capable
// of matching across blocks giving better compression at a small slowdown.
func (e *fastEncL2) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
hashBytes = 5
)
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i].offset
if v <= minOff {
v = 0
} else {
v = v - e.cur + maxMatchOffset
}
e.table[i].offset = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load6432(src, s)
for {
// When should we start skipping if we haven't found matches in a long while.
const skipLog = 5
const doEvery = 2
nextS := s
var candidate tableEntry
for {
nextHash := hashLen(cv, bTableBits, hashBytes)
s = nextS
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
candidate = e.table[nextHash]
now := load6432(src, nextS)
e.table[nextHash] = tableEntry{offset: s + e.cur}
nextHash = hashLen(now, bTableBits, hashBytes)
offset := s - (candidate.offset - e.cur)
if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break
}
// Do one right away...
cv = now
s = nextS
nextS++
candidate = e.table[nextHash]
now >>= 8
e.table[nextHash] = tableEntry{offset: s + e.cur}
offset = s - (candidate.offset - e.cur)
if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
break
}
cv = now
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
// Call emitCopy, and then see if another emitCopy could be our next
// move. Repeat until we find no match for the input immediately after
// what was consumed by the last emitCopy call.
//
// If we exit this loop normally then we need to call emitLiteral next,
// though we don't yet know how big the literal will be. We handle that
// by proceeding to the next iteration of the main loop. We also can
// exit this loop via goto if we get close to exhausting the input.
for {
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
// Extend the 4-byte match as long as possible.
t := candidate.offset - e.cur
l := e.matchlenLong(s+4, t+4, src) + 4
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
if false {
emitLiteral(dst, src[nextEmit:s])
} else {
for _, v := range src[nextEmit:s] {
dst.tokens[dst.n] = token(v)
dst.litHist[v]++
dst.n++
}
}
}
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
// Index first pair after match end.
if int(s+l+8) < len(src) {
cv := load6432(src, s)
e.table[hashLen(cv, bTableBits, hashBytes)] = tableEntry{offset: s + e.cur}
}
goto emitRemainder
}
// Store every second hash in-between, but offset by 1.
for i := s - l + 2; i < s-5; i += 7 {
x := load6432(src, i)
nextHash := hashLen(x, bTableBits, hashBytes)
e.table[nextHash] = tableEntry{offset: e.cur + i}
// Skip one
x >>= 16
nextHash = hashLen(x, bTableBits, hashBytes)
e.table[nextHash] = tableEntry{offset: e.cur + i + 2}
// Skip one
x >>= 16
nextHash = hashLen(x, bTableBits, hashBytes)
e.table[nextHash] = tableEntry{offset: e.cur + i + 4}
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-2 to s. If
// another emitCopy is not our next move, also calculate nextHash
// at s+1. At least on GOARCH=amd64, these three hash calculations
// are faster as one load64 call (with some shifts) instead of
// three load32 calls.
x := load6432(src, s-2)
o := e.cur + s - 2
prevHash := hashLen(x, bTableBits, hashBytes)
prevHash2 := hashLen(x>>8, bTableBits, hashBytes)
e.table[prevHash] = tableEntry{offset: o}
e.table[prevHash2] = tableEntry{offset: o + 1}
currHash := hashLen(x>>16, bTableBits, hashBytes)
candidate = e.table[currHash]
e.table[currHash] = tableEntry{offset: o + 2}
offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || uint32(x>>16) != load3232(src, candidate.offset-e.cur) {
cv = x >> 24
s++
break
}
}
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}

View file

@ -1,241 +0,0 @@
package flate
import "fmt"
// fastEncL3
type fastEncL3 struct {
fastGen
table [1 << 16]tableEntryPrev
}
// Encode uses a similar algorithm to level 2, will check up to two candidates.
func (e *fastEncL3) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
tableBits = 16
tableSize = 1 << tableBits
hashBytes = 5
)
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntryPrev{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i]
if v.Cur.offset <= minOff {
v.Cur.offset = 0
} else {
v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
}
if v.Prev.offset <= minOff {
v.Prev.offset = 0
} else {
v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
}
e.table[i] = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// Skip if too small.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load6432(src, s)
for {
const skipLog = 7
nextS := s
var candidate tableEntry
for {
nextHash := hashLen(cv, tableBits, hashBytes)
s = nextS
nextS = s + 1 + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
candidates := e.table[nextHash]
now := load6432(src, nextS)
// Safe offset distance until s + 4...
minOffset := e.cur + s - (maxMatchOffset - 4)
e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur}}
// Check both candidates
candidate = candidates.Cur
if candidate.offset < minOffset {
cv = now
// Previous will also be invalid, we have nothing.
continue
}
if uint32(cv) == load3232(src, candidate.offset-e.cur) {
if candidates.Prev.offset < minOffset || uint32(cv) != load3232(src, candidates.Prev.offset-e.cur) {
break
}
// Both match and are valid, pick longest.
offset := s - (candidate.offset - e.cur)
o2 := s - (candidates.Prev.offset - e.cur)
l1, l2 := matchLen(src[s+4:], src[s-offset+4:]), matchLen(src[s+4:], src[s-o2+4:])
if l2 > l1 {
candidate = candidates.Prev
}
break
} else {
// We only check if value mismatches.
// Offset will always be invalid in other cases.
candidate = candidates.Prev
if candidate.offset > minOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
break
}
}
cv = now
}
// Call emitCopy, and then see if another emitCopy could be our next
// move. Repeat until we find no match for the input immediately after
// what was consumed by the last emitCopy call.
//
// If we exit this loop normally then we need to call emitLiteral next,
// though we don't yet know how big the literal will be. We handle that
// by proceeding to the next iteration of the main loop. We also can
// exit this loop via goto if we get close to exhausting the input.
for {
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
// Extend the 4-byte match as long as possible.
//
t := candidate.offset - e.cur
l := e.matchlenLong(s+4, t+4, src) + 4
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
if false {
emitLiteral(dst, src[nextEmit:s])
} else {
for _, v := range src[nextEmit:s] {
dst.tokens[dst.n] = token(v)
dst.litHist[v]++
dst.n++
}
}
}
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
t += l
// Index first pair after match end.
if int(t+8) < len(src) && t > 0 {
cv = load6432(src, t)
nextHash := hashLen(cv, tableBits, hashBytes)
e.table[nextHash] = tableEntryPrev{
Prev: e.table[nextHash].Cur,
Cur: tableEntry{offset: e.cur + t},
}
}
goto emitRemainder
}
// Store every 5th hash in-between.
for i := s - l + 2; i < s-5; i += 6 {
nextHash := hashLen(load6432(src, i), tableBits, hashBytes)
e.table[nextHash] = tableEntryPrev{
Prev: e.table[nextHash].Cur,
Cur: tableEntry{offset: e.cur + i}}
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-2 to s.
x := load6432(src, s-2)
prevHash := hashLen(x, tableBits, hashBytes)
e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 2},
}
x >>= 8
prevHash = hashLen(x, tableBits, hashBytes)
e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 1},
}
x >>= 8
currHash := hashLen(x, tableBits, hashBytes)
candidates := e.table[currHash]
cv = x
e.table[currHash] = tableEntryPrev{
Prev: candidates.Cur,
Cur: tableEntry{offset: s + e.cur},
}
// Check both candidates
candidate = candidates.Cur
minOffset := e.cur + s - (maxMatchOffset - 4)
if candidate.offset > minOffset {
if uint32(cv) == load3232(src, candidate.offset-e.cur) {
// Found a match...
continue
}
candidate = candidates.Prev
if candidate.offset > minOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
// Match at prev...
continue
}
}
cv = x >> 8
s++
break
}
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}

View file

@ -1,221 +0,0 @@
package flate
import "fmt"
type fastEncL4 struct {
fastGen
table [tableSize]tableEntry
bTable [tableSize]tableEntry
}
func (e *fastEncL4) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
hashShortBytes = 4
)
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
for i := range e.bTable[:] {
e.bTable[i] = tableEntry{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i].offset
if v <= minOff {
v = 0
} else {
v = v - e.cur + maxMatchOffset
}
e.table[i].offset = v
}
for i := range e.bTable[:] {
v := e.bTable[i].offset
if v <= minOff {
v = 0
} else {
v = v - e.cur + maxMatchOffset
}
e.bTable[i].offset = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load6432(src, s)
for {
const skipLog = 6
const doEvery = 1
nextS := s
var t int32
for {
nextHashS := hashLen(cv, tableBits, hashShortBytes)
nextHashL := hash7(cv, tableBits)
s = nextS
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
// Fetch a short+long candidate
sCandidate := e.table[nextHashS]
lCandidate := e.bTable[nextHashL]
next := load6432(src, nextS)
entry := tableEntry{offset: s + e.cur}
e.table[nextHashS] = entry
e.bTable[nextHashL] = entry
t = lCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.offset-e.cur) {
// We got a long match. Use that.
break
}
t = sCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
// Found a 4 match...
lCandidate = e.bTable[hash7(next, tableBits)]
// If the next long is a candidate, check if we should use that instead...
lOff := nextS - (lCandidate.offset - e.cur)
if lOff < maxMatchOffset && load3232(src, lCandidate.offset-e.cur) == uint32(next) {
l1, l2 := matchLen(src[s+4:], src[t+4:]), matchLen(src[nextS+4:], src[nextS-lOff+4:])
if l2 > l1 {
s = nextS
t = lCandidate.offset - e.cur
}
}
break
}
cv = next
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
// Extend the 4-byte match as long as possible.
l := e.matchlenLong(s+4, t+4, src) + 4
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
if false {
emitLiteral(dst, src[nextEmit:s])
} else {
for _, v := range src[nextEmit:s] {
dst.tokens[dst.n] = token(v)
dst.litHist[v]++
dst.n++
}
}
}
if debugDeflate {
if t >= s {
panic("s-t")
}
if (s - t) > maxMatchOffset {
panic(fmt.Sprintln("mmo", t))
}
if l < baseMatchLength {
panic("bml")
}
}
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
// Index first pair after match end.
if int(s+8) < len(src) {
cv := load6432(src, s)
e.table[hashLen(cv, tableBits, hashShortBytes)] = tableEntry{offset: s + e.cur}
e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur}
}
goto emitRemainder
}
// Store every 3rd hash in-between
if true {
i := nextS
if i < s-1 {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur}
t2 := tableEntry{offset: t.offset + 1}
e.bTable[hash7(cv, tableBits)] = t
e.bTable[hash7(cv>>8, tableBits)] = t2
e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
i += 3
for ; i < s-1; i += 3 {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur}
t2 := tableEntry{offset: t.offset + 1}
e.bTable[hash7(cv, tableBits)] = t
e.bTable[hash7(cv>>8, tableBits)] = t2
e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
}
}
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-1 and at s.
x := load6432(src, s-1)
o := e.cur + s - 1
prevHashS := hashLen(x, tableBits, hashShortBytes)
prevHashL := hash7(x, tableBits)
e.table[prevHashS] = tableEntry{offset: o}
e.bTable[prevHashL] = tableEntry{offset: o}
cv = x >> 8
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}

View file

@ -1,708 +0,0 @@
package flate
import "fmt"
type fastEncL5 struct {
fastGen
table [tableSize]tableEntry
bTable [tableSize]tableEntryPrev
}
func (e *fastEncL5) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
hashShortBytes = 4
)
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
for i := range e.bTable[:] {
e.bTable[i] = tableEntryPrev{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i].offset
if v <= minOff {
v = 0
} else {
v = v - e.cur + maxMatchOffset
}
e.table[i].offset = v
}
for i := range e.bTable[:] {
v := e.bTable[i]
if v.Cur.offset <= minOff {
v.Cur.offset = 0
v.Prev.offset = 0
} else {
v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
if v.Prev.offset <= minOff {
v.Prev.offset = 0
} else {
v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
}
}
e.bTable[i] = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load6432(src, s)
for {
const skipLog = 6
const doEvery = 1
nextS := s
var l int32
var t int32
for {
nextHashS := hashLen(cv, tableBits, hashShortBytes)
nextHashL := hash7(cv, tableBits)
s = nextS
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
// Fetch a short+long candidate
sCandidate := e.table[nextHashS]
lCandidate := e.bTable[nextHashL]
next := load6432(src, nextS)
entry := tableEntry{offset: s + e.cur}
e.table[nextHashS] = entry
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = entry, eLong.Cur
nextHashS = hashLen(next, tableBits, hashShortBytes)
nextHashL = hash7(next, tableBits)
t = lCandidate.Cur.offset - e.cur
if s-t < maxMatchOffset {
if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) {
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
t2 := lCandidate.Prev.offset - e.cur
if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
l = e.matchlen(s+4, t+4, src) + 4
ml1 := e.matchlen(s+4, t2+4, src) + 4
if ml1 > l {
t = t2
l = ml1
break
}
}
break
}
t = lCandidate.Prev.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
break
}
}
t = sCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
// Found a 4 match...
l = e.matchlen(s+4, t+4, src) + 4
lCandidate = e.bTable[nextHashL]
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
// If the next long is a candidate, use that...
t2 := lCandidate.Cur.offset - e.cur
if nextS-t2 < maxMatchOffset {
if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l {
t = t2
s = nextS
l = ml
break
}
}
// If the previous long is a candidate, use that...
t2 = lCandidate.Prev.offset - e.cur
if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l {
t = t2
s = nextS
l = ml
break
}
}
}
break
}
cv = next
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
if l == 0 {
// Extend the 4-byte match as long as possible.
l = e.matchlenLong(s+4, t+4, src) + 4
} else if l == maxMatchLength {
l += e.matchlenLong(s+l, t+l, src)
}
// Try to locate a better match by checking the end of best match...
if sAt := s + l; l < 30 && sAt < sLimit {
// Allow some bytes at the beginning to mismatch.
// Sweet spot is 2/3 bytes depending on input.
// 3 is only a little better when it is but sometimes a lot worse.
// The skipped bytes are tested in Extend backwards,
// and still picked up as part of the match if they do.
const skipBeginning = 2
eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset
t2 := eLong - e.cur - l + skipBeginning
s2 := s + skipBeginning
off := s2 - t2
if t2 >= 0 && off < maxMatchOffset && off > 0 {
if l2 := e.matchlenLong(s2, t2, src); l2 > l {
t = t2
l = l2
s = s2
}
}
}
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
if false {
emitLiteral(dst, src[nextEmit:s])
} else {
for _, v := range src[nextEmit:s] {
dst.tokens[dst.n] = token(v)
dst.litHist[v]++
dst.n++
}
}
}
if debugDeflate {
if t >= s {
panic(fmt.Sprintln("s-t", s, t))
}
if (s - t) > maxMatchOffset {
panic(fmt.Sprintln("mmo", s-t))
}
if l < baseMatchLength {
panic("bml")
}
}
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
goto emitRemainder
}
// Store every 3rd hash in-between.
if true {
const hashEvery = 3
i := s - l + 1
if i < s-1 {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur}
e.table[hashLen(cv, tableBits, hashShortBytes)] = t
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
// Do an long at i+1
cv >>= 8
t = tableEntry{offset: t.offset + 1}
eLong = &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
// We only have enough bits for a short entry at i+2
cv >>= 8
t = tableEntry{offset: t.offset + 1}
e.table[hashLen(cv, tableBits, hashShortBytes)] = t
// Skip one - otherwise we risk hitting 's'
i += 4
for ; i < s-1; i += hashEvery {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur}
t2 := tableEntry{offset: t.offset + 1}
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
}
}
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-1 and at s.
x := load6432(src, s-1)
o := e.cur + s - 1
prevHashS := hashLen(x, tableBits, hashShortBytes)
prevHashL := hash7(x, tableBits)
e.table[prevHashS] = tableEntry{offset: o}
eLong := &e.bTable[prevHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur
cv = x >> 8
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}
// fastEncL5Window is a level 5 encoder,
// but with a custom window size.
type fastEncL5Window struct {
hist []byte
cur int32
maxOffset int32
table [tableSize]tableEntry
bTable [tableSize]tableEntryPrev
}
func (e *fastEncL5Window) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
hashShortBytes = 4
)
maxMatchOffset := e.maxOffset
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
for i := range e.bTable[:] {
e.bTable[i] = tableEntryPrev{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i].offset
if v <= minOff {
v = 0
} else {
v = v - e.cur + maxMatchOffset
}
e.table[i].offset = v
}
for i := range e.bTable[:] {
v := e.bTable[i]
if v.Cur.offset <= minOff {
v.Cur.offset = 0
v.Prev.offset = 0
} else {
v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
if v.Prev.offset <= minOff {
v.Prev.offset = 0
} else {
v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
}
}
e.bTable[i] = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load6432(src, s)
for {
const skipLog = 6
const doEvery = 1
nextS := s
var l int32
var t int32
for {
nextHashS := hashLen(cv, tableBits, hashShortBytes)
nextHashL := hash7(cv, tableBits)
s = nextS
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
// Fetch a short+long candidate
sCandidate := e.table[nextHashS]
lCandidate := e.bTable[nextHashL]
next := load6432(src, nextS)
entry := tableEntry{offset: s + e.cur}
e.table[nextHashS] = entry
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = entry, eLong.Cur
nextHashS = hashLen(next, tableBits, hashShortBytes)
nextHashL = hash7(next, tableBits)
t = lCandidate.Cur.offset - e.cur
if s-t < maxMatchOffset {
if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) {
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
t2 := lCandidate.Prev.offset - e.cur
if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
l = e.matchlen(s+4, t+4, src) + 4
ml1 := e.matchlen(s+4, t2+4, src) + 4
if ml1 > l {
t = t2
l = ml1
break
}
}
break
}
t = lCandidate.Prev.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
break
}
}
t = sCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
// Found a 4 match...
l = e.matchlen(s+4, t+4, src) + 4
lCandidate = e.bTable[nextHashL]
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
// If the next long is a candidate, use that...
t2 := lCandidate.Cur.offset - e.cur
if nextS-t2 < maxMatchOffset {
if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l {
t = t2
s = nextS
l = ml
break
}
}
// If the previous long is a candidate, use that...
t2 = lCandidate.Prev.offset - e.cur
if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l {
t = t2
s = nextS
l = ml
break
}
}
}
break
}
cv = next
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
if l == 0 {
// Extend the 4-byte match as long as possible.
l = e.matchlenLong(s+4, t+4, src) + 4
} else if l == maxMatchLength {
l += e.matchlenLong(s+l, t+l, src)
}
// Try to locate a better match by checking the end of best match...
if sAt := s + l; l < 30 && sAt < sLimit {
// Allow some bytes at the beginning to mismatch.
// Sweet spot is 2/3 bytes depending on input.
// 3 is only a little better when it is but sometimes a lot worse.
// The skipped bytes are tested in Extend backwards,
// and still picked up as part of the match if they do.
const skipBeginning = 2
eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset
t2 := eLong - e.cur - l + skipBeginning
s2 := s + skipBeginning
off := s2 - t2
if t2 >= 0 && off < maxMatchOffset && off > 0 {
if l2 := e.matchlenLong(s2, t2, src); l2 > l {
t = t2
l = l2
s = s2
}
}
}
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
if false {
emitLiteral(dst, src[nextEmit:s])
} else {
for _, v := range src[nextEmit:s] {
dst.tokens[dst.n] = token(v)
dst.litHist[v]++
dst.n++
}
}
}
if debugDeflate {
if t >= s {
panic(fmt.Sprintln("s-t", s, t))
}
if (s - t) > maxMatchOffset {
panic(fmt.Sprintln("mmo", s-t))
}
if l < baseMatchLength {
panic("bml")
}
}
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
goto emitRemainder
}
// Store every 3rd hash in-between.
if true {
const hashEvery = 3
i := s - l + 1
if i < s-1 {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur}
e.table[hashLen(cv, tableBits, hashShortBytes)] = t
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
// Do an long at i+1
cv >>= 8
t = tableEntry{offset: t.offset + 1}
eLong = &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
// We only have enough bits for a short entry at i+2
cv >>= 8
t = tableEntry{offset: t.offset + 1}
e.table[hashLen(cv, tableBits, hashShortBytes)] = t
// Skip one - otherwise we risk hitting 's'
i += 4
for ; i < s-1; i += hashEvery {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur}
t2 := tableEntry{offset: t.offset + 1}
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
}
}
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-1 and at s.
x := load6432(src, s-1)
o := e.cur + s - 1
prevHashS := hashLen(x, tableBits, hashShortBytes)
prevHashL := hash7(x, tableBits)
e.table[prevHashS] = tableEntry{offset: o}
eLong := &e.bTable[prevHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur
cv = x >> 8
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}
// Reset the encoding table.
func (e *fastEncL5Window) Reset() {
// We keep the same allocs, since we are compressing the same block sizes.
if cap(e.hist) < allocHistory {
e.hist = make([]byte, 0, allocHistory)
}
// We offset current position so everything will be out of reach.
// If we are above the buffer reset it will be cleared anyway since len(hist) == 0.
if e.cur <= int32(bufferReset) {
e.cur += e.maxOffset + int32(len(e.hist))
}
e.hist = e.hist[:0]
}
func (e *fastEncL5Window) addBlock(src []byte) int32 {
// check if we have space already
maxMatchOffset := e.maxOffset
if len(e.hist)+len(src) > cap(e.hist) {
if cap(e.hist) == 0 {
e.hist = make([]byte, 0, allocHistory)
} else {
if cap(e.hist) < int(maxMatchOffset*2) {
panic("unexpected buffer size")
}
// Move down
offset := int32(len(e.hist)) - maxMatchOffset
copy(e.hist[0:maxMatchOffset], e.hist[offset:])
e.cur += offset
e.hist = e.hist[:maxMatchOffset]
}
}
s := int32(len(e.hist))
e.hist = append(e.hist, src...)
return s
}
// matchlen will return the match length between offsets and t in src.
// The maximum length returned is maxMatchLength - 4.
// It is assumed that s > t, that t >=0 and s < len(src).
func (e *fastEncL5Window) matchlen(s, t int32, src []byte) int32 {
if debugDecode {
if t >= s {
panic(fmt.Sprint("t >=s:", t, s))
}
if int(s) >= len(src) {
panic(fmt.Sprint("s >= len(src):", s, len(src)))
}
if t < 0 {
panic(fmt.Sprint("t < 0:", t))
}
if s-t > e.maxOffset {
panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
}
}
s1 := int(s) + maxMatchLength - 4
if s1 > len(src) {
s1 = len(src)
}
// Extend the match to be as long as possible.
return int32(matchLen(src[s:s1], src[t:]))
}
// matchlenLong will return the match length between offsets and t in src.
// It is assumed that s > t, that t >=0 and s < len(src).
func (e *fastEncL5Window) matchlenLong(s, t int32, src []byte) int32 {
if debugDeflate {
if t >= s {
panic(fmt.Sprint("t >=s:", t, s))
}
if int(s) >= len(src) {
panic(fmt.Sprint("s >= len(src):", s, len(src)))
}
if t < 0 {
panic(fmt.Sprint("t < 0:", t))
}
if s-t > e.maxOffset {
panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
}
}
// Extend the match to be as long as possible.
return int32(matchLen(src[s:], src[t:]))
}

View file

@ -1,325 +0,0 @@
package flate
import "fmt"
type fastEncL6 struct {
fastGen
table [tableSize]tableEntry
bTable [tableSize]tableEntryPrev
}
func (e *fastEncL6) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
hashShortBytes = 4
)
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
for i := range e.bTable[:] {
e.bTable[i] = tableEntryPrev{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i].offset
if v <= minOff {
v = 0
} else {
v = v - e.cur + maxMatchOffset
}
e.table[i].offset = v
}
for i := range e.bTable[:] {
v := e.bTable[i]
if v.Cur.offset <= minOff {
v.Cur.offset = 0
v.Prev.offset = 0
} else {
v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
if v.Prev.offset <= minOff {
v.Prev.offset = 0
} else {
v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
}
}
e.bTable[i] = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load6432(src, s)
// Repeat MUST be > 1 and within range
repeat := int32(1)
for {
const skipLog = 7
const doEvery = 1
nextS := s
var l int32
var t int32
for {
nextHashS := hashLen(cv, tableBits, hashShortBytes)
nextHashL := hash7(cv, tableBits)
s = nextS
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
// Fetch a short+long candidate
sCandidate := e.table[nextHashS]
lCandidate := e.bTable[nextHashL]
next := load6432(src, nextS)
entry := tableEntry{offset: s + e.cur}
e.table[nextHashS] = entry
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = entry, eLong.Cur
// Calculate hashes of 'next'
nextHashS = hashLen(next, tableBits, hashShortBytes)
nextHashL = hash7(next, tableBits)
t = lCandidate.Cur.offset - e.cur
if s-t < maxMatchOffset {
if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) {
// Long candidate matches at least 4 bytes.
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
// Check the previous long candidate as well.
t2 := lCandidate.Prev.offset - e.cur
if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
l = e.matchlen(s+4, t+4, src) + 4
ml1 := e.matchlen(s+4, t2+4, src) + 4
if ml1 > l {
t = t2
l = ml1
break
}
}
break
}
// Current value did not match, but check if previous long value does.
t = lCandidate.Prev.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
break
}
}
t = sCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
// Found a 4 match...
l = e.matchlen(s+4, t+4, src) + 4
// Look up next long candidate (at nextS)
lCandidate = e.bTable[nextHashL]
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
// Check repeat at s + repOff
const repOff = 1
t2 := s - repeat + repOff
if load3232(src, t2) == uint32(cv>>(8*repOff)) {
ml := e.matchlen(s+4+repOff, t2+4, src) + 4
if ml > l {
t = t2
l = ml
s += repOff
// Not worth checking more.
break
}
}
// If the next long is a candidate, use that...
t2 = lCandidate.Cur.offset - e.cur
if nextS-t2 < maxMatchOffset {
if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l {
t = t2
s = nextS
l = ml
// This is ok, but check previous as well.
}
}
// If the previous long is a candidate, use that...
t2 = lCandidate.Prev.offset - e.cur
if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l {
t = t2
s = nextS
l = ml
break
}
}
}
break
}
cv = next
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
// Extend the 4-byte match as long as possible.
if l == 0 {
l = e.matchlenLong(s+4, t+4, src) + 4
} else if l == maxMatchLength {
l += e.matchlenLong(s+l, t+l, src)
}
// Try to locate a better match by checking the end-of-match...
if sAt := s + l; sAt < sLimit {
// Allow some bytes at the beginning to mismatch.
// Sweet spot is 2/3 bytes depending on input.
// 3 is only a little better when it is but sometimes a lot worse.
// The skipped bytes are tested in Extend backwards,
// and still picked up as part of the match if they do.
const skipBeginning = 2
eLong := &e.bTable[hash7(load6432(src, sAt), tableBits)]
// Test current
t2 := eLong.Cur.offset - e.cur - l + skipBeginning
s2 := s + skipBeginning
off := s2 - t2
if off < maxMatchOffset {
if off > 0 && t2 >= 0 {
if l2 := e.matchlenLong(s2, t2, src); l2 > l {
t = t2
l = l2
s = s2
}
}
// Test next:
t2 = eLong.Prev.offset - e.cur - l + skipBeginning
off := s2 - t2
if off > 0 && off < maxMatchOffset && t2 >= 0 {
if l2 := e.matchlenLong(s2, t2, src); l2 > l {
t = t2
l = l2
s = s2
}
}
}
}
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
if false {
emitLiteral(dst, src[nextEmit:s])
} else {
for _, v := range src[nextEmit:s] {
dst.tokens[dst.n] = token(v)
dst.litHist[v]++
dst.n++
}
}
}
if false {
if t >= s {
panic(fmt.Sprintln("s-t", s, t))
}
if (s - t) > maxMatchOffset {
panic(fmt.Sprintln("mmo", s-t))
}
if l < baseMatchLength {
panic("bml")
}
}
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
repeat = s - t
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
// Index after match end.
for i := nextS + 1; i < int32(len(src))-8; i += 2 {
cv := load6432(src, i)
e.table[hashLen(cv, tableBits, hashShortBytes)] = tableEntry{offset: i + e.cur}
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur}, eLong.Cur
}
goto emitRemainder
}
// Store every long hash in-between and every second short.
if true {
for i := nextS + 1; i < s-1; i += 2 {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur}
t2 := tableEntry{offset: t.offset + 1}
eLong := &e.bTable[hash7(cv, tableBits)]
eLong2 := &e.bTable[hash7(cv>>8, tableBits)]
e.table[hashLen(cv, tableBits, hashShortBytes)] = t
eLong.Cur, eLong.Prev = t, eLong.Cur
eLong2.Cur, eLong2.Prev = t2, eLong2.Cur
}
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-1 and at s.
cv = load6432(src, s)
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}

View file

@ -1,16 +0,0 @@
//go:build amd64 && !appengine && !noasm && gc
// +build amd64,!appengine,!noasm,gc
// Copyright 2019+ Klaus Post. All rights reserved.
// License information can be found in the LICENSE file.
package flate
// matchLen returns how many bytes match in a and b
//
// It assumes that:
//
// len(a) <= len(b) and len(a) > 0
//
//go:noescape
func matchLen(a []byte, b []byte) int

View file

@ -1,68 +0,0 @@
// Copied from S2 implementation.
//go:build !appengine && !noasm && gc && !noasm
#include "textflag.h"
// func matchLen(a []byte, b []byte) int
// Requires: BMI
TEXT ·matchLen(SB), NOSPLIT, $0-56
MOVQ a_base+0(FP), AX
MOVQ b_base+24(FP), CX
MOVQ a_len+8(FP), DX
// matchLen
XORL SI, SI
CMPL DX, $0x08
JB matchlen_match4_standalone
matchlen_loopback_standalone:
MOVQ (AX)(SI*1), BX
XORQ (CX)(SI*1), BX
TESTQ BX, BX
JZ matchlen_loop_standalone
#ifdef GOAMD64_v3
TZCNTQ BX, BX
#else
BSFQ BX, BX
#endif
SARQ $0x03, BX
LEAL (SI)(BX*1), SI
JMP gen_match_len_end
matchlen_loop_standalone:
LEAL -8(DX), DX
LEAL 8(SI), SI
CMPL DX, $0x08
JAE matchlen_loopback_standalone
matchlen_match4_standalone:
CMPL DX, $0x04
JB matchlen_match2_standalone
MOVL (AX)(SI*1), BX
CMPL (CX)(SI*1), BX
JNE matchlen_match2_standalone
LEAL -4(DX), DX
LEAL 4(SI), SI
matchlen_match2_standalone:
CMPL DX, $0x02
JB matchlen_match1_standalone
MOVW (AX)(SI*1), BX
CMPW (CX)(SI*1), BX
JNE matchlen_match1_standalone
LEAL -2(DX), DX
LEAL 2(SI), SI
matchlen_match1_standalone:
CMPL DX, $0x01
JB gen_match_len_end
MOVB (AX)(SI*1), BL
CMPB (CX)(SI*1), BL
JNE gen_match_len_end
INCL SI
gen_match_len_end:
MOVQ SI, ret+48(FP)
RET

View file

@ -1,33 +0,0 @@
//go:build !amd64 || appengine || !gc || noasm
// +build !amd64 appengine !gc noasm
// Copyright 2019+ Klaus Post. All rights reserved.
// License information can be found in the LICENSE file.
package flate
import (
"encoding/binary"
"math/bits"
)
// matchLen returns the maximum common prefix length of a and b.
// a must be the shortest of the two.
func matchLen(a, b []byte) (n int) {
for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] {
diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b)
if diff != 0 {
return n + bits.TrailingZeros64(diff)>>3
}
n += 8
}
for i := range a {
if a[i] != b[i] {
break
}
n++
}
return n
}

View file

@ -1,37 +0,0 @@
package flate
const (
// Masks for shifts with register sizes of the shift value.
// This can be used to work around the x86 design of shifting by mod register size.
// It can be used when a variable shift is always smaller than the register size.
// reg8SizeMaskX - shift value is 8 bits, shifted is X
reg8SizeMask8 = 7
reg8SizeMask16 = 15
reg8SizeMask32 = 31
reg8SizeMask64 = 63
// reg16SizeMaskX - shift value is 16 bits, shifted is X
reg16SizeMask8 = reg8SizeMask8
reg16SizeMask16 = reg8SizeMask16
reg16SizeMask32 = reg8SizeMask32
reg16SizeMask64 = reg8SizeMask64
// reg32SizeMaskX - shift value is 32 bits, shifted is X
reg32SizeMask8 = reg8SizeMask8
reg32SizeMask16 = reg8SizeMask16
reg32SizeMask32 = reg8SizeMask32
reg32SizeMask64 = reg8SizeMask64
// reg64SizeMaskX - shift value is 64 bits, shifted is X
reg64SizeMask8 = reg8SizeMask8
reg64SizeMask16 = reg8SizeMask16
reg64SizeMask32 = reg8SizeMask32
reg64SizeMask64 = reg8SizeMask64
// regSizeMaskUintX - shift value is uint, shifted is X
regSizeMaskUint8 = reg8SizeMask8
regSizeMaskUint16 = reg8SizeMask16
regSizeMaskUint32 = reg8SizeMask32
regSizeMaskUint64 = reg8SizeMask64
)

View file

@ -1,40 +0,0 @@
//go:build !amd64
// +build !amd64
package flate
const (
// Masks for shifts with register sizes of the shift value.
// This can be used to work around the x86 design of shifting by mod register size.
// It can be used when a variable shift is always smaller than the register size.
// reg8SizeMaskX - shift value is 8 bits, shifted is X
reg8SizeMask8 = 0xff
reg8SizeMask16 = 0xff
reg8SizeMask32 = 0xff
reg8SizeMask64 = 0xff
// reg16SizeMaskX - shift value is 16 bits, shifted is X
reg16SizeMask8 = 0xffff
reg16SizeMask16 = 0xffff
reg16SizeMask32 = 0xffff
reg16SizeMask64 = 0xffff
// reg32SizeMaskX - shift value is 32 bits, shifted is X
reg32SizeMask8 = 0xffffffff
reg32SizeMask16 = 0xffffffff
reg32SizeMask32 = 0xffffffff
reg32SizeMask64 = 0xffffffff
// reg64SizeMaskX - shift value is 64 bits, shifted is X
reg64SizeMask8 = 0xffffffffffffffff
reg64SizeMask16 = 0xffffffffffffffff
reg64SizeMask32 = 0xffffffffffffffff
reg64SizeMask64 = 0xffffffffffffffff
// regSizeMaskUintX - shift value is uint, shifted is X
regSizeMaskUint8 = ^uint(0)
regSizeMaskUint16 = ^uint(0)
regSizeMaskUint32 = ^uint(0)
regSizeMaskUint64 = ^uint(0)
)

View file

@ -1,318 +0,0 @@
package flate
import (
"io"
"math"
"sync"
)
const (
maxStatelessBlock = math.MaxInt16
// dictionary will be taken from maxStatelessBlock, so limit it.
maxStatelessDict = 8 << 10
slTableBits = 13
slTableSize = 1 << slTableBits
slTableShift = 32 - slTableBits
)
type statelessWriter struct {
dst io.Writer
closed bool
}
func (s *statelessWriter) Close() error {
if s.closed {
return nil
}
s.closed = true
// Emit EOF block
return StatelessDeflate(s.dst, nil, true, nil)
}
func (s *statelessWriter) Write(p []byte) (n int, err error) {
err = StatelessDeflate(s.dst, p, false, nil)
if err != nil {
return 0, err
}
return len(p), nil
}
func (s *statelessWriter) Reset(w io.Writer) {
s.dst = w
s.closed = false
}
// NewStatelessWriter will do compression but without maintaining any state
// between Write calls.
// There will be no memory kept between Write calls,
// but compression and speed will be suboptimal.
// Because of this, the size of actual Write calls will affect output size.
func NewStatelessWriter(dst io.Writer) io.WriteCloser {
return &statelessWriter{dst: dst}
}
// bitWriterPool contains bit writers that can be reused.
var bitWriterPool = sync.Pool{
New: func() interface{} {
return newHuffmanBitWriter(nil)
},
}
// StatelessDeflate allows compressing directly to a Writer without retaining state.
// When returning everything will be flushed.
// Up to 8KB of an optional dictionary can be given which is presumed to precede the block.
// Longer dictionaries will be truncated and will still produce valid output.
// Sending nil dictionary is perfectly fine.
func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
var dst tokens
bw := bitWriterPool.Get().(*huffmanBitWriter)
bw.reset(out)
defer func() {
// don't keep a reference to our output
bw.reset(nil)
bitWriterPool.Put(bw)
}()
if eof && len(in) == 0 {
// Just write an EOF block.
// Could be faster...
bw.writeStoredHeader(0, true)
bw.flush()
return bw.err
}
// Truncate dict
if len(dict) > maxStatelessDict {
dict = dict[len(dict)-maxStatelessDict:]
}
// For subsequent loops, keep shallow dict reference to avoid alloc+copy.
var inDict []byte
for len(in) > 0 {
todo := in
if len(inDict) > 0 {
if len(todo) > maxStatelessBlock-maxStatelessDict {
todo = todo[:maxStatelessBlock-maxStatelessDict]
}
} else if len(todo) > maxStatelessBlock-len(dict) {
todo = todo[:maxStatelessBlock-len(dict)]
}
inOrg := in
in = in[len(todo):]
uncompressed := todo
if len(dict) > 0 {
// combine dict and source
bufLen := len(todo) + len(dict)
combined := make([]byte, bufLen)
copy(combined, dict)
copy(combined[len(dict):], todo)
todo = combined
}
// Compress
if len(inDict) == 0 {
statelessEnc(&dst, todo, int16(len(dict)))
} else {
statelessEnc(&dst, inDict[:maxStatelessDict+len(todo)], maxStatelessDict)
}
isEof := eof && len(in) == 0
if dst.n == 0 {
bw.writeStoredHeader(len(uncompressed), isEof)
if bw.err != nil {
return bw.err
}
bw.writeBytes(uncompressed)
} else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 {
// If we removed less than 1/16th, huffman compress the block.
bw.writeBlockHuff(isEof, uncompressed, len(in) == 0)
} else {
bw.writeBlockDynamic(&dst, isEof, uncompressed, len(in) == 0)
}
if len(in) > 0 {
// Retain a dict if we have more
inDict = inOrg[len(uncompressed)-maxStatelessDict:]
dict = nil
dst.Reset()
}
if bw.err != nil {
return bw.err
}
}
if !eof {
// Align, only a stored block can do that.
bw.writeStoredHeader(0, false)
}
bw.flush()
return bw.err
}
func hashSL(u uint32) uint32 {
return (u * 0x1e35a7bd) >> slTableShift
}
func load3216(b []byte, i int16) uint32 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:]
b = b[:4]
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
}
func load6416(b []byte, i int16) uint64 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:]
b = b[:8]
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
}
func statelessEnc(dst *tokens, src []byte, startAt int16) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
type tableEntry struct {
offset int16
}
var table [slTableSize]tableEntry
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src)-int(startAt) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = 0
return
}
// Index until startAt
if startAt > 0 {
cv := load3232(src, 0)
for i := int16(0); i < startAt; i++ {
table[hashSL(cv)] = tableEntry{offset: i}
cv = (cv >> 8) | (uint32(src[i+4]) << 24)
}
}
s := startAt + 1
nextEmit := startAt
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int16(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load3216(src, s)
for {
const skipLog = 5
const doEvery = 2
nextS := s
var candidate tableEntry
for {
nextHash := hashSL(cv)
candidate = table[nextHash]
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit || nextS <= 0 {
goto emitRemainder
}
now := load6416(src, nextS)
table[nextHash] = tableEntry{offset: s}
nextHash = hashSL(uint32(now))
if cv == load3216(src, candidate.offset) {
table[nextHash] = tableEntry{offset: nextS}
break
}
// Do one right away...
cv = uint32(now)
s = nextS
nextS++
candidate = table[nextHash]
now >>= 8
table[nextHash] = tableEntry{offset: s}
if cv == load3216(src, candidate.offset) {
table[nextHash] = tableEntry{offset: nextS}
break
}
cv = uint32(now)
s = nextS
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
for {
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
// Extend the 4-byte match as long as possible.
t := candidate.offset
l := int16(matchLen(src[s+4:], src[t+4:]) + 4)
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
if false {
emitLiteral(dst, src[nextEmit:s])
} else {
for _, v := range src[nextEmit:s] {
dst.tokens[dst.n] = token(v)
dst.litHist[v]++
dst.n++
}
}
}
// Save the match found
dst.AddMatchLong(int32(l), uint32(s-t-baseMatchOffset))
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
goto emitRemainder
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-2 and at s. If
// another emitCopy is not our next move, also calculate nextHash
// at s+1. At least on GOARCH=amd64, these three hash calculations
// are faster as one load64 call (with some shifts) instead of
// three load32 calls.
x := load6416(src, s-2)
o := s - 2
prevHash := hashSL(uint32(x))
table[prevHash] = tableEntry{offset: o}
x >>= 16
currHash := hashSL(uint32(x))
candidate = table[currHash]
table[currHash] = tableEntry{offset: o + 2}
if uint32(x) != load3216(src, candidate.offset) {
cv = uint32(x >> 8)
s++
break
}
}
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}

View file

@ -1,379 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"math"
)
const (
// bits 0-16 xoffset = offset - MIN_OFFSET_SIZE, or literal - 16 bits
// bits 16-22 offsetcode - 5 bits
// bits 22-30 xlength = length - MIN_MATCH_LENGTH - 8 bits
// bits 30-32 type 0 = literal 1=EOF 2=Match 3=Unused - 2 bits
lengthShift = 22
offsetMask = 1<<lengthShift - 1
typeMask = 3 << 30
literalType = 0 << 30
matchType = 1 << 30
matchOffsetOnlyMask = 0xffff
)
// The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH)
// is lengthCodes[length - MIN_MATCH_LENGTH]
var lengthCodes = [256]uint8{
0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
13, 13, 13, 13, 14, 14, 14, 14, 15, 15,
15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
17, 17, 17, 17, 17, 17, 17, 17, 18, 18,
18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
19, 19, 19, 19, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 28,
}
// lengthCodes1 is length codes, but starting at 1.
var lengthCodes1 = [256]uint8{
1, 2, 3, 4, 5, 6, 7, 8, 9, 9,
10, 10, 11, 11, 12, 12, 13, 13, 13, 13,
14, 14, 14, 14, 15, 15, 15, 15, 16, 16,
16, 16, 17, 17, 17, 17, 17, 17, 17, 17,
18, 18, 18, 18, 18, 18, 18, 18, 19, 19,
19, 19, 19, 19, 19, 19, 20, 20, 20, 20,
20, 20, 20, 20, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 29,
}
var offsetCodes = [256]uint32{
0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
}
// offsetCodes14 are offsetCodes, but with 14 added.
var offsetCodes14 = [256]uint32{
14, 15, 16, 17, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21,
22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
}
type token uint32
type tokens struct {
extraHist [32]uint16 // codes 256->maxnumlit
offHist [32]uint16 // offset codes
litHist [256]uint16 // codes 0->255
nFilled int
n uint16 // Must be able to contain maxStoreBlockSize
tokens [maxStoreBlockSize + 1]token
}
func (t *tokens) Reset() {
if t.n == 0 {
return
}
t.n = 0
t.nFilled = 0
for i := range t.litHist[:] {
t.litHist[i] = 0
}
for i := range t.extraHist[:] {
t.extraHist[i] = 0
}
for i := range t.offHist[:] {
t.offHist[i] = 0
}
}
func (t *tokens) Fill() {
if t.n == 0 {
return
}
for i, v := range t.litHist[:] {
if v == 0 {
t.litHist[i] = 1
t.nFilled++
}
}
for i, v := range t.extraHist[:literalCount-256] {
if v == 0 {
t.nFilled++
t.extraHist[i] = 1
}
}
for i, v := range t.offHist[:offsetCodeCount] {
if v == 0 {
t.offHist[i] = 1
}
}
}
func indexTokens(in []token) tokens {
var t tokens
t.indexTokens(in)
return t
}
func (t *tokens) indexTokens(in []token) {
t.Reset()
for _, tok := range in {
if tok < matchType {
t.AddLiteral(tok.literal())
continue
}
t.AddMatch(uint32(tok.length()), tok.offset()&matchOffsetOnlyMask)
}
}
// emitLiteral writes a literal chunk and returns the number of bytes written.
func emitLiteral(dst *tokens, lit []byte) {
for _, v := range lit {
dst.tokens[dst.n] = token(v)
dst.litHist[v]++
dst.n++
}
}
func (t *tokens) AddLiteral(lit byte) {
t.tokens[t.n] = token(lit)
t.litHist[lit]++
t.n++
}
// from https://stackoverflow.com/a/28730362
func mFastLog2(val float32) float32 {
ux := int32(math.Float32bits(val))
log2 := (float32)(((ux >> 23) & 255) - 128)
ux &= -0x7f800001
ux += 127 << 23
uval := math.Float32frombits(uint32(ux))
log2 += ((-0.34484843)*uval+2.02466578)*uval - 0.67487759
return log2
}
// EstimatedBits will return an minimum size estimated by an *optimal*
// compression of the block.
// The size of the block
func (t *tokens) EstimatedBits() int {
shannon := float32(0)
bits := int(0)
nMatches := 0
total := int(t.n) + t.nFilled
if total > 0 {
invTotal := 1.0 / float32(total)
for _, v := range t.litHist[:] {
if v > 0 {
n := float32(v)
shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
}
}
// Just add 15 for EOB
shannon += 15
for i, v := range t.extraHist[1 : literalCount-256] {
if v > 0 {
n := float32(v)
shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
bits += int(lengthExtraBits[i&31]) * int(v)
nMatches += int(v)
}
}
}
if nMatches > 0 {
invTotal := 1.0 / float32(nMatches)
for i, v := range t.offHist[:offsetCodeCount] {
if v > 0 {
n := float32(v)
shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
bits += int(offsetExtraBits[i&31]) * int(v)
}
}
}
return int(shannon) + bits
}
// AddMatch adds a match to the tokens.
// This function is very sensitive to inlining and right on the border.
func (t *tokens) AddMatch(xlength uint32, xoffset uint32) {
if debugDeflate {
if xlength >= maxMatchLength+baseMatchLength {
panic(fmt.Errorf("invalid length: %v", xlength))
}
if xoffset >= maxMatchOffset+baseMatchOffset {
panic(fmt.Errorf("invalid offset: %v", xoffset))
}
}
oCode := offsetCode(xoffset)
xoffset |= oCode << 16
t.extraHist[lengthCodes1[uint8(xlength)]]++
t.offHist[oCode&31]++
t.tokens[t.n] = token(matchType | xlength<<lengthShift | xoffset)
t.n++
}
// AddMatchLong adds a match to the tokens, potentially longer than max match length.
// Length should NOT have the base subtracted, only offset should.
func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) {
if debugDeflate {
if xoffset >= maxMatchOffset+baseMatchOffset {
panic(fmt.Errorf("invalid offset: %v", xoffset))
}
}
oc := offsetCode(xoffset)
xoffset |= oc << 16
for xlength > 0 {
xl := xlength
if xl > 258 {
// We need to have at least baseMatchLength left over for next loop.
if xl > 258+baseMatchLength {
xl = 258
} else {
xl = 258 - baseMatchLength
}
}
xlength -= xl
xl -= baseMatchLength
t.extraHist[lengthCodes1[uint8(xl)]]++
t.offHist[oc&31]++
t.tokens[t.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
t.n++
}
}
func (t *tokens) AddEOB() {
t.tokens[t.n] = token(endBlockMarker)
t.extraHist[0]++
t.n++
}
func (t *tokens) Slice() []token {
return t.tokens[:t.n]
}
// VarInt returns the tokens as varint encoded bytes.
func (t *tokens) VarInt() []byte {
var b = make([]byte, binary.MaxVarintLen32*int(t.n))
var off int
for _, v := range t.tokens[:t.n] {
off += binary.PutUvarint(b[off:], uint64(v))
}
return b[:off]
}
// FromVarInt restores t to the varint encoded tokens provided.
// Any data in t is removed.
func (t *tokens) FromVarInt(b []byte) error {
var buf = bytes.NewReader(b)
var toks []token
for {
r, err := binary.ReadUvarint(buf)
if err == io.EOF {
break
}
if err != nil {
return err
}
toks = append(toks, token(r))
}
t.indexTokens(toks)
return nil
}
// Returns the type of a token
func (t token) typ() uint32 { return uint32(t) & typeMask }
// Returns the literal of a literal token
func (t token) literal() uint8 { return uint8(t) }
// Returns the extra offset of a match token
func (t token) offset() uint32 { return uint32(t) & offsetMask }
func (t token) length() uint8 { return uint8(t >> lengthShift) }
// Convert length to code.
func lengthCode(len uint8) uint8 { return lengthCodes[len] }
// Returns the offset code corresponding to a specific offset
func offsetCode(off uint32) uint32 {
if false {
if off < uint32(len(offsetCodes)) {
return offsetCodes[off&255]
} else if off>>7 < uint32(len(offsetCodes)) {
return offsetCodes[(off>>7)&255] + 14
} else {
return offsetCodes[(off>>14)&255] + 28
}
}
if off < uint32(len(offsetCodes)) {
return offsetCodes[uint8(off)]
}
return offsetCodes14[uint8(off>>7)]
}

View file

@ -1,380 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package gzip implements reading and writing of gzip format compressed files,
// as specified in RFC 1952.
package gzip
import (
"bufio"
"compress/gzip"
"encoding/binary"
"hash/crc32"
"io"
"time"
"github.com/klauspost/compress/flate"
)
const (
gzipID1 = 0x1f
gzipID2 = 0x8b
gzipDeflate = 8
flagText = 1 << 0
flagHdrCrc = 1 << 1
flagExtra = 1 << 2
flagName = 1 << 3
flagComment = 1 << 4
)
var (
// ErrChecksum is returned when reading GZIP data that has an invalid checksum.
ErrChecksum = gzip.ErrChecksum
// ErrHeader is returned when reading GZIP data that has an invalid header.
ErrHeader = gzip.ErrHeader
)
var le = binary.LittleEndian
// noEOF converts io.EOF to io.ErrUnexpectedEOF.
func noEOF(err error) error {
if err == io.EOF {
return io.ErrUnexpectedEOF
}
return err
}
// The gzip file stores a header giving metadata about the compressed file.
// That header is exposed as the fields of the Writer and Reader structs.
//
// Strings must be UTF-8 encoded and may only contain Unicode code points
// U+0001 through U+00FF, due to limitations of the GZIP file format.
type Header struct {
Comment string // comment
Extra []byte // "extra data"
ModTime time.Time // modification time
Name string // file name
OS byte // operating system type
}
// A Reader is an io.Reader that can be read to retrieve
// uncompressed data from a gzip-format compressed file.
//
// In general, a gzip file can be a concatenation of gzip files,
// each with its own header. Reads from the Reader
// return the concatenation of the uncompressed data of each.
// Only the first header is recorded in the Reader fields.
//
// Gzip files store a length and checksum of the uncompressed data.
// The Reader will return a ErrChecksum when Read
// reaches the end of the uncompressed data if it does not
// have the expected length or checksum. Clients should treat data
// returned by Read as tentative until they receive the io.EOF
// marking the end of the data.
type Reader struct {
Header // valid after NewReader or Reader.Reset
r flate.Reader
br *bufio.Reader
decompressor io.ReadCloser
digest uint32 // CRC-32, IEEE polynomial (section 8)
size uint32 // Uncompressed size (section 2.3.1)
buf [512]byte
err error
multistream bool
}
// NewReader creates a new Reader reading the given reader.
// If r does not also implement io.ByteReader,
// the decompressor may read more data than necessary from r.
//
// It is the caller's responsibility to call Close on the Reader when done.
//
// The Reader.Header fields will be valid in the Reader returned.
func NewReader(r io.Reader) (*Reader, error) {
z := new(Reader)
if err := z.Reset(r); err != nil {
return nil, err
}
return z, nil
}
// Reset discards the Reader z's state and makes it equivalent to the
// result of its original state from NewReader, but reading from r instead.
// This permits reusing a Reader rather than allocating a new one.
func (z *Reader) Reset(r io.Reader) error {
*z = Reader{
decompressor: z.decompressor,
multistream: true,
br: z.br,
}
if rr, ok := r.(flate.Reader); ok {
z.r = rr
} else {
// Reuse if we can.
if z.br != nil {
z.br.Reset(r)
} else {
z.br = bufio.NewReader(r)
}
z.r = z.br
}
z.Header, z.err = z.readHeader()
return z.err
}
// Multistream controls whether the reader supports multistream files.
//
// If enabled (the default), the Reader expects the input to be a sequence
// of individually gzipped data streams, each with its own header and
// trailer, ending at EOF. The effect is that the concatenation of a sequence
// of gzipped files is treated as equivalent to the gzip of the concatenation
// of the sequence. This is standard behavior for gzip readers.
//
// Calling Multistream(false) disables this behavior; disabling the behavior
// can be useful when reading file formats that distinguish individual gzip
// data streams or mix gzip data streams with other data streams.
// In this mode, when the Reader reaches the end of the data stream,
// Read returns io.EOF. If the underlying reader implements io.ByteReader,
// it will be left positioned just after the gzip stream.
// To start the next stream, call z.Reset(r) followed by z.Multistream(false).
// If there is no next stream, z.Reset(r) will return io.EOF.
func (z *Reader) Multistream(ok bool) {
z.multistream = ok
}
// readString reads a NUL-terminated string from z.r.
// It treats the bytes read as being encoded as ISO 8859-1 (Latin-1) and
// will output a string encoded using UTF-8.
// This method always updates z.digest with the data read.
func (z *Reader) readString() (string, error) {
var err error
needConv := false
for i := 0; ; i++ {
if i >= len(z.buf) {
return "", ErrHeader
}
z.buf[i], err = z.r.ReadByte()
if err != nil {
return "", err
}
if z.buf[i] > 0x7f {
needConv = true
}
if z.buf[i] == 0 {
// Digest covers the NUL terminator.
z.digest = crc32.Update(z.digest, crc32.IEEETable, z.buf[:i+1])
// Strings are ISO 8859-1, Latin-1 (RFC 1952, section 2.3.1).
if needConv {
s := make([]rune, 0, i)
for _, v := range z.buf[:i] {
s = append(s, rune(v))
}
return string(s), nil
}
return string(z.buf[:i]), nil
}
}
}
// readHeader reads the GZIP header according to section 2.3.1.
// This method does not set z.err.
func (z *Reader) readHeader() (hdr Header, err error) {
if _, err = io.ReadFull(z.r, z.buf[:10]); err != nil {
// RFC 1952, section 2.2, says the following:
// A gzip file consists of a series of "members" (compressed data sets).
//
// Other than this, the specification does not clarify whether a
// "series" is defined as "one or more" or "zero or more". To err on the
// side of caution, Go interprets this to mean "zero or more".
// Thus, it is okay to return io.EOF here.
return hdr, err
}
if z.buf[0] != gzipID1 || z.buf[1] != gzipID2 || z.buf[2] != gzipDeflate {
return hdr, ErrHeader
}
flg := z.buf[3]
hdr.ModTime = time.Unix(int64(le.Uint32(z.buf[4:8])), 0)
// z.buf[8] is XFL and is currently ignored.
hdr.OS = z.buf[9]
z.digest = crc32.ChecksumIEEE(z.buf[:10])
if flg&flagExtra != 0 {
if _, err = io.ReadFull(z.r, z.buf[:2]); err != nil {
return hdr, noEOF(err)
}
z.digest = crc32.Update(z.digest, crc32.IEEETable, z.buf[:2])
data := make([]byte, le.Uint16(z.buf[:2]))
if _, err = io.ReadFull(z.r, data); err != nil {
return hdr, noEOF(err)
}
z.digest = crc32.Update(z.digest, crc32.IEEETable, data)
hdr.Extra = data
}
var s string
if flg&flagName != 0 {
if s, err = z.readString(); err != nil {
return hdr, err
}
hdr.Name = s
}
if flg&flagComment != 0 {
if s, err = z.readString(); err != nil {
return hdr, err
}
hdr.Comment = s
}
if flg&flagHdrCrc != 0 {
if _, err = io.ReadFull(z.r, z.buf[:2]); err != nil {
return hdr, noEOF(err)
}
digest := le.Uint16(z.buf[:2])
if digest != uint16(z.digest) {
return hdr, ErrHeader
}
}
// Reserved FLG bits must be zero.
if flg>>5 != 0 {
return hdr, ErrHeader
}
z.digest = 0
if z.decompressor == nil {
z.decompressor = flate.NewReader(z.r)
} else {
z.decompressor.(flate.Resetter).Reset(z.r, nil)
}
return hdr, nil
}
// Read implements io.Reader, reading uncompressed bytes from its underlying Reader.
func (z *Reader) Read(p []byte) (n int, err error) {
if z.err != nil {
return 0, z.err
}
for n == 0 {
n, z.err = z.decompressor.Read(p)
z.digest = crc32.Update(z.digest, crc32.IEEETable, p[:n])
z.size += uint32(n)
if z.err != io.EOF {
// In the normal case we return here.
return n, z.err
}
// Finished file; check checksum and size.
if _, err := io.ReadFull(z.r, z.buf[:8]); err != nil {
z.err = noEOF(err)
return n, z.err
}
digest := le.Uint32(z.buf[:4])
size := le.Uint32(z.buf[4:8])
if digest != z.digest || size != z.size {
z.err = ErrChecksum
return n, z.err
}
z.digest, z.size = 0, 0
// File is ok; check if there is another.
if !z.multistream {
return n, io.EOF
}
z.err = nil // Remove io.EOF
if _, z.err = z.readHeader(); z.err != nil {
return n, z.err
}
}
return n, nil
}
type crcer interface {
io.Writer
Sum32() uint32
Reset()
}
type crcUpdater struct {
z *Reader
}
func (c *crcUpdater) Write(p []byte) (int, error) {
c.z.digest = crc32.Update(c.z.digest, crc32.IEEETable, p)
return len(p), nil
}
func (c *crcUpdater) Sum32() uint32 {
return c.z.digest
}
func (c *crcUpdater) Reset() {
c.z.digest = 0
}
// WriteTo support the io.WriteTo interface for io.Copy and friends.
func (z *Reader) WriteTo(w io.Writer) (int64, error) {
total := int64(0)
crcWriter := crcer(crc32.NewIEEE())
if z.digest != 0 {
crcWriter = &crcUpdater{z: z}
}
for {
if z.err != nil {
if z.err == io.EOF {
return total, nil
}
return total, z.err
}
// We write both to output and digest.
mw := io.MultiWriter(w, crcWriter)
n, err := z.decompressor.(io.WriterTo).WriteTo(mw)
total += n
z.size += uint32(n)
if err != nil {
z.err = err
return total, z.err
}
// Finished file; check checksum + size.
if _, err := io.ReadFull(z.r, z.buf[0:8]); err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
z.err = err
return total, err
}
z.digest = crcWriter.Sum32()
digest := le.Uint32(z.buf[:4])
size := le.Uint32(z.buf[4:8])
if digest != z.digest || size != z.size {
z.err = ErrChecksum
return total, z.err
}
z.digest, z.size = 0, 0
// File is ok; check if there is another.
if !z.multistream {
return total, nil
}
crcWriter.Reset()
z.err = nil // Remove io.EOF
if _, z.err = z.readHeader(); z.err != nil {
if z.err == io.EOF {
return total, nil
}
return total, z.err
}
}
}
// Close closes the Reader. It does not close the underlying io.Reader.
// In order for the GZIP checksum to be verified, the reader must be
// fully consumed until the io.EOF.
func (z *Reader) Close() error { return z.decompressor.Close() }

View file

@ -1,290 +0,0 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gzip
import (
"errors"
"fmt"
"hash/crc32"
"io"
"github.com/klauspost/compress/flate"
)
// These constants are copied from the flate package, so that code that imports
// "compress/gzip" does not also have to import "compress/flate".
const (
NoCompression = flate.NoCompression
BestSpeed = flate.BestSpeed
BestCompression = flate.BestCompression
DefaultCompression = flate.DefaultCompression
ConstantCompression = flate.ConstantCompression
HuffmanOnly = flate.HuffmanOnly
// StatelessCompression will do compression but without maintaining any state
// between Write calls.
// There will be no memory kept between Write calls,
// but compression and speed will be suboptimal.
// Because of this, the size of actual Write calls will affect output size.
StatelessCompression = -3
)
// A Writer is an io.WriteCloser.
// Writes to a Writer are compressed and written to w.
type Writer struct {
Header // written at first call to Write, Flush, or Close
w io.Writer
level int
err error
compressor *flate.Writer
digest uint32 // CRC-32, IEEE polynomial (section 8)
size uint32 // Uncompressed size (section 2.3.1)
wroteHeader bool
closed bool
buf [10]byte
}
// NewWriter returns a new Writer.
// Writes to the returned writer are compressed and written to w.
//
// It is the caller's responsibility to call Close on the WriteCloser when done.
// Writes may be buffered and not flushed until Close.
//
// Callers that wish to set the fields in Writer.Header must do so before
// the first call to Write, Flush, or Close.
func NewWriter(w io.Writer) *Writer {
z, _ := NewWriterLevel(w, DefaultCompression)
return z
}
// NewWriterLevel is like NewWriter but specifies the compression level instead
// of assuming DefaultCompression.
//
// The compression level can be DefaultCompression, NoCompression, or any
// integer value between BestSpeed and BestCompression inclusive. The error
// returned will be nil if the level is valid.
func NewWriterLevel(w io.Writer, level int) (*Writer, error) {
if level < StatelessCompression || level > BestCompression {
return nil, fmt.Errorf("gzip: invalid compression level: %d", level)
}
z := new(Writer)
z.init(w, level)
return z, nil
}
// MinCustomWindowSize is the minimum window size that can be sent to NewWriterWindow.
const MinCustomWindowSize = flate.MinCustomWindowSize
// MaxCustomWindowSize is the maximum custom window that can be sent to NewWriterWindow.
const MaxCustomWindowSize = flate.MaxCustomWindowSize
// NewWriterWindow returns a new Writer compressing data with a custom window size.
// windowSize must be from MinCustomWindowSize to MaxCustomWindowSize.
func NewWriterWindow(w io.Writer, windowSize int) (*Writer, error) {
if windowSize < MinCustomWindowSize {
return nil, errors.New("gzip: requested window size less than MinWindowSize")
}
if windowSize > MaxCustomWindowSize {
return nil, errors.New("gzip: requested window size bigger than MaxCustomWindowSize")
}
z := new(Writer)
z.init(w, -windowSize)
return z, nil
}
func (z *Writer) init(w io.Writer, level int) {
compressor := z.compressor
if level != StatelessCompression {
if compressor != nil {
compressor.Reset(w)
}
}
*z = Writer{
Header: Header{
OS: 255, // unknown
},
w: w,
level: level,
compressor: compressor,
}
}
// Reset discards the Writer z's state and makes it equivalent to the
// result of its original state from NewWriter or NewWriterLevel, but
// writing to w instead. This permits reusing a Writer rather than
// allocating a new one.
func (z *Writer) Reset(w io.Writer) {
z.init(w, z.level)
}
// writeBytes writes a length-prefixed byte slice to z.w.
func (z *Writer) writeBytes(b []byte) error {
if len(b) > 0xffff {
return errors.New("gzip.Write: Extra data is too large")
}
le.PutUint16(z.buf[:2], uint16(len(b)))
_, err := z.w.Write(z.buf[:2])
if err != nil {
return err
}
_, err = z.w.Write(b)
return err
}
// writeString writes a UTF-8 string s in GZIP's format to z.w.
// GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1).
func (z *Writer) writeString(s string) (err error) {
// GZIP stores Latin-1 strings; error if non-Latin-1; convert if non-ASCII.
needconv := false
for _, v := range s {
if v == 0 || v > 0xff {
return errors.New("gzip.Write: non-Latin-1 header string")
}
if v > 0x7f {
needconv = true
}
}
if needconv {
b := make([]byte, 0, len(s))
for _, v := range s {
b = append(b, byte(v))
}
_, err = z.w.Write(b)
} else {
_, err = io.WriteString(z.w, s)
}
if err != nil {
return err
}
// GZIP strings are NUL-terminated.
z.buf[0] = 0
_, err = z.w.Write(z.buf[:1])
return err
}
// Write writes a compressed form of p to the underlying io.Writer. The
// compressed bytes are not necessarily flushed until the Writer is closed.
func (z *Writer) Write(p []byte) (int, error) {
if z.err != nil {
return 0, z.err
}
var n int
// Write the GZIP header lazily.
if !z.wroteHeader {
z.wroteHeader = true
z.buf[0] = gzipID1
z.buf[1] = gzipID2
z.buf[2] = gzipDeflate
z.buf[3] = 0
if z.Extra != nil {
z.buf[3] |= 0x04
}
if z.Name != "" {
z.buf[3] |= 0x08
}
if z.Comment != "" {
z.buf[3] |= 0x10
}
le.PutUint32(z.buf[4:8], uint32(z.ModTime.Unix()))
if z.level == BestCompression {
z.buf[8] = 2
} else if z.level == BestSpeed {
z.buf[8] = 4
} else {
z.buf[8] = 0
}
z.buf[9] = z.OS
n, z.err = z.w.Write(z.buf[:10])
if z.err != nil {
return n, z.err
}
if z.Extra != nil {
z.err = z.writeBytes(z.Extra)
if z.err != nil {
return n, z.err
}
}
if z.Name != "" {
z.err = z.writeString(z.Name)
if z.err != nil {
return n, z.err
}
}
if z.Comment != "" {
z.err = z.writeString(z.Comment)
if z.err != nil {
return n, z.err
}
}
if z.compressor == nil && z.level != StatelessCompression {
z.compressor, _ = flate.NewWriter(z.w, z.level)
}
}
z.size += uint32(len(p))
z.digest = crc32.Update(z.digest, crc32.IEEETable, p)
if z.level == StatelessCompression {
return len(p), flate.StatelessDeflate(z.w, p, false, nil)
}
n, z.err = z.compressor.Write(p)
return n, z.err
}
// Flush flushes any pending compressed data to the underlying writer.
//
// It is useful mainly in compressed network protocols, to ensure that
// a remote reader has enough data to reconstruct a packet. Flush does
// not return until the data has been written. If the underlying
// writer returns an error, Flush returns that error.
//
// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
func (z *Writer) Flush() error {
if z.err != nil {
return z.err
}
if z.closed || z.level == StatelessCompression {
return nil
}
if !z.wroteHeader {
z.Write(nil)
if z.err != nil {
return z.err
}
}
z.err = z.compressor.Flush()
return z.err
}
// Close closes the Writer, flushing any unwritten data to the underlying
// io.Writer, but does not close the underlying io.Writer.
func (z *Writer) Close() error {
if z.err != nil {
return z.err
}
if z.closed {
return nil
}
z.closed = true
if !z.wroteHeader {
z.Write(nil)
if z.err != nil {
return z.err
}
}
if z.level == StatelessCompression {
z.err = flate.StatelessDeflate(z.w, nil, true, nil)
} else {
z.err = z.compressor.Close()
}
if z.err != nil {
return z.err
}
le.PutUint32(z.buf[:4], z.digest)
le.PutUint32(z.buf[4:8], z.size)
_, z.err = z.w.Write(z.buf[:8])
return z.err
}

View file

@ -937,7 +937,7 @@ func WriterUncompressed() WriterOption {
// WriterBlockSize allows to override the default block size.
// Blocks will be this size or smaller.
// Minimum size is 4KB and and maximum size is 4MB.
// Minimum size is 4KB and maximum size is 4MB.
//
// Bigger blocks may give bigger throughput on systems with many cores,
// and will increase compression slightly, but it will limit the possible

View file

@ -1,16 +0,0 @@
cmd/snappytool/snappytool
testdata/bench
# These explicitly listed benchmark data files are for an obsolete version of
# snappy_test.go.
testdata/alice29.txt
testdata/asyoulik.txt
testdata/fireworks.jpeg
testdata/geo.protodata
testdata/html
testdata/html_x_4
testdata/kppkn.gtb
testdata/lcet10.txt
testdata/paper-100k.pdf
testdata/plrabn12.txt
testdata/urls.10K

View file

@ -1,18 +0,0 @@
# This is the official list of Snappy-Go authors for copyright purposes.
# This file is distinct from the CONTRIBUTORS files.
# See the latter for an explanation.
# Names should be added to this file as
# Name or Organization <email address>
# The email address is not required for organizations.
# Please keep the list sorted.
Amazon.com, Inc
Damian Gryski <dgryski@gmail.com>
Eric Buth <eric@topos.com>
Google Inc.
Jan Mercl <0xjnml@gmail.com>
Klaus Post <klauspost@gmail.com>
Rodolfo Carvalho <rhcarvalho@gmail.com>
Sebastien Binet <seb.binet@gmail.com>

View file

@ -1,41 +0,0 @@
# This is the official list of people who can contribute
# (and typically have contributed) code to the Snappy-Go repository.
# The AUTHORS file lists the copyright holders; this file
# lists people. For example, Google employees are listed here
# but not in AUTHORS, because Google holds the copyright.
#
# The submission process automatically checks to make sure
# that people submitting code are listed in this file (by email address).
#
# Names should be added to this file only after verifying that
# the individual or the individual's organization has agreed to
# the appropriate Contributor License Agreement, found here:
#
# http://code.google.com/legal/individual-cla-v1.0.html
# http://code.google.com/legal/corporate-cla-v1.0.html
#
# The agreement for individuals can be filled out on the web.
#
# When adding J Random Contributor's name to this file,
# either J's name or J's organization's name should be
# added to the AUTHORS file, depending on whether the
# individual or corporate CLA was used.
# Names should be added to this file like so:
# Name <email address>
# Please keep the list sorted.
Alex Legg <alexlegg@google.com>
Damian Gryski <dgryski@gmail.com>
Eric Buth <eric@topos.com>
Jan Mercl <0xjnml@gmail.com>
Jonathan Swinney <jswinney@amazon.com>
Kai Backman <kaib@golang.org>
Klaus Post <klauspost@gmail.com>
Marc-Antoine Ruel <maruel@chromium.org>
Nigel Tao <nigeltao@golang.org>
Rob Pike <r@golang.org>
Rodolfo Carvalho <rhcarvalho@gmail.com>
Russ Cox <rsc@golang.org>
Sebastien Binet <seb.binet@gmail.com>

View file

@ -1,27 +0,0 @@
Copyright (c) 2011 The Snappy-Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -1,17 +0,0 @@
# snappy
The Snappy compression format in the Go programming language.
This is a drop-in replacement for `github.com/golang/snappy`.
It provides a full, compatible replacement of the Snappy package by simply changing imports.
See [Snappy Compatibility](https://github.com/klauspost/compress/tree/master/s2#snappy-compatibility) in the S2 documentation.
"Better" compression mode is used. For buffered streams concurrent compression is used.
For more options use the [s2 package](https://pkg.go.dev/github.com/klauspost/compress/s2).
# usage
Replace imports `github.com/golang/snappy` with `github.com/klauspost/compress/snappy`.

View file

@ -1,60 +0,0 @@
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package snappy
import (
"io"
"github.com/klauspost/compress/s2"
)
var (
// ErrCorrupt reports that the input is invalid.
ErrCorrupt = s2.ErrCorrupt
// ErrTooLarge reports that the uncompressed length is too large.
ErrTooLarge = s2.ErrTooLarge
// ErrUnsupported reports that the input isn't supported.
ErrUnsupported = s2.ErrUnsupported
)
const (
// maxBlockSize is the maximum size of the input to encodeBlock. It is not
// part of the wire format per se, but some parts of the encoder assume
// that an offset fits into a uint16.
//
// Also, for the framing format (Writer type instead of Encode function),
// https://github.com/google/snappy/blob/master/framing_format.txt says
// that "the uncompressed data in a chunk must be no longer than 65536
// bytes".
maxBlockSize = 65536
)
// DecodedLen returns the length of the decoded block.
func DecodedLen(src []byte) (int, error) {
return s2.DecodedLen(src)
}
// Decode returns the decoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire decoded block.
// Otherwise, a newly allocated slice will be returned.
//
// The dst and src must not overlap. It is valid to pass a nil dst.
//
// Decode handles the Snappy block format, not the Snappy stream format.
func Decode(dst, src []byte) ([]byte, error) {
return s2.Decode(dst, src)
}
// NewReader returns a new Reader that decompresses from r, using the framing
// format described at
// https://github.com/google/snappy/blob/master/framing_format.txt
func NewReader(r io.Reader) *Reader {
return s2.NewReader(r, s2.ReaderMaxBlockSize(maxBlockSize))
}
// Reader is an io.Reader that can read Snappy-compressed bytes.
//
// Reader handles the Snappy stream format, not the Snappy block format.
type Reader = s2.Reader

View file

@ -1,59 +0,0 @@
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package snappy
import (
"io"
"github.com/klauspost/compress/s2"
)
// Encode returns the encoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire encoded block.
// Otherwise, a newly allocated slice will be returned.
//
// The dst and src must not overlap. It is valid to pass a nil dst.
//
// Encode handles the Snappy block format, not the Snappy stream format.
func Encode(dst, src []byte) []byte {
return s2.EncodeSnappyBetter(dst, src)
}
// MaxEncodedLen returns the maximum length of a snappy block, given its
// uncompressed length.
//
// It will return a negative value if srcLen is too large to encode.
func MaxEncodedLen(srcLen int) int {
return s2.MaxEncodedLen(srcLen)
}
// NewWriter returns a new Writer that compresses to w.
//
// The Writer returned does not buffer writes. There is no need to Flush or
// Close such a Writer.
//
// Deprecated: the Writer returned is not suitable for many small writes, only
// for few large writes. Use NewBufferedWriter instead, which is efficient
// regardless of the frequency and shape of the writes, and remember to Close
// that Writer when done.
func NewWriter(w io.Writer) *Writer {
return s2.NewWriter(w, s2.WriterSnappyCompat(), s2.WriterBetterCompression(), s2.WriterFlushOnWrite(), s2.WriterConcurrency(1))
}
// NewBufferedWriter returns a new Writer that compresses to w, using the
// framing format described at
// https://github.com/google/snappy/blob/master/framing_format.txt
//
// The Writer returned buffers writes. Users must call Close to guarantee all
// data has been forwarded to the underlying io.Writer. They may also call
// Flush zero or more times before calling Close.
func NewBufferedWriter(w io.Writer) *Writer {
return s2.NewWriter(w, s2.WriterSnappyCompat(), s2.WriterBetterCompression())
}
// Writer is an io.Writer that can write Snappy-compressed bytes.
//
// Writer handles the Snappy stream format, not the Snappy block format.
type Writer = s2.Writer

View file

@ -1,46 +0,0 @@
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package snappy implements the Snappy compression format. It aims for very
// high speeds and reasonable compression.
//
// There are actually two Snappy formats: block and stream. They are related,
// but different: trying to decompress block-compressed data as a Snappy stream
// will fail, and vice versa. The block format is the Decode and Encode
// functions and the stream format is the Reader and Writer types.
//
// The block format, the more common case, is used when the complete size (the
// number of bytes) of the original data is known upfront, at the time
// compression starts. The stream format, also known as the framing format, is
// for when that isn't always true.
//
// The canonical, C++ implementation is at https://github.com/google/snappy and
// it only implements the block format.
package snappy
/*
Each encoded block begins with the varint-encoded length of the decoded data,
followed by a sequence of chunks. Chunks begin and end on byte boundaries. The
first byte of each chunk is broken into its 2 least and 6 most significant bits
called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag.
Zero means a literal tag. All other values mean a copy tag.
For literal tags:
- If m < 60, the next 1 + m bytes are literal bytes.
- Otherwise, let n be the little-endian unsigned integer denoted by the next
m - 59 bytes. The next 1 + n bytes after that are literal bytes.
For copy tags, length bytes are copied from offset bytes ago, in the style of
Lempel-Ziv compression algorithms. In particular:
- For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12).
The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10
of the offset. The next byte is bits 0-7 of the offset.
- For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65).
The length is 1 + m. The offset is the little-endian unsigned integer
denoted by the next 2 bytes.
- For l == 3, this tag is a legacy format that is no longer issued by most
encoders. Nonetheless, the offset ranges in [0, 1<<32) and the length in
[1, 65). The length is 1 + m. The offset is the little-endian unsigned
integer denoted by the next 4 bytes.
*/

View file

@ -1,183 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
Package zlib implements reading and writing of zlib format compressed data,
as specified in RFC 1950.
The implementation provides filters that uncompress during reading
and compress during writing. For example, to write compressed data
to a buffer:
var b bytes.Buffer
w := zlib.NewWriter(&b)
w.Write([]byte("hello, world\n"))
w.Close()
and to read that data back:
r, err := zlib.NewReader(&b)
io.Copy(os.Stdout, r)
r.Close()
*/
package zlib
import (
"bufio"
"compress/zlib"
"hash"
"hash/adler32"
"io"
"github.com/klauspost/compress/flate"
)
const zlibDeflate = 8
var (
// ErrChecksum is returned when reading ZLIB data that has an invalid checksum.
ErrChecksum = zlib.ErrChecksum
// ErrDictionary is returned when reading ZLIB data that has an invalid dictionary.
ErrDictionary = zlib.ErrDictionary
// ErrHeader is returned when reading ZLIB data that has an invalid header.
ErrHeader = zlib.ErrHeader
)
type reader struct {
r flate.Reader
decompressor io.ReadCloser
digest hash.Hash32
err error
scratch [4]byte
}
// Resetter resets a ReadCloser returned by NewReader or NewReaderDict to
// to switch to a new underlying Reader. This permits reusing a ReadCloser
// instead of allocating a new one.
type Resetter interface {
// Reset discards any buffered data and resets the Resetter as if it was
// newly initialized with the given reader.
Reset(r io.Reader, dict []byte) error
}
// NewReader creates a new ReadCloser.
// Reads from the returned ReadCloser read and decompress data from r.
// If r does not implement io.ByteReader, the decompressor may read more
// data than necessary from r.
// It is the caller's responsibility to call Close on the ReadCloser when done.
//
// The ReadCloser returned by NewReader also implements Resetter.
func NewReader(r io.Reader) (io.ReadCloser, error) {
return NewReaderDict(r, nil)
}
// NewReaderDict is like NewReader but uses a preset dictionary.
// NewReaderDict ignores the dictionary if the compressed data does not refer to it.
// If the compressed data refers to a different dictionary, NewReaderDict returns ErrDictionary.
//
// The ReadCloser returned by NewReaderDict also implements Resetter.
func NewReaderDict(r io.Reader, dict []byte) (io.ReadCloser, error) {
z := new(reader)
err := z.Reset(r, dict)
if err != nil {
return nil, err
}
return z, nil
}
func (z *reader) Read(p []byte) (int, error) {
if z.err != nil {
return 0, z.err
}
var n int
n, z.err = z.decompressor.Read(p)
z.digest.Write(p[0:n])
if z.err != io.EOF {
// In the normal case we return here.
return n, z.err
}
// Finished file; check checksum.
if _, err := io.ReadFull(z.r, z.scratch[0:4]); err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
z.err = err
return n, z.err
}
// ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
checksum := uint32(z.scratch[0])<<24 | uint32(z.scratch[1])<<16 | uint32(z.scratch[2])<<8 | uint32(z.scratch[3])
if checksum != z.digest.Sum32() {
z.err = ErrChecksum
return n, z.err
}
return n, io.EOF
}
// Calling Close does not close the wrapped io.Reader originally passed to NewReader.
// In order for the ZLIB checksum to be verified, the reader must be
// fully consumed until the io.EOF.
func (z *reader) Close() error {
if z.err != nil && z.err != io.EOF {
return z.err
}
z.err = z.decompressor.Close()
return z.err
}
func (z *reader) Reset(r io.Reader, dict []byte) error {
*z = reader{decompressor: z.decompressor, digest: z.digest}
if fr, ok := r.(flate.Reader); ok {
z.r = fr
} else {
z.r = bufio.NewReader(r)
}
// Read the header (RFC 1950 section 2.2.).
_, z.err = io.ReadFull(z.r, z.scratch[0:2])
if z.err != nil {
if z.err == io.EOF {
z.err = io.ErrUnexpectedEOF
}
return z.err
}
h := uint(z.scratch[0])<<8 | uint(z.scratch[1])
if (z.scratch[0]&0x0f != zlibDeflate) || (h%31 != 0) {
z.err = ErrHeader
return z.err
}
haveDict := z.scratch[1]&0x20 != 0
if haveDict {
_, z.err = io.ReadFull(z.r, z.scratch[0:4])
if z.err != nil {
if z.err == io.EOF {
z.err = io.ErrUnexpectedEOF
}
return z.err
}
checksum := uint32(z.scratch[0])<<24 | uint32(z.scratch[1])<<16 | uint32(z.scratch[2])<<8 | uint32(z.scratch[3])
if checksum != adler32.Checksum(dict) {
z.err = ErrDictionary
return z.err
}
}
if z.decompressor == nil {
if haveDict {
z.decompressor = flate.NewReaderDict(z.r, dict)
} else {
z.decompressor = flate.NewReader(z.r)
}
} else {
z.decompressor.(flate.Resetter).Reset(z.r, dict)
}
if z.digest != nil {
z.digest.Reset()
} else {
z.digest = adler32.New()
}
return nil
}

View file

@ -1,201 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package zlib
import (
"fmt"
"hash"
"hash/adler32"
"io"
"github.com/klauspost/compress/flate"
)
// These constants are copied from the flate package, so that code that imports
// "compress/zlib" does not also have to import "compress/flate".
const (
NoCompression = flate.NoCompression
BestSpeed = flate.BestSpeed
BestCompression = flate.BestCompression
DefaultCompression = flate.DefaultCompression
ConstantCompression = flate.ConstantCompression
HuffmanOnly = flate.HuffmanOnly
)
// A Writer takes data written to it and writes the compressed
// form of that data to an underlying writer (see NewWriter).
type Writer struct {
w io.Writer
level int
dict []byte
compressor *flate.Writer
digest hash.Hash32
err error
scratch [4]byte
wroteHeader bool
}
// NewWriter creates a new Writer.
// Writes to the returned Writer are compressed and written to w.
//
// It is the caller's responsibility to call Close on the WriteCloser when done.
// Writes may be buffered and not flushed until Close.
func NewWriter(w io.Writer) *Writer {
z, _ := NewWriterLevelDict(w, DefaultCompression, nil)
return z
}
// NewWriterLevel is like NewWriter but specifies the compression level instead
// of assuming DefaultCompression.
//
// The compression level can be DefaultCompression, NoCompression, HuffmanOnly
// or any integer value between BestSpeed and BestCompression inclusive.
// The error returned will be nil if the level is valid.
func NewWriterLevel(w io.Writer, level int) (*Writer, error) {
return NewWriterLevelDict(w, level, nil)
}
// NewWriterLevelDict is like NewWriterLevel but specifies a dictionary to
// compress with.
//
// The dictionary may be nil. If not, its contents should not be modified until
// the Writer is closed.
func NewWriterLevelDict(w io.Writer, level int, dict []byte) (*Writer, error) {
if level < HuffmanOnly || level > BestCompression {
return nil, fmt.Errorf("zlib: invalid compression level: %d", level)
}
return &Writer{
w: w,
level: level,
dict: dict,
}, nil
}
// Reset clears the state of the Writer z such that it is equivalent to its
// initial state from NewWriterLevel or NewWriterLevelDict, but instead writing
// to w.
func (z *Writer) Reset(w io.Writer) {
z.w = w
// z.level and z.dict left unchanged.
if z.compressor != nil {
z.compressor.Reset(w)
}
if z.digest != nil {
z.digest.Reset()
}
z.err = nil
z.scratch = [4]byte{}
z.wroteHeader = false
}
// writeHeader writes the ZLIB header.
func (z *Writer) writeHeader() (err error) {
z.wroteHeader = true
// ZLIB has a two-byte header (as documented in RFC 1950).
// The first four bits is the CINFO (compression info), which is 7 for the default deflate window size.
// The next four bits is the CM (compression method), which is 8 for deflate.
z.scratch[0] = 0x78
// The next two bits is the FLEVEL (compression level). The four values are:
// 0=fastest, 1=fast, 2=default, 3=best.
// The next bit, FDICT, is set if a dictionary is given.
// The final five FCHECK bits form a mod-31 checksum.
switch z.level {
case -2, 0, 1:
z.scratch[1] = 0 << 6
case 2, 3, 4, 5:
z.scratch[1] = 1 << 6
case 6, -1:
z.scratch[1] = 2 << 6
case 7, 8, 9:
z.scratch[1] = 3 << 6
default:
panic("unreachable")
}
if z.dict != nil {
z.scratch[1] |= 1 << 5
}
z.scratch[1] += uint8(31 - (uint16(z.scratch[0])<<8+uint16(z.scratch[1]))%31)
if _, err = z.w.Write(z.scratch[0:2]); err != nil {
return err
}
if z.dict != nil {
// The next four bytes are the Adler-32 checksum of the dictionary.
checksum := adler32.Checksum(z.dict)
z.scratch[0] = uint8(checksum >> 24)
z.scratch[1] = uint8(checksum >> 16)
z.scratch[2] = uint8(checksum >> 8)
z.scratch[3] = uint8(checksum >> 0)
if _, err = z.w.Write(z.scratch[0:4]); err != nil {
return err
}
}
if z.compressor == nil {
// Initialize deflater unless the Writer is being reused
// after a Reset call.
z.compressor, err = flate.NewWriterDict(z.w, z.level, z.dict)
if err != nil {
return err
}
z.digest = adler32.New()
}
return nil
}
// Write writes a compressed form of p to the underlying io.Writer. The
// compressed bytes are not necessarily flushed until the Writer is closed or
// explicitly flushed.
func (z *Writer) Write(p []byte) (n int, err error) {
if !z.wroteHeader {
z.err = z.writeHeader()
}
if z.err != nil {
return 0, z.err
}
if len(p) == 0 {
return 0, nil
}
n, err = z.compressor.Write(p)
if err != nil {
z.err = err
return
}
z.digest.Write(p)
return
}
// Flush flushes the Writer to its underlying io.Writer.
func (z *Writer) Flush() error {
if !z.wroteHeader {
z.err = z.writeHeader()
}
if z.err != nil {
return z.err
}
z.err = z.compressor.Flush()
return z.err
}
// Close closes the Writer, flushing any unwritten data to the underlying
// io.Writer, but does not close the underlying io.Writer.
func (z *Writer) Close() error {
if !z.wroteHeader {
z.err = z.writeHeader()
}
if z.err != nil {
return z.err
}
z.err = z.compressor.Close()
if z.err != nil {
return z.err
}
checksum := z.digest.Sum32()
// ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
z.scratch[0] = uint8(checksum >> 24)
z.scratch[1] = uint8(checksum >> 16)
z.scratch[2] = uint8(checksum >> 8)
z.scratch[3] = uint8(checksum >> 0)
_, z.err = z.w.Write(z.scratch[0:4])
return z.err
}

20
vendor/modules.txt vendored
View file

@ -61,10 +61,13 @@ codeberg.org/gruf/go-runners
# codeberg.org/gruf/go-sched v1.2.3
## explicit; go 1.19
codeberg.org/gruf/go-sched
# codeberg.org/gruf/go-store/v2 v2.2.4
## explicit; go 1.19
codeberg.org/gruf/go-store/v2/storage
codeberg.org/gruf/go-store/v2/util
# codeberg.org/gruf/go-storage v0.1.1
## explicit; go 1.22
codeberg.org/gruf/go-storage
codeberg.org/gruf/go-storage/disk
codeberg.org/gruf/go-storage/internal
codeberg.org/gruf/go-storage/memory
codeberg.org/gruf/go-storage/s3
# codeberg.org/gruf/go-structr v0.8.4
## explicit; go 1.21
codeberg.org/gruf/go-structr
@ -168,9 +171,6 @@ github.com/coreos/go-oidc/v3/oidc
# github.com/coreos/go-systemd/v22 v22.3.2
## explicit; go 1.12
github.com/coreos/go-systemd/v22/dbus
# github.com/cornelk/hashmap v1.0.8
## explicit; go 1.19
github.com/cornelk/hashmap
# github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc
## explicit
github.com/davecgh/go-spew/spew
@ -452,14 +452,10 @@ github.com/josharian/intern
# github.com/json-iterator/go v1.1.12
## explicit; go 1.12
github.com/json-iterator/go
# github.com/klauspost/compress v1.17.7
# github.com/klauspost/compress v1.17.8
## explicit; go 1.20
github.com/klauspost/compress/flate
github.com/klauspost/compress/gzip
github.com/klauspost/compress/internal/race
github.com/klauspost/compress/s2
github.com/klauspost/compress/snappy
github.com/klauspost/compress/zlib
# github.com/klauspost/cpuid/v2 v2.2.7
## explicit; go 1.15
github.com/klauspost/cpuid/v2