From 9a22102fa8b1ce47571d5bba71e8f36895d21bf0 Mon Sep 17 00:00:00 2001 From: kim <89579420+NyaaaWhatsUpDoc@users.noreply.github.com> Date: Thu, 22 Jun 2023 20:46:36 +0100 Subject: [PATCH] [bugfix/chore] oauth entropy fix + media cleanup tasks rewrite (#1853) --- .../action/admin/media/prune/all.go | 61 ++ .../action/admin/media/prune/common.go | 15 +- .../action/admin/media/prune/orphaned.go | 28 +- .../action/admin/media/prune/remote.go | 39 +- cmd/gotosocial/admin.go | 19 +- internal/api/client/search/searchget_test.go | 2 +- internal/cache/cache.go | 7 + internal/cleaner/cleaner.go | 135 +++++ internal/cleaner/emoji.go | 238 ++++++++ internal/cleaner/media.go | 547 ++++++++++++++++++ internal/cleaner/media_test.go | 427 ++++++++++++++ internal/db/bundb/emoji.go | 222 +++++-- internal/db/bundb/emoji_test.go | 22 +- internal/db/bundb/media.go | 140 ++++- internal/db/bundb/session.go | 11 +- internal/db/emoji.go | 8 +- internal/db/media.go | 3 + internal/federation/dereferencing/account.go | 191 +++--- .../federation/dereferencing/account_test.go | 17 +- internal/federation/dereferencing/error.go | 18 - internal/federation/dereferencing/status.go | 42 +- internal/federation/dereferencing/thread.go | 14 +- internal/gtscontext/context.go | 15 + internal/gtserror/error.go | 24 + internal/media/manager.go | 41 +- internal/media/manager_test.go | 6 +- internal/media/processingemoji.go | 29 +- internal/media/processingmedia.go | 31 +- internal/media/prune.go | 373 ------------ internal/media/prune_test.go | 357 ------------ internal/media/refetch.go | 2 +- internal/processing/admin/admin.go | 3 + internal/processing/admin/emoji.go | 14 +- internal/processing/admin/media.go | 12 +- internal/processing/search/get.go | 39 +- internal/processing/search/lookup.go | 4 +- internal/storage/storage.go | 8 +- testrig/config.go | 2 +- 38 files changed, 2076 insertions(+), 1090 deletions(-) create mode 100644 cmd/gotosocial/action/admin/media/prune/all.go create mode 100644 internal/cleaner/cleaner.go create mode 100644 internal/cleaner/emoji.go create mode 100644 internal/cleaner/media.go create mode 100644 internal/cleaner/media_test.go delete mode 100644 internal/media/prune.go delete mode 100644 internal/media/prune_test.go diff --git a/cmd/gotosocial/action/admin/media/prune/all.go b/cmd/gotosocial/action/admin/media/prune/all.go new file mode 100644 index 000000000..7642fe928 --- /dev/null +++ b/cmd/gotosocial/action/admin/media/prune/all.go @@ -0,0 +1,61 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package prune + +import ( + "context" + + "github.com/superseriousbusiness/gotosocial/cmd/gotosocial/action" + "github.com/superseriousbusiness/gotosocial/internal/config" + "github.com/superseriousbusiness/gotosocial/internal/gtscontext" + "github.com/superseriousbusiness/gotosocial/internal/log" +) + +// All performs all media clean actions +var All action.GTSAction = func(ctx context.Context) error { + // Setup pruning utilities. + prune, err := setupPrune(ctx) + if err != nil { + return err + } + + defer func() { + // Ensure pruner gets shutdown on exit. + if err := prune.shutdown(ctx); err != nil { + log.Error(ctx, err) + } + }() + + if config.GetAdminMediaPruneDryRun() { + log.Info(ctx, "prune DRY RUN") + ctx = gtscontext.SetDryRun(ctx) + } + + days := config.GetMediaRemoteCacheDays() + + // Perform the actual pruning with logging. + prune.cleaner.Media().All(ctx, days) + prune.cleaner.Emoji().All(ctx) + + // Perform a cleanup of storage (for removed local dirs). + if err := prune.storage.Storage.Clean(ctx); err != nil { + log.Error(ctx, "error cleaning storage: %v", err) + } + + return nil +} diff --git a/cmd/gotosocial/action/admin/media/prune/common.go b/cmd/gotosocial/action/admin/media/prune/common.go index 7b2f097e4..0db1a3462 100644 --- a/cmd/gotosocial/action/admin/media/prune/common.go +++ b/cmd/gotosocial/action/admin/media/prune/common.go @@ -21,8 +21,10 @@ "context" "fmt" + "github.com/superseriousbusiness/gotosocial/internal/cleaner" "github.com/superseriousbusiness/gotosocial/internal/db" "github.com/superseriousbusiness/gotosocial/internal/db/bundb" + "github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/internal/media" "github.com/superseriousbusiness/gotosocial/internal/state" gtsstorage "github.com/superseriousbusiness/gotosocial/internal/storage" @@ -32,6 +34,7 @@ type prune struct { dbService db.DB storage *gtsstorage.Driver manager *media.Manager + cleaner *cleaner.Cleaner state *state.State } @@ -59,25 +62,31 @@ func setupPrune(ctx context.Context) (*prune, error) { //nolint:contextcheck manager := media.NewManager(&state) + //nolint:contextcheck + cleaner := cleaner.New(&state) + return &prune{ dbService: dbService, storage: storage, manager: manager, + cleaner: cleaner, state: &state, }, nil } func (p *prune) shutdown(ctx context.Context) error { + var errs gtserror.MultiError + if err := p.storage.Close(); err != nil { - return fmt.Errorf("error closing storage backend: %w", err) + errs.Appendf("error closing storage backend: %v", err) } if err := p.dbService.Stop(ctx); err != nil { - return fmt.Errorf("error closing dbservice: %w", err) + errs.Appendf("error stopping database: %v", err) } p.state.Workers.Stop() p.state.Caches.Stop() - return nil + return errs.Combine() } diff --git a/cmd/gotosocial/action/admin/media/prune/orphaned.go b/cmd/gotosocial/action/admin/media/prune/orphaned.go index 2711007cd..a94c84422 100644 --- a/cmd/gotosocial/action/admin/media/prune/orphaned.go +++ b/cmd/gotosocial/action/admin/media/prune/orphaned.go @@ -19,32 +19,40 @@ import ( "context" - "fmt" "github.com/superseriousbusiness/gotosocial/cmd/gotosocial/action" "github.com/superseriousbusiness/gotosocial/internal/config" + "github.com/superseriousbusiness/gotosocial/internal/gtscontext" "github.com/superseriousbusiness/gotosocial/internal/log" ) // Orphaned prunes orphaned media from storage. var Orphaned action.GTSAction = func(ctx context.Context) error { + // Setup pruning utilities. prune, err := setupPrune(ctx) if err != nil { return err } - dry := config.GetAdminMediaPruneDryRun() + defer func() { + // Ensure pruner gets shutdown on exit. + if err := prune.shutdown(ctx); err != nil { + log.Error(ctx, err) + } + }() - pruned, err := prune.manager.PruneOrphaned(ctx, dry) - if err != nil { - return fmt.Errorf("error pruning: %s", err) + if config.GetAdminMediaPruneDryRun() { + log.Info(ctx, "prune DRY RUN") + ctx = gtscontext.SetDryRun(ctx) } - if dry /* dick heyyoooooo */ { - log.Infof(ctx, "DRY RUN: %d items are orphaned and eligible to be pruned", pruned) - } else { - log.Infof(ctx, "%d orphaned items were pruned", pruned) + // Perform the actual pruning with logging. + prune.cleaner.Media().LogPruneOrphaned(ctx) + + // Perform a cleanup of storage (for removed local dirs). + if err := prune.storage.Storage.Clean(ctx); err != nil { + log.Error(ctx, "error cleaning storage: %v", err) } - return prune.shutdown(ctx) + return nil } diff --git a/cmd/gotosocial/action/admin/media/prune/remote.go b/cmd/gotosocial/action/admin/media/prune/remote.go index ef886f443..ed521cfe8 100644 --- a/cmd/gotosocial/action/admin/media/prune/remote.go +++ b/cmd/gotosocial/action/admin/media/prune/remote.go @@ -19,39 +19,44 @@ import ( "context" - "fmt" + "time" "github.com/superseriousbusiness/gotosocial/cmd/gotosocial/action" "github.com/superseriousbusiness/gotosocial/internal/config" + "github.com/superseriousbusiness/gotosocial/internal/gtscontext" "github.com/superseriousbusiness/gotosocial/internal/log" ) // Remote prunes old and/or unused remote media. var Remote action.GTSAction = func(ctx context.Context) error { + // Setup pruning utilities. prune, err := setupPrune(ctx) if err != nil { return err } - dry := config.GetAdminMediaPruneDryRun() + defer func() { + // Ensure pruner gets shutdown on exit. + if err := prune.shutdown(ctx); err != nil { + log.Error(ctx, err) + } + }() - pruned, err := prune.manager.PruneUnusedRemote(ctx, dry) - if err != nil { - return fmt.Errorf("error pruning: %w", err) + if config.GetAdminMediaPruneDryRun() { + log.Info(ctx, "prune DRY RUN") + ctx = gtscontext.SetDryRun(ctx) } - uncached, err := prune.manager.UncacheRemote(ctx, config.GetMediaRemoteCacheDays(), dry) - if err != nil { - return fmt.Errorf("error pruning: %w", err) + t := time.Now().Add(-24 * time.Hour * time.Duration(config.GetMediaRemoteCacheDays())) + + // Perform the actual pruning with logging. + prune.cleaner.Media().LogPruneUnused(ctx) + prune.cleaner.Media().LogUncacheRemote(ctx, t) + + // Perform a cleanup of storage (for removed local dirs). + if err := prune.storage.Storage.Clean(ctx); err != nil { + log.Error(ctx, "error cleaning storage: %v", err) } - total := pruned + uncached - - if dry /* dick heyyoooooo */ { - log.Infof(ctx, "DRY RUN: %d remote items are unused/stale and eligible to be pruned", total) - } else { - log.Infof(ctx, "%d unused/stale remote items were pruned", pruned) - } - - return prune.shutdown(ctx) + return nil } diff --git a/cmd/gotosocial/admin.go b/cmd/gotosocial/admin.go index 810d57e54..3dad3e3d6 100644 --- a/cmd/gotosocial/admin.go +++ b/cmd/gotosocial/admin.go @@ -170,7 +170,7 @@ func adminCommands() *cobra.Command { adminMediaCmd := &cobra.Command{ Use: "media", - Short: "admin commands related stored media attachments/emojis", + Short: "admin commands related to stored media / emojis", } /* @@ -178,7 +178,7 @@ func adminCommands() *cobra.Command { */ adminMediaPruneCmd := &cobra.Command{ Use: "prune", - Short: "admin commands for pruning unused/orphaned media from storage", + Short: "admin commands for pruning media from storage", } adminMediaPruneOrphanedCmd := &cobra.Command{ @@ -196,7 +196,7 @@ func adminCommands() *cobra.Command { adminMediaPruneRemoteCmd := &cobra.Command{ Use: "remote", - Short: "prune unused/stale remote media from storage, older than given number of days", + Short: "prune unused / stale media from storage, older than given number of days", PreRunE: func(cmd *cobra.Command, args []string) error { return preRun(preRunArgs{cmd: cmd}) }, @@ -207,6 +207,19 @@ func adminCommands() *cobra.Command { config.AddAdminMediaPrune(adminMediaPruneRemoteCmd) adminMediaPruneCmd.AddCommand(adminMediaPruneRemoteCmd) + adminMediaPruneAllCmd := &cobra.Command{ + Use: "all", + Short: "perform all media and emoji prune / cleaning commands", + PreRunE: func(cmd *cobra.Command, args []string) error { + return preRun(preRunArgs{cmd: cmd}) + }, + RunE: func(cmd *cobra.Command, args []string) error { + return run(cmd.Context(), prune.All) + }, + } + config.AddAdminMediaPrune(adminMediaPruneAllCmd) + adminMediaPruneCmd.AddCommand(adminMediaPruneAllCmd) + adminMediaCmd.AddCommand(adminMediaPruneCmd) adminCmd.AddCommand(adminMediaCmd) diff --git a/internal/api/client/search/searchget_test.go b/internal/api/client/search/searchget_test.go index 9e0a8eb67..f6a2db70a 100644 --- a/internal/api/client/search/searchget_test.go +++ b/internal/api/client/search/searchget_test.go @@ -120,7 +120,7 @@ func (suite *SearchGetTestSuite) getSearch( // Check expected code + body. if resultCode := recorder.Code; expectedHTTPStatus != resultCode { - errs = append(errs, fmt.Sprintf("expected %d got %d", expectedHTTPStatus, resultCode)) + errs = append(errs, fmt.Sprintf("expected %d got %d: %v", expectedHTTPStatus, resultCode, ctx.Errors.JSON())) } // If we got an expected body, return early. diff --git a/internal/cache/cache.go b/internal/cache/cache.go index 510b6eb53..df7d9a7ae 100644 --- a/internal/cache/cache.go +++ b/internal/cache/cache.go @@ -92,6 +92,13 @@ func (c *Caches) setuphooks() { c.Visibility.Invalidate("RequesterID", block.TargetAccountID) }) + c.GTS.EmojiCategory().SetInvalidateCallback(func(category *gtsmodel.EmojiCategory) { + // Invalidate entire emoji cache, + // as we can't know which emojis + // specifically this will effect. + c.GTS.Emoji().Clear() + }) + c.GTS.Follow().SetInvalidateCallback(func(follow *gtsmodel.Follow) { // Invalidate follow origin account ID cached visibility. c.Visibility.Invalidate("ItemID", follow.AccountID) diff --git a/internal/cleaner/cleaner.go b/internal/cleaner/cleaner.go new file mode 100644 index 000000000..ee1e4785f --- /dev/null +++ b/internal/cleaner/cleaner.go @@ -0,0 +1,135 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package cleaner + +import ( + "context" + "errors" + "time" + + "codeberg.org/gruf/go-runners" + "codeberg.org/gruf/go-sched" + "codeberg.org/gruf/go-store/v2/storage" + "github.com/superseriousbusiness/gotosocial/internal/config" + "github.com/superseriousbusiness/gotosocial/internal/gtscontext" + "github.com/superseriousbusiness/gotosocial/internal/gtserror" + "github.com/superseriousbusiness/gotosocial/internal/log" + "github.com/superseriousbusiness/gotosocial/internal/state" +) + +const ( + selectLimit = 50 +) + +type Cleaner struct { + state *state.State + emoji Emoji + media Media +} + +func New(state *state.State) *Cleaner { + c := new(Cleaner) + c.state = state + c.emoji.Cleaner = c + c.media.Cleaner = c + scheduleJobs(c) + return c +} + +// Emoji returns the emoji set of cleaner utilities. +func (c *Cleaner) Emoji() *Emoji { + return &c.emoji +} + +// Media returns the media set of cleaner utilities. +func (c *Cleaner) Media() *Media { + return &c.media +} + +// checkFiles checks for each of the provided files, and calls onMissing() if any of them are missing. Returns true if missing. +func (c *Cleaner) checkFiles(ctx context.Context, onMissing func() error, files ...string) (bool, error) { + for _, file := range files { + // Check whether each file exists in storage. + have, err := c.state.Storage.Has(ctx, file) + if err != nil { + return false, gtserror.Newf("error checking storage for %s: %w", file, err) + } else if !have { + // Missing files, perform hook. + return true, onMissing() + } + } + return false, nil +} + +// removeFiles removes the provided files, returning the number of them returned. +func (c *Cleaner) removeFiles(ctx context.Context, files ...string) (int, error) { + if gtscontext.DryRun(ctx) { + // Dry run, do nothing. + return len(files), nil + } + + var errs gtserror.MultiError + + for _, path := range files { + // Remove each provided storage path. + log.Debugf(ctx, "removing file: %s", path) + err := c.state.Storage.Delete(ctx, path) + if err != nil && !errors.Is(err, storage.ErrNotFound) { + errs.Appendf("error removing %s: %v", path, err) + } + } + + // Calculate no. files removed. + diff := len(files) - len(errs) + + // Wrap the combined error slice. + if err := errs.Combine(); err != nil { + return diff, gtserror.Newf("error(s) removing files: %w", err) + } + + return diff, nil +} + +func scheduleJobs(c *Cleaner) { + const day = time.Hour * 24 + + // Calculate closest midnight. + now := time.Now() + midnight := now.Round(day) + + if midnight.Before(now) { + // since <= 11:59am rounds down. + midnight = midnight.Add(day) + } + + // Get ctx associated with scheduler run state. + done := c.state.Workers.Scheduler.Done() + doneCtx := runners.CancelCtx(done) + + // TODO: we'll need to do some thinking to make these + // jobs restartable if we want to implement reloads in + // the future that make call to Workers.Stop() -> Workers.Start(). + + // Schedule the cleaning tasks to execute every day at midnight. + c.state.Workers.Scheduler.Schedule(sched.NewJob(func(start time.Time) { + log.Info(nil, "starting media clean") + c.Media().All(doneCtx, config.GetMediaRemoteCacheDays()) + c.Emoji().All(doneCtx) + log.Infof(nil, "finished media clean after %s", time.Since(start)) + }).EveryAt(midnight, day)) +} diff --git a/internal/cleaner/emoji.go b/internal/cleaner/emoji.go new file mode 100644 index 000000000..35e579171 --- /dev/null +++ b/internal/cleaner/emoji.go @@ -0,0 +1,238 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package cleaner + +import ( + "context" + "errors" + + "github.com/superseriousbusiness/gotosocial/internal/db" + "github.com/superseriousbusiness/gotosocial/internal/gtscontext" + "github.com/superseriousbusiness/gotosocial/internal/gtserror" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/log" +) + +// Emoji encompasses a set of +// emoji cleanup / admin utils. +type Emoji struct { + *Cleaner +} + +// All will execute all cleaner.Emoji utilities synchronously, including output logging. +// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action. +func (e *Emoji) All(ctx context.Context) { + e.LogPruneMissing(ctx) + e.LogFixBroken(ctx) +} + +// LogPruneMissing performs emoji.PruneMissing(...), logging the start and outcome. +func (e *Emoji) LogPruneMissing(ctx context.Context) { + log.Info(ctx, "start") + if n, err := e.PruneMissing(ctx); err != nil { + log.Error(ctx, err) + } else { + log.Infof(ctx, "pruned: %d", n) + } +} + +// LogFixBroken performs emoji.FixBroken(...), logging the start and outcome. +func (e *Emoji) LogFixBroken(ctx context.Context) { + log.Info(ctx, "start") + if n, err := e.FixBroken(ctx); err != nil { + log.Error(ctx, err) + } else { + log.Infof(ctx, "fixed: %d", n) + } +} + +// PruneMissing will delete emoji with missing files from the database and storage driver. +// Context will be checked for `gtscontext.DryRun()` to perform the action. NOTE: this function +// should be updated to match media.FixCacheStat() if we ever support emoji uncaching. +func (e *Emoji) PruneMissing(ctx context.Context) (int, error) { + var ( + total int + maxID string + ) + + for { + // Fetch the next batch of emoji media up to next ID. + emojis, err := e.state.DB.GetEmojis(ctx, maxID, selectLimit) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return total, gtserror.Newf("error getting emojis: %w", err) + } + + if len(emojis) == 0 { + // reached end. + break + } + + // Use last as the next 'maxID' value. + maxID = emojis[len(emojis)-1].ID + + for _, emoji := range emojis { + // Check / fix missing emoji media. + fixed, err := e.pruneMissing(ctx, emoji) + if err != nil { + return total, err + } + + if fixed { + // Update + // count. + total++ + } + } + } + + return total, nil +} + +// FixBroken will check all emojis for valid related models (e.g. category). +// Broken media will be automatically updated to remove now-missing models. +// Context will be checked for `gtscontext.DryRun()` to perform the action. +func (e *Emoji) FixBroken(ctx context.Context) (int, error) { + var ( + total int + maxID string + ) + + for { + // Fetch the next batch of emoji media up to next ID. + emojis, err := e.state.DB.GetEmojis(ctx, maxID, selectLimit) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return total, gtserror.Newf("error getting emojis: %w", err) + } + + if len(emojis) == 0 { + // reached end. + break + } + + // Use last as the next 'maxID' value. + maxID = emojis[len(emojis)-1].ID + + for _, emoji := range emojis { + // Check / fix missing broken emoji. + fixed, err := e.fixBroken(ctx, emoji) + if err != nil { + return total, err + } + + if fixed { + // Update + // count. + total++ + } + } + } + + return total, nil +} + +func (e *Emoji) pruneMissing(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { + return e.checkFiles(ctx, func() error { + // Emoji missing files, delete it. + // NOTE: if we ever support uncaching + // of emojis, change to e.uncache(). + // In that case we should also rename + // this function to match the media + // equivalent -> fixCacheState(). + log.WithContext(ctx). + WithField("emoji", emoji.ID). + Debug("deleting due to missing emoji") + return e.delete(ctx, emoji) + }, + emoji.ImageStaticPath, + emoji.ImagePath, + ) +} + +func (e *Emoji) fixBroken(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { + // Check we have the required category for emoji. + _, missing, err := e.getRelatedCategory(ctx, emoji) + if err != nil { + return false, err + } + + if missing { + if !gtscontext.DryRun(ctx) { + // Dry run, do nothing. + return true, nil + } + + // Remove related category. + emoji.CategoryID = "" + + // Update emoji model in the database to remove category ID. + log.Debugf(ctx, "fixing missing emoji category: %s", emoji.ID) + if err := e.state.DB.UpdateEmoji(ctx, emoji, "category_id"); err != nil { + return true, gtserror.Newf("error updating emoji: %w", err) + } + + return true, nil + } + + return false, nil +} + +func (e *Emoji) getRelatedCategory(ctx context.Context, emoji *gtsmodel.Emoji) (*gtsmodel.EmojiCategory, bool, error) { + if emoji.CategoryID == "" { + // no related category. + return nil, false, nil + } + + // Load the category related to this emoji. + category, err := e.state.DB.GetEmojiCategory( + gtscontext.SetBarebones(ctx), + emoji.CategoryID, + ) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return nil, false, gtserror.Newf("error fetching category by id %s: %w", emoji.CategoryID, err) + } + + if category == nil { + // Category is missing. + return nil, true, nil + } + + return category, false, nil +} + +func (e *Emoji) delete(ctx context.Context, emoji *gtsmodel.Emoji) error { + if gtscontext.DryRun(ctx) { + // Dry run, do nothing. + return nil + } + + // Remove emoji and static files. + _, err := e.removeFiles(ctx, + emoji.ImageStaticPath, + emoji.ImagePath, + ) + if err != nil { + return gtserror.Newf("error removing emoji files: %w", err) + } + + // Delete emoji entirely from the database by its ID. + if err := e.state.DB.DeleteEmojiByID(ctx, emoji.ID); err != nil { + return gtserror.Newf("error deleting emoji: %w", err) + } + + return nil +} diff --git a/internal/cleaner/media.go b/internal/cleaner/media.go new file mode 100644 index 000000000..51a0aea6d --- /dev/null +++ b/internal/cleaner/media.go @@ -0,0 +1,547 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package cleaner + +import ( + "context" + "errors" + "time" + + "github.com/superseriousbusiness/gotosocial/internal/db" + "github.com/superseriousbusiness/gotosocial/internal/gtscontext" + "github.com/superseriousbusiness/gotosocial/internal/gtserror" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/log" + "github.com/superseriousbusiness/gotosocial/internal/media" + "github.com/superseriousbusiness/gotosocial/internal/regexes" + "github.com/superseriousbusiness/gotosocial/internal/uris" +) + +// Media encompasses a set of +// media cleanup / admin utils. +type Media struct { + *Cleaner +} + +// All will execute all cleaner.Media utilities synchronously, including output logging. +// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action. +func (m *Media) All(ctx context.Context, maxRemoteDays int) { + t := time.Now().Add(-24 * time.Hour * time.Duration(maxRemoteDays)) + m.LogUncacheRemote(ctx, t) + m.LogPruneOrphaned(ctx) + m.LogPruneUnused(ctx) + m.LogFixCacheStates(ctx) + _ = m.state.Storage.Storage.Clean(ctx) +} + +// LogUncacheRemote performs Media.UncacheRemote(...), logging the start and outcome. +func (m *Media) LogUncacheRemote(ctx context.Context, olderThan time.Time) { + log.Infof(ctx, "start older than: %s", olderThan.Format(time.Stamp)) + if n, err := m.UncacheRemote(ctx, olderThan); err != nil { + log.Error(ctx, err) + } else { + log.Infof(ctx, "uncached: %d", n) + } +} + +// LogPruneOrphaned performs Media.PruneOrphaned(...), logging the start and outcome. +func (m *Media) LogPruneOrphaned(ctx context.Context) { + log.Info(ctx, "start") + if n, err := m.PruneOrphaned(ctx); err != nil { + log.Error(ctx, err) + } else { + log.Infof(ctx, "pruned: %d", n) + } +} + +// LogPruneUnused performs Media.PruneUnused(...), logging the start and outcome. +func (m *Media) LogPruneUnused(ctx context.Context) { + log.Info(ctx, "start") + if n, err := m.PruneUnused(ctx); err != nil { + log.Error(ctx, err) + } else { + log.Infof(ctx, "pruned: %d", n) + } +} + +// LogFixCacheStates performs Media.FixCacheStates(...), logging the start and outcome. +func (m *Media) LogFixCacheStates(ctx context.Context) { + log.Info(ctx, "start") + if n, err := m.FixCacheStates(ctx); err != nil { + log.Error(ctx, err) + } else { + log.Infof(ctx, "fixed: %d", n) + } +} + +// PruneOrphaned will delete orphaned files from storage (i.e. media missing a database entry). +// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action. +func (m *Media) PruneOrphaned(ctx context.Context) (int, error) { + var files []string + + // All media files in storage will have path fitting: {$account}/{$type}/{$size}/{$id}.{$ext} + if err := m.state.Storage.WalkKeys(ctx, func(ctx context.Context, path string) error { + if !regexes.FilePath.MatchString(path) { + // This is not our expected media + // path format, skip this one. + return nil + } + + // Check whether this entry is orphaned. + orphaned, err := m.isOrphaned(ctx, path) + if err != nil { + return gtserror.Newf("error checking orphaned status: %w", err) + } + + if orphaned { + // Add this orphaned entry. + files = append(files, path) + } + + return nil + }); err != nil { + return 0, gtserror.Newf("error walking storage: %w", err) + } + + // Delete all orphaned files from storage. + return m.removeFiles(ctx, files...) +} + +// PruneUnused will delete all unused media attachments from the database and storage driver. +// Media is marked as unused if not attached to any status, account or account is suspended. +// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action. +func (m *Media) PruneUnused(ctx context.Context) (int, error) { + var ( + total int + maxID string + ) + + for { + // Fetch the next batch of media attachments up to next max ID. + attachments, err := m.state.DB.GetAttachments(ctx, maxID, selectLimit) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return total, gtserror.Newf("error getting attachments: %w", err) + } + + if len(attachments) == 0 { + // reached end. + break + } + + // Use last ID as the next 'maxID' value. + maxID = attachments[len(attachments)-1].ID + + for _, media := range attachments { + // Check / prune unused media attachment. + fixed, err := m.pruneUnused(ctx, media) + if err != nil { + return total, err + } + + if fixed { + // Update + // count. + total++ + } + } + } + + return total, nil +} + +// UncacheRemote will uncache all remote media attachments older than given input time. +// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action. +func (m *Media) UncacheRemote(ctx context.Context, olderThan time.Time) (int, error) { + var total int + + // Drop time by a minute to improve search, + // (i.e. make it olderThan inclusive search). + olderThan = olderThan.Add(-time.Minute) + + // Store recent time. + mostRecent := olderThan + + for { + // Fetch the next batch of attachments older than last-set time. + attachments, err := m.state.DB.GetRemoteOlderThan(ctx, olderThan, selectLimit) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return total, gtserror.Newf("error getting remote media: %w", err) + } + + if len(attachments) == 0 { + // reached end. + break + } + + // Use last created-at as the next 'olderThan' value. + olderThan = attachments[len(attachments)-1].CreatedAt + + for _, media := range attachments { + // Check / uncache each remote media attachment. + uncached, err := m.uncacheRemote(ctx, mostRecent, media) + if err != nil { + return total, err + } + + if uncached { + // Update + // count. + total++ + } + } + } + + return total, nil +} + +// FixCacheStatus will check all media for up-to-date cache status (i.e. in storage driver). +// Media marked as cached, with any required files missing, will be automatically uncached. +// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action. +func (m *Media) FixCacheStates(ctx context.Context) (int, error) { + var ( + total int + maxID string + ) + + for { + // Fetch the next batch of media attachments up to next max ID. + attachments, err := m.state.DB.GetAttachments(ctx, maxID, selectLimit) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return total, gtserror.Newf("error getting avatars / headers: %w", err) + } + + if len(attachments) == 0 { + // reached end. + break + } + + // Use last ID as the next 'maxID' value. + maxID = attachments[len(attachments)-1].ID + + for _, media := range attachments { + // Check / fix required media cache states. + fixed, err := m.fixCacheState(ctx, media) + if err != nil { + return total, err + } + + if fixed { + // Update + // count. + total++ + } + } + } + + return total, nil +} + +func (m *Media) isOrphaned(ctx context.Context, path string) (bool, error) { + pathParts := regexes.FilePath.FindStringSubmatch(path) + if len(pathParts) != 6 { + // This doesn't match our expectations so + // it wasn't created by gts; ignore it. + return false, nil + } + + var ( + // 0th -> whole match + // 1st -> account ID + mediaType = pathParts[2] + // 3rd -> media sub-type (e.g. small, static) + mediaID = pathParts[4] + // 5th -> file extension + ) + + // Start a log entry for media. + l := log.WithContext(ctx). + WithField("media", mediaID) + + switch media.Type(mediaType) { + case media.TypeAttachment: + // Look for media in database stored by ID. + media, err := m.state.DB.GetAttachmentByID( + gtscontext.SetBarebones(ctx), + mediaID, + ) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return false, gtserror.Newf("error fetching media by id %s: %w", mediaID, err) + } + + if media == nil { + l.Debug("missing db entry for media") + return true, nil + } + + case media.TypeEmoji: + // Generate static URL for this emoji to lookup. + staticURL := uris.GenerateURIForAttachment( + pathParts[1], // instance account ID + string(media.TypeEmoji), + string(media.SizeStatic), + mediaID, + "png", + ) + + // Look for emoji in database stored by static URL. + // The media ID part of the storage key for emojis can + // change for refreshed items, so search by generated URL. + emoji, err := m.state.DB.GetEmojiByStaticURL( + gtscontext.SetBarebones(ctx), + staticURL, + ) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return false, gtserror.Newf("error fetching emoji by url %s: %w", staticURL, err) + } + + if emoji == nil { + l.Debug("missing db entry for emoji") + return true, nil + } + } + + return false, nil +} + +func (m *Media) pruneUnused(ctx context.Context, media *gtsmodel.MediaAttachment) (bool, error) { + // Start a log entry for media. + l := log.WithContext(ctx). + WithField("media", media.ID) + + // Check whether we have the required account for media. + account, missing, err := m.getRelatedAccount(ctx, media) + if err != nil { + return false, err + } else if missing { + l.Debug("deleting due to missing account") + return true, m.delete(ctx, media) + } + + if account != nil { + // Related account exists for this media, check whether it is being used. + headerInUse := (*media.Header && media.ID == account.HeaderMediaAttachmentID) + avatarInUse := (*media.Avatar && media.ID == account.AvatarMediaAttachmentID) + if (headerInUse || avatarInUse) && account.SuspendedAt.IsZero() { + l.Debug("skipping as account media in use") + return false, nil + } + } + + // Check whether we have the required status for media. + status, missing, err := m.getRelatedStatus(ctx, media) + if err != nil { + return false, err + } else if missing { + l.Debug("deleting due to missing status") + return true, m.delete(ctx, media) + } + + if status != nil { + // Check whether still attached to status. + for _, id := range status.AttachmentIDs { + if id == media.ID { + l.Debug("skippping as attached to status") + return false, nil + } + } + } + + // Media totally unused, delete it. + l.Debug("deleting unused media") + return true, m.delete(ctx, media) +} + +func (m *Media) fixCacheState(ctx context.Context, media *gtsmodel.MediaAttachment) (bool, error) { + if !*media.Cached { + // We ignore uncached media, a + // false negative is a much better + // situation than a false positive, + // re-cache will just overwrite it. + return false, nil + } + + // Start a log entry for media. + l := log.WithContext(ctx). + WithField("media", media.ID) + + // Check whether we have the required account for media. + _, missingAccount, err := m.getRelatedAccount(ctx, media) + if err != nil { + return false, err + } else if missingAccount { + l.Debug("skipping due to missing account") + return false, nil + } + + // Check whether we have the required status for media. + _, missingStatus, err := m.getRelatedStatus(ctx, media) + if err != nil { + return false, err + } else if missingStatus { + l.Debug("skipping due to missing status") + return false, nil + } + + // So we know this a valid cached media entry. + // Check that we have the files on disk required.... + return m.checkFiles(ctx, func() error { + l.Debug("uncaching due to missing media") + return m.uncache(ctx, media) + }, + media.Thumbnail.Path, + media.File.Path, + ) +} + +func (m *Media) uncacheRemote(ctx context.Context, after time.Time, media *gtsmodel.MediaAttachment) (bool, error) { + if !*media.Cached { + // Already uncached. + return false, nil + } + + // Start a log entry for media. + l := log.WithContext(ctx). + WithField("media", media.ID) + + // Check whether we have the required account for media. + account, missing, err := m.getRelatedAccount(ctx, media) + if err != nil { + return false, err + } else if missing { + l.Debug("skipping due to missing account") + return false, nil + } + + if account != nil && account.FetchedAt.After(after) { + l.Debug("skipping due to recently fetched account") + return false, nil + } + + // Check whether we have the required status for media. + status, missing, err := m.getRelatedStatus(ctx, media) + if err != nil { + return false, err + } else if missing { + l.Debug("skipping due to missing status") + return false, nil + } + + if status != nil && status.FetchedAt.After(after) { + l.Debug("skipping due to recently fetched status") + return false, nil + } + + // This media is too old, uncache it. + l.Debug("uncaching old remote media") + return true, m.uncache(ctx, media) +} + +func (m *Media) getRelatedAccount(ctx context.Context, media *gtsmodel.MediaAttachment) (*gtsmodel.Account, bool, error) { + if media.AccountID == "" { + // no related account. + return nil, false, nil + } + + // Load the account related to this media. + account, err := m.state.DB.GetAccountByID( + gtscontext.SetBarebones(ctx), + media.AccountID, + ) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return nil, false, gtserror.Newf("error fetching account by id %s: %w", media.AccountID, err) + } + + if account == nil { + // account is missing. + return nil, true, nil + } + + return account, false, nil +} + +func (m *Media) getRelatedStatus(ctx context.Context, media *gtsmodel.MediaAttachment) (*gtsmodel.Status, bool, error) { + if media.StatusID == "" { + // no related status. + return nil, false, nil + } + + // Load the status related to this media. + status, err := m.state.DB.GetStatusByID( + gtscontext.SetBarebones(ctx), + media.StatusID, + ) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return nil, false, gtserror.Newf("error fetching status by id %s: %w", media.StatusID, err) + } + + if status == nil { + // status is missing. + return nil, true, nil + } + + return status, false, nil +} + +func (m *Media) uncache(ctx context.Context, media *gtsmodel.MediaAttachment) error { + if gtscontext.DryRun(ctx) { + // Dry run, do nothing. + return nil + } + + // Remove media and thumbnail. + _, err := m.removeFiles(ctx, + media.File.Path, + media.Thumbnail.Path, + ) + if err != nil { + return gtserror.Newf("error removing media files: %w", err) + } + + // Update attachment to reflect that we no longer have it cached. + log.Debugf(ctx, "marking media attachment as uncached: %s", media.ID) + media.Cached = func() *bool { i := false; return &i }() + if err := m.state.DB.UpdateAttachment(ctx, media, "cached"); err != nil { + return gtserror.Newf("error updating media: %w", err) + } + + return nil +} + +func (m *Media) delete(ctx context.Context, media *gtsmodel.MediaAttachment) error { + if gtscontext.DryRun(ctx) { + // Dry run, do nothing. + return nil + } + + // Remove media and thumbnail. + _, err := m.removeFiles(ctx, + media.File.Path, + media.Thumbnail.Path, + ) + if err != nil { + return gtserror.Newf("error removing media files: %w", err) + } + + // Delete media attachment entirely from the database. + log.Debugf(ctx, "deleting media attachment: %s", media.ID) + if err := m.state.DB.DeleteAttachment(ctx, media.ID); err != nil { + return gtserror.Newf("error deleting media: %w", err) + } + + return nil +} diff --git a/internal/cleaner/media_test.go b/internal/cleaner/media_test.go new file mode 100644 index 000000000..824df2ca5 --- /dev/null +++ b/internal/cleaner/media_test.go @@ -0,0 +1,427 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package cleaner_test + +import ( + "bytes" + "context" + "io" + "os" + "testing" + "time" + + "github.com/stretchr/testify/suite" + "github.com/superseriousbusiness/gotosocial/internal/cleaner" + "github.com/superseriousbusiness/gotosocial/internal/db" + "github.com/superseriousbusiness/gotosocial/internal/gtscontext" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/media" + "github.com/superseriousbusiness/gotosocial/internal/state" + "github.com/superseriousbusiness/gotosocial/internal/storage" + "github.com/superseriousbusiness/gotosocial/internal/transport" + "github.com/superseriousbusiness/gotosocial/internal/visibility" + "github.com/superseriousbusiness/gotosocial/testrig" +) + +type MediaTestSuite struct { + suite.Suite + + db db.DB + storage *storage.Driver + state state.State + manager *media.Manager + cleaner *cleaner.Cleaner + transportController transport.Controller + testAttachments map[string]*gtsmodel.MediaAttachment + testAccounts map[string]*gtsmodel.Account + testEmojis map[string]*gtsmodel.Emoji +} + +func TestMediaTestSuite(t *testing.T) { + suite.Run(t, &MediaTestSuite{}) +} + +func (suite *MediaTestSuite) SetupTest() { + testrig.InitTestConfig() + testrig.InitTestLog() + + suite.state.Caches.Init() + testrig.StartWorkers(&suite.state) + + suite.db = testrig.NewTestDB(&suite.state) + suite.storage = testrig.NewInMemoryStorage() + suite.state.DB = suite.db + suite.state.Storage = suite.storage + + testrig.StandardStorageSetup(suite.storage, "../../testrig/media") + testrig.StandardDBSetup(suite.db, nil) + + testrig.StartTimelines( + &suite.state, + visibility.NewFilter(&suite.state), + testrig.NewTestTypeConverter(suite.db), + ) + + suite.testAttachments = testrig.NewTestAttachments() + suite.testAccounts = testrig.NewTestAccounts() + suite.testEmojis = testrig.NewTestEmojis() + suite.manager = testrig.NewTestMediaManager(&suite.state) + suite.cleaner = cleaner.New(&suite.state) + suite.transportController = testrig.NewTestTransportController(&suite.state, testrig.NewMockHTTPClient(nil, "../../testrig/media")) +} + +func (suite *MediaTestSuite) TearDownTest() { + testrig.StandardDBTeardown(suite.db) + testrig.StandardStorageTeardown(suite.storage) + testrig.StopWorkers(&suite.state) +} + +// func (suite *MediaTestSuite) TestPruneOrphanedDry() { +// // add a big orphan panda to store +// b, err := os.ReadFile("../media/test/big-panda.gif") +// if err != nil { +// suite.FailNow(err.Error()) +// } + +// pandaPath := "01GJQJ1YD9QCHCE12GG0EYHVNW/attachment/original/01GJQJ2AYM1VKSRW96YVAJ3NK3.gif" +// if _, err := suite.storage.Put(context.Background(), pandaPath, b); err != nil { +// suite.FailNow(err.Error()) +// } + +// ctx := context.Background() + +// // dry run should show up 1 orphaned panda +// totalPruned, err := suite.cleaner.Media().PruneOrphaned(gtscontext.SetDryRun(ctx)) +// suite.NoError(err) +// suite.Equal(1, totalPruned) + +// // panda should still be in storage +// hasKey, err := suite.storage.Has(ctx, pandaPath) +// suite.NoError(err) +// suite.True(hasKey) +// } + +// func (suite *MediaTestSuite) TestPruneOrphanedMoist() { +// // i am not complicit in the moistness of this codebase :| + +// // add a big orphan panda to store +// b, err := os.ReadFile("../media/test/big-panda.gif") +// if err != nil { +// suite.FailNow(err.Error()) +// } + +// pandaPath := "01GJQJ1YD9QCHCE12GG0EYHVNW/attachment/original/01GJQJ2AYM1VKSRW96YVAJ3NK3.gif" +// if _, err := suite.storage.Put(context.Background(), pandaPath, b); err != nil { +// suite.FailNow(err.Error()) +// } + +// ctx := context.Background() + +// // should show up 1 orphaned panda +// totalPruned, err := suite.cleaner.Media().PruneOrphaned(ctx) +// suite.NoError(err) +// suite.Equal(1, totalPruned) + +// // panda should no longer be in storage +// hasKey, err := suite.storage.Has(ctx, pandaPath) +// suite.NoError(err) +// suite.False(hasKey) +// } + +// func (suite *MediaTestSuite) TestPruneUnusedLocal() { +// testAttachment := suite.testAttachments["local_account_1_unattached_1"] +// suite.True(*testAttachment.Cached) + +// totalPruned, err := suite.manager.PruneUnusedLocal(context.Background(), false) +// suite.NoError(err) +// suite.Equal(1, totalPruned) + +// _, err = suite.db.GetAttachmentByID(context.Background(), testAttachment.ID) +// suite.ErrorIs(err, db.ErrNoEntries) +// } + +// func (suite *MediaTestSuite) TestPruneUnusedLocalDry() { +// testAttachment := suite.testAttachments["local_account_1_unattached_1"] +// suite.True(*testAttachment.Cached) + +// totalPruned, err := suite.manager.PruneUnusedLocal(context.Background(), true) +// suite.NoError(err) +// suite.Equal(1, totalPruned) + +// _, err = suite.db.GetAttachmentByID(context.Background(), testAttachment.ID) +// suite.NoError(err) +// } + +// func (suite *MediaTestSuite) TestPruneRemoteTwice() { +// totalPruned, err := suite.manager.PruneUnusedLocal(context.Background(), false) +// suite.NoError(err) +// suite.Equal(1, totalPruned) + +// // final prune should prune nothing, since the first prune already happened +// totalPrunedAgain, err := suite.manager.PruneUnusedLocal(context.Background(), false) +// suite.NoError(err) +// suite.Equal(0, totalPrunedAgain) +// } + +// func (suite *MediaTestSuite) TestPruneOneNonExistent() { +// ctx := context.Background() +// testAttachment := suite.testAttachments["local_account_1_unattached_1"] + +// // Delete this attachment cached on disk +// media, err := suite.db.GetAttachmentByID(ctx, testAttachment.ID) +// suite.NoError(err) +// suite.True(*media.Cached) +// err = suite.storage.Delete(ctx, media.File.Path) +// suite.NoError(err) + +// // Now attempt to prune for item with db entry no file +// totalPruned, err := suite.manager.PruneUnusedLocal(ctx, false) +// suite.NoError(err) +// suite.Equal(1, totalPruned) +// } + +// func (suite *MediaTestSuite) TestPruneUnusedRemote() { +// ctx := context.Background() + +// // start by clearing zork's avatar + header +// zorkOldAvatar := suite.testAttachments["local_account_1_avatar"] +// zorkOldHeader := suite.testAttachments["local_account_1_avatar"] +// zork := suite.testAccounts["local_account_1"] +// zork.AvatarMediaAttachmentID = "" +// zork.HeaderMediaAttachmentID = "" +// if err := suite.db.UpdateByID(ctx, zork, zork.ID, "avatar_media_attachment_id", "header_media_attachment_id"); err != nil { +// panic(err) +// } + +// totalPruned, err := suite.manager.PruneUnusedRemote(ctx, false) +// suite.NoError(err) +// suite.Equal(2, totalPruned) + +// // media should no longer be stored +// _, err = suite.storage.Get(ctx, zorkOldAvatar.File.Path) +// suite.ErrorIs(err, storage.ErrNotFound) +// _, err = suite.storage.Get(ctx, zorkOldAvatar.Thumbnail.Path) +// suite.ErrorIs(err, storage.ErrNotFound) +// _, err = suite.storage.Get(ctx, zorkOldHeader.File.Path) +// suite.ErrorIs(err, storage.ErrNotFound) +// _, err = suite.storage.Get(ctx, zorkOldHeader.Thumbnail.Path) +// suite.ErrorIs(err, storage.ErrNotFound) + +// // attachments should no longer be in the db +// _, err = suite.db.GetAttachmentByID(ctx, zorkOldAvatar.ID) +// suite.ErrorIs(err, db.ErrNoEntries) +// _, err = suite.db.GetAttachmentByID(ctx, zorkOldHeader.ID) +// suite.ErrorIs(err, db.ErrNoEntries) +// } + +// func (suite *MediaTestSuite) TestPruneUnusedRemoteTwice() { +// ctx := context.Background() + +// // start by clearing zork's avatar + header +// zork := suite.testAccounts["local_account_1"] +// zork.AvatarMediaAttachmentID = "" +// zork.HeaderMediaAttachmentID = "" +// if err := suite.db.UpdateByID(ctx, zork, zork.ID, "avatar_media_attachment_id", "header_media_attachment_id"); err != nil { +// panic(err) +// } + +// totalPruned, err := suite.manager.PruneUnusedRemote(ctx, false) +// suite.NoError(err) +// suite.Equal(2, totalPruned) + +// // final prune should prune nothing, since the first prune already happened +// totalPruned, err = suite.manager.PruneUnusedRemote(ctx, false) +// suite.NoError(err) +// suite.Equal(0, totalPruned) +// } + +// func (suite *MediaTestSuite) TestPruneUnusedRemoteMultipleAccounts() { +// ctx := context.Background() + +// // start by clearing zork's avatar + header +// zorkOldAvatar := suite.testAttachments["local_account_1_avatar"] +// zorkOldHeader := suite.testAttachments["local_account_1_avatar"] +// zork := suite.testAccounts["local_account_1"] +// zork.AvatarMediaAttachmentID = "" +// zork.HeaderMediaAttachmentID = "" +// if err := suite.db.UpdateByID(ctx, zork, zork.ID, "avatar_media_attachment_id", "header_media_attachment_id"); err != nil { +// panic(err) +// } + +// // set zork's unused header as belonging to turtle +// turtle := suite.testAccounts["local_account_1"] +// zorkOldHeader.AccountID = turtle.ID +// if err := suite.db.UpdateByID(ctx, zorkOldHeader, zorkOldHeader.ID, "account_id"); err != nil { +// panic(err) +// } + +// totalPruned, err := suite.manager.PruneUnusedRemote(ctx, false) +// suite.NoError(err) +// suite.Equal(2, totalPruned) + +// // media should no longer be stored +// _, err = suite.storage.Get(ctx, zorkOldAvatar.File.Path) +// suite.ErrorIs(err, storage.ErrNotFound) +// _, err = suite.storage.Get(ctx, zorkOldAvatar.Thumbnail.Path) +// suite.ErrorIs(err, storage.ErrNotFound) +// _, err = suite.storage.Get(ctx, zorkOldHeader.File.Path) +// suite.ErrorIs(err, storage.ErrNotFound) +// _, err = suite.storage.Get(ctx, zorkOldHeader.Thumbnail.Path) +// suite.ErrorIs(err, storage.ErrNotFound) + +// // attachments should no longer be in the db +// _, err = suite.db.GetAttachmentByID(ctx, zorkOldAvatar.ID) +// suite.ErrorIs(err, db.ErrNoEntries) +// _, err = suite.db.GetAttachmentByID(ctx, zorkOldHeader.ID) +// suite.ErrorIs(err, db.ErrNoEntries) +// } + +func (suite *MediaTestSuite) TestUncacheRemote() { + ctx := context.Background() + + testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"] + suite.True(*testStatusAttachment.Cached) + + testHeader := suite.testAttachments["remote_account_3_header"] + suite.True(*testHeader.Cached) + + after := time.Now().Add(-24 * time.Hour) + totalUncached, err := suite.cleaner.Media().UncacheRemote(ctx, after) + suite.NoError(err) + suite.Equal(2, totalUncached) + + uncachedAttachment, err := suite.db.GetAttachmentByID(ctx, testStatusAttachment.ID) + suite.NoError(err) + suite.False(*uncachedAttachment.Cached) + + uncachedAttachment, err = suite.db.GetAttachmentByID(ctx, testHeader.ID) + suite.NoError(err) + suite.False(*uncachedAttachment.Cached) +} + +func (suite *MediaTestSuite) TestUncacheRemoteDry() { + ctx := context.Background() + + testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"] + suite.True(*testStatusAttachment.Cached) + + testHeader := suite.testAttachments["remote_account_3_header"] + suite.True(*testHeader.Cached) + + after := time.Now().Add(-24 * time.Hour) + totalUncached, err := suite.cleaner.Media().UncacheRemote(gtscontext.SetDryRun(ctx), after) + suite.NoError(err) + suite.Equal(2, totalUncached) + + uncachedAttachment, err := suite.db.GetAttachmentByID(ctx, testStatusAttachment.ID) + suite.NoError(err) + suite.True(*uncachedAttachment.Cached) + + uncachedAttachment, err = suite.db.GetAttachmentByID(ctx, testHeader.ID) + suite.NoError(err) + suite.True(*uncachedAttachment.Cached) +} + +func (suite *MediaTestSuite) TestUncacheRemoteTwice() { + ctx := context.Background() + after := time.Now().Add(-24 * time.Hour) + + totalUncached, err := suite.cleaner.Media().UncacheRemote(ctx, after) + suite.NoError(err) + suite.Equal(2, totalUncached) + + // final uncache should uncache nothing, since the first uncache already happened + totalUncachedAgain, err := suite.cleaner.Media().UncacheRemote(ctx, after) + suite.NoError(err) + suite.Equal(0, totalUncachedAgain) +} + +func (suite *MediaTestSuite) TestUncacheAndRecache() { + ctx := context.Background() + testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"] + testHeader := suite.testAttachments["remote_account_3_header"] + + after := time.Now().Add(-24 * time.Hour) + totalUncached, err := suite.cleaner.Media().UncacheRemote(ctx, after) + suite.NoError(err) + suite.Equal(2, totalUncached) + + // media should no longer be stored + _, err = suite.storage.Get(ctx, testStatusAttachment.File.Path) + suite.ErrorIs(err, storage.ErrNotFound) + _, err = suite.storage.Get(ctx, testStatusAttachment.Thumbnail.Path) + suite.ErrorIs(err, storage.ErrNotFound) + _, err = suite.storage.Get(ctx, testHeader.File.Path) + suite.ErrorIs(err, storage.ErrNotFound) + _, err = suite.storage.Get(ctx, testHeader.Thumbnail.Path) + suite.ErrorIs(err, storage.ErrNotFound) + + // now recache the image.... + data := func(_ context.Context) (io.ReadCloser, int64, error) { + // load bytes from a test image + b, err := os.ReadFile("../../testrig/media/thoughtsofdog-original.jpg") + if err != nil { + panic(err) + } + return io.NopCloser(bytes.NewBuffer(b)), int64(len(b)), nil + } + + for _, original := range []*gtsmodel.MediaAttachment{ + testStatusAttachment, + testHeader, + } { + processingRecache, err := suite.manager.PreProcessMediaRecache(ctx, data, original.ID) + suite.NoError(err) + + // synchronously load the recached attachment + recachedAttachment, err := processingRecache.LoadAttachment(ctx) + suite.NoError(err) + suite.NotNil(recachedAttachment) + + // recachedAttachment should be basically the same as the old attachment + suite.True(*recachedAttachment.Cached) + suite.Equal(original.ID, recachedAttachment.ID) + suite.Equal(original.File.Path, recachedAttachment.File.Path) // file should be stored in the same place + suite.Equal(original.Thumbnail.Path, recachedAttachment.Thumbnail.Path) // as should the thumbnail + suite.EqualValues(original.FileMeta, recachedAttachment.FileMeta) // and the filemeta should be the same + + // recached files should be back in storage + _, err = suite.storage.Get(ctx, recachedAttachment.File.Path) + suite.NoError(err) + _, err = suite.storage.Get(ctx, recachedAttachment.Thumbnail.Path) + suite.NoError(err) + } +} + +func (suite *MediaTestSuite) TestUncacheOneNonExistent() { + ctx := context.Background() + testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"] + + // Delete this attachment cached on disk + media, err := suite.db.GetAttachmentByID(ctx, testStatusAttachment.ID) + suite.NoError(err) + suite.True(*media.Cached) + err = suite.storage.Delete(ctx, media.File.Path) + suite.NoError(err) + + // Now attempt to uncache remote for item with db entry no file + after := time.Now().Add(-24 * time.Hour) + totalUncached, err := suite.cleaner.Media().UncacheRemote(ctx, after) + suite.NoError(err) + suite.Equal(2, totalUncached) +} diff --git a/internal/db/bundb/emoji.go b/internal/db/bundb/emoji.go index 60b8fc12b..60c140264 100644 --- a/internal/db/bundb/emoji.go +++ b/internal/db/bundb/emoji.go @@ -19,6 +19,7 @@ import ( "context" + "database/sql" "errors" "strings" "time" @@ -37,19 +38,6 @@ type emojiDB struct { state *state.State } -func (e *emojiDB) newEmojiQ(emoji *gtsmodel.Emoji) *bun.SelectQuery { - return e.conn. - NewSelect(). - Model(emoji). - Relation("Category") -} - -func (e *emojiDB) newEmojiCategoryQ(emojiCategory *gtsmodel.EmojiCategory) *bun.SelectQuery { - return e.conn. - NewSelect(). - Model(emojiCategory) -} - func (e *emojiDB) PutEmoji(ctx context.Context, emoji *gtsmodel.Emoji) db.Error { return e.state.Caches.GTS.Emoji().Store(emoji, func() error { _, err := e.conn.NewInsert().Model(emoji).Exec(ctx) @@ -57,14 +45,15 @@ func (e *emojiDB) PutEmoji(ctx context.Context, emoji *gtsmodel.Emoji) db.Error }) } -func (e *emojiDB) UpdateEmoji(ctx context.Context, emoji *gtsmodel.Emoji, columns ...string) (*gtsmodel.Emoji, db.Error) { +func (e *emojiDB) UpdateEmoji(ctx context.Context, emoji *gtsmodel.Emoji, columns ...string) error { emoji.UpdatedAt = time.Now() if len(columns) > 0 { // If we're updating by column, ensure "updated_at" is included. columns = append(columns, "updated_at") } - err := e.state.Caches.GTS.Emoji().Store(emoji, func() error { + // Update the emoji model in the database. + return e.state.Caches.GTS.Emoji().Store(emoji, func() error { _, err := e.conn. NewUpdate(). Model(emoji). @@ -73,15 +62,34 @@ func (e *emojiDB) UpdateEmoji(ctx context.Context, emoji *gtsmodel.Emoji, column Exec(ctx) return e.conn.ProcessError(err) }) - if err != nil { - return nil, err - } - - return emoji, nil } func (e *emojiDB) DeleteEmojiByID(ctx context.Context, id string) db.Error { - defer e.state.Caches.GTS.Emoji().Invalidate("ID", id) + var ( + accountIDs []string + statusIDs []string + ) + + defer func() { + // Invalidate cached emoji. + e.state.Caches.GTS. + Emoji(). + Invalidate("ID", id) + + for _, id := range accountIDs { + // Invalidate cached account. + e.state.Caches.GTS. + Account(). + Invalidate("ID", id) + } + + for _, id := range statusIDs { + // Invalidate cached account. + e.state.Caches.GTS. + Status(). + Invalidate("ID", id) + } + }() // Load emoji into cache before attempting a delete, // as we need it cached in order to trigger the invalidate @@ -99,6 +107,7 @@ func (e *emojiDB) DeleteEmojiByID(ctx context.Context, id string) db.Error { return e.conn.RunInTx(ctx, func(tx bun.Tx) error { // delete links between this emoji and any statuses that use it + // TODO: remove when we delete this table if _, err := tx. NewDelete(). TableExpr("? AS ?", bun.Ident("status_to_emojis"), bun.Ident("status_to_emoji")). @@ -108,6 +117,7 @@ func (e *emojiDB) DeleteEmojiByID(ctx context.Context, id string) db.Error { } // delete links between this emoji and any accounts that use it + // TODO: remove when we delete this table if _, err := tx. NewDelete(). TableExpr("? AS ?", bun.Ident("account_to_emojis"), bun.Ident("account_to_emoji")). @@ -116,19 +126,91 @@ func (e *emojiDB) DeleteEmojiByID(ctx context.Context, id string) db.Error { return err } - if _, err := tx. - NewDelete(). - TableExpr("? AS ?", bun.Ident("emojis"), bun.Ident("emoji")). - Where("? = ?", bun.Ident("emoji.id"), id). + // Select all accounts using this emoji. + if _, err := tx.NewSelect(). + Table("accounts"). + Column("id"). + Where("? IN (emojis)", id). + Exec(ctx, &accountIDs); err != nil { + return err + } + + for _, id := range accountIDs { + var emojiIDs []string + + // Select account with ID. + if _, err := tx.NewSelect(). + Table("accounts"). + Column("emojis"). + Where("id = ?", id). + Exec(ctx); err != nil && + err != sql.ErrNoRows { + return err + } + + // Drop ID from account emojis. + emojiIDs = dropID(emojiIDs, id) + + // Update account emoji IDs. + if _, err := tx.NewUpdate(). + Table("accounts"). + Where("id = ?", id). + Set("emojis = ?", emojiIDs). + Exec(ctx); err != nil && + err != sql.ErrNoRows { + return err + } + } + + // Select all statuses using this emoji. + if _, err := tx.NewSelect(). + Table("statuses"). + Column("id"). + Where("? IN (emojis)", id). + Exec(ctx, &statusIDs); err != nil { + return err + } + + for _, id := range statusIDs { + var emojiIDs []string + + // Select statuses with ID. + if _, err := tx.NewSelect(). + Table("statuses"). + Column("emojis"). + Where("id = ?", id). + Exec(ctx); err != nil && + err != sql.ErrNoRows { + return err + } + + // Drop ID from account emojis. + emojiIDs = dropID(emojiIDs, id) + + // Update status emoji IDs. + if _, err := tx.NewUpdate(). + Table("statuses"). + Where("id = ?", id). + Set("emojis = ?", emojiIDs). + Exec(ctx); err != nil && + err != sql.ErrNoRows { + return err + } + } + + // Delete emoji from database. + if _, err := tx.NewDelete(). + Table("emojis"). + Where("id = ?", id). Exec(ctx); err != nil { - return e.conn.ProcessError(err) + return err } return nil }) } -func (e *emojiDB) GetEmojis(ctx context.Context, domain string, includeDisabled bool, includeEnabled bool, shortcode string, maxShortcodeDomain string, minShortcodeDomain string, limit int) ([]*gtsmodel.Emoji, db.Error) { +func (e *emojiDB) GetEmojisBy(ctx context.Context, domain string, includeDisabled bool, includeEnabled bool, shortcode string, maxShortcodeDomain string, minShortcodeDomain string, limit int) ([]*gtsmodel.Emoji, error) { emojiIDs := []string{} subQuery := e.conn. @@ -245,6 +327,29 @@ func (e *emojiDB) GetEmojis(ctx context.Context, domain string, includeDisabled return e.GetEmojisByIDs(ctx, emojiIDs) } +func (e *emojiDB) GetEmojis(ctx context.Context, maxID string, limit int) ([]*gtsmodel.Emoji, error) { + emojiIDs := []string{} + + q := e.conn.NewSelect(). + Table("emojis"). + Column("id"). + Order("id DESC") + + if maxID != "" { + q = q.Where("? < ?", bun.Ident("id"), maxID) + } + + if limit != 0 { + q = q.Limit(limit) + } + + if err := q.Scan(ctx, &emojiIDs); err != nil { + return nil, e.conn.ProcessError(err) + } + + return e.GetEmojisByIDs(ctx, emojiIDs) +} + func (e *emojiDB) GetUseableEmojis(ctx context.Context) ([]*gtsmodel.Emoji, db.Error) { emojiIDs := []string{} @@ -269,7 +374,10 @@ func (e *emojiDB) GetEmojiByID(ctx context.Context, id string) (*gtsmodel.Emoji, ctx, "ID", func(emoji *gtsmodel.Emoji) error { - return e.newEmojiQ(emoji).Where("? = ?", bun.Ident("emoji.id"), id).Scan(ctx) + return e.conn. + NewSelect(). + Model(emoji). + Where("? = ?", bun.Ident("emoji.id"), id).Scan(ctx) }, id, ) @@ -280,7 +388,10 @@ func (e *emojiDB) GetEmojiByURI(ctx context.Context, uri string) (*gtsmodel.Emoj ctx, "URI", func(emoji *gtsmodel.Emoji) error { - return e.newEmojiQ(emoji).Where("? = ?", bun.Ident("emoji.uri"), uri).Scan(ctx) + return e.conn. + NewSelect(). + Model(emoji). + Where("? = ?", bun.Ident("emoji.uri"), uri).Scan(ctx) }, uri, ) @@ -291,7 +402,9 @@ func (e *emojiDB) GetEmojiByShortcodeDomain(ctx context.Context, shortcode strin ctx, "Shortcode.Domain", func(emoji *gtsmodel.Emoji) error { - q := e.newEmojiQ(emoji) + q := e.conn. + NewSelect(). + Model(emoji) if domain != "" { q = q.Where("? = ?", bun.Ident("emoji.shortcode"), shortcode) @@ -313,8 +426,9 @@ func (e *emojiDB) GetEmojiByStaticURL(ctx context.Context, imageStaticURL string ctx, "ImageStaticURL", func(emoji *gtsmodel.Emoji) error { - return e. - newEmojiQ(emoji). + return e.conn. + NewSelect(). + Model(emoji). Where("? = ?", bun.Ident("emoji.image_static_url"), imageStaticURL). Scan(ctx) }, @@ -350,7 +464,10 @@ func (e *emojiDB) GetEmojiCategory(ctx context.Context, id string) (*gtsmodel.Em ctx, "ID", func(emojiCategory *gtsmodel.EmojiCategory) error { - return e.newEmojiCategoryQ(emojiCategory).Where("? = ?", bun.Ident("emoji_category.id"), id).Scan(ctx) + return e.conn. + NewSelect(). + Model(emojiCategory). + Where("? = ?", bun.Ident("emoji_category.id"), id).Scan(ctx) }, id, ) @@ -361,14 +478,18 @@ func (e *emojiDB) GetEmojiCategoryByName(ctx context.Context, name string) (*gts ctx, "Name", func(emojiCategory *gtsmodel.EmojiCategory) error { - return e.newEmojiCategoryQ(emojiCategory).Where("LOWER(?) = ?", bun.Ident("emoji_category.name"), strings.ToLower(name)).Scan(ctx) + return e.conn. + NewSelect(). + Model(emojiCategory). + Where("LOWER(?) = ?", bun.Ident("emoji_category.name"), strings.ToLower(name)).Scan(ctx) }, name, ) } func (e *emojiDB) getEmoji(ctx context.Context, lookup string, dbQuery func(*gtsmodel.Emoji) error, keyParts ...any) (*gtsmodel.Emoji, db.Error) { - return e.state.Caches.GTS.Emoji().Load(lookup, func() (*gtsmodel.Emoji, error) { + // Fetch emoji from database cache with loader callback + emoji, err := e.state.Caches.GTS.Emoji().Load(lookup, func() (*gtsmodel.Emoji, error) { var emoji gtsmodel.Emoji // Not cached! Perform database query @@ -378,6 +499,23 @@ func (e *emojiDB) getEmoji(ctx context.Context, lookup string, dbQuery func(*gts return &emoji, nil }, keyParts...) + if err != nil { + return nil, err + } + + if gtscontext.Barebones(ctx) { + // no need to fully populate. + return emoji, nil + } + + if emoji.CategoryID != "" { + emoji.Category, err = e.GetEmojiCategory(ctx, emoji.CategoryID) + if err != nil { + log.Errorf(ctx, "error getting emoji category %s: %v", emoji.CategoryID, err) + } + } + + return emoji, nil } func (e *emojiDB) GetEmojisByIDs(ctx context.Context, emojiIDs []string) ([]*gtsmodel.Emoji, db.Error) { @@ -432,3 +570,17 @@ func (e *emojiDB) GetEmojiCategoriesByIDs(ctx context.Context, emojiCategoryIDs return emojiCategories, nil } + +// dropIDs drops given ID string from IDs slice. +func dropID(ids []string, id string) []string { + for i := 0; i < len(ids); { + if ids[i] == id { + // Remove this reference. + copy(ids[i:], ids[i+1:]) + ids = ids[:len(ids)-1] + continue + } + i++ + } + return ids +} diff --git a/internal/db/bundb/emoji_test.go b/internal/db/bundb/emoji_test.go index c71c63efb..f75334d90 100644 --- a/internal/db/bundb/emoji_test.go +++ b/internal/db/bundb/emoji_test.go @@ -59,7 +59,7 @@ func (suite *EmojiTestSuite) TestGetEmojiByStaticURL() { } func (suite *EmojiTestSuite) TestGetAllEmojis() { - emojis, err := suite.db.GetEmojis(context.Background(), db.EmojiAllDomains, true, true, "", "", "", 0) + emojis, err := suite.db.GetEmojisBy(context.Background(), db.EmojiAllDomains, true, true, "", "", "", 0) suite.NoError(err) suite.Equal(2, len(emojis)) @@ -68,7 +68,7 @@ func (suite *EmojiTestSuite) TestGetAllEmojis() { } func (suite *EmojiTestSuite) TestGetAllEmojisLimit1() { - emojis, err := suite.db.GetEmojis(context.Background(), db.EmojiAllDomains, true, true, "", "", "", 1) + emojis, err := suite.db.GetEmojisBy(context.Background(), db.EmojiAllDomains, true, true, "", "", "", 1) suite.NoError(err) suite.Equal(1, len(emojis)) @@ -76,7 +76,7 @@ func (suite *EmojiTestSuite) TestGetAllEmojisLimit1() { } func (suite *EmojiTestSuite) TestGetAllEmojisMaxID() { - emojis, err := suite.db.GetEmojis(context.Background(), db.EmojiAllDomains, true, true, "", "rainbow@", "", 0) + emojis, err := suite.db.GetEmojisBy(context.Background(), db.EmojiAllDomains, true, true, "", "rainbow@", "", 0) suite.NoError(err) suite.Equal(1, len(emojis)) @@ -84,7 +84,7 @@ func (suite *EmojiTestSuite) TestGetAllEmojisMaxID() { } func (suite *EmojiTestSuite) TestGetAllEmojisMinID() { - emojis, err := suite.db.GetEmojis(context.Background(), db.EmojiAllDomains, true, true, "", "", "yell@fossbros-anonymous.io", 0) + emojis, err := suite.db.GetEmojisBy(context.Background(), db.EmojiAllDomains, true, true, "", "", "yell@fossbros-anonymous.io", 0) suite.NoError(err) suite.Equal(1, len(emojis)) @@ -92,14 +92,14 @@ func (suite *EmojiTestSuite) TestGetAllEmojisMinID() { } func (suite *EmojiTestSuite) TestGetAllDisabledEmojis() { - emojis, err := suite.db.GetEmojis(context.Background(), db.EmojiAllDomains, true, false, "", "", "", 0) + emojis, err := suite.db.GetEmojisBy(context.Background(), db.EmojiAllDomains, true, false, "", "", "", 0) suite.ErrorIs(err, db.ErrNoEntries) suite.Equal(0, len(emojis)) } func (suite *EmojiTestSuite) TestGetAllEnabledEmojis() { - emojis, err := suite.db.GetEmojis(context.Background(), db.EmojiAllDomains, false, true, "", "", "", 0) + emojis, err := suite.db.GetEmojisBy(context.Background(), db.EmojiAllDomains, false, true, "", "", "", 0) suite.NoError(err) suite.Equal(2, len(emojis)) @@ -108,7 +108,7 @@ func (suite *EmojiTestSuite) TestGetAllEnabledEmojis() { } func (suite *EmojiTestSuite) TestGetLocalEnabledEmojis() { - emojis, err := suite.db.GetEmojis(context.Background(), "", false, true, "", "", "", 0) + emojis, err := suite.db.GetEmojisBy(context.Background(), "", false, true, "", "", "", 0) suite.NoError(err) suite.Equal(1, len(emojis)) @@ -116,21 +116,21 @@ func (suite *EmojiTestSuite) TestGetLocalEnabledEmojis() { } func (suite *EmojiTestSuite) TestGetLocalDisabledEmojis() { - emojis, err := suite.db.GetEmojis(context.Background(), "", true, false, "", "", "", 0) + emojis, err := suite.db.GetEmojisBy(context.Background(), "", true, false, "", "", "", 0) suite.ErrorIs(err, db.ErrNoEntries) suite.Equal(0, len(emojis)) } func (suite *EmojiTestSuite) TestGetAllEmojisFromDomain() { - emojis, err := suite.db.GetEmojis(context.Background(), "peepee.poopoo", true, true, "", "", "", 0) + emojis, err := suite.db.GetEmojisBy(context.Background(), "peepee.poopoo", true, true, "", "", "", 0) suite.ErrorIs(err, db.ErrNoEntries) suite.Equal(0, len(emojis)) } func (suite *EmojiTestSuite) TestGetAllEmojisFromDomain2() { - emojis, err := suite.db.GetEmojis(context.Background(), "fossbros-anonymous.io", true, true, "", "", "", 0) + emojis, err := suite.db.GetEmojisBy(context.Background(), "fossbros-anonymous.io", true, true, "", "", "", 0) suite.NoError(err) suite.Equal(1, len(emojis)) @@ -138,7 +138,7 @@ func (suite *EmojiTestSuite) TestGetAllEmojisFromDomain2() { } func (suite *EmojiTestSuite) TestGetSpecificEmojisFromDomain2() { - emojis, err := suite.db.GetEmojis(context.Background(), "fossbros-anonymous.io", true, true, "yell", "", "", 0) + emojis, err := suite.db.GetEmojisBy(context.Background(), "fossbros-anonymous.io", true, true, "yell", "", "", 0) suite.NoError(err) suite.Equal(1, len(emojis)) diff --git a/internal/db/bundb/media.go b/internal/db/bundb/media.go index b64447beb..a9b60e3ae 100644 --- a/internal/db/bundb/media.go +++ b/internal/db/bundb/media.go @@ -24,6 +24,7 @@ "github.com/superseriousbusiness/gotosocial/internal/db" "github.com/superseriousbusiness/gotosocial/internal/gtscontext" + "github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/log" "github.com/superseriousbusiness/gotosocial/internal/state" @@ -110,7 +111,7 @@ func (m *mediaDB) DeleteAttachment(ctx context.Context, id string) error { // Load media into cache before attempting a delete, // as we need it cached in order to trigger the invalidate // callback. This in turn invalidates others. - _, err := m.GetAttachmentByID(gtscontext.SetBarebones(ctx), id) + media, err := m.GetAttachmentByID(gtscontext.SetBarebones(ctx), id) if err != nil { if errors.Is(err, db.ErrNoEntries) { // not an issue. @@ -119,11 +120,115 @@ func (m *mediaDB) DeleteAttachment(ctx context.Context, id string) error { return err } - // Finally delete media from DB. - _, err = m.conn.NewDelete(). - TableExpr("? AS ?", bun.Ident("media_attachments"), bun.Ident("media_attachment")). - Where("? = ?", bun.Ident("media_attachment.id"), id). - Exec(ctx) + var ( + invalidateAccount bool + invalidateStatus bool + ) + + // Delete media attachment in new transaction. + err = m.conn.RunInTx(ctx, func(tx bun.Tx) error { + if media.AccountID != "" { + var account gtsmodel.Account + + // Get related account model. + if _, err := tx.NewSelect(). + Model(&account). + Where("? = ?", bun.Ident("id"), media.AccountID). + Exec(ctx); err != nil && !errors.Is(err, db.ErrNoEntries) { + return gtserror.Newf("error selecting account: %w", err) + } + + var set func(*bun.UpdateQuery) *bun.UpdateQuery + + switch { + case *media.Avatar && account.AvatarMediaAttachmentID == id: + set = func(q *bun.UpdateQuery) *bun.UpdateQuery { + return q.Set("? = NULL", bun.Ident("avatar_media_attachment_id")) + } + case *media.Header && account.HeaderMediaAttachmentID == id: + set = func(q *bun.UpdateQuery) *bun.UpdateQuery { + return q.Set("? = NULL", bun.Ident("header_media_attachment_id")) + } + } + + if set != nil { + // Note: this handles not found. + // + // Update the account model. + q := tx.NewUpdate(). + Table("accounts"). + Where("? = ?", bun.Ident("id"), account.ID) + if _, err := set(q).Exec(ctx); err != nil { + return gtserror.Newf("error updating account: %w", err) + } + + // Mark as needing invalidate. + invalidateAccount = true + } + } + + if media.StatusID != "" { + var status gtsmodel.Status + + // Get related status model. + if _, err := tx.NewSelect(). + Model(&status). + Where("? = ?", bun.Ident("id"), media.StatusID). + Exec(ctx); err != nil && !errors.Is(err, db.ErrNoEntries) { + return gtserror.Newf("error selecting status: %w", err) + } + + // Get length of attachments beforehand. + before := len(status.AttachmentIDs) + + for i := 0; i < len(status.AttachmentIDs); { + if status.AttachmentIDs[i] == id { + // Remove this reference to deleted attachment ID. + copy(status.AttachmentIDs[i:], status.AttachmentIDs[i+1:]) + status.AttachmentIDs = status.AttachmentIDs[:len(status.AttachmentIDs)-1] + continue + } + i++ + } + + if before != len(status.AttachmentIDs) { + // Note: this accounts for status not found. + // + // Attachments changed, update the status. + if _, err := tx.NewUpdate(). + Table("statuses"). + Where("? = ?", bun.Ident("id"), status.ID). + Set("? = ?", bun.Ident("attachment_ids"), status.AttachmentIDs). + Exec(ctx); err != nil { + return gtserror.Newf("error updating status: %w", err) + } + + // Mark as needing invalidate. + invalidateStatus = true + } + } + + // Finally delete this media. + if _, err := tx.NewDelete(). + Table("media_attachments"). + Where("? = ?", bun.Ident("id"), id). + Exec(ctx); err != nil { + return gtserror.Newf("error deleting media: %w", err) + } + + return nil + }) + + if invalidateAccount { + // The account for given ID will have been updated in transaction. + m.state.Caches.GTS.Account().Invalidate("ID", media.AccountID) + } + + if invalidateStatus { + // The status for given ID will have been updated in transaction. + m.state.Caches.GTS.Status().Invalidate("ID", media.StatusID) + } + return m.conn.ProcessError(err) } @@ -167,6 +272,29 @@ func (m *mediaDB) CountRemoteOlderThan(ctx context.Context, olderThan time.Time) return count, nil } +func (m *mediaDB) GetAttachments(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, error) { + attachmentIDs := []string{} + + q := m.conn.NewSelect(). + Table("media_attachments"). + Column("id"). + Order("id DESC") + + if maxID != "" { + q = q.Where("? < ?", bun.Ident("id"), maxID) + } + + if limit != 0 { + q = q.Limit(limit) + } + + if err := q.Scan(ctx, &attachmentIDs); err != nil { + return nil, m.conn.ProcessError(err) + } + + return m.GetAttachmentsByIDs(ctx, attachmentIDs) +} + func (m *mediaDB) GetAvatarsAndHeaders(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, db.Error) { attachmentIDs := []string{} diff --git a/internal/db/bundb/session.go b/internal/db/bundb/session.go index 6d900d75a..9a4256de5 100644 --- a/internal/db/bundb/session.go +++ b/internal/db/bundb/session.go @@ -20,6 +20,7 @@ import ( "context" "crypto/rand" + "io" "github.com/superseriousbusiness/gotosocial/internal/db" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" @@ -52,13 +53,11 @@ func (s *sessionDB) GetSession(ctx context.Context) (*gtsmodel.RouterSession, db } func (s *sessionDB) createSession(ctx context.Context) (*gtsmodel.RouterSession, db.Error) { - auth := make([]byte, 32) - crypt := make([]byte, 32) + buf := make([]byte, 64) + auth := buf[:32] + crypt := buf[32:64] - if _, err := rand.Read(auth); err != nil { - return nil, err - } - if _, err := rand.Read(crypt); err != nil { + if _, err := io.ReadFull(rand.Reader, buf); err != nil { return nil, err } diff --git a/internal/db/emoji.go b/internal/db/emoji.go index 0c6ff4d1d..5dcad9ece 100644 --- a/internal/db/emoji.go +++ b/internal/db/emoji.go @@ -33,15 +33,17 @@ type Emoji interface { PutEmoji(ctx context.Context, emoji *gtsmodel.Emoji) Error // UpdateEmoji updates the given columns of one emoji. // If no columns are specified, every column is updated. - UpdateEmoji(ctx context.Context, emoji *gtsmodel.Emoji, columns ...string) (*gtsmodel.Emoji, Error) + UpdateEmoji(ctx context.Context, emoji *gtsmodel.Emoji, columns ...string) error // DeleteEmojiByID deletes one emoji by its database ID. DeleteEmojiByID(ctx context.Context, id string) Error // GetEmojisByIDs gets emojis for the given IDs. GetEmojisByIDs(ctx context.Context, ids []string) ([]*gtsmodel.Emoji, Error) // GetUseableEmojis gets all emojis which are useable by accounts on this instance. GetUseableEmojis(ctx context.Context) ([]*gtsmodel.Emoji, Error) - // GetEmojis gets emojis based on given parameters. Useful for admin actions. - GetEmojis(ctx context.Context, domain string, includeDisabled bool, includeEnabled bool, shortcode string, maxShortcodeDomain string, minShortcodeDomain string, limit int) ([]*gtsmodel.Emoji, Error) + // GetEmojis ... + GetEmojis(ctx context.Context, maxID string, limit int) ([]*gtsmodel.Emoji, error) + // GetEmojisBy gets emojis based on given parameters. Useful for admin actions. + GetEmojisBy(ctx context.Context, domain string, includeDisabled bool, includeEnabled bool, shortcode string, maxShortcodeDomain string, minShortcodeDomain string, limit int) ([]*gtsmodel.Emoji, error) // GetEmojiByID gets a specific emoji by its database ID. GetEmojiByID(ctx context.Context, id string) (*gtsmodel.Emoji, Error) // GetEmojiByShortcodeDomain gets an emoji based on its shortcode and domain. diff --git a/internal/db/media.go b/internal/db/media.go index 05609ba52..01bca1748 100644 --- a/internal/db/media.go +++ b/internal/db/media.go @@ -41,6 +41,9 @@ type Media interface { // DeleteAttachment deletes the attachment with given ID from the database. DeleteAttachment(ctx context.Context, id string) error + // GetAttachments ... + GetAttachments(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, error) + // GetRemoteOlderThan gets limit n remote media attachments (including avatars and headers) older than the given // olderThan time. These will be returned in order of attachment.created_at descending (newest to oldest in other words). // diff --git a/internal/federation/dereferencing/account.go b/internal/federation/dereferencing/account.go index 5b0de99bc..f7e740d4b 100644 --- a/internal/federation/dereferencing/account.go +++ b/internal/federation/dereferencing/account.go @@ -116,7 +116,7 @@ func (d *deref) getAccountByURI(ctx context.Context, requestUser string, uri *ur if account == nil { // Ensure that this is isn't a search for a local account. if uri.Host == config.GetHost() || uri.Host == config.GetAccountDomain() { - return nil, nil, NewErrNotRetrievable(err) // this will be db.ErrNoEntries + return nil, nil, gtserror.SetUnretrievable(err) // this will be db.ErrNoEntries } // Create and pass-through a new bare-bones model for dereferencing. @@ -179,7 +179,7 @@ func (d *deref) GetAccountByUsernameDomain(ctx context.Context, requestUser stri if account == nil { if domain == "" { // failed local lookup, will be db.ErrNoEntries. - return nil, nil, NewErrNotRetrievable(err) + return nil, nil, gtserror.SetUnretrievable(err) } // Create and pass-through a new bare-bones model for dereferencing. @@ -306,8 +306,10 @@ func (d *deref) enrichAccount(ctx context.Context, requestUser string, uri *url. accDomain, accURI, err := d.fingerRemoteAccount(ctx, tsport, account.Username, account.Domain) if err != nil { if account.URI == "" { - // this is a new account (to us) with username@domain but failed webfinger, nothing more we can do. - return nil, nil, &ErrNotRetrievable{gtserror.Newf("error webfingering account: %w", err)} + // this is a new account (to us) with username@domain + // but failed webfinger, nothing more we can do. + err := gtserror.Newf("error webfingering account: %w", err) + return nil, nil, gtserror.SetUnretrievable(err) } // Simply log this error and move on, we already have an account URI. @@ -316,10 +318,6 @@ func (d *deref) enrichAccount(ctx context.Context, requestUser string, uri *url. if err == nil { if account.Domain != accDomain { - // Domain has changed, assume the activitypub - // account data provided may not be the latest. - apubAcc = nil - // After webfinger, we now have correct account domain from which we can do a final DB check. alreadyAccount, err := d.state.DB.GetAccountByUsernameDomain(ctx, account.Username, accDomain) if err != nil && !errors.Is(err, db.ErrNoEntries) { @@ -358,31 +356,28 @@ func (d *deref) enrichAccount(ctx context.Context, requestUser string, uri *url. d.startHandshake(requestUser, uri) defer d.stopHandshake(requestUser, uri) - // By default we assume that apubAcc has been passed, - // indicating that the given account is already latest. - latestAcc := account - if apubAcc == nil { // Dereference latest version of the account. b, err := tsport.Dereference(ctx, uri) if err != nil { - return nil, nil, &ErrNotRetrievable{gtserror.Newf("error deferencing %s: %w", uri, err)} + err := gtserror.Newf("error deferencing %s: %w", uri, err) + return nil, nil, gtserror.SetUnretrievable(err) } - // Attempt to resolve ActivityPub account from data. + // Attempt to resolve ActivityPub acc from data. apubAcc, err = ap.ResolveAccountable(ctx, b) if err != nil { return nil, nil, gtserror.Newf("error resolving accountable from data for account %s: %w", uri, err) } + } - // Convert the dereferenced AP account object to our GTS model. - latestAcc, err = d.typeConverter.ASRepresentationToAccount(ctx, - apubAcc, - account.Domain, - ) - if err != nil { - return nil, nil, gtserror.Newf("error converting accountable to gts model for account %s: %w", uri, err) - } + // Convert the dereferenced AP account object to our GTS model. + latestAcc, err := d.typeConverter.ASRepresentationToAccount(ctx, + apubAcc, + account.Domain, + ) + if err != nil { + return nil, nil, gtserror.Newf("error converting accountable to gts model for account %s: %w", uri, err) } if account.Username == "" { @@ -425,52 +420,14 @@ func (d *deref) enrichAccount(ctx context.Context, requestUser string, uri *url. latestAcc.ID = account.ID latestAcc.FetchedAt = time.Now() - // Reuse the existing account media attachments by default. - latestAcc.AvatarMediaAttachmentID = account.AvatarMediaAttachmentID - latestAcc.HeaderMediaAttachmentID = account.HeaderMediaAttachmentID - - if (latestAcc.AvatarMediaAttachmentID == "") || - (latestAcc.AvatarRemoteURL != account.AvatarRemoteURL) { - // Reset the avatar media ID (handles removed). - latestAcc.AvatarMediaAttachmentID = "" - - if latestAcc.AvatarRemoteURL != "" { - // Avatar has changed to a new one, fetch up-to-date copy and use new ID. - latestAcc.AvatarMediaAttachmentID, err = d.fetchRemoteAccountAvatar(ctx, - tsport, - latestAcc.AvatarRemoteURL, - latestAcc.ID, - ) - if err != nil { - log.Errorf(ctx, "error fetching remote avatar for account %s: %v", uri, err) - - // Keep old avatar for now, we'll try again in $interval. - latestAcc.AvatarMediaAttachmentID = account.AvatarMediaAttachmentID - latestAcc.AvatarRemoteURL = account.AvatarRemoteURL - } - } + // Ensure the account's avatar media is populated, passing in existing to check for chages. + if err := d.fetchRemoteAccountAvatar(ctx, tsport, account, latestAcc); err != nil { + log.Errorf(ctx, "error fetching remote avatar for account %s: %v", uri, err) } - if (latestAcc.HeaderMediaAttachmentID == "") || - (latestAcc.HeaderRemoteURL != account.HeaderRemoteURL) { - // Reset the header media ID (handles removed). - latestAcc.HeaderMediaAttachmentID = "" - - if latestAcc.HeaderRemoteURL != "" { - // Header has changed to a new one, fetch up-to-date copy and use new ID. - latestAcc.HeaderMediaAttachmentID, err = d.fetchRemoteAccountHeader(ctx, - tsport, - latestAcc.HeaderRemoteURL, - latestAcc.ID, - ) - if err != nil { - log.Errorf(ctx, "error fetching remote header for account %s: %v", uri, err) - - // Keep old header for now, we'll try again in $interval. - latestAcc.HeaderMediaAttachmentID = account.HeaderMediaAttachmentID - latestAcc.HeaderRemoteURL = account.HeaderRemoteURL - } - } + // Ensure the account's avatar media is populated, passing in existing to check for chages. + if err := d.fetchRemoteAccountHeader(ctx, tsport, account, latestAcc); err != nil { + log.Errorf(ctx, "error fetching remote header for account %s: %v", uri, err) } // Fetch the latest remote account emoji IDs used in account display name/bio. @@ -515,11 +472,34 @@ func (d *deref) enrichAccount(ctx context.Context, requestUser string, uri *url. return latestAcc, apubAcc, nil } -func (d *deref) fetchRemoteAccountAvatar(ctx context.Context, tsport transport.Transport, avatarURL string, accountID string) (string, error) { - // Parse and validate provided media URL. - avatarURI, err := url.Parse(avatarURL) +func (d *deref) fetchRemoteAccountAvatar(ctx context.Context, tsport transport.Transport, existing, account *gtsmodel.Account) error { + if account.AvatarRemoteURL == "" { + // No fetching to do. + return nil + } + + // By default we set the original media attachment ID. + account.AvatarMediaAttachmentID = existing.AvatarMediaAttachmentID + + if account.AvatarMediaAttachmentID != "" && + existing.AvatarRemoteURL == account.AvatarRemoteURL { + // Look for an existing media attachment by the known ID. + media, err := d.state.DB.GetAttachmentByID(ctx, existing.AvatarMediaAttachmentID) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return gtserror.Newf("error getting attachment %s: %w", existing.AvatarMediaAttachmentID, err) + } + + if media != nil && *media.Cached { + // Media already cached, + // use this existing. + return nil + } + } + + // Parse and validate the newly provided media URL. + avatarURI, err := url.Parse(account.AvatarRemoteURL) if err != nil { - return "", err + return gtserror.Newf("error parsing url %s: %w", account.AvatarRemoteURL, err) } // Acquire lock for derefs map. @@ -527,7 +507,7 @@ func (d *deref) fetchRemoteAccountAvatar(ctx context.Context, tsport transport.T defer unlock() // Look for an existing dereference in progress. - processing, ok := d.derefAvatars[avatarURL] + processing, ok := d.derefAvatars[account.AvatarRemoteURL] if !ok { var err error @@ -538,21 +518,21 @@ func (d *deref) fetchRemoteAccountAvatar(ctx context.Context, tsport transport.T } // Create new media processing request from the media manager instance. - processing, err = d.mediaManager.PreProcessMedia(ctx, data, accountID, &media.AdditionalMediaInfo{ + processing, err = d.mediaManager.PreProcessMedia(ctx, data, account.ID, &media.AdditionalMediaInfo{ Avatar: func() *bool { v := true; return &v }(), - RemoteURL: &avatarURL, + RemoteURL: &account.AvatarRemoteURL, }) if err != nil { - return "", err + return gtserror.Newf("error preprocessing media for attachment %s: %w", account.AvatarRemoteURL, err) } // Store media in map to mark as processing. - d.derefAvatars[avatarURL] = processing + d.derefAvatars[account.AvatarRemoteURL] = processing defer func() { // On exit safely remove media from map. unlock := d.derefAvatarsMu.Lock() - delete(d.derefAvatars, avatarURL) + delete(d.derefAvatars, account.AvatarRemoteURL) unlock() }() } @@ -562,17 +542,43 @@ func (d *deref) fetchRemoteAccountAvatar(ctx context.Context, tsport transport.T // Start media attachment loading (blocking call). if _, err := processing.LoadAttachment(ctx); err != nil { - return "", err + return gtserror.Newf("error loading attachment %s: %w", account.AvatarRemoteURL, err) } - return processing.AttachmentID(), nil + // Set the newly loaded avatar media attachment ID. + account.AvatarMediaAttachmentID = processing.AttachmentID() + + return nil } -func (d *deref) fetchRemoteAccountHeader(ctx context.Context, tsport transport.Transport, headerURL string, accountID string) (string, error) { - // Parse and validate provided media URL. - headerURI, err := url.Parse(headerURL) +func (d *deref) fetchRemoteAccountHeader(ctx context.Context, tsport transport.Transport, existing, account *gtsmodel.Account) error { + if account.HeaderRemoteURL == "" { + // No fetching to do. + return nil + } + + // By default we set the original media attachment ID. + account.HeaderMediaAttachmentID = existing.HeaderMediaAttachmentID + + if account.HeaderMediaAttachmentID != "" && + existing.HeaderRemoteURL == account.HeaderRemoteURL { + // Look for an existing media attachment by the known ID. + media, err := d.state.DB.GetAttachmentByID(ctx, existing.HeaderMediaAttachmentID) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return gtserror.Newf("error getting attachment %s: %w", existing.HeaderMediaAttachmentID, err) + } + + if media != nil && *media.Cached { + // Media already cached, + // use this existing. + return nil + } + } + + // Parse and validate the newly provided media URL. + headerURI, err := url.Parse(account.HeaderRemoteURL) if err != nil { - return "", err + return gtserror.Newf("error parsing url %s: %w", account.HeaderRemoteURL, err) } // Acquire lock for derefs map. @@ -580,32 +586,32 @@ func (d *deref) fetchRemoteAccountHeader(ctx context.Context, tsport transport.T defer unlock() // Look for an existing dereference in progress. - processing, ok := d.derefHeaders[headerURL] + processing, ok := d.derefHeaders[account.HeaderRemoteURL] if !ok { var err error - // Set the media data function to dereference header from URI. + // Set the media data function to dereference avatar from URI. data := func(ctx context.Context) (io.ReadCloser, int64, error) { return tsport.DereferenceMedia(ctx, headerURI) } // Create new media processing request from the media manager instance. - processing, err = d.mediaManager.PreProcessMedia(ctx, data, accountID, &media.AdditionalMediaInfo{ + processing, err = d.mediaManager.PreProcessMedia(ctx, data, account.ID, &media.AdditionalMediaInfo{ Header: func() *bool { v := true; return &v }(), - RemoteURL: &headerURL, + RemoteURL: &account.HeaderRemoteURL, }) if err != nil { - return "", err + return gtserror.Newf("error preprocessing media for attachment %s: %w", account.HeaderRemoteURL, err) } // Store media in map to mark as processing. - d.derefHeaders[headerURL] = processing + d.derefHeaders[account.HeaderRemoteURL] = processing defer func() { // On exit safely remove media from map. unlock := d.derefHeadersMu.Lock() - delete(d.derefHeaders, headerURL) + delete(d.derefHeaders, account.HeaderRemoteURL) unlock() }() } @@ -615,10 +621,13 @@ func (d *deref) fetchRemoteAccountHeader(ctx context.Context, tsport transport.T // Start media attachment loading (blocking call). if _, err := processing.LoadAttachment(ctx); err != nil { - return "", err + return gtserror.Newf("error loading attachment %s: %w", account.HeaderRemoteURL, err) } - return processing.AttachmentID(), nil + // Set the newly loaded avatar media attachment ID. + account.HeaderMediaAttachmentID = processing.AttachmentID() + + return nil } func (d *deref) fetchRemoteAccountEmojis(ctx context.Context, targetAccount *gtsmodel.Account, requestingUsername string) (bool, error) { diff --git a/internal/federation/dereferencing/account_test.go b/internal/federation/dereferencing/account_test.go index 9cff0a171..71028e342 100644 --- a/internal/federation/dereferencing/account_test.go +++ b/internal/federation/dereferencing/account_test.go @@ -25,7 +25,7 @@ "github.com/stretchr/testify/suite" "github.com/superseriousbusiness/gotosocial/internal/ap" "github.com/superseriousbusiness/gotosocial/internal/config" - "github.com/superseriousbusiness/gotosocial/internal/federation/dereferencing" + "github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/testrig" ) @@ -174,9 +174,8 @@ func (suite *AccountTestSuite) TestDereferenceLocalAccountWithUnknownUsername() "thisaccountdoesnotexist", config.GetHost(), ) - var errNotRetrievable *dereferencing.ErrNotRetrievable - suite.ErrorAs(err, &errNotRetrievable) - suite.EqualError(err, "item could not be retrieved: no entries") + suite.True(gtserror.Unretrievable(err)) + suite.EqualError(err, "no entries") suite.Nil(fetchedAccount) } @@ -189,9 +188,8 @@ func (suite *AccountTestSuite) TestDereferenceLocalAccountWithUnknownUsernameDom "thisaccountdoesnotexist", "localhost:8080", ) - var errNotRetrievable *dereferencing.ErrNotRetrievable - suite.ErrorAs(err, &errNotRetrievable) - suite.EqualError(err, "item could not be retrieved: no entries") + suite.True(gtserror.Unretrievable(err)) + suite.EqualError(err, "no entries") suite.Nil(fetchedAccount) } @@ -203,9 +201,8 @@ func (suite *AccountTestSuite) TestDereferenceLocalAccountWithUnknownUserURI() { fetchingAccount.Username, testrig.URLMustParse("http://localhost:8080/users/thisaccountdoesnotexist"), ) - var errNotRetrievable *dereferencing.ErrNotRetrievable - suite.ErrorAs(err, &errNotRetrievable) - suite.EqualError(err, "item could not be retrieved: no entries") + suite.True(gtserror.Unretrievable(err)) + suite.EqualError(err, "no entries") suite.Nil(fetchedAccount) } diff --git a/internal/federation/dereferencing/error.go b/internal/federation/dereferencing/error.go index 1b8d90653..6a1ce0a6e 100644 --- a/internal/federation/dereferencing/error.go +++ b/internal/federation/dereferencing/error.go @@ -16,21 +16,3 @@ // along with this program. If not, see . package dereferencing - -import ( - "fmt" -) - -// ErrNotRetrievable denotes that an item could not be dereferenced -// with the given parameters. -type ErrNotRetrievable struct { - wrapped error -} - -func (err *ErrNotRetrievable) Error() string { - return fmt.Sprintf("item could not be retrieved: %v", err.wrapped) -} - -func NewErrNotRetrievable(err error) error { - return &ErrNotRetrievable{wrapped: err} -} diff --git a/internal/federation/dereferencing/status.go b/internal/federation/dereferencing/status.go index 11d6d7147..75adfdd6f 100644 --- a/internal/federation/dereferencing/status.go +++ b/internal/federation/dereferencing/status.go @@ -106,7 +106,7 @@ func (d *deref) getStatusByURI(ctx context.Context, requestUser string, uri *url if status == nil { // Ensure that this is isn't a search for a local status. if uri.Host == config.GetHost() || uri.Host == config.GetAccountDomain() { - return nil, nil, NewErrNotRetrievable(err) // this will be db.ErrNoEntries + return nil, nil, gtserror.SetUnretrievable(err) // this will be db.ErrNoEntries } // Create and pass-through a new bare-bones model for deref. @@ -220,13 +220,12 @@ func (d *deref) enrichStatus(ctx context.Context, requestUser string, uri *url.U return nil, nil, gtserror.Newf("%s is blocked", uri.Host) } - var derefd bool - if apubStatus == nil { // Dereference latest version of the status. b, err := tsport.Dereference(ctx, uri) if err != nil { - return nil, nil, &ErrNotRetrievable{gtserror.Newf("error deferencing %s: %w", uri, err)} + err := gtserror.Newf("error deferencing %s: %w", uri, err) + return nil, nil, gtserror.SetUnretrievable(err) } // Attempt to resolve ActivityPub status from data. @@ -234,9 +233,6 @@ func (d *deref) enrichStatus(ctx context.Context, requestUser string, uri *url.U if err != nil { return nil, nil, gtserror.Newf("error resolving statusable from data for account %s: %w", uri, err) } - - // Mark as deref'd. - derefd = true } // Get the attributed-to account in order to fetch profile. @@ -256,17 +252,11 @@ func (d *deref) enrichStatus(ctx context.Context, requestUser string, uri *url.U log.Warnf(ctx, "status author account ID changed: old=%s new=%s", status.AccountID, author.ID) } - // By default we assume that apubStatus has been passed, - // indicating that the given status is already latest. - latestStatus := status - - if derefd { - // ActivityPub model was recently dereferenced, so assume that passed status - // may contain out-of-date information, convert AP model to our GTS model. - latestStatus, err = d.typeConverter.ASStatusToStatus(ctx, apubStatus) - if err != nil { - return nil, nil, gtserror.Newf("error converting statusable to gts model for status %s: %w", uri, err) - } + // ActivityPub model was recently dereferenced, so assume that passed status + // may contain out-of-date information, convert AP model to our GTS model. + latestStatus, err := d.typeConverter.ASStatusToStatus(ctx, apubStatus) + if err != nil { + return nil, nil, gtserror.Newf("error converting statusable to gts model for status %s: %w", uri, err) } // Use existing status ID. @@ -327,7 +317,7 @@ func (d *deref) enrichStatus(ctx context.Context, requestUser string, uri *url.U return latestStatus, apubStatus, nil } -func (d *deref) fetchStatusMentions(ctx context.Context, requestUser string, existing *gtsmodel.Status, status *gtsmodel.Status) error { +func (d *deref) fetchStatusMentions(ctx context.Context, requestUser string, existing, status *gtsmodel.Status) error { // Allocate new slice to take the yet-to-be created mention IDs. status.MentionIDs = make([]string, len(status.Mentions)) @@ -385,7 +375,7 @@ func (d *deref) fetchStatusMentions(ctx context.Context, requestUser string, exi status.MentionIDs[i] = mention.ID } - for i := 0; i < len(status.MentionIDs); i++ { + for i := 0; i < len(status.MentionIDs); { if status.MentionIDs[i] == "" { // This is a failed mention population, likely due // to invalid incoming data / now-deleted accounts. @@ -393,13 +383,15 @@ func (d *deref) fetchStatusMentions(ctx context.Context, requestUser string, exi copy(status.MentionIDs[i:], status.MentionIDs[i+1:]) status.Mentions = status.Mentions[:len(status.Mentions)-1] status.MentionIDs = status.MentionIDs[:len(status.MentionIDs)-1] + continue } + i++ } return nil } -func (d *deref) fetchStatusAttachments(ctx context.Context, tsport transport.Transport, existing *gtsmodel.Status, status *gtsmodel.Status) error { +func (d *deref) fetchStatusAttachments(ctx context.Context, tsport transport.Transport, existing, status *gtsmodel.Status) error { // Allocate new slice to take the yet-to-be fetched attachment IDs. status.AttachmentIDs = make([]string, len(status.Attachments)) @@ -408,7 +400,7 @@ func (d *deref) fetchStatusAttachments(ctx context.Context, tsport transport.Tra // Look for existing media attachment with remoet URL first. existing, ok := existing.GetAttachmentByRemoteURL(placeholder.RemoteURL) - if ok && existing.ID != "" { + if ok && existing.ID != "" && *existing.Cached { status.Attachments[i] = existing status.AttachmentIDs[i] = existing.ID continue @@ -447,7 +439,7 @@ func (d *deref) fetchStatusAttachments(ctx context.Context, tsport transport.Tra status.AttachmentIDs[i] = media.ID } - for i := 0; i < len(status.AttachmentIDs); i++ { + for i := 0; i < len(status.AttachmentIDs); { if status.AttachmentIDs[i] == "" { // This is a failed attachment population, this may // be due to us not currently supporting a media type. @@ -455,13 +447,15 @@ func (d *deref) fetchStatusAttachments(ctx context.Context, tsport transport.Tra copy(status.AttachmentIDs[i:], status.AttachmentIDs[i+1:]) status.Attachments = status.Attachments[:len(status.Attachments)-1] status.AttachmentIDs = status.AttachmentIDs[:len(status.AttachmentIDs)-1] + continue } + i++ } return nil } -func (d *deref) fetchStatusEmojis(ctx context.Context, requestUser string, existing *gtsmodel.Status, status *gtsmodel.Status) error { +func (d *deref) fetchStatusEmojis(ctx context.Context, requestUser string, existing, status *gtsmodel.Status) error { // Fetch the full-fleshed-out emoji objects for our status. emojis, err := d.populateEmojis(ctx, status.Emojis, requestUser) if err != nil { diff --git a/internal/federation/dereferencing/thread.go b/internal/federation/dereferencing/thread.go index ec22c66a8..a12e537bc 100644 --- a/internal/federation/dereferencing/thread.go +++ b/internal/federation/dereferencing/thread.go @@ -279,13 +279,21 @@ type frame struct { // Get the current page's "next" property pageNext := current.page.GetActivityStreamsNext() - if pageNext == nil { + if pageNext == nil || !pageNext.IsIRI() { continue stackLoop } - // Get the "next" page property IRI + // Get the IRI of the "next" property. pageNextIRI := pageNext.GetIRI() - if pageNextIRI == nil { + + // Ensure this isn't a self-referencing page... + // We don't need to store / check against a map of IRIs + // as our getStatusByIRI() function above prevents iter'ing + // over statuses that have been dereferenced recently, due to + // the `fetched_at` field preventing frequent refetches. + if id := current.page.GetJSONLDId(); id != nil && + pageNextIRI.String() == id.Get().String() { + log.Warnf(ctx, "self referencing collection page: %s", pageNextIRI) continue stackLoop } diff --git a/internal/gtscontext/context.go b/internal/gtscontext/context.go index c8cd42208..46f2899fa 100644 --- a/internal/gtscontext/context.go +++ b/internal/gtscontext/context.go @@ -41,8 +41,23 @@ httpSigVerifierKey httpSigKey httpSigPubKeyIDKey + dryRunKey ) +// DryRun returns whether the "dryrun" context key has been set. This can be +// used to indicate to functions, (that support it), that only a dry-run of +// the operation should be performed. As opposed to making any permanent changes. +func DryRun(ctx context.Context) bool { + _, ok := ctx.Value(dryRunKey).(struct{}) + return ok +} + +// SetDryRun sets the "dryrun" context flag and returns this wrapped context. +// See DryRun() for further information on the "dryrun" context flag. +func SetDryRun(ctx context.Context) context.Context { + return context.WithValue(ctx, dryRunKey, struct{}{}) +} + // RequestID returns the request ID associated with context. This value will usually // be set by the request ID middleware handler, either pulling an existing supplied // value from request headers, or generating a unique new entry. This is useful for diff --git a/internal/gtserror/error.go b/internal/gtserror/error.go index e68ed7d3b..6eaa3db63 100644 --- a/internal/gtserror/error.go +++ b/internal/gtserror/error.go @@ -33,11 +33,35 @@ statusCodeKey notFoundKey errorTypeKey + unrtrvableKey + wrongTypeKey // Types returnable from Type(...). TypeSMTP ErrorType = "smtp" // smtp (mail) ) +// Unretrievable ... +func Unretrievable(err error) bool { + _, ok := errors.Value(err, unrtrvableKey).(struct{}) + return ok +} + +// SetUnretrievable ... +func SetUnretrievable(err error) error { + return errors.WithValue(err, unrtrvableKey, struct{}{}) +} + +// WrongType ... +func WrongType(err error) bool { + _, ok := errors.Value(err, wrongTypeKey).(struct{}) + return ok +} + +// SetWrongType ... +func SetWrongType(err error) error { + return errors.WithValue(err, unrtrvableKey, struct{}{}) +} + // StatusCode checks error for a stored status code value. For example // an error from an outgoing HTTP request may be stored, or an API handler // expected response status code may be stored. diff --git a/internal/media/manager.go b/internal/media/manager.go index ec95b67e9..1d673128a 100644 --- a/internal/media/manager.go +++ b/internal/media/manager.go @@ -25,10 +25,8 @@ "time" "codeberg.org/gruf/go-iotools" - "codeberg.org/gruf/go-runners" - "codeberg.org/gruf/go-sched" "codeberg.org/gruf/go-store/v2/storage" - "github.com/superseriousbusiness/gotosocial/internal/config" + "github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/id" "github.com/superseriousbusiness/gotosocial/internal/log" @@ -61,7 +59,6 @@ type Manager struct { // See internal/concurrency.NewWorkerPool() documentation for further information. func NewManager(state *state.State) *Manager { m := &Manager{state: state} - scheduleCleanupJobs(m) return m } @@ -214,7 +211,7 @@ func (m *Manager) ProcessMedia(ctx context.Context, data DataFunc, accountID str func (m *Manager) PreProcessEmoji(ctx context.Context, data DataFunc, shortcode string, emojiID string, uri string, ai *AdditionalEmojiInfo, refresh bool) (*ProcessingEmoji, error) { instanceAccount, err := m.state.DB.GetInstanceAccount(ctx, "") if err != nil { - return nil, fmt.Errorf("preProcessEmoji: error fetching this instance account from the db: %s", err) + return nil, gtserror.Newf("error fetching this instance account from the db: %s", err) } var ( @@ -227,7 +224,7 @@ func (m *Manager) PreProcessEmoji(ctx context.Context, data DataFunc, shortcode // Look for existing emoji by given ID. emoji, err = m.state.DB.GetEmojiByID(ctx, emojiID) if err != nil { - return nil, fmt.Errorf("preProcessEmoji: error fetching emoji to refresh from the db: %s", err) + return nil, gtserror.Newf("error fetching emoji to refresh from the db: %s", err) } // if this is a refresh, we will end up with new images @@ -260,7 +257,7 @@ func (m *Manager) PreProcessEmoji(ctx context.Context, data DataFunc, shortcode newPathID, err = id.NewRandomULID() if err != nil { - return nil, fmt.Errorf("preProcessEmoji: error generating alternateID for emoji refresh: %s", err) + return nil, gtserror.Newf("error generating alternateID for emoji refresh: %s", err) } // store + serve static image at new path ID @@ -356,33 +353,3 @@ func (m *Manager) ProcessEmoji(ctx context.Context, data DataFunc, shortcode str return emoji, nil } - -func scheduleCleanupJobs(m *Manager) { - const day = time.Hour * 24 - - // Calculate closest midnight. - now := time.Now() - midnight := now.Round(day) - - if midnight.Before(now) { - // since <= 11:59am rounds down. - midnight = midnight.Add(day) - } - - // Get ctx associated with scheduler run state. - done := m.state.Workers.Scheduler.Done() - doneCtx := runners.CancelCtx(done) - - // TODO: we'll need to do some thinking to make these - // jobs restartable if we want to implement reloads in - // the future that make call to Workers.Stop() -> Workers.Start(). - - // Schedule the PruneAll task to execute every day at midnight. - m.state.Workers.Scheduler.Schedule(sched.NewJob(func(now time.Time) { - err := m.PruneAll(doneCtx, config.GetMediaRemoteCacheDays(), true) - if err != nil { - log.Errorf(nil, "error during prune: %v", err) - } - log.Infof(nil, "finished pruning all in %s", time.Since(now)) - }).EveryAt(midnight, day)) -} diff --git a/internal/media/manager_test.go b/internal/media/manager_test.go index 2bee1091d..7b9b66147 100644 --- a/internal/media/manager_test.go +++ b/internal/media/manager_test.go @@ -214,7 +214,7 @@ func (suite *ManagerTestSuite) TestEmojiProcessBlockingTooLarge() { // do a blocking call to fetch the emoji emoji, err := processingEmoji.LoadEmoji(ctx) - suite.EqualError(err, "given emoji size 630kiB greater than max allowed 50.0kiB") + suite.EqualError(err, "store: given emoji size 630kiB greater than max allowed 50.0kiB") suite.Nil(emoji) } @@ -238,7 +238,7 @@ func (suite *ManagerTestSuite) TestEmojiProcessBlockingTooLargeNoSizeGiven() { // do a blocking call to fetch the emoji emoji, err := processingEmoji.LoadEmoji(ctx) - suite.EqualError(err, "calculated emoji size 630kiB greater than max allowed 50.0kiB") + suite.EqualError(err, "store: calculated emoji size 630kiB greater than max allowed 50.0kiB") suite.Nil(emoji) } @@ -626,7 +626,7 @@ func (suite *ManagerTestSuite) TestNotAnMp4ProcessBlocking() { // we should get an error while loading attachment, err := processingMedia.LoadAttachment(ctx) - suite.EqualError(err, "error decoding video: error determining video metadata: [width height framerate]") + suite.EqualError(err, "finish: error decoding video: error determining video metadata: [width height framerate]") suite.Nil(attachment) } diff --git a/internal/media/processingemoji.go b/internal/media/processingemoji.go index 7c3db8196..d3a1edbf8 100644 --- a/internal/media/processingemoji.go +++ b/internal/media/processingemoji.go @@ -28,6 +28,7 @@ "codeberg.org/gruf/go-runners" "github.com/h2non/filetype" "github.com/superseriousbusiness/gotosocial/internal/config" + "github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/log" "github.com/superseriousbusiness/gotosocial/internal/uris" @@ -137,7 +138,7 @@ func (p *ProcessingEmoji) load(ctx context.Context) (*gtsmodel.Emoji, bool, erro } // Existing emoji we're refreshing, so only need to update. - _, err = p.mgr.state.DB.UpdateEmoji(ctx, p.emoji, columns...) + err = p.mgr.state.DB.UpdateEmoji(ctx, p.emoji, columns...) return err } @@ -160,7 +161,7 @@ func (p *ProcessingEmoji) store(ctx context.Context) error { // Load media from provided data fn. rc, sz, err := p.dataFn(ctx) if err != nil { - return fmt.Errorf("error executing data function: %w", err) + return gtserror.Newf("error executing data function: %w", err) } defer func() { @@ -177,13 +178,13 @@ func (p *ProcessingEmoji) store(ctx context.Context) error { // Read the first 261 header bytes into buffer. if _, err := io.ReadFull(rc, hdrBuf); err != nil { - return fmt.Errorf("error reading incoming media: %w", err) + return gtserror.Newf("error reading incoming media: %w", err) } // Parse file type info from header buffer. info, err := filetype.Match(hdrBuf) if err != nil { - return fmt.Errorf("error parsing file type: %w", err) + return gtserror.Newf("error parsing file type: %w", err) } switch info.Extension { @@ -192,7 +193,7 @@ func (p *ProcessingEmoji) store(ctx context.Context) error { // unhandled default: - return fmt.Errorf("unsupported emoji filetype: %s", info.Extension) + return gtserror.Newf("unsupported emoji filetype: %s", info.Extension) } // Recombine header bytes with remaining stream @@ -211,7 +212,7 @@ func (p *ProcessingEmoji) store(ctx context.Context) error { // Check that provided size isn't beyond max. We check beforehand // so that we don't attempt to stream the emoji into storage if not needed. if size := bytesize.Size(sz); sz > 0 && size > maxSize { - return fmt.Errorf("given emoji size %s greater than max allowed %s", size, maxSize) + return gtserror.Newf("given emoji size %s greater than max allowed %s", size, maxSize) } var pathID string @@ -241,14 +242,14 @@ func (p *ProcessingEmoji) store(ctx context.Context) error { // Attempt to remove existing emoji at storage path (might be broken / out-of-date) if err := p.mgr.state.Storage.Delete(ctx, p.emoji.ImagePath); err != nil { - return fmt.Errorf("error removing emoji from storage: %v", err) + return gtserror.Newf("error removing emoji from storage: %v", err) } } // Write the final image reader stream to our storage. sz, err = p.mgr.state.Storage.PutStream(ctx, p.emoji.ImagePath, r) if err != nil { - return fmt.Errorf("error writing emoji to storage: %w", err) + return gtserror.Newf("error writing emoji to storage: %w", err) } // Once again check size in case none was provided previously. @@ -257,7 +258,7 @@ func (p *ProcessingEmoji) store(ctx context.Context) error { if err := p.mgr.state.Storage.Delete(ctx, p.emoji.ImagePath); err != nil { log.Errorf(ctx, "error removing too-large-emoji from storage: %v", err) } - return fmt.Errorf("calculated emoji size %s greater than max allowed %s", size, maxSize) + return gtserror.Newf("calculated emoji size %s greater than max allowed %s", size, maxSize) } // Fill in remaining attachment data now it's stored. @@ -278,19 +279,19 @@ func (p *ProcessingEmoji) finish(ctx context.Context) error { // Fetch a stream to the original file in storage. rc, err := p.mgr.state.Storage.GetStream(ctx, p.emoji.ImagePath) if err != nil { - return fmt.Errorf("error loading file from storage: %w", err) + return gtserror.Newf("error loading file from storage: %w", err) } defer rc.Close() // Decode the image from storage. staticImg, err := decodeImage(rc) if err != nil { - return fmt.Errorf("error decoding image: %w", err) + return gtserror.Newf("error decoding image: %w", err) } // The image should be in-memory by now. if err := rc.Close(); err != nil { - return fmt.Errorf("error closing file: %w", err) + return gtserror.Newf("error closing file: %w", err) } // This shouldn't already exist, but we do a check as it's worth logging. @@ -298,7 +299,7 @@ func (p *ProcessingEmoji) finish(ctx context.Context) error { log.Warnf(ctx, "static emoji already exists at storage path: %s", p.emoji.ImagePath) // Attempt to remove static existing emoji at storage path (might be broken / out-of-date) if err := p.mgr.state.Storage.Delete(ctx, p.emoji.ImageStaticPath); err != nil { - return fmt.Errorf("error removing static emoji from storage: %v", err) + return gtserror.Newf("error removing static emoji from storage: %v", err) } } @@ -308,7 +309,7 @@ func (p *ProcessingEmoji) finish(ctx context.Context) error { // Stream-encode the PNG static image into storage. sz, err := p.mgr.state.Storage.PutStream(ctx, p.emoji.ImageStaticPath, enc) if err != nil { - return fmt.Errorf("error stream-encoding static emoji to storage: %w", err) + return gtserror.Newf("error stream-encoding static emoji to storage: %w", err) } // Set written image size. diff --git a/internal/media/processingmedia.go b/internal/media/processingmedia.go index 5c66f561d..acfee48f3 100644 --- a/internal/media/processingmedia.go +++ b/internal/media/processingmedia.go @@ -30,6 +30,7 @@ "github.com/disintegration/imaging" "github.com/h2non/filetype" terminator "github.com/superseriousbusiness/exif-terminator" + "github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/log" "github.com/superseriousbusiness/gotosocial/internal/uris" @@ -145,7 +146,7 @@ func (p *ProcessingMedia) store(ctx context.Context) error { // Load media from provided data fun rc, sz, err := p.dataFn(ctx) if err != nil { - return fmt.Errorf("error executing data function: %w", err) + return gtserror.Newf("error executing data function: %w", err) } defer func() { @@ -162,13 +163,13 @@ func (p *ProcessingMedia) store(ctx context.Context) error { // Read the first 261 header bytes into buffer. if _, err := io.ReadFull(rc, hdrBuf); err != nil { - return fmt.Errorf("error reading incoming media: %w", err) + return gtserror.Newf("error reading incoming media: %w", err) } // Parse file type info from header buffer. info, err := filetype.Match(hdrBuf) if err != nil { - return fmt.Errorf("error parsing file type: %w", err) + return gtserror.Newf("error parsing file type: %w", err) } // Recombine header bytes with remaining stream @@ -187,12 +188,12 @@ func (p *ProcessingMedia) store(ctx context.Context) error { // A file size was provided so we can clean exif data from image. r, err = terminator.Terminate(r, int(sz), info.Extension) if err != nil { - return fmt.Errorf("error cleaning exif data: %w", err) + return gtserror.Newf("error cleaning exif data: %w", err) } } default: - return fmt.Errorf("unsupported file type: %s", info.Extension) + return gtserror.Newf("unsupported file type: %s", info.Extension) } // Calculate attachment file path. @@ -211,14 +212,14 @@ func (p *ProcessingMedia) store(ctx context.Context) error { // Attempt to remove existing media at storage path (might be broken / out-of-date) if err := p.mgr.state.Storage.Delete(ctx, p.media.File.Path); err != nil { - return fmt.Errorf("error removing media from storage: %v", err) + return gtserror.Newf("error removing media from storage: %v", err) } } // Write the final image reader stream to our storage. sz, err = p.mgr.state.Storage.PutStream(ctx, p.media.File.Path, r) if err != nil { - return fmt.Errorf("error writing media to storage: %w", err) + return gtserror.Newf("error writing media to storage: %w", err) } // Set written image size. @@ -245,7 +246,7 @@ func (p *ProcessingMedia) finish(ctx context.Context) error { // Fetch a stream to the original file in storage. rc, err := p.mgr.state.Storage.GetStream(ctx, p.media.File.Path) if err != nil { - return fmt.Errorf("error loading file from storage: %w", err) + return gtserror.Newf("error loading file from storage: %w", err) } defer rc.Close() @@ -256,7 +257,7 @@ func (p *ProcessingMedia) finish(ctx context.Context) error { case mimeImageJpeg, mimeImageGif, mimeImageWebp: fullImg, err = decodeImage(rc, imaging.AutoOrientation(true)) if err != nil { - return fmt.Errorf("error decoding image: %w", err) + return gtserror.Newf("error decoding image: %w", err) } // .png image (requires ancillary chunk stripping) @@ -265,14 +266,14 @@ func (p *ProcessingMedia) finish(ctx context.Context) error { Reader: rc, }, imaging.AutoOrientation(true)) if err != nil { - return fmt.Errorf("error decoding image: %w", err) + return gtserror.Newf("error decoding image: %w", err) } // .mp4 video type case mimeVideoMp4: video, err := decodeVideoFrame(rc) if err != nil { - return fmt.Errorf("error decoding video: %w", err) + return gtserror.Newf("error decoding video: %w", err) } // Set video frame as image. @@ -286,7 +287,7 @@ func (p *ProcessingMedia) finish(ctx context.Context) error { // The image should be in-memory by now. if err := rc.Close(); err != nil { - return fmt.Errorf("error closing file: %w", err) + return gtserror.Newf("error closing file: %w", err) } // Set full-size dimensions in attachment info. @@ -314,7 +315,7 @@ func (p *ProcessingMedia) finish(ctx context.Context) error { // Blurhash needs generating from thumb. hash, err := thumbImg.Blurhash() if err != nil { - return fmt.Errorf("error generating blurhash: %w", err) + return gtserror.Newf("error generating blurhash: %w", err) } // Set the attachment blurhash. @@ -326,7 +327,7 @@ func (p *ProcessingMedia) finish(ctx context.Context) error { // Attempt to remove existing thumbnail at storage path (might be broken / out-of-date) if err := p.mgr.state.Storage.Delete(ctx, p.media.Thumbnail.Path); err != nil { - return fmt.Errorf("error removing thumbnail from storage: %v", err) + return gtserror.Newf("error removing thumbnail from storage: %v", err) } } @@ -338,7 +339,7 @@ func (p *ProcessingMedia) finish(ctx context.Context) error { // Stream-encode the JPEG thumbnail image into storage. sz, err := p.mgr.state.Storage.PutStream(ctx, p.media.Thumbnail.Path, enc) if err != nil { - return fmt.Errorf("error stream-encoding thumbnail to storage: %w", err) + return gtserror.Newf("error stream-encoding thumbnail to storage: %w", err) } // Fill in remaining thumbnail now it's stored diff --git a/internal/media/prune.go b/internal/media/prune.go deleted file mode 100644 index 71c8e00ce..000000000 --- a/internal/media/prune.go +++ /dev/null @@ -1,373 +0,0 @@ -// GoToSocial -// Copyright (C) GoToSocial Authors admin@gotosocial.org -// SPDX-License-Identifier: AGPL-3.0-or-later -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package media - -import ( - "context" - "errors" - "fmt" - "time" - - "codeberg.org/gruf/go-store/v2/storage" - "github.com/superseriousbusiness/gotosocial/internal/db" - "github.com/superseriousbusiness/gotosocial/internal/gtserror" - "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" - "github.com/superseriousbusiness/gotosocial/internal/log" - "github.com/superseriousbusiness/gotosocial/internal/regexes" - "github.com/superseriousbusiness/gotosocial/internal/uris" -) - -const ( - selectPruneLimit = 50 // Amount of media entries to select at a time from the db when pruning. - unusedLocalAttachmentDays = 3 // Number of days to keep local media in storage if not attached to a status. -) - -// PruneAll runs all of the below pruning/uncacheing functions, and then cleans up any resulting -// empty directories from the storage driver. It can be called as a shortcut for calling the below -// pruning functions one by one. -// -// If blocking is true, then any errors encountered during the prune will be combined + returned to -// the caller. If blocking is false, the prune is run in the background and errors are just logged -// instead. -func (m *Manager) PruneAll(ctx context.Context, mediaCacheRemoteDays int, blocking bool) error { - const dry = false - - f := func(innerCtx context.Context) error { - errs := gtserror.MultiError{} - - pruned, err := m.PruneUnusedLocal(innerCtx, dry) - if err != nil { - errs = append(errs, fmt.Sprintf("error pruning unused local media (%s)", err)) - } else { - log.Infof(ctx, "pruned %d unused local media", pruned) - } - - pruned, err = m.PruneUnusedRemote(innerCtx, dry) - if err != nil { - errs = append(errs, fmt.Sprintf("error pruning unused remote media: (%s)", err)) - } else { - log.Infof(ctx, "pruned %d unused remote media", pruned) - } - - pruned, err = m.UncacheRemote(innerCtx, mediaCacheRemoteDays, dry) - if err != nil { - errs = append(errs, fmt.Sprintf("error uncacheing remote media older than %d day(s): (%s)", mediaCacheRemoteDays, err)) - } else { - log.Infof(ctx, "uncached %d remote media older than %d day(s)", pruned, mediaCacheRemoteDays) - } - - pruned, err = m.PruneOrphaned(innerCtx, dry) - if err != nil { - errs = append(errs, fmt.Sprintf("error pruning orphaned media: (%s)", err)) - } else { - log.Infof(ctx, "pruned %d orphaned media", pruned) - } - - if err := m.state.Storage.Storage.Clean(innerCtx); err != nil { - errs = append(errs, fmt.Sprintf("error cleaning storage: (%s)", err)) - } else { - log.Info(ctx, "cleaned storage") - } - - return errs.Combine() - } - - if blocking { - return f(ctx) - } - - go func() { - if err := f(context.Background()); err != nil { - log.Error(ctx, err) - } - }() - - return nil -} - -// PruneUnusedRemote prunes unused/out of date headers and avatars cached on this instance. -// -// The returned int is the amount of media that was pruned by this function. -func (m *Manager) PruneUnusedRemote(ctx context.Context, dry bool) (int, error) { - var ( - totalPruned int - maxID string - attachments []*gtsmodel.MediaAttachment - err error - ) - - // We don't know in advance how many remote attachments will meet - // our criteria for being 'unused'. So a dry run in this case just - // means we iterate through as normal, but do nothing with each entry - // instead of removing it. Define this here so we don't do the 'if dry' - // check inside the loop a million times. - var f func(ctx context.Context, attachment *gtsmodel.MediaAttachment) error - if !dry { - f = m.deleteAttachment - } else { - f = func(_ context.Context, _ *gtsmodel.MediaAttachment) error { - return nil // noop - } - } - - for attachments, err = m.state.DB.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.state.DB.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit) { - maxID = attachments[len(attachments)-1].ID // use the id of the last attachment in the slice as the next 'maxID' value - - for _, attachment := range attachments { - // Retrieve owning account if possible. - var account *gtsmodel.Account - if accountID := attachment.AccountID; accountID != "" { - account, err = m.state.DB.GetAccountByID(ctx, attachment.AccountID) - if err != nil && !errors.Is(err, db.ErrNoEntries) { - // Only return on a real error. - return 0, fmt.Errorf("PruneUnusedRemote: error fetching account with id %s: %w", accountID, err) - } - } - - // Prune each attachment that meets one of the following criteria: - // - Has no owning account in the database. - // - Is a header but isn't the owning account's current header. - // - Is an avatar but isn't the owning account's current avatar. - if account == nil || - (*attachment.Header && attachment.ID != account.HeaderMediaAttachmentID) || - (*attachment.Avatar && attachment.ID != account.AvatarMediaAttachmentID) { - if err := f(ctx, attachment); err != nil { - return totalPruned, err - } - totalPruned++ - } - } - } - - // Make sure we don't have a real error when we leave the loop. - if err != nil && !errors.Is(err, db.ErrNoEntries) { - return totalPruned, err - } - - return totalPruned, nil -} - -// PruneOrphaned prunes files that exist in storage but which do not have a corresponding -// entry in the database. -// -// If dry is true, then nothing will be changed, only the amount that *would* be removed -// is returned to the caller. -func (m *Manager) PruneOrphaned(ctx context.Context, dry bool) (int, error) { - // Emojis are stored under the instance account, so we - // need the ID of the instance account for the next part. - instanceAccount, err := m.state.DB.GetInstanceAccount(ctx, "") - if err != nil { - return 0, fmt.Errorf("PruneOrphaned: error getting instance account: %w", err) - } - - instanceAccountID := instanceAccount.ID - - var orphanedKeys []string - - // Keys in storage will look like the following format: - // `[ACCOUNT_ID]/[MEDIA_TYPE]/[MEDIA_SIZE]/[MEDIA_ID].[EXTENSION]` - // We can filter out keys we're not interested in by matching through a regex. - if err := m.state.Storage.WalkKeys(ctx, func(ctx context.Context, key string) error { - if !regexes.FilePath.MatchString(key) { - // This is not our expected key format. - return nil - } - - // Check whether this storage entry is orphaned. - orphaned, err := m.orphaned(ctx, key, instanceAccountID) - if err != nil { - return fmt.Errorf("error checking orphaned status: %w", err) - } - - if orphaned { - // Add this orphaned entry to list of keys. - orphanedKeys = append(orphanedKeys, key) - } - - return nil - }); err != nil { - return 0, fmt.Errorf("PruneOrphaned: error walking keys: %w", err) - } - - totalPruned := len(orphanedKeys) - - if dry { - // Dry run: don't remove anything. - return totalPruned, nil - } - - // This is not a drill! We have to delete stuff! - return m.removeFiles(ctx, orphanedKeys...) -} - -func (m *Manager) orphaned(ctx context.Context, key string, instanceAccountID string) (bool, error) { - pathParts := regexes.FilePath.FindStringSubmatch(key) - if len(pathParts) != 6 { - // This doesn't match our expectations so - // it wasn't created by gts; ignore it. - return false, nil - } - - var ( - mediaType = pathParts[2] - mediaID = pathParts[4] - orphaned = false - ) - - // Look for keys in storage that we don't have an attachment for. - switch Type(mediaType) { - case TypeAttachment, TypeHeader, TypeAvatar: - if _, err := m.state.DB.GetAttachmentByID(ctx, mediaID); err != nil { - if !errors.Is(err, db.ErrNoEntries) { - return false, fmt.Errorf("error calling GetAttachmentByID: %w", err) - } - orphaned = true - } - case TypeEmoji: - // Look using the static URL for the emoji. Emoji images can change, so - // the MEDIA_ID part of the key for emojis will not necessarily correspond - // to the file that's currently being used as the emoji image. - staticURL := uris.GenerateURIForAttachment(instanceAccountID, string(TypeEmoji), string(SizeStatic), mediaID, mimePng) - if _, err := m.state.DB.GetEmojiByStaticURL(ctx, staticURL); err != nil { - if !errors.Is(err, db.ErrNoEntries) { - return false, fmt.Errorf("error calling GetEmojiByStaticURL: %w", err) - } - orphaned = true - } - } - - return orphaned, nil -} - -// UncacheRemote uncaches all remote media attachments older than the given amount of days. -// -// In this context, uncacheing means deleting media files from storage and marking the attachment -// as cached=false in the database. -// -// If 'dry' is true, then only a dry run will be performed: nothing will actually be changed. -// -// The returned int is the amount of media that was/would be uncached by this function. -func (m *Manager) UncacheRemote(ctx context.Context, olderThanDays int, dry bool) (int, error) { - if olderThanDays < 0 { - return 0, nil - } - - olderThan := time.Now().Add(-time.Hour * 24 * time.Duration(olderThanDays)) - - if dry { - // Dry run, just count eligible entries without removing them. - return m.state.DB.CountRemoteOlderThan(ctx, olderThan) - } - - var ( - totalPruned int - attachments []*gtsmodel.MediaAttachment - err error - ) - - for attachments, err = m.state.DB.GetRemoteOlderThan(ctx, olderThan, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.state.DB.GetRemoteOlderThan(ctx, olderThan, selectPruneLimit) { - olderThan = attachments[len(attachments)-1].CreatedAt // use the created time of the last attachment in the slice as the next 'olderThan' value - - for _, attachment := range attachments { - if err := m.uncacheAttachment(ctx, attachment); err != nil { - return totalPruned, err - } - totalPruned++ - } - } - - // Make sure we don't have a real error when we leave the loop. - if err != nil && !errors.Is(err, db.ErrNoEntries) { - return totalPruned, err - } - - return totalPruned, nil -} - -// PruneUnusedLocal prunes unused media attachments that were uploaded by -// a user on this instance, but never actually attached to a status, or attached but -// later detached. -// -// The returned int is the amount of media that was pruned by this function. -func (m *Manager) PruneUnusedLocal(ctx context.Context, dry bool) (int, error) { - olderThan := time.Now().Add(-time.Hour * 24 * time.Duration(unusedLocalAttachmentDays)) - - if dry { - // Dry run, just count eligible entries without removing them. - return m.state.DB.CountLocalUnattachedOlderThan(ctx, olderThan) - } - - var ( - totalPruned int - attachments []*gtsmodel.MediaAttachment - err error - ) - - for attachments, err = m.state.DB.GetLocalUnattachedOlderThan(ctx, olderThan, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.state.DB.GetLocalUnattachedOlderThan(ctx, olderThan, selectPruneLimit) { - olderThan = attachments[len(attachments)-1].CreatedAt // use the created time of the last attachment in the slice as the next 'olderThan' value - - for _, attachment := range attachments { - if err := m.deleteAttachment(ctx, attachment); err != nil { - return totalPruned, err - } - totalPruned++ - } - } - - // Make sure we don't have a real error when we leave the loop. - if err != nil && !errors.Is(err, db.ErrNoEntries) { - return totalPruned, err - } - - return totalPruned, nil -} - -/* - Handy little helpers -*/ - -func (m *Manager) deleteAttachment(ctx context.Context, attachment *gtsmodel.MediaAttachment) error { - if _, err := m.removeFiles(ctx, attachment.File.Path, attachment.Thumbnail.Path); err != nil { - return err - } - - // Delete attachment completely. - return m.state.DB.DeleteAttachment(ctx, attachment.ID) -} - -func (m *Manager) uncacheAttachment(ctx context.Context, attachment *gtsmodel.MediaAttachment) error { - if _, err := m.removeFiles(ctx, attachment.File.Path, attachment.Thumbnail.Path); err != nil { - return err - } - - // Update attachment to reflect that we no longer have it cached. - attachment.Cached = func() *bool { i := false; return &i }() - return m.state.DB.UpdateAttachment(ctx, attachment, "cached") -} - -func (m *Manager) removeFiles(ctx context.Context, keys ...string) (int, error) { - errs := make(gtserror.MultiError, 0, len(keys)) - - for _, key := range keys { - if err := m.state.Storage.Delete(ctx, key); err != nil && !errors.Is(err, storage.ErrNotFound) { - errs = append(errs, "storage error removing "+key+": "+err.Error()) - } - } - - return len(keys) - len(errs), errs.Combine() -} diff --git a/internal/media/prune_test.go b/internal/media/prune_test.go deleted file mode 100644 index 375ce0c06..000000000 --- a/internal/media/prune_test.go +++ /dev/null @@ -1,357 +0,0 @@ -// GoToSocial -// Copyright (C) GoToSocial Authors admin@gotosocial.org -// SPDX-License-Identifier: AGPL-3.0-or-later -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package media_test - -import ( - "bytes" - "context" - "io" - "os" - "testing" - - "codeberg.org/gruf/go-store/v2/storage" - "github.com/stretchr/testify/suite" - "github.com/superseriousbusiness/gotosocial/internal/db" - gtsmodel "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" -) - -type PruneTestSuite struct { - MediaStandardTestSuite -} - -func (suite *PruneTestSuite) TestPruneOrphanedDry() { - // add a big orphan panda to store - b, err := os.ReadFile("./test/big-panda.gif") - if err != nil { - suite.FailNow(err.Error()) - } - - pandaPath := "01GJQJ1YD9QCHCE12GG0EYHVNW/attachment/original/01GJQJ2AYM1VKSRW96YVAJ3NK3.gif" - if _, err := suite.storage.Put(context.Background(), pandaPath, b); err != nil { - suite.FailNow(err.Error()) - } - - // dry run should show up 1 orphaned panda - totalPruned, err := suite.manager.PruneOrphaned(context.Background(), true) - suite.NoError(err) - suite.Equal(1, totalPruned) - - // panda should still be in storage - hasKey, err := suite.storage.Has(context.Background(), pandaPath) - suite.NoError(err) - suite.True(hasKey) -} - -func (suite *PruneTestSuite) TestPruneOrphanedMoist() { - // add a big orphan panda to store - b, err := os.ReadFile("./test/big-panda.gif") - if err != nil { - suite.FailNow(err.Error()) - } - - pandaPath := "01GJQJ1YD9QCHCE12GG0EYHVNW/attachment/original/01GJQJ2AYM1VKSRW96YVAJ3NK3.gif" - if _, err := suite.storage.Put(context.Background(), pandaPath, b); err != nil { - suite.FailNow(err.Error()) - } - - // should show up 1 orphaned panda - totalPruned, err := suite.manager.PruneOrphaned(context.Background(), false) - suite.NoError(err) - suite.Equal(1, totalPruned) - - // panda should no longer be in storage - hasKey, err := suite.storage.Has(context.Background(), pandaPath) - suite.NoError(err) - suite.False(hasKey) -} - -func (suite *PruneTestSuite) TestPruneUnusedLocal() { - testAttachment := suite.testAttachments["local_account_1_unattached_1"] - suite.True(*testAttachment.Cached) - - totalPruned, err := suite.manager.PruneUnusedLocal(context.Background(), false) - suite.NoError(err) - suite.Equal(1, totalPruned) - - _, err = suite.db.GetAttachmentByID(context.Background(), testAttachment.ID) - suite.ErrorIs(err, db.ErrNoEntries) -} - -func (suite *PruneTestSuite) TestPruneUnusedLocalDry() { - testAttachment := suite.testAttachments["local_account_1_unattached_1"] - suite.True(*testAttachment.Cached) - - totalPruned, err := suite.manager.PruneUnusedLocal(context.Background(), true) - suite.NoError(err) - suite.Equal(1, totalPruned) - - _, err = suite.db.GetAttachmentByID(context.Background(), testAttachment.ID) - suite.NoError(err) -} - -func (suite *PruneTestSuite) TestPruneRemoteTwice() { - totalPruned, err := suite.manager.PruneUnusedLocal(context.Background(), false) - suite.NoError(err) - suite.Equal(1, totalPruned) - - // final prune should prune nothing, since the first prune already happened - totalPrunedAgain, err := suite.manager.PruneUnusedLocal(context.Background(), false) - suite.NoError(err) - suite.Equal(0, totalPrunedAgain) -} - -func (suite *PruneTestSuite) TestPruneOneNonExistent() { - ctx := context.Background() - testAttachment := suite.testAttachments["local_account_1_unattached_1"] - - // Delete this attachment cached on disk - media, err := suite.db.GetAttachmentByID(ctx, testAttachment.ID) - suite.NoError(err) - suite.True(*media.Cached) - err = suite.storage.Delete(ctx, media.File.Path) - suite.NoError(err) - - // Now attempt to prune for item with db entry no file - totalPruned, err := suite.manager.PruneUnusedLocal(ctx, false) - suite.NoError(err) - suite.Equal(1, totalPruned) -} - -func (suite *PruneTestSuite) TestPruneUnusedRemote() { - ctx := context.Background() - - // start by clearing zork's avatar + header - zorkOldAvatar := suite.testAttachments["local_account_1_avatar"] - zorkOldHeader := suite.testAttachments["local_account_1_avatar"] - zork := suite.testAccounts["local_account_1"] - zork.AvatarMediaAttachmentID = "" - zork.HeaderMediaAttachmentID = "" - if err := suite.db.UpdateByID(ctx, zork, zork.ID, "avatar_media_attachment_id", "header_media_attachment_id"); err != nil { - panic(err) - } - - totalPruned, err := suite.manager.PruneUnusedRemote(ctx, false) - suite.NoError(err) - suite.Equal(2, totalPruned) - - // media should no longer be stored - _, err = suite.storage.Get(ctx, zorkOldAvatar.File.Path) - suite.ErrorIs(err, storage.ErrNotFound) - _, err = suite.storage.Get(ctx, zorkOldAvatar.Thumbnail.Path) - suite.ErrorIs(err, storage.ErrNotFound) - _, err = suite.storage.Get(ctx, zorkOldHeader.File.Path) - suite.ErrorIs(err, storage.ErrNotFound) - _, err = suite.storage.Get(ctx, zorkOldHeader.Thumbnail.Path) - suite.ErrorIs(err, storage.ErrNotFound) - - // attachments should no longer be in the db - _, err = suite.db.GetAttachmentByID(ctx, zorkOldAvatar.ID) - suite.ErrorIs(err, db.ErrNoEntries) - _, err = suite.db.GetAttachmentByID(ctx, zorkOldHeader.ID) - suite.ErrorIs(err, db.ErrNoEntries) -} - -func (suite *PruneTestSuite) TestPruneUnusedRemoteTwice() { - ctx := context.Background() - - // start by clearing zork's avatar + header - zork := suite.testAccounts["local_account_1"] - zork.AvatarMediaAttachmentID = "" - zork.HeaderMediaAttachmentID = "" - if err := suite.db.UpdateByID(ctx, zork, zork.ID, "avatar_media_attachment_id", "header_media_attachment_id"); err != nil { - panic(err) - } - - totalPruned, err := suite.manager.PruneUnusedRemote(ctx, false) - suite.NoError(err) - suite.Equal(2, totalPruned) - - // final prune should prune nothing, since the first prune already happened - totalPruned, err = suite.manager.PruneUnusedRemote(ctx, false) - suite.NoError(err) - suite.Equal(0, totalPruned) -} - -func (suite *PruneTestSuite) TestPruneUnusedRemoteMultipleAccounts() { - ctx := context.Background() - - // start by clearing zork's avatar + header - zorkOldAvatar := suite.testAttachments["local_account_1_avatar"] - zorkOldHeader := suite.testAttachments["local_account_1_avatar"] - zork := suite.testAccounts["local_account_1"] - zork.AvatarMediaAttachmentID = "" - zork.HeaderMediaAttachmentID = "" - if err := suite.db.UpdateByID(ctx, zork, zork.ID, "avatar_media_attachment_id", "header_media_attachment_id"); err != nil { - panic(err) - } - - // set zork's unused header as belonging to turtle - turtle := suite.testAccounts["local_account_1"] - zorkOldHeader.AccountID = turtle.ID - if err := suite.db.UpdateByID(ctx, zorkOldHeader, zorkOldHeader.ID, "account_id"); err != nil { - panic(err) - } - - totalPruned, err := suite.manager.PruneUnusedRemote(ctx, false) - suite.NoError(err) - suite.Equal(2, totalPruned) - - // media should no longer be stored - _, err = suite.storage.Get(ctx, zorkOldAvatar.File.Path) - suite.ErrorIs(err, storage.ErrNotFound) - _, err = suite.storage.Get(ctx, zorkOldAvatar.Thumbnail.Path) - suite.ErrorIs(err, storage.ErrNotFound) - _, err = suite.storage.Get(ctx, zorkOldHeader.File.Path) - suite.ErrorIs(err, storage.ErrNotFound) - _, err = suite.storage.Get(ctx, zorkOldHeader.Thumbnail.Path) - suite.ErrorIs(err, storage.ErrNotFound) - - // attachments should no longer be in the db - _, err = suite.db.GetAttachmentByID(ctx, zorkOldAvatar.ID) - suite.ErrorIs(err, db.ErrNoEntries) - _, err = suite.db.GetAttachmentByID(ctx, zorkOldHeader.ID) - suite.ErrorIs(err, db.ErrNoEntries) -} - -func (suite *PruneTestSuite) TestUncacheRemote() { - testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"] - suite.True(*testStatusAttachment.Cached) - - testHeader := suite.testAttachments["remote_account_3_header"] - suite.True(*testHeader.Cached) - - totalUncached, err := suite.manager.UncacheRemote(context.Background(), 1, false) - suite.NoError(err) - suite.Equal(2, totalUncached) - - uncachedAttachment, err := suite.db.GetAttachmentByID(context.Background(), testStatusAttachment.ID) - suite.NoError(err) - suite.False(*uncachedAttachment.Cached) - - uncachedAttachment, err = suite.db.GetAttachmentByID(context.Background(), testHeader.ID) - suite.NoError(err) - suite.False(*uncachedAttachment.Cached) -} - -func (suite *PruneTestSuite) TestUncacheRemoteDry() { - testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"] - suite.True(*testStatusAttachment.Cached) - - testHeader := suite.testAttachments["remote_account_3_header"] - suite.True(*testHeader.Cached) - - totalUncached, err := suite.manager.UncacheRemote(context.Background(), 1, true) - suite.NoError(err) - suite.Equal(2, totalUncached) - - uncachedAttachment, err := suite.db.GetAttachmentByID(context.Background(), testStatusAttachment.ID) - suite.NoError(err) - suite.True(*uncachedAttachment.Cached) - - uncachedAttachment, err = suite.db.GetAttachmentByID(context.Background(), testHeader.ID) - suite.NoError(err) - suite.True(*uncachedAttachment.Cached) -} - -func (suite *PruneTestSuite) TestUncacheRemoteTwice() { - totalUncached, err := suite.manager.UncacheRemote(context.Background(), 1, false) - suite.NoError(err) - suite.Equal(2, totalUncached) - - // final uncache should uncache nothing, since the first uncache already happened - totalUncachedAgain, err := suite.manager.UncacheRemote(context.Background(), 1, false) - suite.NoError(err) - suite.Equal(0, totalUncachedAgain) -} - -func (suite *PruneTestSuite) TestUncacheAndRecache() { - ctx := context.Background() - testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"] - testHeader := suite.testAttachments["remote_account_3_header"] - - totalUncached, err := suite.manager.UncacheRemote(ctx, 1, false) - suite.NoError(err) - suite.Equal(2, totalUncached) - - // media should no longer be stored - _, err = suite.storage.Get(ctx, testStatusAttachment.File.Path) - suite.ErrorIs(err, storage.ErrNotFound) - _, err = suite.storage.Get(ctx, testStatusAttachment.Thumbnail.Path) - suite.ErrorIs(err, storage.ErrNotFound) - _, err = suite.storage.Get(ctx, testHeader.File.Path) - suite.ErrorIs(err, storage.ErrNotFound) - _, err = suite.storage.Get(ctx, testHeader.Thumbnail.Path) - suite.ErrorIs(err, storage.ErrNotFound) - - // now recache the image.... - data := func(_ context.Context) (io.ReadCloser, int64, error) { - // load bytes from a test image - b, err := os.ReadFile("../../testrig/media/thoughtsofdog-original.jpg") - if err != nil { - panic(err) - } - return io.NopCloser(bytes.NewBuffer(b)), int64(len(b)), nil - } - - for _, original := range []*gtsmodel.MediaAttachment{ - testStatusAttachment, - testHeader, - } { - processingRecache, err := suite.manager.PreProcessMediaRecache(ctx, data, original.ID) - suite.NoError(err) - - // synchronously load the recached attachment - recachedAttachment, err := processingRecache.LoadAttachment(ctx) - suite.NoError(err) - suite.NotNil(recachedAttachment) - - // recachedAttachment should be basically the same as the old attachment - suite.True(*recachedAttachment.Cached) - suite.Equal(original.ID, recachedAttachment.ID) - suite.Equal(original.File.Path, recachedAttachment.File.Path) // file should be stored in the same place - suite.Equal(original.Thumbnail.Path, recachedAttachment.Thumbnail.Path) // as should the thumbnail - suite.EqualValues(original.FileMeta, recachedAttachment.FileMeta) // and the filemeta should be the same - - // recached files should be back in storage - _, err = suite.storage.Get(ctx, recachedAttachment.File.Path) - suite.NoError(err) - _, err = suite.storage.Get(ctx, recachedAttachment.Thumbnail.Path) - suite.NoError(err) - } -} - -func (suite *PruneTestSuite) TestUncacheOneNonExistent() { - ctx := context.Background() - testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"] - - // Delete this attachment cached on disk - media, err := suite.db.GetAttachmentByID(ctx, testStatusAttachment.ID) - suite.NoError(err) - suite.True(*media.Cached) - err = suite.storage.Delete(ctx, media.File.Path) - suite.NoError(err) - - // Now attempt to uncache remote for item with db entry no file - totalUncached, err := suite.manager.UncacheRemote(ctx, 1, false) - suite.NoError(err) - suite.Equal(2, totalUncached) -} - -func TestPruneOrphanedTestSuite(t *testing.T) { - suite.Run(t, &PruneTestSuite{}) -} diff --git a/internal/media/refetch.go b/internal/media/refetch.go index 80dfe4f60..03f0fbf34 100644 --- a/internal/media/refetch.go +++ b/internal/media/refetch.go @@ -52,7 +52,7 @@ func (m *Manager) RefetchEmojis(ctx context.Context, domain string, dereferenceM // page through emojis 20 at a time, looking for those with missing images for { // Fetch next block of emojis from database - emojis, err := m.state.DB.GetEmojis(ctx, domain, false, true, "", maxShortcodeDomain, "", 20) + emojis, err := m.state.DB.GetEmojisBy(ctx, domain, false, true, "", maxShortcodeDomain, "", 20) if err != nil { if !errors.Is(err, db.ErrNoEntries) { // an actual error has occurred diff --git a/internal/processing/admin/admin.go b/internal/processing/admin/admin.go index 9b243e06d..0fa24452b 100644 --- a/internal/processing/admin/admin.go +++ b/internal/processing/admin/admin.go @@ -18,6 +18,7 @@ package admin import ( + "github.com/superseriousbusiness/gotosocial/internal/cleaner" "github.com/superseriousbusiness/gotosocial/internal/email" "github.com/superseriousbusiness/gotosocial/internal/media" "github.com/superseriousbusiness/gotosocial/internal/state" @@ -27,6 +28,7 @@ type Processor struct { state *state.State + cleaner *cleaner.Cleaner tc typeutils.TypeConverter mediaManager *media.Manager transportController transport.Controller @@ -37,6 +39,7 @@ type Processor struct { func New(state *state.State, tc typeutils.TypeConverter, mediaManager *media.Manager, transportController transport.Controller, emailSender email.Sender) Processor { return Processor{ state: state, + cleaner: cleaner.New(state), tc: tc, mediaManager: mediaManager, transportController: transportController, diff --git a/internal/processing/admin/emoji.go b/internal/processing/admin/emoji.go index 3a7868eb1..96b0bef07 100644 --- a/internal/processing/admin/emoji.go +++ b/internal/processing/admin/emoji.go @@ -109,7 +109,7 @@ func (p *Processor) EmojisGet( return nil, gtserror.NewErrorUnauthorized(fmt.Errorf("user %s not an admin", user.ID), "user is not an admin") } - emojis, err := p.state.DB.GetEmojis(ctx, domain, includeDisabled, includeEnabled, shortcode, maxShortcodeDomain, minShortcodeDomain, limit) + emojis, err := p.state.DB.GetEmojisBy(ctx, domain, includeDisabled, includeEnabled, shortcode, maxShortcodeDomain, minShortcodeDomain, limit) if err != nil && !errors.Is(err, db.ErrNoEntries) { err := fmt.Errorf("EmojisGet: db error: %s", err) return nil, gtserror.NewErrorInternalError(err) @@ -385,13 +385,13 @@ func (p *Processor) emojiUpdateDisable(ctx context.Context, emoji *gtsmodel.Emoj emojiDisabled := true emoji.Disabled = &emojiDisabled - updatedEmoji, err := p.state.DB.UpdateEmoji(ctx, emoji, "disabled") + err := p.state.DB.UpdateEmoji(ctx, emoji, "disabled") if err != nil { err = fmt.Errorf("emojiUpdateDisable: error updating emoji %s: %s", emoji.ID, err) return nil, gtserror.NewErrorInternalError(err) } - adminEmoji, err := p.tc.EmojiToAdminAPIEmoji(ctx, updatedEmoji) + adminEmoji, err := p.tc.EmojiToAdminAPIEmoji(ctx, emoji) if err != nil { err = fmt.Errorf("emojiUpdateDisable: error converting updated emoji %s to admin emoji: %s", emoji.ID, err) return nil, gtserror.NewErrorInternalError(err) @@ -407,8 +407,6 @@ func (p *Processor) emojiUpdateModify(ctx context.Context, emoji *gtsmodel.Emoji return nil, gtserror.NewErrorBadRequest(err, err.Error()) } - var updatedEmoji *gtsmodel.Emoji - // keep existing categoryID unless a new one is defined var ( updatedCategoryID = emoji.CategoryID @@ -442,7 +440,7 @@ func (p *Processor) emojiUpdateModify(ctx context.Context, emoji *gtsmodel.Emoji } var err error - updatedEmoji, err = p.state.DB.UpdateEmoji(ctx, emoji, columns...) + err = p.state.DB.UpdateEmoji(ctx, emoji, columns...) if err != nil { err = fmt.Errorf("emojiUpdateModify: error updating emoji %s: %s", emoji.ID, err) return nil, gtserror.NewErrorInternalError(err) @@ -467,14 +465,14 @@ func (p *Processor) emojiUpdateModify(ctx context.Context, emoji *gtsmodel.Emoji return nil, gtserror.NewErrorInternalError(err) } - updatedEmoji, err = processingEmoji.LoadEmoji(ctx) + emoji, err = processingEmoji.LoadEmoji(ctx) if err != nil { err = fmt.Errorf("emojiUpdateModify: error loading processed emoji %s: %s", emoji.ID, err) return nil, gtserror.NewErrorInternalError(err) } } - adminEmoji, err := p.tc.EmojiToAdminAPIEmoji(ctx, updatedEmoji) + adminEmoji, err := p.tc.EmojiToAdminAPIEmoji(ctx, emoji) if err != nil { err = fmt.Errorf("emojiUpdateModify: error converting updated emoji %s to admin emoji: %s", emoji.ID, err) return nil, gtserror.NewErrorInternalError(err) diff --git a/internal/processing/admin/media.go b/internal/processing/admin/media.go index b8af183da..a457487b8 100644 --- a/internal/processing/admin/media.go +++ b/internal/processing/admin/media.go @@ -47,17 +47,19 @@ func (p *Processor) MediaRefetch(ctx context.Context, requestingAccount *gtsmode return nil } -// MediaPrune triggers a non-blocking prune of remote media, local unused media, etc. +// MediaPrune triggers a non-blocking prune of unused media, orphaned, uncaching remote and fixing cache states. func (p *Processor) MediaPrune(ctx context.Context, mediaRemoteCacheDays int) gtserror.WithCode { if mediaRemoteCacheDays < 0 { err := fmt.Errorf("MediaPrune: invalid value for mediaRemoteCacheDays prune: value was %d, cannot be less than 0", mediaRemoteCacheDays) return gtserror.NewErrorBadRequest(err, err.Error()) } - if err := p.mediaManager.PruneAll(ctx, mediaRemoteCacheDays, false); err != nil { - err = fmt.Errorf("MediaPrune: %w", err) - return gtserror.NewErrorInternalError(err) - } + // Start background task performing all media cleanup tasks. + go func() { + ctx := context.Background() + p.cleaner.Media().All(ctx, mediaRemoteCacheDays) + p.cleaner.Emoji().All(ctx) + }() return nil } diff --git a/internal/processing/search/get.go b/internal/processing/search/get.go index 936e8acfa..aaade8908 100644 --- a/internal/processing/search/get.go +++ b/internal/processing/search/get.go @@ -26,11 +26,9 @@ "strings" "codeberg.org/gruf/go-kv" - "github.com/superseriousbusiness/gotosocial/internal/ap" apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model" "github.com/superseriousbusiness/gotosocial/internal/config" "github.com/superseriousbusiness/gotosocial/internal/db" - "github.com/superseriousbusiness/gotosocial/internal/federation/dereferencing" "github.com/superseriousbusiness/gotosocial/internal/gtscontext" "github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" @@ -62,7 +60,6 @@ func (p *Processor) Get( account *gtsmodel.Account, req *apimodel.SearchRequest, ) (*apimodel.SearchResult, gtserror.WithCode) { - var ( maxID = req.MaxID minID = req.MinID @@ -127,7 +124,7 @@ func (p *Processor) Get( // accounts, since this is all namestring search can return. if includeAccounts(queryType) { // Copy query to avoid altering original. - var queryC = query + queryC := query // If query looks vaguely like an email address, ie. it doesn't // start with '@' but it has '@' in it somewhere, it's probably @@ -284,12 +281,7 @@ func (p *Processor) accountsByNamestring( if err != nil { // Check for semi-expected error types. // On one of these, we can continue. - var ( - errNotRetrievable = new(*dereferencing.ErrNotRetrievable) // Item can't be dereferenced. - errWrongType = new(*ap.ErrWrongType) // Item was dereferenced, but wasn't an account. - ) - - if !errors.As(err, errNotRetrievable) && !errors.As(err, errWrongType) { + if !gtserror.Unretrievable(err) && !gtserror.WrongType(err) { err = gtserror.Newf("error looking up %s as account: %w", query, err) return false, gtserror.NewErrorInternalError(err) } @@ -331,11 +323,10 @@ func (p *Processor) accountByUsernameDomain( if err != nil { err = gtserror.Newf("error checking domain block: %w", err) return nil, gtserror.NewErrorInternalError(err) - } - - if blocked { + } else if blocked { // Don't search on blocked domain. - return nil, dereferencing.NewErrNotRetrievable(err) + err = gtserror.New("domain blocked") + return nil, gtserror.SetUnretrievable(err) } } @@ -365,7 +356,7 @@ func (p *Processor) accountByUsernameDomain( } err = fmt.Errorf("account %s could not be retrieved locally and we cannot resolve", usernameDomain) - return nil, dereferencing.NewErrNotRetrievable(err) + return nil, gtserror.SetUnretrievable(err) } // byURI looks for account(s) or a status with the given URI @@ -419,12 +410,7 @@ func (p *Processor) byURI( if err != nil { // Check for semi-expected error types. // On one of these, we can continue. - var ( - errNotRetrievable = new(*dereferencing.ErrNotRetrievable) // Item can't be dereferenced. - errWrongType = new(*ap.ErrWrongType) // Item was dereferenced, but wasn't an account. - ) - - if !errors.As(err, errNotRetrievable) && !errors.As(err, errWrongType) { + if !gtserror.Unretrievable(err) && !gtserror.WrongType(err) { err = gtserror.Newf("error looking up %s as account: %w", uri, err) return false, gtserror.NewErrorInternalError(err) } @@ -443,12 +429,7 @@ func (p *Processor) byURI( if err != nil { // Check for semi-expected error types. // On one of these, we can continue. - var ( - errNotRetrievable = new(*dereferencing.ErrNotRetrievable) // Item can't be dereferenced. - errWrongType = new(*ap.ErrWrongType) // Item was dereferenced, but wasn't a status. - ) - - if !errors.As(err, errNotRetrievable) && !errors.As(err, errWrongType) { + if !gtserror.Unretrievable(err) && !gtserror.WrongType(err) { err = gtserror.Newf("error looking up %s as status: %w", uri, err) return false, gtserror.NewErrorInternalError(err) } @@ -519,7 +500,7 @@ func (p *Processor) accountByURI( } err = fmt.Errorf("account %s could not be retrieved locally and we cannot resolve", uriStr) - return nil, dereferencing.NewErrNotRetrievable(err) + return nil, gtserror.SetUnretrievable(err) } // statusByURI looks for one status with the given URI. @@ -575,7 +556,7 @@ func (p *Processor) statusByURI( } err = fmt.Errorf("status %s could not be retrieved locally and we cannot resolve", uriStr) - return nil, dereferencing.NewErrNotRetrievable(err) + return nil, gtserror.SetUnretrievable(err) } // byText searches in the database for accounts and/or diff --git a/internal/processing/search/lookup.go b/internal/processing/search/lookup.go index 0f2a4191b..d50183221 100644 --- a/internal/processing/search/lookup.go +++ b/internal/processing/search/lookup.go @@ -23,10 +23,8 @@ "fmt" "strings" - errorsv2 "codeberg.org/gruf/go-errors/v2" "codeberg.org/gruf/go-kv" apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model" - "github.com/superseriousbusiness/gotosocial/internal/federation/dereferencing" "github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/log" @@ -82,7 +80,7 @@ func (p *Processor) Lookup( false, // never resolve! ) if err != nil { - if errorsv2.Assignable(err, (*dereferencing.ErrNotRetrievable)(nil)) { + if gtserror.Unretrievable(err) { // ErrNotRetrievable is fine, just wrap it in // a 404 to indicate we couldn't find anything. err := fmt.Errorf("%s not found", query) diff --git a/internal/storage/storage.go b/internal/storage/storage.go index f131b4292..ea8184881 100644 --- a/internal/storage/storage.go +++ b/internal/storage/storage.go @@ -46,9 +46,11 @@ type PresignedURL struct { Expiry time.Time // link expires at this time } -// ErrAlreadyExists is a ptr to underlying storage.ErrAlreadyExists, -// to put the related errors in the same package as our storage wrapper. -var ErrAlreadyExists = storage.ErrAlreadyExists +var ( + // Ptrs to underlying storage library errors. + ErrAlreadyExists = storage.ErrAlreadyExists + ErrNotFound = storage.ErrNotFound +) // Driver wraps a kv.KVStore to also provide S3 presigned GET URLs. type Driver struct { diff --git a/testrig/config.go b/testrig/config.go index 36de99338..126c5b3d4 100644 --- a/testrig/config.go +++ b/testrig/config.go @@ -33,7 +33,7 @@ func InitTestConfig() { } var testDefaults = config.Configuration{ - LogLevel: "trace", + LogLevel: "info", LogDbQueries: true, ApplicationName: "gotosocial", LandingPageUser: "",