[feature] Start implementing refetch of lost media files via /api/v1/admin/media_refetch (#1221)

* [chore] Move ShortcodeDomain to its own little util func

* [feature] Add RefetchEmojis function to media manager

* [feature] Expose admin media refresh via admin API

* update following review feedback

- change/fix log levels
- make sure not to try to refetch local emojis
- small style refactoring + comments

* log on emoji refetch start

Signed-off-by: kim <grufwub@gmail.com>
Co-authored-by: kim <grufwub@gmail.com>
This commit is contained in:
tobi 2022-12-10 22:43:11 +01:00 committed by GitHub
parent 24b4f9b5d7
commit 5e060d0bcb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 499 additions and 28 deletions

View file

@ -3209,6 +3209,40 @@ paths:
summary: Clean up remote media older than the specified number of days.
tags:
- admin
/api/v1/admin/media_refetch:
post:
description: |-
Currently, this only includes remote emojis.
This endpoint is useful when data loss has occurred, and you want to try to recover to a working state.
operationId: mediaRefetch
parameters:
- description: Domain to refetch media from. If empty, all domains will be refetched.
in: query
name: domain
type: string
produces:
- application/json
responses:
"202":
description: Request accepted and will be processed. Check the logs for progress / errors.
"400":
description: bad request
"401":
description: unauthorized
"403":
description: forbidden
"404":
description: not found
"406":
description: not acceptable
"500":
description: internal server error
security:
- OAuth2 Bearer:
- admin
summary: Refetch media specified in the database but missing from storage.
tags:
- admin
/api/v1/apps:
post:
consumes:

View file

@ -46,6 +46,7 @@
// AccountsActionPath is used for taking action on a single account.
AccountsActionPath = AccountsPathWithID + "/action"
MediaCleanupPath = BasePath + "/media_cleanup"
MediaRefetchPath = BasePath + "/media_refetch"
// ExportQueryKey is for requesting a public export of some data.
ExportQueryKey = "export"
@ -63,6 +64,8 @@
MinShortcodeDomainKey = "min_shortcode_domain"
// LimitKey is for specifying maximum number of results to return.
LimitKey = "limit"
// DomainQueryKey is for specifying a domain during admin actions.
DomainQueryKey = "domain"
)
// Module implements the ClientAPIModule interface for admin-related actions (reports, emojis, etc)
@ -90,6 +93,7 @@ func (m *Module) Route(r router.Router) error {
r.AttachHandler(http.MethodDelete, DomainBlocksPathWithID, m.DomainBlockDELETEHandler)
r.AttachHandler(http.MethodPost, AccountsActionPath, m.AccountActionPOSTHandler)
r.AttachHandler(http.MethodPost, MediaCleanupPath, m.MediaCleanupPOSTHandler)
r.AttachHandler(http.MethodPost, MediaRefetchPath, m.MediaRefetchPOSTHandler)
r.AttachHandler(http.MethodGet, EmojiCategoriesPath, m.EmojiCategoriesGETHandler)
return nil
}

View file

@ -0,0 +1,93 @@
/*
GoToSocial
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package admin
import (
"fmt"
"net/http"
"github.com/gin-gonic/gin"
"github.com/superseriousbusiness/gotosocial/internal/api"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/oauth"
)
// MediaRefetchPOSTHandler swagger:operation POST /api/v1/admin/media_refetch mediaRefetch
//
// Refetch media specified in the database but missing from storage.
// Currently, this only includes remote emojis.
// This endpoint is useful when data loss has occurred, and you want to try to recover to a working state.
//
// ---
// tags:
// - admin
//
// produces:
// - application/json
//
// security:
// - OAuth2 Bearer:
// - admin
//
// parameters:
// -
// name: domain
// in: query
// description: >-
// Domain to refetch media from.
// If empty, all domains will be refetched.
// type: string
//
// responses:
// '202':
// description: >-
// Request accepted and will be processed.
// Check the logs for progress / errors.
// '400':
// description: bad request
// '401':
// description: unauthorized
// '403':
// description: forbidden
// '404':
// description: not found
// '406':
// description: not acceptable
// '500':
// description: internal server error
func (m *Module) MediaRefetchPOSTHandler(c *gin.Context) {
authed, err := oauth.Authed(c, true, true, true, true)
if err != nil {
api.ErrorHandler(c, gtserror.NewErrorUnauthorized(err, err.Error()), m.processor.InstanceGet)
return
}
if !*authed.User.Admin {
err := fmt.Errorf("user %s not an admin", authed.User.ID)
api.ErrorHandler(c, gtserror.NewErrorForbidden(err, err.Error()), m.processor.InstanceGet)
return
}
if errWithCode := m.processor.AdminMediaRefetch(c.Request.Context(), authed, c.Query(DomainQueryKey)); errWithCode != nil {
api.ErrorHandler(c, errWithCode, m.processor.InstanceGet)
return
}
c.Status(http.StatusAccepted)
}

View file

@ -40,6 +40,15 @@
// Manager provides an interface for managing media: parsing, storing, and retrieving media objects like photos, videos, and gifs.
type Manager interface {
// Stop stops the underlying worker pool of the manager. It should be called
// when closing GoToSocial in order to cleanly finish any in-progress jobs.
// It will block until workers are finished processing.
Stop() error
/*
PROCESSING FUNCTIONS
*/
// ProcessMedia begins the process of decoding and storing the given data as an attachment.
// It will return a pointer to a ProcessingMedia struct upon which further actions can be performed, such as getting
// the finished media, thumbnail, attachment, etc.
@ -75,6 +84,10 @@ type Manager interface {
// RecacheMedia refetches, reprocesses, and recaches an existing attachment that has been uncached via pruneRemote.
RecacheMedia(ctx context.Context, data DataFunc, postData PostDataCallbackFunc, attachmentID string) (*ProcessingMedia, error)
/*
PRUNING FUNCTIONS
*/
// PruneAllRemote prunes all remote media attachments cached on this instance which are older than the given amount of days.
// 'Pruning' in this context means removing the locally stored data of the attachment (both thumbnail and full size),
// and setting 'cached' to false on the associated attachment.
@ -98,10 +111,18 @@ type Manager interface {
// is returned to the caller.
PruneOrphaned(ctx context.Context, dry bool) (int, error)
// Stop stops the underlying worker pool of the manager. It should be called
// when closing GoToSocial in order to cleanly finish any in-progress jobs.
// It will block until workers are finished processing.
Stop() error
/*
REFETCHING FUNCTIONS
Useful when data loss has occurred.
*/
// RefetchEmojis iterates through remote emojis (for the given domain, or all if domain is empty string).
//
// For each emoji, the manager will check whether both the full size and static images are present in storage.
// If not, the manager will refetch and reprocess full size and static images for the emoji.
//
// The provided DereferenceMedia function will be used when it's necessary to refetch something this way.
RefetchEmojis(ctx context.Context, domain string, dereferenceMedia DereferenceMedia) (int, error)
}
type manager struct {

View file

@ -20,10 +20,13 @@
import (
"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/concurrency"
"github.com/superseriousbusiness/gotosocial/internal/db"
gtsmodel "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/media"
"github.com/superseriousbusiness/gotosocial/internal/messages"
"github.com/superseriousbusiness/gotosocial/internal/storage"
"github.com/superseriousbusiness/gotosocial/internal/transport"
"github.com/superseriousbusiness/gotosocial/testrig"
)
@ -33,6 +36,7 @@ type MediaStandardTestSuite struct {
db db.DB
storage *storage.Driver
manager media.Manager
transportController transport.Controller
testAttachments map[string]*gtsmodel.MediaAttachment
testAccounts map[string]*gtsmodel.Account
testEmojis map[string]*gtsmodel.Emoji
@ -53,6 +57,7 @@ func (suite *MediaStandardTestSuite) SetupTest() {
suite.testAccounts = testrig.NewTestAccounts()
suite.testEmojis = testrig.NewTestEmojis()
suite.manager = testrig.NewTestMediaManager(suite.db, suite.storage)
suite.transportController = testrig.NewTestTransportController(testrig.NewMockHTTPClient(nil, "../../testrig/media"), suite.db, concurrency.NewWorkerPool[messages.FromFederator](0, 0))
}
func (suite *MediaStandardTestSuite) TearDownTest() {

149
internal/media/refetch.go Normal file
View file

@ -0,0 +1,149 @@
/*
GoToSocial
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package media
import (
"context"
"errors"
"fmt"
"io"
"net/url"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/util"
)
type DereferenceMedia func(ctx context.Context, iri *url.URL) (io.ReadCloser, int64, error)
func (m *manager) RefetchEmojis(ctx context.Context, domain string, dereferenceMedia DereferenceMedia) (int, error) {
// normalize domain
if domain == "" {
domain = db.EmojiAllDomains
}
var (
maxShortcodeDomain string
refetchIDs []string
)
// page through emojis 20 at a time, looking for those with missing images
for {
// Fetch next block of emojis from database
emojis, err := m.db.GetEmojis(ctx, domain, false, true, "", maxShortcodeDomain, "", 20)
if err != nil {
if !errors.Is(err, db.ErrNoEntries) {
// an actual error has occurred
log.Errorf("error fetching emojis from database: %s", err)
}
break
}
for _, emoji := range emojis {
if emoji.Domain == "" {
// never try to refetch local emojis
continue
}
if refetch, err := m.emojiRequiresRefetch(ctx, emoji); err != nil {
// an error here indicates something is wrong with storage, so we should stop
return 0, fmt.Errorf("error checking refetch requirement for emoji %s: %w", util.ShortcodeDomain(emoji), err)
} else if !refetch {
continue
}
refetchIDs = append(refetchIDs, emoji.ID)
}
// Update next maxShortcodeDomain from last emoji
maxShortcodeDomain = util.ShortcodeDomain(emojis[len(emojis)-1])
}
// bail early if we've got nothing to do
toRefetchCount := len(refetchIDs)
if toRefetchCount == 0 {
log.Debug("no remote emojis require a refetch")
return 0, nil
}
log.Debugf("%d remote emoji(s) require a refetch, doing that now...", toRefetchCount)
var totalRefetched int
for _, emojiID := range refetchIDs {
emoji, err := m.db.GetEmojiByID(ctx, emojiID)
if err != nil {
// this shouldn't happen--since we know we have the emoji--so return if it does
return 0, fmt.Errorf("error getting emoji %s: %w", emojiID, err)
}
shortcodeDomain := util.ShortcodeDomain(emoji)
if emoji.ImageRemoteURL == "" {
log.Errorf("remote emoji %s could not be refreshed because it has no ImageRemoteURL set", shortcodeDomain)
continue
}
emojiImageIRI, err := url.Parse(emoji.ImageRemoteURL)
if err != nil {
log.Errorf("remote emoji %s could not be refreshed because its ImageRemoteURL (%s) is not a valid uri: %s", shortcodeDomain, emoji.ImageRemoteURL, err)
continue
}
dataFunc := func(ctx context.Context) (reader io.ReadCloser, fileSize int64, err error) {
return dereferenceMedia(ctx, emojiImageIRI)
}
processingEmoji, err := m.ProcessEmoji(ctx, dataFunc, nil, emoji.Shortcode, emoji.ID, emoji.URI, &AdditionalEmojiInfo{
Domain: &emoji.Domain,
ImageRemoteURL: &emoji.ImageRemoteURL,
ImageStaticRemoteURL: &emoji.ImageStaticRemoteURL,
Disabled: emoji.Disabled,
VisibleInPicker: emoji.VisibleInPicker,
}, true)
if err != nil {
log.Errorf("emoji %s could not be refreshed because of an error during processing: %s", shortcodeDomain, err)
continue
}
if _, err := processingEmoji.LoadEmoji(ctx); err != nil {
log.Errorf("emoji %s could not be refreshed because of an error during loading: %s", shortcodeDomain, err)
continue
}
log.Tracef("refetched emoji %s successfully from remote", shortcodeDomain)
totalRefetched++
}
return totalRefetched, nil
}
func (m *manager) emojiRequiresRefetch(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) {
if has, err := m.storage.Has(ctx, emoji.ImagePath); err != nil {
return false, err
} else if !has {
return true, nil
}
if has, err := m.storage.Has(ctx, emoji.ImageStaticPath); err != nil {
return false, err
} else if !has {
return true, nil
}
return false, nil
}

View file

@ -0,0 +1,85 @@
/*
GoToSocial
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package media_test
import (
"context"
"testing"
"github.com/stretchr/testify/suite"
)
type RefetchTestSuite struct {
MediaStandardTestSuite
}
func (suite *RefetchTestSuite) TestRefetchEmojisNothingToDo() {
ctx := context.Background()
adminAccount := suite.testAccounts["admin_account"]
transport, err := suite.transportController.NewTransportForUsername(ctx, adminAccount.Username)
if err != nil {
suite.FailNow(err.Error())
}
refetched, err := suite.manager.RefetchEmojis(ctx, "", transport.DereferenceMedia)
suite.NoError(err)
suite.Equal(0, refetched)
}
func (suite *RefetchTestSuite) TestRefetchEmojis() {
ctx := context.Background()
if err := suite.storage.Delete(ctx, suite.testEmojis["yell"].ImagePath); err != nil {
suite.FailNow(err.Error())
}
adminAccount := suite.testAccounts["admin_account"]
transport, err := suite.transportController.NewTransportForUsername(ctx, adminAccount.Username)
if err != nil {
suite.FailNow(err.Error())
}
refetched, err := suite.manager.RefetchEmojis(ctx, "", transport.DereferenceMedia)
suite.NoError(err)
suite.Equal(1, refetched)
}
func (suite *RefetchTestSuite) TestRefetchEmojisLocal() {
ctx := context.Background()
// delete the image for a LOCAL emoji
if err := suite.storage.Delete(ctx, suite.testEmojis["rainbow"].ImagePath); err != nil {
suite.FailNow(err.Error())
}
adminAccount := suite.testAccounts["admin_account"]
transport, err := suite.transportController.NewTransportForUsername(ctx, adminAccount.Username)
if err != nil {
suite.FailNow(err.Error())
}
refetched, err := suite.manager.RefetchEmojis(ctx, "", transport.DereferenceMedia)
suite.NoError(err)
suite.Equal(0, refetched) // shouldn't refetch anything because local
}
func TestRefetchTestSuite(t *testing.T) {
suite.Run(t, &RefetchTestSuite{})
}

View file

@ -77,3 +77,7 @@ func (p *processor) AdminDomainBlockDelete(ctx context.Context, authed *oauth.Au
func (p *processor) AdminMediaPrune(ctx context.Context, mediaRemoteCacheDays int) gtserror.WithCode {
return p.adminProcessor.MediaPrune(ctx, mediaRemoteCacheDays)
}
func (p *processor) AdminMediaRefetch(ctx context.Context, authed *oauth.Auth, domain string) gtserror.WithCode {
return p.adminProcessor.MediaRefetch(ctx, authed.Account, domain)
}

View file

@ -30,6 +30,7 @@
"github.com/superseriousbusiness/gotosocial/internal/media"
"github.com/superseriousbusiness/gotosocial/internal/messages"
"github.com/superseriousbusiness/gotosocial/internal/storage"
"github.com/superseriousbusiness/gotosocial/internal/transport"
"github.com/superseriousbusiness/gotosocial/internal/typeutils"
)
@ -48,21 +49,24 @@ type Processor interface {
EmojiUpdate(ctx context.Context, id string, form *apimodel.EmojiUpdateRequest) (*apimodel.AdminEmoji, gtserror.WithCode)
EmojiCategoriesGet(ctx context.Context) ([]*apimodel.EmojiCategory, gtserror.WithCode)
MediaPrune(ctx context.Context, mediaRemoteCacheDays int) gtserror.WithCode
MediaRefetch(ctx context.Context, requestingAccount *gtsmodel.Account, domain string) gtserror.WithCode
}
type processor struct {
tc typeutils.TypeConverter
mediaManager media.Manager
transportController transport.Controller
storage *storage.Driver
clientWorker *concurrency.WorkerPool[messages.FromClientAPI]
db db.DB
}
// New returns a new admin processor.
func New(db db.DB, tc typeutils.TypeConverter, mediaManager media.Manager, storage *storage.Driver, clientWorker *concurrency.WorkerPool[messages.FromClientAPI]) Processor {
func New(db db.DB, tc typeutils.TypeConverter, mediaManager media.Manager, transportController transport.Controller, storage *storage.Driver, clientWorker *concurrency.WorkerPool[messages.FromClientAPI]) Processor {
return &processor{
tc: tc,
mediaManager: mediaManager,
transportController: transportController,
storage: storage,
clientWorker: clientWorker,
db: db,

View file

@ -88,14 +88,10 @@ func (p *processor) EmojisGet(ctx context.Context, account *gtsmodel.Account, us
Items: items,
Path: "api/v1/admin/custom_emojis",
NextMaxIDKey: "max_shortcode_domain",
NextMaxIDValue: shortcodeDomain(emojis[count-1]),
NextMaxIDValue: util.ShortcodeDomain(emojis[count-1]),
PrevMinIDKey: "min_shortcode_domain",
PrevMinIDValue: shortcodeDomain(emojis[0]),
PrevMinIDValue: util.ShortcodeDomain(emojis[0]),
Limit: limit,
ExtraQueryParams: []string{filterBuilder.String()},
})
}
func shortcodeDomain(emoji *gtsmodel.Emoji) string {
return emoji.Shortcode + "@" + emoji.Domain
}

View file

@ -0,0 +1,48 @@
/*
GoToSocial
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package admin
import (
"context"
"fmt"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
)
func (p *processor) MediaRefetch(ctx context.Context, requestingAccount *gtsmodel.Account, domain string) gtserror.WithCode {
transport, err := p.transportController.NewTransportForUsername(ctx, requestingAccount.Username)
if err != nil {
err = fmt.Errorf("error getting transport for user %s during media refetch request: %w", requestingAccount.Username, err)
return gtserror.NewErrorInternalError(err)
}
go func() {
log.Info("starting emoji refetch")
refetched, err := p.mediaManager.RefetchEmojis(context.Background(), domain, transport.DereferenceMedia)
if err != nil {
log.Errorf("error refetching emojis: %s", err)
} else {
log.Infof("refetched %d emojis from remote", refetched)
}
}()
return nil
}

View file

@ -136,6 +136,8 @@ type Processor interface {
AdminDomainBlockDelete(ctx context.Context, authed *oauth.Auth, id string) (*apimodel.DomainBlock, gtserror.WithCode)
// AdminMediaRemotePrune triggers a prune of remote media according to the given number of mediaRemoteCacheDays
AdminMediaPrune(ctx context.Context, mediaRemoteCacheDays int) gtserror.WithCode
// AdminMediaRefetch triggers a refetch of remote media for the given domain (or all if domain is empty).
AdminMediaRefetch(ctx context.Context, authed *oauth.Auth, domain string) gtserror.WithCode
// AppCreate processes the creation of a new API application
AppCreate(ctx context.Context, authed *oauth.Auth, form *apimodel.ApplicationCreateRequest) (*apimodel.Application, gtserror.WithCode)
@ -318,7 +320,7 @@ func NewProcessor(
statusProcessor := status.New(db, tc, clientWorker, parseMentionFunc)
streamingProcessor := streaming.New(db, oauthServer)
accountProcessor := account.New(db, tc, mediaManager, oauthServer, clientWorker, federator, parseMentionFunc)
adminProcessor := admin.New(db, tc, mediaManager, storage, clientWorker)
adminProcessor := admin.New(db, tc, mediaManager, federator.TransportController(), storage, clientWorker)
mediaProcessor := mediaProcessor.New(db, tc, mediaManager, federator.TransportController(), storage)
userProcessor := user.New(db, emailSender)
federationProcessor := federationProcessor.New(db, tc, federator)

26
internal/util/emoji.go Normal file
View file

@ -0,0 +1,26 @@
/*
GoToSocial
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package util
import "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
// ShortcodeDomain returns the [shortcode]@[domain] for the given emoji.
func ShortcodeDomain(emoji *gtsmodel.Emoji) string {
return emoji.Shortcode + "@" + emoji.Domain
}