[bugfix] media.Processor{}.GetFile() returning 404s on first call, correctly loading on 2nd (#3129)

* refactor file handling a tiny bit

* whoops

* make processing media / emoji defers a bit clear to see that it's the "on finished processing" path

* some wording

* add some debug logging

* add mutex locks for processing remote media

* try removing freshness check

* fix derefMedia not being allocated

* fix log format string

* handle case of empty file paths (i.e. not stored)

* remove media / emoji once finished processing from dereferencer maps

* whoops, fix the cached / force checks

* move url parsing outside of 'process___Safely()' funcs to prevalidate url

* use emoji.ShortcodeDomain()

* update RefreshEmoji() to also match RefreshMedia() changes

---------

Co-authored-by: tobi <tobi.smethurst@protonmail.com>
This commit is contained in:
kim 2024-07-22 18:45:48 +01:00 committed by GitHub
parent 5338825d2b
commit 31294f7c78
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 385 additions and 293 deletions

View file

@ -59,12 +59,19 @@ func (c *Cleaner) Media() *Media {
// haveFiles returns whether all of the provided files exist within current storage. // haveFiles returns whether all of the provided files exist within current storage.
func (c *Cleaner) haveFiles(ctx context.Context, files ...string) (bool, error) { func (c *Cleaner) haveFiles(ctx context.Context, files ...string) (bool, error) {
for _, file := range files { for _, path := range files {
if path == "" {
// File not stored.
return false, nil
}
// Check whether each file exists in storage. // Check whether each file exists in storage.
have, err := c.state.Storage.Has(ctx, file) have, err := c.state.Storage.Has(ctx, path)
if err != nil { if err != nil {
return false, gtserror.Newf("error checking storage for %s: %w", file, err) return false, gtserror.Newf("error checking storage for %s: %w", path, err)
} else if !have { }
if !have {
// Missing file(s). // Missing file(s).
return false, nil return false, nil
} }
@ -80,29 +87,34 @@ func (c *Cleaner) removeFiles(ctx context.Context, files ...string) (int, error)
} }
var ( var (
errs gtserror.MultiError errs gtserror.MultiError
errCount int count int
) )
for _, path := range files { for _, path := range files {
if path == "" {
// not stored.
continue
}
// Remove each provided storage path. // Remove each provided storage path.
log.Debugf(ctx, "removing file: %s", path) log.Debugf(ctx, "removing file: %s", path)
err := c.state.Storage.Delete(ctx, path) err := c.state.Storage.Delete(ctx, path)
if err != nil && !storage.IsNotFound(err) { if err != nil && !storage.IsNotFound(err) {
errs.Appendf("error removing %s: %w", path, err) errs.Appendf("error removing %s: %w", path, err)
errCount++ continue
} }
}
// Calculate no. files removed. // Incr.
diff := len(files) - errCount count++
}
// Wrap the combined error slice. // Wrap the combined error slice.
if err := errs.Combine(); err != nil { if err := errs.Combine(); err != nil {
return diff, gtserror.Newf("error(s) removing files: %w", err) return count, gtserror.Newf("error(s) removing files: %w", err)
} }
return diff, nil return count, nil
} }
// ScheduleJobs schedules cleaning // ScheduleJobs schedules cleaning

View file

@ -97,7 +97,7 @@ func (m *Media) PruneOrphaned(ctx context.Context) (int, error) {
if err := m.state.Storage.WalkKeys(ctx, func(path string) error { if err := m.state.Storage.WalkKeys(ctx, func(path string) error {
// Check for our expected fileserver path format. // Check for our expected fileserver path format.
if !regexes.FilePath.MatchString(path) { if !regexes.FilePath.MatchString(path) {
log.Warn(ctx, "unexpected storage item: %s", path) log.Warnf(ctx, "unexpected storage item: %s", path)
return nil return nil
} }

View file

@ -85,12 +85,9 @@ type Dereferencer struct {
mediaManager *media.Manager mediaManager *media.Manager
visibility *visibility.Filter visibility *visibility.Filter
// in-progress dereferencing emoji. we already perform // in-progress dereferencing media / emoji
// locks per-status and per-account so we don't need derefMedia map[string]*media.ProcessingMedia
// processing maps for other media which won't often derefMediaMu sync.Mutex
// end up being repeated. worst case we run into an
// db.ErrAlreadyExists error which then gets handled
// appropriately by enrich{Account,Status}Safely().
derefEmojis map[string]*media.ProcessingEmoji derefEmojis map[string]*media.ProcessingEmoji
derefEmojisMu sync.Mutex derefEmojisMu sync.Mutex
@ -119,6 +116,7 @@ func NewDereferencer(
transportController: transportController, transportController: transportController,
mediaManager: mediaManager, mediaManager: mediaManager,
visibility: visFilter, visibility: visFilter,
derefMedia: make(map[string]*media.ProcessingMedia),
derefEmojis: make(map[string]*media.ProcessingEmoji), derefEmojis: make(map[string]*media.ProcessingEmoji),
handshakes: make(map[string][]*url.URL), handshakes: make(map[string][]*url.URL),
} }

View file

@ -77,32 +77,34 @@ func (d *Dereferencer) GetEmoji(
// Generate shortcode domain for locks + logging. // Generate shortcode domain for locks + logging.
shortcodeDomain := shortcode + "@" + domain shortcodeDomain := shortcode + "@" + domain
// Ensure we have a valid remote URL.
url, err := url.Parse(remoteURL)
if err != nil {
err := gtserror.Newf("invalid image remote url %s for emoji %s: %w", remoteURL, shortcodeDomain, err)
return nil, err
}
// Acquire new instance account transport for emoji dereferencing.
tsport, err := d.transportController.NewTransportForUsername(ctx, "")
if err != nil {
err := gtserror.Newf("error getting instance transport: %w", err)
return nil, err
}
// Get maximum supported remote emoji size.
maxsz := config.GetMediaEmojiRemoteMaxSize()
// Prepare data function to dereference remote emoji media.
data := func(context.Context) (io.ReadCloser, error) {
return tsport.DereferenceMedia(ctx, url, int64(maxsz))
}
// Pass along for safe processing. // Pass along for safe processing.
return d.processEmojiSafely(ctx, return d.processEmojiSafely(ctx,
shortcodeDomain, shortcodeDomain,
func() (*media.ProcessingEmoji, error) { func() (*media.ProcessingEmoji, error) {
// Ensure we have a valid remote URL.
url, err := url.Parse(remoteURL)
if err != nil {
err := gtserror.Newf("invalid image remote url %s for emoji %s: %w", remoteURL, shortcodeDomain, err)
return nil, err
}
// Acquire new instance account transport for emoji dereferencing.
tsport, err := d.transportController.NewTransportForUsername(ctx, "")
if err != nil {
err := gtserror.Newf("error getting instance transport: %w", err)
return nil, err
}
// Get maximum supported remote emoji size.
maxsz := config.GetMediaEmojiRemoteMaxSize()
// Prepare data function to dereference remote emoji media.
data := func(context.Context) (io.ReadCloser, error) {
return tsport.DereferenceMedia(ctx, url, int64(maxsz))
}
// Create new emoji with prepared info.
return d.mediaManager.CreateEmoji(ctx, return d.mediaManager.CreateEmoji(ctx,
shortcode, shortcode,
domain, domain,
@ -142,24 +144,25 @@ func (d *Dereferencer) RefreshEmoji(
switch { switch {
case info.URI != nil && case info.URI != nil &&
*info.URI != emoji.URI: *info.URI != emoji.URI:
emoji.URI = *info.URI
force = true force = true
case info.ImageRemoteURL != nil && case info.ImageRemoteURL != nil &&
*info.ImageRemoteURL != emoji.ImageRemoteURL: *info.ImageRemoteURL != emoji.ImageRemoteURL:
emoji.ImageRemoteURL = *info.ImageRemoteURL
force = true force = true
case info.ImageStaticRemoteURL != nil && case info.ImageStaticRemoteURL != nil &&
*info.ImageStaticRemoteURL != emoji.ImageStaticRemoteURL: *info.ImageStaticRemoteURL != emoji.ImageStaticRemoteURL:
emoji.ImageStaticRemoteURL = *info.ImageStaticRemoteURL
force = true force = true
} }
// Check if needs updating. // Check if needs updating.
if !force && *emoji.Cached { if *emoji.Cached && !force {
return emoji, nil return emoji, nil
} }
// TODO: more finegrained freshness checks. // Get shortcode domain for locks + logging.
shortcodeDomain := emoji.ShortcodeDomain()
// Generate shortcode domain for locks + logging.
shortcodeDomain := emoji.Shortcode + "@" + emoji.Domain
// Ensure we have a valid image remote URL. // Ensure we have a valid image remote URL.
url, err := url.Parse(emoji.ImageRemoteURL) url, err := url.Parse(emoji.ImageRemoteURL)
@ -168,25 +171,27 @@ func (d *Dereferencer) RefreshEmoji(
return nil, err return nil, err
} }
// Acquire new instance account transport for emoji dereferencing.
tsport, err := d.transportController.NewTransportForUsername(ctx, "")
if err != nil {
err := gtserror.Newf("error getting instance transport: %w", err)
return nil, err
}
// Get maximum supported remote emoji size.
maxsz := config.GetMediaEmojiRemoteMaxSize()
// Prepare data function to dereference remote emoji media.
data := func(context.Context) (io.ReadCloser, error) {
return tsport.DereferenceMedia(ctx, url, int64(maxsz))
}
// Pass along for safe processing. // Pass along for safe processing.
return d.processEmojiSafely(ctx, return d.processEmojiSafely(ctx,
shortcodeDomain, shortcodeDomain,
func() (*media.ProcessingEmoji, error) { func() (*media.ProcessingEmoji, error) {
// Acquire new instance account transport for emoji dereferencing.
tsport, err := d.transportController.NewTransportForUsername(ctx, "")
if err != nil {
err := gtserror.Newf("error getting instance transport: %w", err)
return nil, err
}
// Get maximum supported remote emoji size.
maxsz := config.GetMediaEmojiRemoteMaxSize()
// Prepare data function to dereference remote emoji media.
data := func(context.Context) (io.ReadCloser, error) {
return tsport.DereferenceMedia(ctx, url, int64(maxsz))
}
// Refresh emoji with prepared info.
return d.mediaManager.RefreshEmoji(ctx, return d.mediaManager.RefreshEmoji(ctx,
emoji, emoji,
data, data,
@ -226,6 +231,13 @@ func (d *Dereferencer) processEmojiSafely(
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer func() {
// Remove on finish.
d.derefEmojisMu.Lock()
delete(d.derefEmojis, shortcodeDomain)
d.derefEmojisMu.Unlock()
}()
} }
// Unlock map. // Unlock map.
@ -240,10 +252,7 @@ func (d *Dereferencer) processEmojiSafely(
// which can determine if loading error should allow remaining placeholder. // which can determine if loading error should allow remaining placeholder.
} }
// Return a COPY of emoji. return
emoji2 := new(gtsmodel.Emoji)
*emoji2 = *emoji
return emoji2, err
} }
func (d *Dereferencer) fetchEmojis( func (d *Dereferencer) fetchEmojis(

View file

@ -26,6 +26,7 @@
"github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/media" "github.com/superseriousbusiness/gotosocial/internal/media"
"github.com/superseriousbusiness/gotosocial/internal/util"
) )
// GetMedia fetches the media at given remote URL by // GetMedia fetches the media at given remote URL by
@ -56,46 +57,39 @@ func (d *Dereferencer) GetMedia(
*gtsmodel.MediaAttachment, *gtsmodel.MediaAttachment,
error, error,
) { ) {
// Parse str as valid URL object. // Ensure we have a valid remote URL.
url, err := url.Parse(remoteURL) url, err := url.Parse(remoteURL)
if err != nil { if err != nil {
return nil, gtserror.Newf("invalid remote media url %q: %v", remoteURL, err) err := gtserror.Newf("invalid media remote url %s: %w", remoteURL, err)
}
// Fetch transport for the provided request user from controller.
tsport, err := d.transportController.NewTransportForUsername(ctx,
requestUser,
)
if err != nil {
return nil, gtserror.Newf("failed getting transport for %s: %w", requestUser, err)
}
// Get maximum supported remote media size.
maxsz := config.GetMediaRemoteMaxSize()
// Start processing remote attachment at URL.
processing, err := d.mediaManager.CreateMedia(
ctx,
accountID,
func(ctx context.Context) (io.ReadCloser, error) {
return tsport.DereferenceMedia(ctx, url, int64(maxsz))
},
info,
)
if err != nil {
return nil, err return nil, err
} }
// Perform media load operation. return d.processMediaSafeley(ctx,
media, err := processing.Load(ctx) remoteURL,
if err != nil { func() (*media.ProcessingMedia, error) {
err = gtserror.Newf("error loading media %s: %w", media.RemoteURL, err)
// TODO: in time we should return checkable flags by gtserror.Is___() // Fetch transport for the provided request user from controller.
// which can determine if loading error should allow remaining placeholder. tsport, err := d.transportController.NewTransportForUsername(ctx,
} requestUser,
)
if err != nil {
return nil, gtserror.Newf("failed getting transport for %s: %w", requestUser, err)
}
return media, err // Get maximum supported remote media size.
maxsz := config.GetMediaRemoteMaxSize()
// Create media with prepared info.
return d.mediaManager.CreateMedia(
ctx,
accountID,
func(ctx context.Context) (io.ReadCloser, error) {
return tsport.DereferenceMedia(ctx, url, int64(maxsz))
},
info,
)
},
)
} }
// RefreshMedia ensures that given media is up-to-date, // RefreshMedia ensures that given media is up-to-date,
@ -119,7 +113,7 @@ func(ctx context.Context) (io.ReadCloser, error) {
func (d *Dereferencer) RefreshMedia( func (d *Dereferencer) RefreshMedia(
ctx context.Context, ctx context.Context,
requestUser string, requestUser string,
media *gtsmodel.MediaAttachment, attach *gtsmodel.MediaAttachment,
info media.AdditionalMediaInfo, info media.AdditionalMediaInfo,
force bool, force bool,
) ( ) (
@ -127,67 +121,65 @@ func (d *Dereferencer) RefreshMedia(
error, error,
) { ) {
// Can't refresh local. // Can't refresh local.
if media.IsLocal() { if attach.IsLocal() {
return media, nil return attach, nil
} }
// Check emoji is up-to-date // Check emoji is up-to-date
// with provided extra info. // with provided extra info.
switch { switch {
case info.Blurhash != nil && case info.Blurhash != nil &&
*info.Blurhash != media.Blurhash: *info.Blurhash != attach.Blurhash:
attach.Blurhash = *info.Blurhash
force = true force = true
case info.Description != nil && case info.Description != nil &&
*info.Description != media.Description: *info.Description != attach.Description:
attach.Description = *info.Description
force = true force = true
case info.RemoteURL != nil && case info.RemoteURL != nil &&
*info.RemoteURL != media.RemoteURL: *info.RemoteURL != attach.RemoteURL:
attach.RemoteURL = *info.RemoteURL
force = true force = true
} }
// Check if needs updating. // Check if needs updating.
if !force && *media.Cached { if *attach.Cached && !force {
return media, nil return attach, nil
} }
// TODO: more finegrained freshness checks.
// Ensure we have a valid remote URL. // Ensure we have a valid remote URL.
url, err := url.Parse(media.RemoteURL) url, err := url.Parse(attach.RemoteURL)
if err != nil { if err != nil {
err := gtserror.Newf("invalid media remote url %s: %w", media.RemoteURL, err) err := gtserror.Newf("invalid media remote url %s: %w", attach.RemoteURL, err)
return nil, err return nil, err
} }
// Fetch transport for the provided request user from controller. // Pass along for safe processing.
tsport, err := d.transportController.NewTransportForUsername(ctx, return d.processMediaSafeley(ctx,
requestUser, attach.RemoteURL,
) func() (*media.ProcessingMedia, error) {
if err != nil {
return nil, gtserror.Newf("failed getting transport for %s: %w", requestUser, err)
}
// Get maximum supported remote media size. // Fetch transport for the provided request user from controller.
maxsz := config.GetMediaRemoteMaxSize() tsport, err := d.transportController.NewTransportForUsername(ctx,
requestUser,
)
if err != nil {
return nil, gtserror.Newf("failed getting transport for %s: %w", requestUser, err)
}
// Start processing remote attachment recache. // Get maximum supported remote media size.
processing := d.mediaManager.RecacheMedia( maxsz := config.GetMediaRemoteMaxSize()
media,
func(ctx context.Context) (io.ReadCloser, error) { // Recache media with prepared info,
return tsport.DereferenceMedia(ctx, url, int64(maxsz)) // this will also update media in db.
return d.mediaManager.RecacheMedia(
attach,
func(ctx context.Context) (io.ReadCloser, error) {
return tsport.DereferenceMedia(ctx, url, int64(maxsz))
},
), nil
}, },
) )
// Perform media load operation.
media, err = processing.Load(ctx)
if err != nil {
err = gtserror.Newf("error loading media %s: %w", media.RemoteURL, err)
// TODO: in time we should return checkable flags by gtserror.Is___()
// which can determine if loading error should allow remaining placeholder.
}
return media, err
} }
// updateAttachment handles the case of an existing media attachment // updateAttachment handles the case of an existing media attachment
@ -220,3 +212,57 @@ func (d *Dereferencer) updateAttachment(
false, false,
) )
} }
// processingEmojiSafely provides concurrency-safe processing of
// an emoji with given shortcode+domain. if a copy of the emoji is
// not already being processed, the given 'process' callback will
// be used to generate new *media.ProcessingEmoji{} instance.
func (d *Dereferencer) processMediaSafeley(
ctx context.Context,
remoteURL string,
process func() (*media.ProcessingMedia, error),
) (
media *gtsmodel.MediaAttachment,
err error,
) {
// Acquire map lock.
d.derefMediaMu.Lock()
// Ensure unlock only done once.
unlock := d.derefMediaMu.Unlock
unlock = util.DoOnce(unlock)
defer unlock()
// Look for an existing deref in progress.
processing, ok := d.derefMedia[remoteURL]
if !ok {
// Start new processing emoji.
processing, err = process()
if err != nil {
return nil, err
}
defer func() {
// Remove on finish.
d.derefMediaMu.Lock()
delete(d.derefMedia, remoteURL)
d.derefMediaMu.Unlock()
}()
}
// Unlock map.
unlock()
// Perform media load operation.
media, err = processing.Load(ctx)
if err != nil {
err = gtserror.Newf("error loading media %s: %w", remoteURL, err)
// TODO: in time we should return checkable flags by gtserror.Is___()
// which can determine if loading error should allow remaining placeholder.
}
return
}

View file

@ -22,7 +22,6 @@
errorsv2 "codeberg.org/gruf/go-errors/v2" errorsv2 "codeberg.org/gruf/go-errors/v2"
"codeberg.org/gruf/go-runners" "codeberg.org/gruf/go-runners"
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
"github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log" "github.com/superseriousbusiness/gotosocial/internal/log"
@ -77,42 +76,34 @@ func (p *ProcessingEmoji) load(ctx context.Context) (
defer func() { defer func() {
// This is only done when ctx NOT cancelled. // This is only done when ctx NOT cancelled.
done = (err == nil || !errorsv2.IsV2(err, if done = (err == nil || !errorsv2.IsV2(err,
context.Canceled, context.Canceled,
context.DeadlineExceeded, context.DeadlineExceeded,
)) )); done {
// Processing finished,
// whether error or not!
if !done { // Anything from here, we
return // need to ensure happens
// (i.e. no ctx canceled).
ctx = context.WithoutCancel(ctx)
// On error, clean
// downloaded files.
if err != nil {
p.cleanup(ctx)
}
// Update with latest details, whatever happened.
e := p.mgr.state.DB.UpdateEmoji(ctx, p.emoji)
if e != nil {
log.Errorf(ctx, "error updating emoji in db: %v", e)
}
// Store values.
p.done = true
p.err = err
} }
// Anything from here, we
// need to ensure happens
// (i.e. no ctx canceled).
ctx = gtscontext.WithValues(
context.Background(),
ctx, // values
)
// On error, clean
// downloaded files.
if err != nil {
p.cleanup(ctx)
}
if !done {
return
}
// Update with latest details, whatever happened.
e := p.mgr.state.DB.UpdateEmoji(ctx, p.emoji)
if e != nil {
log.Errorf(ctx, "error updating emoji in db: %v", e)
}
// Store final values.
p.done = true
p.err = err
}() }()
// Attempt to store media and calculate // Attempt to store media and calculate
@ -122,7 +113,10 @@ func (p *ProcessingEmoji) load(ctx context.Context) (
err = p.store(ctx) err = p.store(ctx)
return err return err
}) })
emoji = p.emoji
// Return a copy of emoji.
emoji = new(gtsmodel.Emoji)
*emoji = *p.emoji
return return
} }
@ -265,11 +259,11 @@ func (p *ProcessingEmoji) store(ctx context.Context) error {
// cleanup will remove any traces of processing emoji from storage, // cleanup will remove any traces of processing emoji from storage,
// and perform any other necessary cleanup steps after failure. // and perform any other necessary cleanup steps after failure.
func (p *ProcessingEmoji) cleanup(ctx context.Context) { func (p *ProcessingEmoji) cleanup(ctx context.Context) {
var err error log.Debugf(ctx, "running cleanup of emoji %s", p.emoji.ID)
if p.emoji.ImagePath != "" { if p.emoji.ImagePath != "" {
// Ensure emoji file at path is deleted from storage. // Ensure emoji file at path is deleted from storage.
err = p.mgr.state.Storage.Delete(ctx, p.emoji.ImagePath) err := p.mgr.state.Storage.Delete(ctx, p.emoji.ImagePath)
if err != nil && !storage.IsNotFound(err) { if err != nil && !storage.IsNotFound(err) {
log.Errorf(ctx, "error deleting %s: %v", p.emoji.ImagePath, err) log.Errorf(ctx, "error deleting %s: %v", p.emoji.ImagePath, err)
} }
@ -277,7 +271,7 @@ func (p *ProcessingEmoji) cleanup(ctx context.Context) {
if p.emoji.ImageStaticPath != "" { if p.emoji.ImageStaticPath != "" {
// Ensure emoji static file at path is deleted from storage. // Ensure emoji static file at path is deleted from storage.
err = p.mgr.state.Storage.Delete(ctx, p.emoji.ImageStaticPath) err := p.mgr.state.Storage.Delete(ctx, p.emoji.ImageStaticPath)
if err != nil && !storage.IsNotFound(err) { if err != nil && !storage.IsNotFound(err) {
log.Errorf(ctx, "error deleting %s: %v", p.emoji.ImageStaticPath, err) log.Errorf(ctx, "error deleting %s: %v", p.emoji.ImageStaticPath, err)
} }

View file

@ -19,12 +19,10 @@
import ( import (
"context" "context"
"time"
errorsv2 "codeberg.org/gruf/go-errors/v2" errorsv2 "codeberg.org/gruf/go-errors/v2"
"codeberg.org/gruf/go-runners" "codeberg.org/gruf/go-runners"
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
"github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log" "github.com/superseriousbusiness/gotosocial/internal/log"
@ -63,6 +61,7 @@ func (p *ProcessingMedia) Load(ctx context.Context) (*gtsmodel.MediaAttachment,
media, done, err := p.load(ctx) media, done, err := p.load(ctx)
if !done { if !done {
// On a context-canceled error (marked as !done), requeue for loading. // On a context-canceled error (marked as !done), requeue for loading.
log.Warnf(ctx, "reprocessing media %s after canceled ctx", p.media.ID)
p.mgr.state.Workers.Dereference.Queue.Push(func(ctx context.Context) { p.mgr.state.Workers.Dereference.Queue.Push(func(ctx context.Context) {
if _, _, err := p.load(ctx); err != nil { if _, _, err := p.load(ctx); err != nil {
log.Errorf(ctx, "error loading media: %v", err) log.Errorf(ctx, "error loading media: %v", err)
@ -86,55 +85,35 @@ func (p *ProcessingMedia) load(ctx context.Context) (
defer func() { defer func() {
// This is only done when ctx NOT cancelled. // This is only done when ctx NOT cancelled.
done = (err == nil || !errorsv2.IsV2(err, if done = (err == nil || !errorsv2.IsV2(err,
context.Canceled, context.Canceled,
context.DeadlineExceeded, context.DeadlineExceeded,
)) )); done {
// Processing finished,
// whether error or not!
if !done { // Anything from here, we
return // need to ensure happens
// (i.e. no ctx canceled).
ctx = context.WithoutCancel(ctx)
// On error or unknown media types, perform error cleanup.
if err != nil || p.media.Type == gtsmodel.FileTypeUnknown {
p.cleanup(ctx)
}
// Update with latest details, whatever happened.
e := p.mgr.state.DB.UpdateAttachment(ctx, p.media)
if e != nil {
log.Errorf(ctx, "error updating media in db: %v", e)
}
// Store values.
p.done = true
p.err = err
} }
// Anything from here, we
// need to ensure happens
// (i.e. no ctx canceled).
ctx = gtscontext.WithValues(
context.Background(),
ctx, // values
)
// On error or unknown media types, perform error cleanup.
if err != nil || p.media.Type == gtsmodel.FileTypeUnknown {
p.cleanup(ctx)
}
// Update with latest details, whatever happened.
e := p.mgr.state.DB.UpdateAttachment(ctx, p.media)
if e != nil {
log.Errorf(ctx, "error updating media in db: %v", e)
}
// Store final values.
p.done = true
p.err = err
}() }()
// TODO: in time update this
// to perhaps follow a similar
// freshness window to statuses
// / accounts? But that's a big
// maybe, media don't change in
// the same way so this is largely
// just to slow down fail retries.
const maxfreq = 6 * time.Hour
// Check whether media is uncached but repeatedly failing,
// specifically limit the frequency at which we allow this.
if !p.media.UpdatedAt.Equal(p.media.CreatedAt) && // i.e. not new
p.media.UpdatedAt.Add(maxfreq).Before(time.Now()) {
return nil
}
// Attempt to store media and calculate // Attempt to store media and calculate
// full-size media attachment details. // full-size media attachment details.
// //
@ -142,7 +121,10 @@ func (p *ProcessingMedia) load(ctx context.Context) (
err = p.store(ctx) err = p.store(ctx)
return err return err
}) })
media = p.media
// Return a copy of media attachment.
media = new(gtsmodel.MediaAttachment)
*media = *p.media
return return
} }
@ -331,11 +313,9 @@ func (p *ProcessingMedia) store(ctx context.Context) error {
// cleanup will remove any traces of processing media from storage. // cleanup will remove any traces of processing media from storage.
// and perform any other necessary cleanup steps after failure. // and perform any other necessary cleanup steps after failure.
func (p *ProcessingMedia) cleanup(ctx context.Context) { func (p *ProcessingMedia) cleanup(ctx context.Context) {
var err error
if p.media.File.Path != "" { if p.media.File.Path != "" {
// Ensure media file at path is deleted from storage. // Ensure media file at path is deleted from storage.
err = p.mgr.state.Storage.Delete(ctx, p.media.File.Path) err := p.mgr.state.Storage.Delete(ctx, p.media.File.Path)
if err != nil && !storage.IsNotFound(err) { if err != nil && !storage.IsNotFound(err) {
log.Errorf(ctx, "error deleting %s: %v", p.media.File.Path, err) log.Errorf(ctx, "error deleting %s: %v", p.media.File.Path, err)
} }
@ -343,7 +323,7 @@ func (p *ProcessingMedia) cleanup(ctx context.Context) {
if p.media.Thumbnail.Path != "" { if p.media.Thumbnail.Path != "" {
// Ensure media thumbnail at path is deleted from storage. // Ensure media thumbnail at path is deleted from storage.
err = p.mgr.state.Storage.Delete(ctx, p.media.Thumbnail.Path) err := p.mgr.state.Storage.Delete(ctx, p.media.Thumbnail.Path)
if err != nil && !storage.IsNotFound(err) { if err != nil && !storage.IsNotFound(err) {
log.Errorf(ctx, "error deleting %s: %v", p.media.Thumbnail.Path, err) log.Errorf(ctx, "error deleting %s: %v", p.media.Thumbnail.Path, err)
} }

View file

@ -30,6 +30,7 @@
"github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/media" "github.com/superseriousbusiness/gotosocial/internal/media"
"github.com/superseriousbusiness/gotosocial/internal/regexes"
"github.com/superseriousbusiness/gotosocial/internal/storage" "github.com/superseriousbusiness/gotosocial/internal/storage"
"github.com/superseriousbusiness/gotosocial/internal/uris" "github.com/superseriousbusiness/gotosocial/internal/uris"
) )
@ -41,79 +42,97 @@ func (p *Processor) GetFile(
requester *gtsmodel.Account, requester *gtsmodel.Account,
form *apimodel.GetContentRequestForm, form *apimodel.GetContentRequestForm,
) (*apimodel.Content, gtserror.WithCode) { ) (*apimodel.Content, gtserror.WithCode) {
// parse the form fields // Parse media size (small, static, original).
mediaSize, err := parseSize(form.MediaSize) mediaSize, err := parseSize(form.MediaSize)
if err != nil { if err != nil {
return nil, gtserror.NewErrorNotFound(fmt.Errorf("media size %s not valid", form.MediaSize)) err := gtserror.Newf("media size %s not valid", form.MediaSize)
return nil, gtserror.NewErrorNotFound(err)
} }
// Parse media type (emoji, header, avatar, attachment).
mediaType, err := parseType(form.MediaType) mediaType, err := parseType(form.MediaType)
if err != nil { if err != nil {
return nil, gtserror.NewErrorNotFound(fmt.Errorf("media type %s not valid", form.MediaType)) err := gtserror.Newf("media type %s not valid", form.MediaType)
return nil, gtserror.NewErrorNotFound(err)
} }
spl := strings.Split(form.FileName, ".") // Parse media ID from file name.
if len(spl) != 2 || spl[0] == "" || spl[1] == "" { mediaID, _, err := parseFileName(form.FileName)
return nil, gtserror.NewErrorNotFound(fmt.Errorf("file name %s not parseable", form.FileName))
}
wantedMediaID := spl[0]
owningAccountID := form.AccountID
// get the account that owns the media and make sure it's not suspended
owningAccount, err := p.state.DB.GetAccountByID(ctx, owningAccountID)
if err != nil { if err != nil {
return nil, gtserror.NewErrorNotFound(fmt.Errorf("account with id %s could not be selected from the db: %s", owningAccountID, err)) err := gtserror.Newf("media file name %s not valid", form.FileName)
} return nil, gtserror.NewErrorNotFound(err)
if !owningAccount.SuspendedAt.IsZero() {
return nil, gtserror.NewErrorNotFound(fmt.Errorf("account with id %s is suspended", owningAccountID))
} }
// make sure the requesting account and the media account don't block each other // Get the account that owns the media
// and make sure it's not suspended.
acctID := form.AccountID
acct, err := p.state.DB.GetAccountByID(ctx, acctID)
if err != nil {
err := gtserror.Newf("db error getting account %s: %w", acctID, err)
return nil, gtserror.NewErrorNotFound(err)
}
if acct.IsSuspended() {
err := gtserror.Newf("account %s is suspended", acctID)
return nil, gtserror.NewErrorNotFound(err)
}
// If requester was authenticated, ensure media
// owner and requester don't block each other.
if requester != nil { if requester != nil {
blocked, err := p.state.DB.IsEitherBlocked(ctx, requester.ID, owningAccountID) blocked, err := p.state.DB.IsEitherBlocked(ctx, requester.ID, acctID)
if err != nil { if err != nil {
return nil, gtserror.NewErrorNotFound(fmt.Errorf("block status could not be established between accounts %s and %s: %s", owningAccountID, requester.ID, err)) err := gtserror.Newf("db error checking block between %s and %s: %w", acctID, requester.ID, err)
return nil, gtserror.NewErrorNotFound(err)
} }
if blocked { if blocked {
return nil, gtserror.NewErrorNotFound(fmt.Errorf("block exists between accounts %s and %s", owningAccountID, requester.ID)) err := gtserror.Newf("block exists between %s and %s", acctID, requester.ID)
return nil, gtserror.NewErrorNotFound(err)
} }
} }
// the way we store emojis is a little different from the way we store other attachments, // The way we store emojis is a bit different
// so we need to take different steps depending on the media type being requested // from the way we store other attachments,
// so we need to take different steps depending
// on the media type being requested.
switch mediaType { switch mediaType {
case media.TypeEmoji: case media.TypeEmoji:
return p.getEmojiContent(ctx, return p.getEmojiContent(ctx,
owningAccountID, acctID,
wantedMediaID,
mediaSize, mediaSize,
mediaID,
) )
case media.TypeAttachment, media.TypeHeader, media.TypeAvatar: case media.TypeAttachment, media.TypeHeader, media.TypeAvatar:
return p.getAttachmentContent(ctx, return p.getAttachmentContent(ctx,
requester, requester,
owningAccountID, acctID,
wantedMediaID,
mediaSize, mediaSize,
mediaID,
) )
default: default:
return nil, gtserror.NewErrorNotFound(fmt.Errorf("media type %s not recognized", mediaType)) err := gtserror.Newf("media type %s not recognized", mediaType)
return nil, gtserror.NewErrorNotFound(err)
} }
} }
func (p *Processor) getAttachmentContent( func (p *Processor) getAttachmentContent(
ctx context.Context, ctx context.Context,
requester *gtsmodel.Account, requester *gtsmodel.Account,
ownerID string, acctID string,
mediaID string,
sizeStr media.Size, sizeStr media.Size,
mediaID string,
) ( ) (
*apimodel.Content, *apimodel.Content,
gtserror.WithCode, gtserror.WithCode,
) { ) {
// Search for media with given ID in the database. // Get attachment with given ID from the database.
attach, err := p.state.DB.GetAttachmentByID(ctx, mediaID) attach, err := p.state.DB.GetAttachmentByID(ctx, mediaID)
if err != nil && !errors.Is(err, db.ErrNoEntries) { if err != nil && !errors.Is(err, db.ErrNoEntries) {
err := gtserror.Newf("error fetching media from database: %w", err) err := gtserror.Newf("db error getting attachment %s: %w", mediaID, err)
return nil, gtserror.NewErrorInternalError(err) return nil, gtserror.NewErrorInternalError(err)
} }
@ -122,51 +141,24 @@ func (p *Processor) getAttachmentContent(
return nil, gtserror.NewErrorNotFound(errors.New(text), text) return nil, gtserror.NewErrorNotFound(errors.New(text), text)
} }
// Ensure the 'owner' owns media. // Ensure the account
if attach.AccountID != ownerID { // actually owns the media.
if attach.AccountID != acctID {
const text = "media was not owned by passed account id" const text = "media was not owned by passed account id"
return nil, gtserror.NewErrorNotFound(errors.New(text) /* no help text! */) return nil, gtserror.NewErrorNotFound(errors.New(text) /* no help text! */)
} }
var remoteURL *url.URL // Unknown file types indicate no *locally*
if attach.RemoteURL != "" {
// Parse media remote URL to valid URL object.
remoteURL, err = url.Parse(attach.RemoteURL)
if err != nil {
err := gtserror.Newf("invalid media remote url %s: %w", attach.RemoteURL, err)
return nil, gtserror.NewErrorInternalError(err)
}
}
// Uknown file types indicate no *locally*
// stored data we can serve. Handle separately. // stored data we can serve. Handle separately.
if attach.Type == gtsmodel.FileTypeUnknown { if attach.Type == gtsmodel.FileTypeUnknown {
if remoteURL == nil { return handleUnknown(attach)
err := gtserror.Newf("missing remote url for unknown type media %s: %w", attach.ID, err)
return nil, gtserror.NewErrorInternalError(err)
}
// If this is an "Unknown" file type, ie., one we
// tried to process and couldn't, or one we refused
// to process because it wasn't supported, then we
// can skip a lot of steps here by simply forwarding
// the request to the remote URL.
url := &storage.PresignedURL{
URL: remoteURL,
// We might manage to cache the media
// at some point, so set a low-ish expiry.
Expiry: time.Now().Add(2 * time.Hour),
}
return &apimodel.Content{URL: url}, nil
} }
// If requester was provided, use their username
// to create a transport to potentially re-fetch
// the media. Else falls back to instance account.
var requestUser string var requestUser string
if requester != nil { if requester != nil {
// Set requesting acc username.
requestUser = requester.Username requestUser = requester.Username
} }
@ -217,10 +209,9 @@ func (p *Processor) getAttachmentContent(
func (p *Processor) getEmojiContent( func (p *Processor) getEmojiContent(
ctx context.Context, ctx context.Context,
acctID string,
ownerID string,
emojiID string,
sizeStr media.Size, sizeStr media.Size,
emojiID string,
) ( ) (
*apimodel.Content, *apimodel.Content,
gtserror.WithCode, gtserror.WithCode,
@ -229,7 +220,7 @@ func (p *Processor) getEmojiContent(
// As refreshed emojis use a newly generated path ID to // As refreshed emojis use a newly generated path ID to
// differentiate them (cache-wise) from the original. // differentiate them (cache-wise) from the original.
staticURL := uris.URIForAttachment( staticURL := uris.URIForAttachment(
ownerID, acctID,
string(media.TypeEmoji), string(media.TypeEmoji),
string(media.SizeStatic), string(media.SizeStatic),
emojiID, emojiID,
@ -323,8 +314,9 @@ func (p *Processor) getContent(
// Ensure found. // Ensure found.
if rc == nil { if rc == nil {
err := gtserror.Newf("file not found at %s", path)
const text = "file not found" const text = "file not found"
return nil, gtserror.NewErrorNotFound(errors.New(text), text) return nil, gtserror.NewErrorNotFound(err, text)
} }
// Return with stream. // Return with stream.
@ -332,6 +324,41 @@ func (p *Processor) getContent(
return content, nil return content, nil
} }
// handles serving Content for "unknown" file
// type, ie., a file we couldn't cache (this time).
func handleUnknown(
attach *gtsmodel.MediaAttachment,
) (*apimodel.Content, gtserror.WithCode) {
if attach.RemoteURL == "" {
err := gtserror.Newf("empty remote url for %s", attach.ID)
return nil, gtserror.NewErrorInternalError(err)
}
// Parse media remote URL to valid URL object.
remoteURL, err := url.Parse(attach.RemoteURL)
if err != nil {
err := gtserror.Newf("invalid remote url for %s: %w", attach.ID, err)
return nil, gtserror.NewErrorInternalError(err)
}
if remoteURL == nil {
err := gtserror.Newf("nil remote url for %s", attach.ID)
return nil, gtserror.NewErrorInternalError(err)
}
// Just forward the request to the remote URL,
// since this is a type we couldn't process.
url := &storage.PresignedURL{
URL: remoteURL,
// We might manage to cache the media
// at some point, so set a low-ish expiry.
Expiry: time.Now().Add(2 * time.Hour),
}
return &apimodel.Content{URL: url}, nil
}
func parseType(s string) (media.Type, error) { func parseType(s string) (media.Type, error) {
switch s { switch s {
case string(media.TypeAttachment): case string(media.TypeAttachment):
@ -357,3 +384,23 @@ func parseSize(s string) (media.Size, error) {
} }
return "", fmt.Errorf("%s not a recognized media.Size", s) return "", fmt.Errorf("%s not a recognized media.Size", s)
} }
// Extract the mediaID and file extension from
// a string like "01J3CTH8CZ6ATDNMG6CPRC36XE.gif"
func parseFileName(s string) (string, string, error) {
spl := strings.Split(s, ".")
if len(spl) != 2 || spl[0] == "" || spl[1] == "" {
return "", "", errors.New("file name not splittable on '.'")
}
var (
mediaID = spl[0]
mediaExt = spl[1]
)
if !regexes.ULID.MatchString(mediaID) {
return "", "", fmt.Errorf("%s not a valid ULID", mediaID)
}
return mediaID, mediaExt, nil
}

View file

@ -52,6 +52,12 @@ type PresignedURL struct {
Expiry time.Time // link expires at this time Expiry time.Time // link expires at this time
} }
// IsInvalidKey returns whether error is an invalid-key
// type error returned by the underlying storage library.
func IsInvalidKey(err error) bool {
return errors.Is(err, storage.ErrInvalidKey)
}
// IsAlreadyExist returns whether error is an already-exists // IsAlreadyExist returns whether error is an already-exists
// type error returned by the underlying storage library. // type error returned by the underlying storage library.
func IsAlreadyExist(err error) bool { func IsAlreadyExist(err error) bool {