From b371c2db47a6c93fbea1b06b56eecb7237bc6130 Mon Sep 17 00:00:00 2001 From: kim <89579420+NyaaaWhatsUpDoc@users.noreply.github.com> Date: Thu, 6 Jun 2024 14:35:50 +0000 Subject: [PATCH] [bugfix] update media if more than just url changes (#2970) * refactor status media handling into separate functions, handle case of changed metadata * update fetchRemoteAccount{Avatar,Header} to use new refactored {load,update}Attachment() functions * whoops, nearly marked avatars as headers :') * reformatting to improve legibility --- internal/federation/dereferencing/account.go | 183 ++++++++++--------- internal/federation/dereferencing/status.go | 61 +++---- internal/federation/dereferencing/util.go | 111 +++++++++++ 3 files changed, 241 insertions(+), 114 deletions(-) diff --git a/internal/federation/dereferencing/account.go b/internal/federation/dereferencing/account.go index bd97b91ed..33a71ceb9 100644 --- a/internal/federation/dereferencing/account.go +++ b/internal/federation/dereferencing/account.go @@ -20,7 +20,6 @@ import ( "context" "errors" - "io" "net/url" "time" @@ -752,128 +751,146 @@ func (d *Dereferencer) enrichAccount( return latestAcc, apubAcc, nil } -func (d *Dereferencer) fetchRemoteAccountAvatar(ctx context.Context, tsport transport.Transport, existing, latestAcc *gtsmodel.Account) error { +func (d *Dereferencer) fetchRemoteAccountAvatar( + ctx context.Context, + tsport transport.Transport, + existingAcc *gtsmodel.Account, + latestAcc *gtsmodel.Account, +) error { if latestAcc.AvatarRemoteURL == "" { // No avatar set on newest model, leave // latest avatar attachment ID empty. return nil } - // By default we keep the previous media attachment ID. This will only - // be changed if and when we have the new media loaded into storage. - latestAcc.AvatarMediaAttachmentID = existing.AvatarMediaAttachmentID + // Check for an existing stored media attachment + // specifically with unchanged remote URL we can use. + if existingAcc.AvatarMediaAttachmentID != "" && + existingAcc.AvatarRemoteURL == latestAcc.AvatarRemoteURL { - // If we had a media attachment ID already, and the URL - // of the attachment hasn't changed from existing -> latest, - // then we may be able to just keep our existing attachment - // without having to make any remote calls. - if latestAcc.AvatarMediaAttachmentID != "" && - existing.AvatarRemoteURL == latestAcc.AvatarRemoteURL { - - // Ensure we have media attachment with the known ID. - media, err := d.state.DB.GetAttachmentByID(ctx, existing.AvatarMediaAttachmentID) + // Fetch the existing avatar media attachment with ID. + existing, err := d.state.DB.GetAttachmentByID(ctx, + existingAcc.AvatarMediaAttachmentID, + ) if err != nil && !errors.Is(err, db.ErrNoEntries) { - return gtserror.Newf("error getting attachment %s: %w", existing.AvatarMediaAttachmentID, err) + return gtserror.Newf("error getting attachment %s: %w", existingAcc.AvatarMediaAttachmentID, err) } - // Ensure attachment has correct properties. - if media != nil && media.RemoteURL == latestAcc.AvatarRemoteURL { - // We already have the most up-to-date - // media attachment, keep using it. + if existing != nil { + // Ensuring existing attachment is up-to-date + // and any recaching is performed if required. + existing, err := d.updateAttachment(ctx, + tsport, + existing, + nil, + ) + + if err != nil { + log.Errorf(ctx, "error updating existing attachment: %v", err) + + // specifically do NOT return nil here, + // we already have a model, we don't + // want to drop it from the account, just + // log that an update for it failed. + } + + // Set the avatar attachment on account model. + latestAcc.AvatarMediaAttachment = existing + latestAcc.AvatarMediaAttachmentID = existing.ID + return nil } } - // If we reach here, we know we need to fetch the most - // up-to-date version of the attachment from remote. - - // Parse and validate the newly provided media URL. - avatarURI, err := url.Parse(latestAcc.AvatarRemoteURL) + // Fetch newly changed avatar from remote. + attachment, err := d.loadAttachment(ctx, + tsport, + latestAcc.ID, + latestAcc.AvatarRemoteURL, + &media.AdditionalMediaInfo{ + Avatar: util.Ptr(true), + RemoteURL: &latestAcc.AvatarRemoteURL, + }, + ) if err != nil { - return gtserror.Newf("error parsing url %s: %w", latestAcc.AvatarRemoteURL, err) - } - - // Set the media data function to dereference avatar from URI. - data := func(ctx context.Context) (io.ReadCloser, int64, error) { - return tsport.DereferenceMedia(ctx, avatarURI) - } - - // Create new media processing request from the media manager instance. - processing := d.mediaManager.PreProcessMedia(data, latestAcc.ID, &media.AdditionalMediaInfo{ - Avatar: func() *bool { v := true; return &v }(), - RemoteURL: &latestAcc.AvatarRemoteURL, - }) - - // Start media attachment loading (blocking call). - if _, err := processing.LoadAttachment(ctx); err != nil { return gtserror.Newf("error loading attachment %s: %w", latestAcc.AvatarRemoteURL, err) } - // Set the newly loaded avatar media attachment ID. - latestAcc.AvatarMediaAttachmentID = processing.AttachmentID() + // Set the avatar attachment on account model. + latestAcc.AvatarMediaAttachment = attachment + latestAcc.AvatarMediaAttachmentID = attachment.ID return nil } -func (d *Dereferencer) fetchRemoteAccountHeader(ctx context.Context, tsport transport.Transport, existing, latestAcc *gtsmodel.Account) error { +func (d *Dereferencer) fetchRemoteAccountHeader( + ctx context.Context, + tsport transport.Transport, + existingAcc *gtsmodel.Account, + latestAcc *gtsmodel.Account, +) error { if latestAcc.HeaderRemoteURL == "" { // No header set on newest model, leave // latest header attachment ID empty. return nil } - // By default we keep the previous media attachment ID. This will only - // be changed if and when we have the new media loaded into storage. - latestAcc.HeaderMediaAttachmentID = existing.HeaderMediaAttachmentID + // Check for an existing stored media attachment + // specifically with unchanged remote URL we can use. + if existingAcc.HeaderMediaAttachmentID != "" && + existingAcc.HeaderRemoteURL == latestAcc.HeaderRemoteURL { - // If we had a media attachment ID already, and the URL - // of the attachment hasn't changed from existing -> latest, - // then we may be able to just keep our existing attachment - // without having to make any remote calls. - if latestAcc.HeaderMediaAttachmentID != "" && - existing.HeaderRemoteURL == latestAcc.HeaderRemoteURL { - - // Ensure we have media attachment with the known ID. - media, err := d.state.DB.GetAttachmentByID(ctx, existing.HeaderMediaAttachmentID) + // Fetch the existing header media attachment with ID. + existing, err := d.state.DB.GetAttachmentByID(ctx, + existingAcc.HeaderMediaAttachmentID, + ) if err != nil && !errors.Is(err, db.ErrNoEntries) { - return gtserror.Newf("error getting attachment %s: %w", existing.HeaderMediaAttachmentID, err) + return gtserror.Newf("error getting attachment %s: %w", existingAcc.HeaderMediaAttachmentID, err) } - // Ensure attachment has correct properties. - if media != nil && media.RemoteURL == latestAcc.HeaderRemoteURL { - // We already have the most up-to-date - // media attachment, keep using it. + if existing != nil { + // Ensuring existing attachment is up-to-date + // and any recaching is performed if required. + existing, err := d.updateAttachment(ctx, + tsport, + existing, + nil, + ) + + if err != nil { + log.Errorf(ctx, "error updating existing attachment: %v", err) + + // specifically do NOT return nil here, + // we already have a model, we don't + // want to drop it from the account, just + // log that an update for it failed. + } + + // Set the header attachment on account model. + latestAcc.HeaderMediaAttachment = existing + latestAcc.HeaderMediaAttachmentID = existing.ID + return nil } } - // If we reach here, we know we need to fetch the most - // up-to-date version of the attachment from remote. - - // Parse and validate the newly provided media URL. - headerURI, err := url.Parse(latestAcc.HeaderRemoteURL) + // Fetch newly changed header from remote. + attachment, err := d.loadAttachment(ctx, + tsport, + latestAcc.ID, + latestAcc.HeaderRemoteURL, + &media.AdditionalMediaInfo{ + Header: util.Ptr(true), + RemoteURL: &latestAcc.HeaderRemoteURL, + }, + ) if err != nil { - return gtserror.Newf("error parsing url %s: %w", latestAcc.HeaderRemoteURL, err) - } - - // Set the media data function to dereference avatar from URI. - data := func(ctx context.Context) (io.ReadCloser, int64, error) { - return tsport.DereferenceMedia(ctx, headerURI) - } - - // Create new media processing request from the media manager instance. - processing := d.mediaManager.PreProcessMedia(data, latestAcc.ID, &media.AdditionalMediaInfo{ - Header: func() *bool { v := true; return &v }(), - RemoteURL: &latestAcc.HeaderRemoteURL, - }) - - // Start media attachment loading (blocking call). - if _, err := processing.LoadAttachment(ctx); err != nil { return gtserror.Newf("error loading attachment %s: %w", latestAcc.HeaderRemoteURL, err) } - // Set the newly loaded avatar media attachment ID. - latestAcc.HeaderMediaAttachmentID = processing.AttachmentID() + // Set the header attachment on account model. + latestAcc.HeaderMediaAttachment = attachment + latestAcc.HeaderMediaAttachmentID = attachment.ID return nil } diff --git a/internal/federation/dereferencing/status.go b/internal/federation/dereferencing/status.go index 69627adc2..add12c31f 100644 --- a/internal/federation/dereferencing/status.go +++ b/internal/federation/dereferencing/status.go @@ -20,7 +20,6 @@ import ( "context" "errors" - "io" "net/url" "slices" "time" @@ -1000,45 +999,45 @@ func (d *Dereferencer) fetchStatusAttachments(ctx context.Context, tsport transp // Look for existing media attachment with remote URL first. existing, ok := existing.GetAttachmentByRemoteURL(attachment.RemoteURL) - if ok && existing.ID != "" && *existing.Cached { + if ok && existing.ID != "" { + + // Ensure the existing media attachment is up-to-date and cached. + existing, err := d.updateAttachment(ctx, tsport, existing, attachment) + if err != nil { + log.Errorf(ctx, "error updating existing attachment: %v", err) + + // specifically do NOT continue here, + // we already have a model, we don't + // want to drop it from the status, just + // log that an update for it failed. + } + + // Set the existing attachment. status.Attachments[i] = existing status.AttachmentIDs[i] = existing.ID continue } - // Ensure a valid media attachment remote URL. - remoteURL, err := url.Parse(attachment.RemoteURL) - if err != nil { - log.Errorf(ctx, "invalid remote media url %q: %v", attachment.RemoteURL, err) + // Load this new media attachment. + attachment, err := d.loadAttachment( + ctx, + tsport, + status.AccountID, + attachment.RemoteURL, + &media.AdditionalMediaInfo{ + StatusID: &status.ID, + RemoteURL: &attachment.RemoteURL, + Description: &attachment.Description, + Blurhash: &attachment.Blurhash, + }, + ) + if err != nil && attachment == nil { + log.Errorf(ctx, "error loading attachment: %v", err) continue } - data := func(ctx context.Context) (io.ReadCloser, int64, error) { - return tsport.DereferenceMedia(ctx, remoteURL) - } - - ai := &media.AdditionalMediaInfo{ - StatusID: &status.ID, - RemoteURL: &attachment.RemoteURL, - Description: &attachment.Description, - Blurhash: &attachment.Blurhash, - } - - // Start pre-processing remote media at remote URL. - processing := d.mediaManager.PreProcessMedia(data, status.AccountID, ai) - - // Force attachment loading *right now*. - attachment, err = processing.LoadAttachment(ctx) if err != nil { - if attachment == nil { - // Totally failed to load; - // bail on this attachment. - log.Errorf(ctx, "error loading attachment: %v", err) - continue - } - - // Partially loaded. Keep as - // placeholder and try again later. + // A non-fatal error occurred during loading. log.Warnf(ctx, "partially loaded attachment: %v", err) } diff --git a/internal/federation/dereferencing/util.go b/internal/federation/dereferencing/util.go index 38622f6c1..5cb7a0106 100644 --- a/internal/federation/dereferencing/util.go +++ b/internal/federation/dereferencing/util.go @@ -18,11 +18,122 @@ package dereferencing import ( + "context" + "io" + "net/url" "slices" + "github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/media" + "github.com/superseriousbusiness/gotosocial/internal/transport" + "github.com/superseriousbusiness/gotosocial/internal/util" ) +// loadAttachment handles the case of a new media attachment +// that requires loading. it stores and caches from given data. +func (d *Dereferencer) loadAttachment( + ctx context.Context, + tsport transport.Transport, + accountID string, // media account owner + remoteURL string, + info *media.AdditionalMediaInfo, +) ( + *gtsmodel.MediaAttachment, + error, +) { + // Parse str as valid URL object. + url, err := url.Parse(remoteURL) + if err != nil { + return nil, gtserror.Newf("invalid remote media url %q: %v", remoteURL, err) + } + + // Start pre-processing remote media at remote URL. + processing := d.mediaManager.PreProcessMedia( + func(ctx context.Context) (io.ReadCloser, int64, error) { + return tsport.DereferenceMedia(ctx, url) + }, + accountID, + info, + ) + + // Force attachment loading *right now*. + return processing.LoadAttachment(ctx) +} + +// updateAttachment handles the case of an existing media attachment +// that *may* have changes or need recaching. it checks for changed +// fields, updating in the database if so, and recaches uncached media. +func (d *Dereferencer) updateAttachment( + ctx context.Context, + tsport transport.Transport, + existing *gtsmodel.MediaAttachment, // existing attachment + media *gtsmodel.MediaAttachment, // (optional) changed media +) ( + *gtsmodel.MediaAttachment, // always set + error, +) { + if media != nil { + // Possible changed media columns. + changed := make([]string, 0, 3) + + // Check if attachment description has changed. + if existing.Description != media.Description { + changed = append(changed, "description") + existing.Description = media.Description + } + + // Check if attachment blurhash has changed (i.e. content change). + if existing.Blurhash != media.Blurhash && media.Blurhash != "" { + changed = append(changed, "blurhash", "cached") + existing.Blurhash = media.Blurhash + existing.Cached = util.Ptr(false) + } + + if len(changed) > 0 { + // Update the existing attachment model in the database. + err := d.state.DB.UpdateAttachment(ctx, existing, changed...) + if err != nil { + return media, gtserror.Newf("error updating media: %w", err) + } + } + } + + // Check if cached. + if *existing.Cached { + return existing, nil + } + + // Parse str as valid URL object. + url, err := url.Parse(existing.RemoteURL) + if err != nil { + return nil, gtserror.Newf("invalid remote media url %q: %v", media.RemoteURL, err) + } + + // Start pre-processing remote media recaching from remote. + processing, err := d.mediaManager.PreProcessMediaRecache( + ctx, + func(ctx context.Context) (io.ReadCloser, int64, error) { + return tsport.DereferenceMedia(ctx, url) + }, + existing.ID, + ) + if err != nil { + return nil, gtserror.Newf("error processing recache: %w", err) + } + + // Force load attachment recache *right now*. + recached, err := processing.LoadAttachment(ctx) + + // Always return the error we + // receive, but ensure we return + // most up-to-date media file. + if recached != nil { + return recached, err + } + return existing, err +} + // pollChanged returns whether a poll has changed in way that // indicates that this should be an entirely new poll. i.e. if // the available options have changed, or the expiry has increased.