[bugfix] update media if more than just url changes (#2970)

* refactor status media handling into separate functions, handle case of changed metadata

* update fetchRemoteAccount{Avatar,Header} to use new refactored {load,update}Attachment() functions

* whoops, nearly marked avatars as headers :')

* reformatting to improve legibility
This commit is contained in:
kim 2024-06-06 14:35:50 +00:00 committed by GitHub
parent bcda048eab
commit b371c2db47
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 241 additions and 114 deletions

View file

@ -20,7 +20,6 @@
import (
"context"
"errors"
"io"
"net/url"
"time"
@ -752,128 +751,146 @@ func (d *Dereferencer) enrichAccount(
return latestAcc, apubAcc, nil
}
func (d *Dereferencer) fetchRemoteAccountAvatar(ctx context.Context, tsport transport.Transport, existing, latestAcc *gtsmodel.Account) error {
func (d *Dereferencer) fetchRemoteAccountAvatar(
ctx context.Context,
tsport transport.Transport,
existingAcc *gtsmodel.Account,
latestAcc *gtsmodel.Account,
) error {
if latestAcc.AvatarRemoteURL == "" {
// No avatar set on newest model, leave
// latest avatar attachment ID empty.
return nil
}
// By default we keep the previous media attachment ID. This will only
// be changed if and when we have the new media loaded into storage.
latestAcc.AvatarMediaAttachmentID = existing.AvatarMediaAttachmentID
// Check for an existing stored media attachment
// specifically with unchanged remote URL we can use.
if existingAcc.AvatarMediaAttachmentID != "" &&
existingAcc.AvatarRemoteURL == latestAcc.AvatarRemoteURL {
// If we had a media attachment ID already, and the URL
// of the attachment hasn't changed from existing -> latest,
// then we may be able to just keep our existing attachment
// without having to make any remote calls.
if latestAcc.AvatarMediaAttachmentID != "" &&
existing.AvatarRemoteURL == latestAcc.AvatarRemoteURL {
// Ensure we have media attachment with the known ID.
media, err := d.state.DB.GetAttachmentByID(ctx, existing.AvatarMediaAttachmentID)
// Fetch the existing avatar media attachment with ID.
existing, err := d.state.DB.GetAttachmentByID(ctx,
existingAcc.AvatarMediaAttachmentID,
)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return gtserror.Newf("error getting attachment %s: %w", existing.AvatarMediaAttachmentID, err)
return gtserror.Newf("error getting attachment %s: %w", existingAcc.AvatarMediaAttachmentID, err)
}
// Ensure attachment has correct properties.
if media != nil && media.RemoteURL == latestAcc.AvatarRemoteURL {
// We already have the most up-to-date
// media attachment, keep using it.
if existing != nil {
// Ensuring existing attachment is up-to-date
// and any recaching is performed if required.
existing, err := d.updateAttachment(ctx,
tsport,
existing,
nil,
)
if err != nil {
log.Errorf(ctx, "error updating existing attachment: %v", err)
// specifically do NOT return nil here,
// we already have a model, we don't
// want to drop it from the account, just
// log that an update for it failed.
}
// Set the avatar attachment on account model.
latestAcc.AvatarMediaAttachment = existing
latestAcc.AvatarMediaAttachmentID = existing.ID
return nil
}
}
// If we reach here, we know we need to fetch the most
// up-to-date version of the attachment from remote.
// Parse and validate the newly provided media URL.
avatarURI, err := url.Parse(latestAcc.AvatarRemoteURL)
// Fetch newly changed avatar from remote.
attachment, err := d.loadAttachment(ctx,
tsport,
latestAcc.ID,
latestAcc.AvatarRemoteURL,
&media.AdditionalMediaInfo{
Avatar: util.Ptr(true),
RemoteURL: &latestAcc.AvatarRemoteURL,
},
)
if err != nil {
return gtserror.Newf("error parsing url %s: %w", latestAcc.AvatarRemoteURL, err)
}
// Set the media data function to dereference avatar from URI.
data := func(ctx context.Context) (io.ReadCloser, int64, error) {
return tsport.DereferenceMedia(ctx, avatarURI)
}
// Create new media processing request from the media manager instance.
processing := d.mediaManager.PreProcessMedia(data, latestAcc.ID, &media.AdditionalMediaInfo{
Avatar: func() *bool { v := true; return &v }(),
RemoteURL: &latestAcc.AvatarRemoteURL,
})
// Start media attachment loading (blocking call).
if _, err := processing.LoadAttachment(ctx); err != nil {
return gtserror.Newf("error loading attachment %s: %w", latestAcc.AvatarRemoteURL, err)
}
// Set the newly loaded avatar media attachment ID.
latestAcc.AvatarMediaAttachmentID = processing.AttachmentID()
// Set the avatar attachment on account model.
latestAcc.AvatarMediaAttachment = attachment
latestAcc.AvatarMediaAttachmentID = attachment.ID
return nil
}
func (d *Dereferencer) fetchRemoteAccountHeader(ctx context.Context, tsport transport.Transport, existing, latestAcc *gtsmodel.Account) error {
func (d *Dereferencer) fetchRemoteAccountHeader(
ctx context.Context,
tsport transport.Transport,
existingAcc *gtsmodel.Account,
latestAcc *gtsmodel.Account,
) error {
if latestAcc.HeaderRemoteURL == "" {
// No header set on newest model, leave
// latest header attachment ID empty.
return nil
}
// By default we keep the previous media attachment ID. This will only
// be changed if and when we have the new media loaded into storage.
latestAcc.HeaderMediaAttachmentID = existing.HeaderMediaAttachmentID
// Check for an existing stored media attachment
// specifically with unchanged remote URL we can use.
if existingAcc.HeaderMediaAttachmentID != "" &&
existingAcc.HeaderRemoteURL == latestAcc.HeaderRemoteURL {
// If we had a media attachment ID already, and the URL
// of the attachment hasn't changed from existing -> latest,
// then we may be able to just keep our existing attachment
// without having to make any remote calls.
if latestAcc.HeaderMediaAttachmentID != "" &&
existing.HeaderRemoteURL == latestAcc.HeaderRemoteURL {
// Ensure we have media attachment with the known ID.
media, err := d.state.DB.GetAttachmentByID(ctx, existing.HeaderMediaAttachmentID)
// Fetch the existing header media attachment with ID.
existing, err := d.state.DB.GetAttachmentByID(ctx,
existingAcc.HeaderMediaAttachmentID,
)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return gtserror.Newf("error getting attachment %s: %w", existing.HeaderMediaAttachmentID, err)
return gtserror.Newf("error getting attachment %s: %w", existingAcc.HeaderMediaAttachmentID, err)
}
// Ensure attachment has correct properties.
if media != nil && media.RemoteURL == latestAcc.HeaderRemoteURL {
// We already have the most up-to-date
// media attachment, keep using it.
if existing != nil {
// Ensuring existing attachment is up-to-date
// and any recaching is performed if required.
existing, err := d.updateAttachment(ctx,
tsport,
existing,
nil,
)
if err != nil {
log.Errorf(ctx, "error updating existing attachment: %v", err)
// specifically do NOT return nil here,
// we already have a model, we don't
// want to drop it from the account, just
// log that an update for it failed.
}
// Set the header attachment on account model.
latestAcc.HeaderMediaAttachment = existing
latestAcc.HeaderMediaAttachmentID = existing.ID
return nil
}
}
// If we reach here, we know we need to fetch the most
// up-to-date version of the attachment from remote.
// Parse and validate the newly provided media URL.
headerURI, err := url.Parse(latestAcc.HeaderRemoteURL)
// Fetch newly changed header from remote.
attachment, err := d.loadAttachment(ctx,
tsport,
latestAcc.ID,
latestAcc.HeaderRemoteURL,
&media.AdditionalMediaInfo{
Header: util.Ptr(true),
RemoteURL: &latestAcc.HeaderRemoteURL,
},
)
if err != nil {
return gtserror.Newf("error parsing url %s: %w", latestAcc.HeaderRemoteURL, err)
}
// Set the media data function to dereference avatar from URI.
data := func(ctx context.Context) (io.ReadCloser, int64, error) {
return tsport.DereferenceMedia(ctx, headerURI)
}
// Create new media processing request from the media manager instance.
processing := d.mediaManager.PreProcessMedia(data, latestAcc.ID, &media.AdditionalMediaInfo{
Header: func() *bool { v := true; return &v }(),
RemoteURL: &latestAcc.HeaderRemoteURL,
})
// Start media attachment loading (blocking call).
if _, err := processing.LoadAttachment(ctx); err != nil {
return gtserror.Newf("error loading attachment %s: %w", latestAcc.HeaderRemoteURL, err)
}
// Set the newly loaded avatar media attachment ID.
latestAcc.HeaderMediaAttachmentID = processing.AttachmentID()
// Set the header attachment on account model.
latestAcc.HeaderMediaAttachment = attachment
latestAcc.HeaderMediaAttachmentID = attachment.ID
return nil
}

View file

@ -20,7 +20,6 @@
import (
"context"
"errors"
"io"
"net/url"
"slices"
"time"
@ -1000,45 +999,45 @@ func (d *Dereferencer) fetchStatusAttachments(ctx context.Context, tsport transp
// Look for existing media attachment with remote URL first.
existing, ok := existing.GetAttachmentByRemoteURL(attachment.RemoteURL)
if ok && existing.ID != "" && *existing.Cached {
if ok && existing.ID != "" {
// Ensure the existing media attachment is up-to-date and cached.
existing, err := d.updateAttachment(ctx, tsport, existing, attachment)
if err != nil {
log.Errorf(ctx, "error updating existing attachment: %v", err)
// specifically do NOT continue here,
// we already have a model, we don't
// want to drop it from the status, just
// log that an update for it failed.
}
// Set the existing attachment.
status.Attachments[i] = existing
status.AttachmentIDs[i] = existing.ID
continue
}
// Ensure a valid media attachment remote URL.
remoteURL, err := url.Parse(attachment.RemoteURL)
if err != nil {
log.Errorf(ctx, "invalid remote media url %q: %v", attachment.RemoteURL, err)
// Load this new media attachment.
attachment, err := d.loadAttachment(
ctx,
tsport,
status.AccountID,
attachment.RemoteURL,
&media.AdditionalMediaInfo{
StatusID: &status.ID,
RemoteURL: &attachment.RemoteURL,
Description: &attachment.Description,
Blurhash: &attachment.Blurhash,
},
)
if err != nil && attachment == nil {
log.Errorf(ctx, "error loading attachment: %v", err)
continue
}
data := func(ctx context.Context) (io.ReadCloser, int64, error) {
return tsport.DereferenceMedia(ctx, remoteURL)
}
ai := &media.AdditionalMediaInfo{
StatusID: &status.ID,
RemoteURL: &attachment.RemoteURL,
Description: &attachment.Description,
Blurhash: &attachment.Blurhash,
}
// Start pre-processing remote media at remote URL.
processing := d.mediaManager.PreProcessMedia(data, status.AccountID, ai)
// Force attachment loading *right now*.
attachment, err = processing.LoadAttachment(ctx)
if err != nil {
if attachment == nil {
// Totally failed to load;
// bail on this attachment.
log.Errorf(ctx, "error loading attachment: %v", err)
continue
}
// Partially loaded. Keep as
// placeholder and try again later.
// A non-fatal error occurred during loading.
log.Warnf(ctx, "partially loaded attachment: %v", err)
}

View file

@ -18,11 +18,122 @@
package dereferencing
import (
"context"
"io"
"net/url"
"slices"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/media"
"github.com/superseriousbusiness/gotosocial/internal/transport"
"github.com/superseriousbusiness/gotosocial/internal/util"
)
// loadAttachment handles the case of a new media attachment
// that requires loading. it stores and caches from given data.
func (d *Dereferencer) loadAttachment(
ctx context.Context,
tsport transport.Transport,
accountID string, // media account owner
remoteURL string,
info *media.AdditionalMediaInfo,
) (
*gtsmodel.MediaAttachment,
error,
) {
// Parse str as valid URL object.
url, err := url.Parse(remoteURL)
if err != nil {
return nil, gtserror.Newf("invalid remote media url %q: %v", remoteURL, err)
}
// Start pre-processing remote media at remote URL.
processing := d.mediaManager.PreProcessMedia(
func(ctx context.Context) (io.ReadCloser, int64, error) {
return tsport.DereferenceMedia(ctx, url)
},
accountID,
info,
)
// Force attachment loading *right now*.
return processing.LoadAttachment(ctx)
}
// updateAttachment handles the case of an existing media attachment
// that *may* have changes or need recaching. it checks for changed
// fields, updating in the database if so, and recaches uncached media.
func (d *Dereferencer) updateAttachment(
ctx context.Context,
tsport transport.Transport,
existing *gtsmodel.MediaAttachment, // existing attachment
media *gtsmodel.MediaAttachment, // (optional) changed media
) (
*gtsmodel.MediaAttachment, // always set
error,
) {
if media != nil {
// Possible changed media columns.
changed := make([]string, 0, 3)
// Check if attachment description has changed.
if existing.Description != media.Description {
changed = append(changed, "description")
existing.Description = media.Description
}
// Check if attachment blurhash has changed (i.e. content change).
if existing.Blurhash != media.Blurhash && media.Blurhash != "" {
changed = append(changed, "blurhash", "cached")
existing.Blurhash = media.Blurhash
existing.Cached = util.Ptr(false)
}
if len(changed) > 0 {
// Update the existing attachment model in the database.
err := d.state.DB.UpdateAttachment(ctx, existing, changed...)
if err != nil {
return media, gtserror.Newf("error updating media: %w", err)
}
}
}
// Check if cached.
if *existing.Cached {
return existing, nil
}
// Parse str as valid URL object.
url, err := url.Parse(existing.RemoteURL)
if err != nil {
return nil, gtserror.Newf("invalid remote media url %q: %v", media.RemoteURL, err)
}
// Start pre-processing remote media recaching from remote.
processing, err := d.mediaManager.PreProcessMediaRecache(
ctx,
func(ctx context.Context) (io.ReadCloser, int64, error) {
return tsport.DereferenceMedia(ctx, url)
},
existing.ID,
)
if err != nil {
return nil, gtserror.Newf("error processing recache: %w", err)
}
// Force load attachment recache *right now*.
recached, err := processing.LoadAttachment(ctx)
// Always return the error we
// receive, but ensure we return
// most up-to-date media file.
if recached != nil {
return recached, err
}
return existing, err
}
// pollChanged returns whether a poll has changed in way that
// indicates that this should be an entirely new poll. i.e. if
// the available options have changed, or the expiry has increased.