[bugfix] update media if more than just url changes (#2970)

* refactor status media handling into separate functions, handle case of changed metadata

* update fetchRemoteAccount{Avatar,Header} to use new refactored {load,update}Attachment() functions

* whoops, nearly marked avatars as headers :')

* reformatting to improve legibility
This commit is contained in:
kim 2024-06-06 14:35:50 +00:00 committed by GitHub
parent bcda048eab
commit b371c2db47
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 241 additions and 114 deletions

View file

@ -20,7 +20,6 @@
import ( import (
"context" "context"
"errors" "errors"
"io"
"net/url" "net/url"
"time" "time"
@ -752,128 +751,146 @@ func (d *Dereferencer) enrichAccount(
return latestAcc, apubAcc, nil return latestAcc, apubAcc, nil
} }
func (d *Dereferencer) fetchRemoteAccountAvatar(ctx context.Context, tsport transport.Transport, existing, latestAcc *gtsmodel.Account) error { func (d *Dereferencer) fetchRemoteAccountAvatar(
ctx context.Context,
tsport transport.Transport,
existingAcc *gtsmodel.Account,
latestAcc *gtsmodel.Account,
) error {
if latestAcc.AvatarRemoteURL == "" { if latestAcc.AvatarRemoteURL == "" {
// No avatar set on newest model, leave // No avatar set on newest model, leave
// latest avatar attachment ID empty. // latest avatar attachment ID empty.
return nil return nil
} }
// By default we keep the previous media attachment ID. This will only // Check for an existing stored media attachment
// be changed if and when we have the new media loaded into storage. // specifically with unchanged remote URL we can use.
latestAcc.AvatarMediaAttachmentID = existing.AvatarMediaAttachmentID if existingAcc.AvatarMediaAttachmentID != "" &&
existingAcc.AvatarRemoteURL == latestAcc.AvatarRemoteURL {
// If we had a media attachment ID already, and the URL // Fetch the existing avatar media attachment with ID.
// of the attachment hasn't changed from existing -> latest, existing, err := d.state.DB.GetAttachmentByID(ctx,
// then we may be able to just keep our existing attachment existingAcc.AvatarMediaAttachmentID,
// without having to make any remote calls. )
if latestAcc.AvatarMediaAttachmentID != "" &&
existing.AvatarRemoteURL == latestAcc.AvatarRemoteURL {
// Ensure we have media attachment with the known ID.
media, err := d.state.DB.GetAttachmentByID(ctx, existing.AvatarMediaAttachmentID)
if err != nil && !errors.Is(err, db.ErrNoEntries) { if err != nil && !errors.Is(err, db.ErrNoEntries) {
return gtserror.Newf("error getting attachment %s: %w", existing.AvatarMediaAttachmentID, err) return gtserror.Newf("error getting attachment %s: %w", existingAcc.AvatarMediaAttachmentID, err)
} }
// Ensure attachment has correct properties. if existing != nil {
if media != nil && media.RemoteURL == latestAcc.AvatarRemoteURL { // Ensuring existing attachment is up-to-date
// We already have the most up-to-date // and any recaching is performed if required.
// media attachment, keep using it. existing, err := d.updateAttachment(ctx,
tsport,
existing,
nil,
)
if err != nil {
log.Errorf(ctx, "error updating existing attachment: %v", err)
// specifically do NOT return nil here,
// we already have a model, we don't
// want to drop it from the account, just
// log that an update for it failed.
}
// Set the avatar attachment on account model.
latestAcc.AvatarMediaAttachment = existing
latestAcc.AvatarMediaAttachmentID = existing.ID
return nil return nil
} }
} }
// If we reach here, we know we need to fetch the most // Fetch newly changed avatar from remote.
// up-to-date version of the attachment from remote. attachment, err := d.loadAttachment(ctx,
tsport,
// Parse and validate the newly provided media URL. latestAcc.ID,
avatarURI, err := url.Parse(latestAcc.AvatarRemoteURL) latestAcc.AvatarRemoteURL,
&media.AdditionalMediaInfo{
Avatar: util.Ptr(true),
RemoteURL: &latestAcc.AvatarRemoteURL,
},
)
if err != nil { if err != nil {
return gtserror.Newf("error parsing url %s: %w", latestAcc.AvatarRemoteURL, err)
}
// Set the media data function to dereference avatar from URI.
data := func(ctx context.Context) (io.ReadCloser, int64, error) {
return tsport.DereferenceMedia(ctx, avatarURI)
}
// Create new media processing request from the media manager instance.
processing := d.mediaManager.PreProcessMedia(data, latestAcc.ID, &media.AdditionalMediaInfo{
Avatar: func() *bool { v := true; return &v }(),
RemoteURL: &latestAcc.AvatarRemoteURL,
})
// Start media attachment loading (blocking call).
if _, err := processing.LoadAttachment(ctx); err != nil {
return gtserror.Newf("error loading attachment %s: %w", latestAcc.AvatarRemoteURL, err) return gtserror.Newf("error loading attachment %s: %w", latestAcc.AvatarRemoteURL, err)
} }
// Set the newly loaded avatar media attachment ID. // Set the avatar attachment on account model.
latestAcc.AvatarMediaAttachmentID = processing.AttachmentID() latestAcc.AvatarMediaAttachment = attachment
latestAcc.AvatarMediaAttachmentID = attachment.ID
return nil return nil
} }
func (d *Dereferencer) fetchRemoteAccountHeader(ctx context.Context, tsport transport.Transport, existing, latestAcc *gtsmodel.Account) error { func (d *Dereferencer) fetchRemoteAccountHeader(
ctx context.Context,
tsport transport.Transport,
existingAcc *gtsmodel.Account,
latestAcc *gtsmodel.Account,
) error {
if latestAcc.HeaderRemoteURL == "" { if latestAcc.HeaderRemoteURL == "" {
// No header set on newest model, leave // No header set on newest model, leave
// latest header attachment ID empty. // latest header attachment ID empty.
return nil return nil
} }
// By default we keep the previous media attachment ID. This will only // Check for an existing stored media attachment
// be changed if and when we have the new media loaded into storage. // specifically with unchanged remote URL we can use.
latestAcc.HeaderMediaAttachmentID = existing.HeaderMediaAttachmentID if existingAcc.HeaderMediaAttachmentID != "" &&
existingAcc.HeaderRemoteURL == latestAcc.HeaderRemoteURL {
// If we had a media attachment ID already, and the URL // Fetch the existing header media attachment with ID.
// of the attachment hasn't changed from existing -> latest, existing, err := d.state.DB.GetAttachmentByID(ctx,
// then we may be able to just keep our existing attachment existingAcc.HeaderMediaAttachmentID,
// without having to make any remote calls. )
if latestAcc.HeaderMediaAttachmentID != "" &&
existing.HeaderRemoteURL == latestAcc.HeaderRemoteURL {
// Ensure we have media attachment with the known ID.
media, err := d.state.DB.GetAttachmentByID(ctx, existing.HeaderMediaAttachmentID)
if err != nil && !errors.Is(err, db.ErrNoEntries) { if err != nil && !errors.Is(err, db.ErrNoEntries) {
return gtserror.Newf("error getting attachment %s: %w", existing.HeaderMediaAttachmentID, err) return gtserror.Newf("error getting attachment %s: %w", existingAcc.HeaderMediaAttachmentID, err)
} }
// Ensure attachment has correct properties. if existing != nil {
if media != nil && media.RemoteURL == latestAcc.HeaderRemoteURL { // Ensuring existing attachment is up-to-date
// We already have the most up-to-date // and any recaching is performed if required.
// media attachment, keep using it. existing, err := d.updateAttachment(ctx,
tsport,
existing,
nil,
)
if err != nil {
log.Errorf(ctx, "error updating existing attachment: %v", err)
// specifically do NOT return nil here,
// we already have a model, we don't
// want to drop it from the account, just
// log that an update for it failed.
}
// Set the header attachment on account model.
latestAcc.HeaderMediaAttachment = existing
latestAcc.HeaderMediaAttachmentID = existing.ID
return nil return nil
} }
} }
// If we reach here, we know we need to fetch the most // Fetch newly changed header from remote.
// up-to-date version of the attachment from remote. attachment, err := d.loadAttachment(ctx,
tsport,
// Parse and validate the newly provided media URL. latestAcc.ID,
headerURI, err := url.Parse(latestAcc.HeaderRemoteURL) latestAcc.HeaderRemoteURL,
&media.AdditionalMediaInfo{
Header: util.Ptr(true),
RemoteURL: &latestAcc.HeaderRemoteURL,
},
)
if err != nil { if err != nil {
return gtserror.Newf("error parsing url %s: %w", latestAcc.HeaderRemoteURL, err)
}
// Set the media data function to dereference avatar from URI.
data := func(ctx context.Context) (io.ReadCloser, int64, error) {
return tsport.DereferenceMedia(ctx, headerURI)
}
// Create new media processing request from the media manager instance.
processing := d.mediaManager.PreProcessMedia(data, latestAcc.ID, &media.AdditionalMediaInfo{
Header: func() *bool { v := true; return &v }(),
RemoteURL: &latestAcc.HeaderRemoteURL,
})
// Start media attachment loading (blocking call).
if _, err := processing.LoadAttachment(ctx); err != nil {
return gtserror.Newf("error loading attachment %s: %w", latestAcc.HeaderRemoteURL, err) return gtserror.Newf("error loading attachment %s: %w", latestAcc.HeaderRemoteURL, err)
} }
// Set the newly loaded avatar media attachment ID. // Set the header attachment on account model.
latestAcc.HeaderMediaAttachmentID = processing.AttachmentID() latestAcc.HeaderMediaAttachment = attachment
latestAcc.HeaderMediaAttachmentID = attachment.ID
return nil return nil
} }

View file

@ -20,7 +20,6 @@
import ( import (
"context" "context"
"errors" "errors"
"io"
"net/url" "net/url"
"slices" "slices"
"time" "time"
@ -1000,45 +999,45 @@ func (d *Dereferencer) fetchStatusAttachments(ctx context.Context, tsport transp
// Look for existing media attachment with remote URL first. // Look for existing media attachment with remote URL first.
existing, ok := existing.GetAttachmentByRemoteURL(attachment.RemoteURL) existing, ok := existing.GetAttachmentByRemoteURL(attachment.RemoteURL)
if ok && existing.ID != "" && *existing.Cached { if ok && existing.ID != "" {
// Ensure the existing media attachment is up-to-date and cached.
existing, err := d.updateAttachment(ctx, tsport, existing, attachment)
if err != nil {
log.Errorf(ctx, "error updating existing attachment: %v", err)
// specifically do NOT continue here,
// we already have a model, we don't
// want to drop it from the status, just
// log that an update for it failed.
}
// Set the existing attachment.
status.Attachments[i] = existing status.Attachments[i] = existing
status.AttachmentIDs[i] = existing.ID status.AttachmentIDs[i] = existing.ID
continue continue
} }
// Ensure a valid media attachment remote URL. // Load this new media attachment.
remoteURL, err := url.Parse(attachment.RemoteURL) attachment, err := d.loadAttachment(
if err != nil { ctx,
log.Errorf(ctx, "invalid remote media url %q: %v", attachment.RemoteURL, err) tsport,
status.AccountID,
attachment.RemoteURL,
&media.AdditionalMediaInfo{
StatusID: &status.ID,
RemoteURL: &attachment.RemoteURL,
Description: &attachment.Description,
Blurhash: &attachment.Blurhash,
},
)
if err != nil && attachment == nil {
log.Errorf(ctx, "error loading attachment: %v", err)
continue continue
} }
data := func(ctx context.Context) (io.ReadCloser, int64, error) {
return tsport.DereferenceMedia(ctx, remoteURL)
}
ai := &media.AdditionalMediaInfo{
StatusID: &status.ID,
RemoteURL: &attachment.RemoteURL,
Description: &attachment.Description,
Blurhash: &attachment.Blurhash,
}
// Start pre-processing remote media at remote URL.
processing := d.mediaManager.PreProcessMedia(data, status.AccountID, ai)
// Force attachment loading *right now*.
attachment, err = processing.LoadAttachment(ctx)
if err != nil { if err != nil {
if attachment == nil { // A non-fatal error occurred during loading.
// Totally failed to load;
// bail on this attachment.
log.Errorf(ctx, "error loading attachment: %v", err)
continue
}
// Partially loaded. Keep as
// placeholder and try again later.
log.Warnf(ctx, "partially loaded attachment: %v", err) log.Warnf(ctx, "partially loaded attachment: %v", err)
} }

View file

@ -18,11 +18,122 @@
package dereferencing package dereferencing
import ( import (
"context"
"io"
"net/url"
"slices" "slices"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/media"
"github.com/superseriousbusiness/gotosocial/internal/transport"
"github.com/superseriousbusiness/gotosocial/internal/util"
) )
// loadAttachment handles the case of a new media attachment
// that requires loading. it stores and caches from given data.
func (d *Dereferencer) loadAttachment(
ctx context.Context,
tsport transport.Transport,
accountID string, // media account owner
remoteURL string,
info *media.AdditionalMediaInfo,
) (
*gtsmodel.MediaAttachment,
error,
) {
// Parse str as valid URL object.
url, err := url.Parse(remoteURL)
if err != nil {
return nil, gtserror.Newf("invalid remote media url %q: %v", remoteURL, err)
}
// Start pre-processing remote media at remote URL.
processing := d.mediaManager.PreProcessMedia(
func(ctx context.Context) (io.ReadCloser, int64, error) {
return tsport.DereferenceMedia(ctx, url)
},
accountID,
info,
)
// Force attachment loading *right now*.
return processing.LoadAttachment(ctx)
}
// updateAttachment handles the case of an existing media attachment
// that *may* have changes or need recaching. it checks for changed
// fields, updating in the database if so, and recaches uncached media.
func (d *Dereferencer) updateAttachment(
ctx context.Context,
tsport transport.Transport,
existing *gtsmodel.MediaAttachment, // existing attachment
media *gtsmodel.MediaAttachment, // (optional) changed media
) (
*gtsmodel.MediaAttachment, // always set
error,
) {
if media != nil {
// Possible changed media columns.
changed := make([]string, 0, 3)
// Check if attachment description has changed.
if existing.Description != media.Description {
changed = append(changed, "description")
existing.Description = media.Description
}
// Check if attachment blurhash has changed (i.e. content change).
if existing.Blurhash != media.Blurhash && media.Blurhash != "" {
changed = append(changed, "blurhash", "cached")
existing.Blurhash = media.Blurhash
existing.Cached = util.Ptr(false)
}
if len(changed) > 0 {
// Update the existing attachment model in the database.
err := d.state.DB.UpdateAttachment(ctx, existing, changed...)
if err != nil {
return media, gtserror.Newf("error updating media: %w", err)
}
}
}
// Check if cached.
if *existing.Cached {
return existing, nil
}
// Parse str as valid URL object.
url, err := url.Parse(existing.RemoteURL)
if err != nil {
return nil, gtserror.Newf("invalid remote media url %q: %v", media.RemoteURL, err)
}
// Start pre-processing remote media recaching from remote.
processing, err := d.mediaManager.PreProcessMediaRecache(
ctx,
func(ctx context.Context) (io.ReadCloser, int64, error) {
return tsport.DereferenceMedia(ctx, url)
},
existing.ID,
)
if err != nil {
return nil, gtserror.Newf("error processing recache: %w", err)
}
// Force load attachment recache *right now*.
recached, err := processing.LoadAttachment(ctx)
// Always return the error we
// receive, but ensure we return
// most up-to-date media file.
if recached != nil {
return recached, err
}
return existing, err
}
// pollChanged returns whether a poll has changed in way that // pollChanged returns whether a poll has changed in way that
// indicates that this should be an entirely new poll. i.e. if // indicates that this should be an entirely new poll. i.e. if
// the available options have changed, or the expiry has increased. // the available options have changed, or the expiry has increased.