mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2024-12-31 19:46:31 +00:00
6c9d8e78eb
* revamp http client to not limit requests, instead use sender worker Signed-off-by: kim <grufwub@gmail.com> * remove separate sender worker pool, spawn 2*GOMAXPROCS batch senders each time, no need for transport cache sweeping Signed-off-by: kim <grufwub@gmail.com> * improve batch senders to keep popping recipients until remote URL found Signed-off-by: kim <grufwub@gmail.com> * fix recipient looping issue Signed-off-by: kim <grufwub@gmail.com> * move request id ctx key to gtscontext, finish filling out more code comments, add basic support for not logging client IP Signed-off-by: kim <grufwub@gmail.com> * first draft of status refetching logic Signed-off-by: kim <grufwub@gmail.com> * fix testrig to use new federation alloc func signature Signed-off-by: kim <grufwub@gmail.com> * fix log format directive Signed-off-by: kim <grufwub@gmail.com> * add status fetched_at migration Signed-off-by: kim <grufwub@gmail.com> * remove unused / unchecked for error types Signed-off-by: kim <grufwub@gmail.com> * add back the used type... Signed-off-by: kim <grufwub@gmail.com> * add separate internal getStatus() function for derefThread() that doesn't recurse Signed-off-by: kim <grufwub@gmail.com> * improved mention and media attachment error handling Signed-off-by: kim <grufwub@gmail.com> * fix log and error format directives Signed-off-by: kim <grufwub@gmail.com> * update account deref to match status deref changes Signed-off-by: kim <grufwub@gmail.com> * very small code formatting change to make things clearer Signed-off-by: kim <grufwub@gmail.com> * add more code comments Signed-off-by: kim <grufwub@gmail.com> * improved code commenting Signed-off-by: kim <grufwub@gmail.com> * only check for required further derefs if needed Signed-off-by: kim <grufwub@gmail.com> * improved cache invalidation Signed-off-by: kim <grufwub@gmail.com> * tweak cache restarting to use a (very small) backoff Signed-off-by: kim <grufwub@gmail.com> * small readability changes and fixes Signed-off-by: kim <grufwub@gmail.com> * fix account sync issues Signed-off-by: kim <grufwub@gmail.com> * fix merge conflicts + update account enrichment to accept already-passed accountable Signed-off-by: kim <grufwub@gmail.com> * remove secondary function declaration Signed-off-by: kim <grufwub@gmail.com> * normalise dereferencer get status / account behaviour, fix remaining tests Signed-off-by: kim <grufwub@gmail.com> * fix remaining rebase conflicts, finish commenting code Signed-off-by: kim <grufwub@gmail.com> * appease the linter Signed-off-by: kim <grufwub@gmail.com> * add source file header Signed-off-by: kim <grufwub@gmail.com> * update to use TIMESTAMPTZ column type instead of just TIMESTAMP Signed-off-by: kim <grufwub@gmail.com> * don't pass in 'updated_at' to UpdateEmoji() Signed-off-by: kim <grufwub@gmail.com> * use new ap.Resolve{Account,Status}able() functions Signed-off-by: kim <grufwub@gmail.com> * remove the somewhat confusing rescoping of the same variable names Signed-off-by: kim <grufwub@gmail.com> * update migration file name, improved database delete error returns Signed-off-by: kim <grufwub@gmail.com> * formatting Signed-off-by: kim <grufwub@gmail.com> * improved multi-delete database functions to minimise DB calls Signed-off-by: kim <grufwub@gmail.com> * remove unused type Signed-off-by: kim <grufwub@gmail.com> * fix delete statements Signed-off-by: kim <grufwub@gmail.com> --------- Signed-off-by: kim <grufwub@gmail.com>
465 lines
16 KiB
Go
465 lines
16 KiB
Go
// GoToSocial
|
|
// Copyright (C) GoToSocial Authors admin@gotosocial.org
|
|
// SPDX-License-Identifier: AGPL-3.0-or-later
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Affero General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
package dereferencing
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net/url"
|
|
"time"
|
|
|
|
"github.com/superseriousbusiness/gotosocial/internal/ap"
|
|
"github.com/superseriousbusiness/gotosocial/internal/config"
|
|
"github.com/superseriousbusiness/gotosocial/internal/db"
|
|
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
|
|
"github.com/superseriousbusiness/gotosocial/internal/id"
|
|
"github.com/superseriousbusiness/gotosocial/internal/log"
|
|
"github.com/superseriousbusiness/gotosocial/internal/media"
|
|
"github.com/superseriousbusiness/gotosocial/internal/transport"
|
|
)
|
|
|
|
// statusUpToDate returns whether the given status model is both updateable
|
|
// (i.e. remote status) and whether it needs an update based on `fetched_at`.
|
|
func statusUpToDate(status *gtsmodel.Status) bool {
|
|
if *status.Local {
|
|
// Can't update local statuses.
|
|
return true
|
|
}
|
|
|
|
// If this status was updated recently (last interval), we return as-is.
|
|
if next := status.FetchedAt.Add(2 * time.Hour); time.Now().Before(next) {
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// GetStatus: implements Dereferencer{}.GetStatus().
|
|
func (d *deref) GetStatusByURI(ctx context.Context, requestUser string, uri *url.URL) (*gtsmodel.Status, ap.Statusable, error) {
|
|
// Fetch and dereference status if necessary.
|
|
status, apubStatus, err := d.getStatusByURI(ctx,
|
|
requestUser,
|
|
uri,
|
|
)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
|
|
if apubStatus != nil {
|
|
// This status was updated, enqueue re-dereferencing the whole thread.
|
|
d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) {
|
|
d.dereferenceThread(ctx, requestUser, uri, status, apubStatus)
|
|
})
|
|
}
|
|
|
|
return status, apubStatus, nil
|
|
}
|
|
|
|
// getStatusByURI is a package internal form of .GetStatusByURI() that doesn't bother dereferencing the whole thread on update.
|
|
func (d *deref) getStatusByURI(ctx context.Context, requestUser string, uri *url.URL) (*gtsmodel.Status, ap.Statusable, error) {
|
|
var (
|
|
status *gtsmodel.Status
|
|
uriStr = uri.String()
|
|
err error
|
|
)
|
|
|
|
// Search the database for existing status with ID URI.
|
|
status, err = d.state.DB.GetStatusByURI(ctx, uriStr)
|
|
if err != nil && !errors.Is(err, db.ErrNoEntries) {
|
|
return nil, nil, fmt.Errorf("GetStatusByURI: error checking database for status %s by uri: %w", uriStr, err)
|
|
}
|
|
|
|
if status == nil {
|
|
// Else, search the database for existing by ID URL.
|
|
status, err = d.state.DB.GetStatusByURL(ctx, uriStr)
|
|
if err != nil && !errors.Is(err, db.ErrNoEntries) {
|
|
return nil, nil, fmt.Errorf("GetStatusByURI: error checking database for status %s by url: %w", uriStr, err)
|
|
}
|
|
}
|
|
|
|
if status == nil {
|
|
// Ensure that this is isn't a search for a local status.
|
|
if uri.Host == config.GetHost() || uri.Host == config.GetAccountDomain() {
|
|
return nil, nil, NewErrNotRetrievable(err) // this will be db.ErrNoEntries
|
|
}
|
|
|
|
// Create and pass-through a new bare-bones model for deref.
|
|
return d.enrichStatus(ctx, requestUser, uri, >smodel.Status{
|
|
Local: func() *bool { var false bool; return &false }(),
|
|
URI: uriStr,
|
|
}, nil)
|
|
}
|
|
|
|
// Try to update + deref existing status model.
|
|
latest, apubStatus, err := d.enrichStatus(ctx,
|
|
requestUser,
|
|
uri,
|
|
status,
|
|
nil,
|
|
)
|
|
if err != nil {
|
|
log.Errorf(ctx, "error enriching remote status: %v", err)
|
|
|
|
// Update fetch-at to slow re-attempts.
|
|
status.FetchedAt = time.Now()
|
|
_ = d.state.DB.UpdateStatus(ctx, status, "fetched_at")
|
|
|
|
// Fallback to existing.
|
|
return status, nil, nil
|
|
}
|
|
|
|
return latest, apubStatus, nil
|
|
}
|
|
|
|
// RefreshStatus: implements Dereferencer{}.RefreshStatus().
|
|
func (d *deref) RefreshStatus(ctx context.Context, requestUser string, status *gtsmodel.Status, apubStatus ap.Statusable, force bool) (*gtsmodel.Status, ap.Statusable, error) {
|
|
// Check whether needs update.
|
|
if statusUpToDate(status) {
|
|
return status, nil, nil
|
|
}
|
|
|
|
// Parse the URI from status.
|
|
uri, err := url.Parse(status.URI)
|
|
if err != nil {
|
|
return nil, nil, fmt.Errorf("RefreshStatus: invalid status uri %q: %w", status.URI, err)
|
|
}
|
|
|
|
// Try to update + deref existing status model.
|
|
latest, apubStatus, err := d.enrichStatus(ctx,
|
|
requestUser,
|
|
uri,
|
|
status,
|
|
apubStatus,
|
|
)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
|
|
// This status was updated, enqueue re-dereferencing the whole thread.
|
|
d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) {
|
|
d.dereferenceThread(ctx, requestUser, uri, latest, apubStatus)
|
|
})
|
|
|
|
return latest, apubStatus, nil
|
|
}
|
|
|
|
// RefreshStatusAsync: implements Dereferencer{}.RefreshStatusAsync().
|
|
func (d *deref) RefreshStatusAsync(ctx context.Context, requestUser string, status *gtsmodel.Status, apubStatus ap.Statusable, force bool) {
|
|
// Check whether needs update.
|
|
if statusUpToDate(status) {
|
|
return
|
|
}
|
|
|
|
// Parse the URI from status.
|
|
uri, err := url.Parse(status.URI)
|
|
if err != nil {
|
|
log.Errorf(ctx, "RefreshStatusAsync: invalid status uri %q: %v", status.URI, err)
|
|
return
|
|
}
|
|
|
|
// Enqueue a worker function to re-fetch this status async.
|
|
d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) {
|
|
latest, apubStatus, err := d.enrichStatus(ctx, requestUser, uri, status, apubStatus)
|
|
if err != nil {
|
|
log.Errorf(ctx, "error enriching remote status: %v", err)
|
|
return
|
|
}
|
|
|
|
// This status was updated, re-dereference the whole thread.
|
|
d.dereferenceThread(ctx, requestUser, uri, latest, apubStatus)
|
|
})
|
|
}
|
|
|
|
// enrichStatus will enrich the given status, whether a new barebones model, or existing model from the database. It handles necessary dereferencing etc.
|
|
func (d *deref) enrichStatus(ctx context.Context, requestUser string, uri *url.URL, status *gtsmodel.Status, apubStatus ap.Statusable) (*gtsmodel.Status, ap.Statusable, error) {
|
|
// Pre-fetch a transport for requesting username, used by later dereferencing.
|
|
tsport, err := d.transportController.NewTransportForUsername(ctx, requestUser)
|
|
if err != nil {
|
|
return nil, nil, fmt.Errorf("enrichStatus: couldn't create transport: %w", err)
|
|
}
|
|
|
|
// Check whether this account URI is a blocked domain / subdomain.
|
|
if blocked, err := d.state.DB.IsDomainBlocked(ctx, uri.Host); err != nil {
|
|
return nil, nil, fmt.Errorf("enrichStatus: error checking blocked domain: %w", err)
|
|
} else if blocked {
|
|
return nil, nil, fmt.Errorf("enrichStatus: %s is blocked", uri.Host)
|
|
}
|
|
|
|
var derefd bool
|
|
|
|
if apubStatus == nil {
|
|
// Dereference latest version of the status.
|
|
b, err := tsport.Dereference(ctx, uri)
|
|
if err != nil {
|
|
return nil, nil, &ErrNotRetrievable{fmt.Errorf("enrichStatus: error deferencing %s: %w", uri, err)}
|
|
}
|
|
|
|
// Attempt to resolve ActivityPub status from data.
|
|
apubStatus, err = ap.ResolveStatusable(ctx, b)
|
|
if err != nil {
|
|
return nil, nil, fmt.Errorf("enrichStatus: error resolving statusable from data for account %s: %w", uri, err)
|
|
}
|
|
|
|
// Mark as deref'd.
|
|
derefd = true
|
|
}
|
|
|
|
// Get the attributed-to status in order to fetch profile.
|
|
attributedTo, err := ap.ExtractAttributedTo(apubStatus)
|
|
if err != nil {
|
|
return nil, nil, errors.New("enrichStatus: attributedTo was empty")
|
|
}
|
|
|
|
// Ensure we have the author account of the status dereferenced (+ up-to-date).
|
|
if author, _, err := d.getAccountByURI(ctx, requestUser, attributedTo); err != nil {
|
|
if status.AccountID == "" {
|
|
// Provided status account is nil, i.e. this is a new status / author, so a deref fail is unrecoverable.
|
|
return nil, nil, fmt.Errorf("enrichStatus: failed to dereference status author %s: %w", uri, err)
|
|
}
|
|
} else if status.AccountID != "" && status.AccountID != author.ID {
|
|
// There already existed an account for this status author, but account ID changed. This shouldn't happen!
|
|
log.Warnf(ctx, "status author account ID changed: old=%s new=%s", status.AccountID, author.ID)
|
|
}
|
|
|
|
// By default we assume that apubStatus has been passed,
|
|
// indicating that the given status is already latest.
|
|
latestStatus := status
|
|
|
|
if derefd {
|
|
// ActivityPub model was recently dereferenced, so assume that passed status
|
|
// may contain out-of-date information, convert AP model to our GTS model.
|
|
latestStatus, err = d.typeConverter.ASStatusToStatus(ctx, apubStatus)
|
|
if err != nil {
|
|
return nil, nil, fmt.Errorf("enrichStatus: error converting statusable to gts model for status %s: %w", uri, err)
|
|
}
|
|
}
|
|
|
|
// Use existing status ID.
|
|
latestStatus.ID = status.ID
|
|
|
|
if latestStatus.ID == "" {
|
|
// Generate new status ID from the provided creation date.
|
|
latestStatus.ID, err = id.NewULIDFromTime(latestStatus.CreatedAt)
|
|
if err != nil {
|
|
return nil, nil, fmt.Errorf("enrichStatus: invalid created at date: %w", err)
|
|
}
|
|
}
|
|
|
|
// Carry-over values and set fetch time.
|
|
latestStatus.FetchedAt = time.Now()
|
|
latestStatus.Local = status.Local
|
|
|
|
// Ensure the status' mentions are populated, and pass in existing to check for changes.
|
|
if err := d.fetchStatusMentions(ctx, requestUser, status, latestStatus); err != nil {
|
|
return nil, nil, fmt.Errorf("enrichStatus: error populating mentions for status %s: %w", uri, err)
|
|
}
|
|
|
|
// TODO: populateStatusTags()
|
|
|
|
// Ensure the status' media attachments are populated, passing in existing to check for changes.
|
|
if err := d.fetchStatusAttachments(ctx, tsport, status, latestStatus); err != nil {
|
|
return nil, nil, fmt.Errorf("enrichStatus: error populating attachments for status %s: %w", uri, err)
|
|
}
|
|
|
|
// Ensure the status' emoji attachments are populated, passing in existing to check for changes.
|
|
if err := d.fetchStatusEmojis(ctx, requestUser, status, latestStatus); err != nil {
|
|
return nil, nil, fmt.Errorf("enrichStatus: error populating emojis for status %s: %w", uri, err)
|
|
}
|
|
|
|
if status.CreatedAt.IsZero() {
|
|
// CreatedAt will be zero if no local copy was
|
|
// found in one of the GetStatusBy___() functions.
|
|
//
|
|
// This is new, put the status in the database.
|
|
err := d.state.DB.PutStatus(ctx, latestStatus)
|
|
|
|
if errors.Is(err, db.ErrAlreadyExists) {
|
|
// TODO: replace this quick fix with per-URI deref locks.
|
|
latestStatus, err = d.state.DB.GetStatusByURI(ctx, latestStatus.URI)
|
|
return latestStatus, nil, err
|
|
}
|
|
|
|
if err != nil {
|
|
return nil, nil, fmt.Errorf("enrichStatus: error putting in database: %w", err)
|
|
}
|
|
} else {
|
|
// This is an existing status, update the model in the database.
|
|
if err := d.state.DB.UpdateStatus(ctx, latestStatus); err != nil {
|
|
return nil, nil, fmt.Errorf("enrichStatus: error updating database: %w", err)
|
|
}
|
|
}
|
|
|
|
return latestStatus, apubStatus, nil
|
|
}
|
|
|
|
func (d *deref) fetchStatusMentions(ctx context.Context, requestUser string, existing *gtsmodel.Status, status *gtsmodel.Status) error {
|
|
// Allocate new slice to take the yet-to-be created mention IDs.
|
|
status.MentionIDs = make([]string, len(status.Mentions))
|
|
|
|
for i := range status.Mentions {
|
|
mention := status.Mentions[i]
|
|
|
|
// Look for existing mention with target account URI first.
|
|
existing, ok := existing.GetMentionByTargetURI(mention.TargetAccountURI)
|
|
if ok && existing.ID != "" {
|
|
status.Mentions[i] = existing
|
|
status.MentionIDs[i] = existing.ID
|
|
continue
|
|
}
|
|
|
|
// Ensure that mention account URI is parseable.
|
|
accountURI, err := url.Parse(mention.TargetAccountURI)
|
|
if err != nil {
|
|
log.Errorf(ctx, "invalid account uri %q: %v", mention.TargetAccountURI, err)
|
|
continue
|
|
}
|
|
|
|
// Ensure we have the account of the mention target dereferenced.
|
|
mention.TargetAccount, _, err = d.getAccountByURI(ctx, requestUser, accountURI)
|
|
if err != nil {
|
|
log.Errorf(ctx, "failed to dereference account %s: %v", accountURI, err)
|
|
continue
|
|
}
|
|
|
|
// Generate new ID according to status creation.
|
|
mention.ID, err = id.NewULIDFromTime(status.CreatedAt)
|
|
if err != nil {
|
|
log.Errorf(ctx, "invalid created at date: %v", err)
|
|
mention.ID = id.NewULID() // just use "now"
|
|
}
|
|
|
|
// Set known further mention details.
|
|
mention.CreatedAt = status.CreatedAt
|
|
mention.UpdatedAt = status.UpdatedAt
|
|
mention.OriginAccount = status.Account
|
|
mention.OriginAccountID = status.AccountID
|
|
mention.OriginAccountURI = status.AccountURI
|
|
mention.TargetAccountID = mention.TargetAccount.ID
|
|
mention.TargetAccountURI = mention.TargetAccount.URI
|
|
mention.TargetAccountURL = mention.TargetAccount.URL
|
|
mention.StatusID = status.ID
|
|
mention.Status = status
|
|
|
|
// Place the new mention into the database.
|
|
if err := d.state.DB.PutMention(ctx, mention); err != nil {
|
|
return fmt.Errorf("error putting mention in database: %w", err)
|
|
}
|
|
|
|
// Set the *new* mention and ID.
|
|
status.Mentions[i] = mention
|
|
status.MentionIDs[i] = mention.ID
|
|
}
|
|
|
|
for i := 0; i < len(status.MentionIDs); i++ {
|
|
if status.MentionIDs[i] == "" {
|
|
// This is a failed mention population, likely due
|
|
// to invalid incoming data / now-deleted accounts.
|
|
copy(status.Mentions[i:], status.Mentions[i+1:])
|
|
copy(status.MentionIDs[i:], status.MentionIDs[i+1:])
|
|
status.Mentions = status.Mentions[:len(status.Mentions)-1]
|
|
status.MentionIDs = status.MentionIDs[:len(status.MentionIDs)-1]
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (d *deref) fetchStatusAttachments(ctx context.Context, tsport transport.Transport, existing *gtsmodel.Status, status *gtsmodel.Status) error {
|
|
// Allocate new slice to take the yet-to-be fetched attachment IDs.
|
|
status.AttachmentIDs = make([]string, len(status.Attachments))
|
|
|
|
for i := range status.Attachments {
|
|
placeholder := status.Attachments[i]
|
|
|
|
// Look for existing media attachment with remoet URL first.
|
|
existing, ok := existing.GetAttachmentByRemoteURL(placeholder.RemoteURL)
|
|
if ok && existing.ID != "" {
|
|
status.Attachments[i] = existing
|
|
status.AttachmentIDs[i] = existing.ID
|
|
continue
|
|
}
|
|
|
|
// Ensure a valid media attachment remote URL.
|
|
remoteURL, err := url.Parse(placeholder.RemoteURL)
|
|
if err != nil {
|
|
log.Errorf(ctx, "invalid remote media url %q: %v", placeholder.RemoteURL, err)
|
|
continue
|
|
}
|
|
|
|
// Start pre-processing remote media at remote URL.
|
|
processing, err := d.mediaManager.PreProcessMedia(ctx, func(ctx context.Context) (io.ReadCloser, int64, error) {
|
|
return tsport.DereferenceMedia(ctx, remoteURL)
|
|
}, nil, status.AccountID, &media.AdditionalMediaInfo{
|
|
StatusID: &status.ID,
|
|
RemoteURL: &placeholder.RemoteURL,
|
|
Description: &placeholder.Description,
|
|
Blurhash: &placeholder.Blurhash,
|
|
})
|
|
if err != nil {
|
|
log.Errorf(ctx, "error processing attachment: %v", err)
|
|
continue
|
|
}
|
|
|
|
// Force attachment loading *right now*.
|
|
media, err := processing.LoadAttachment(ctx)
|
|
if err != nil {
|
|
log.Errorf(ctx, "error loading attachment: %v", err)
|
|
continue
|
|
}
|
|
|
|
// Set the *new* attachment and ID.
|
|
status.Attachments[i] = media
|
|
status.AttachmentIDs[i] = media.ID
|
|
}
|
|
|
|
for i := 0; i < len(status.AttachmentIDs); i++ {
|
|
if status.AttachmentIDs[i] == "" {
|
|
// This is a failed attachment population, this may
|
|
// be due to us not currently supporting a media type.
|
|
copy(status.Attachments[i:], status.Attachments[i+1:])
|
|
copy(status.AttachmentIDs[i:], status.AttachmentIDs[i+1:])
|
|
status.Attachments = status.Attachments[:len(status.Attachments)-1]
|
|
status.AttachmentIDs = status.AttachmentIDs[:len(status.AttachmentIDs)-1]
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (d *deref) fetchStatusEmojis(ctx context.Context, requestUser string, existing *gtsmodel.Status, status *gtsmodel.Status) error {
|
|
// Fetch the full-fleshed-out emoji objects for our status.
|
|
emojis, err := d.populateEmojis(ctx, status.Emojis, requestUser)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to populate emojis: %w", err)
|
|
}
|
|
|
|
// Iterate over and get their IDs.
|
|
emojiIDs := make([]string, 0, len(emojis))
|
|
for _, e := range emojis {
|
|
emojiIDs = append(emojiIDs, e.ID)
|
|
}
|
|
|
|
// Set known emoji details.
|
|
status.Emojis = emojis
|
|
status.EmojiIDs = emojiIDs
|
|
|
|
return nil
|
|
}
|