gotosocial/internal/federation/dereferencing/thread.go

421 lines
12 KiB
Go
Raw Permalink Normal View History

// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package dereferencing
import (
"context"
"net/http"
"net/url"
[chore] use our own logging implementation (#716) * first commit Signed-off-by: kim <grufwub@gmail.com> * replace logging with our own log library Signed-off-by: kim <grufwub@gmail.com> * fix imports Signed-off-by: kim <grufwub@gmail.com> * fix log imports Signed-off-by: kim <grufwub@gmail.com> * add license text Signed-off-by: kim <grufwub@gmail.com> * fix package import cycle between config and log package Signed-off-by: kim <grufwub@gmail.com> * fix empty kv.Fields{} being passed to WithFields() Signed-off-by: kim <grufwub@gmail.com> * fix uses of log.WithFields() with whitespace issues and empty slices Signed-off-by: kim <grufwub@gmail.com> * *linter related grumbling* Signed-off-by: kim <grufwub@gmail.com> * gofmt the codebase! also fix more log.WithFields() formatting issues Signed-off-by: kim <grufwub@gmail.com> * update testrig code to match new changes Signed-off-by: kim <grufwub@gmail.com> * fix error wrapping in non fmt.Errorf function Signed-off-by: kim <grufwub@gmail.com> * add benchmarking of log.Caller() vs non-cached Signed-off-by: kim <grufwub@gmail.com> * fix syslog tests, add standard build tags to test runner to ensure consistency Signed-off-by: kim <grufwub@gmail.com> * make syslog tests more robust Signed-off-by: kim <grufwub@gmail.com> * fix caller depth arithmatic (is that how you spell it?) Signed-off-by: kim <grufwub@gmail.com> * update to use unkeyed fields in kv.Field{} instances Signed-off-by: kim <grufwub@gmail.com> * update go-kv library Signed-off-by: kim <grufwub@gmail.com> * update libraries list Signed-off-by: kim <grufwub@gmail.com> * fuck you linter get nerfed Signed-off-by: kim <grufwub@gmail.com> Co-authored-by: tobi <31960611+tsmethurst@users.noreply.github.com>
2022-07-19 08:47:55 +00:00
"codeberg.org/gruf/go-kv"
"github.com/superseriousbusiness/activity/pub"
"github.com/superseriousbusiness/gotosocial/internal/ap"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
[chore] use our own logging implementation (#716) * first commit Signed-off-by: kim <grufwub@gmail.com> * replace logging with our own log library Signed-off-by: kim <grufwub@gmail.com> * fix imports Signed-off-by: kim <grufwub@gmail.com> * fix log imports Signed-off-by: kim <grufwub@gmail.com> * add license text Signed-off-by: kim <grufwub@gmail.com> * fix package import cycle between config and log package Signed-off-by: kim <grufwub@gmail.com> * fix empty kv.Fields{} being passed to WithFields() Signed-off-by: kim <grufwub@gmail.com> * fix uses of log.WithFields() with whitespace issues and empty slices Signed-off-by: kim <grufwub@gmail.com> * *linter related grumbling* Signed-off-by: kim <grufwub@gmail.com> * gofmt the codebase! also fix more log.WithFields() formatting issues Signed-off-by: kim <grufwub@gmail.com> * update testrig code to match new changes Signed-off-by: kim <grufwub@gmail.com> * fix error wrapping in non fmt.Errorf function Signed-off-by: kim <grufwub@gmail.com> * add benchmarking of log.Caller() vs non-cached Signed-off-by: kim <grufwub@gmail.com> * fix syslog tests, add standard build tags to test runner to ensure consistency Signed-off-by: kim <grufwub@gmail.com> * make syslog tests more robust Signed-off-by: kim <grufwub@gmail.com> * fix caller depth arithmatic (is that how you spell it?) Signed-off-by: kim <grufwub@gmail.com> * update to use unkeyed fields in kv.Field{} instances Signed-off-by: kim <grufwub@gmail.com> * update go-kv library Signed-off-by: kim <grufwub@gmail.com> * update libraries list Signed-off-by: kim <grufwub@gmail.com> * fuck you linter get nerfed Signed-off-by: kim <grufwub@gmail.com> Co-authored-by: tobi <31960611+tsmethurst@users.noreply.github.com>
2022-07-19 08:47:55 +00:00
"github.com/superseriousbusiness/gotosocial/internal/log"
)
// maxIter defines how many iterations of descendants or
// ancesters we are willing to follow before returning error.
const maxIter = 512
// dereferenceThread handles dereferencing status thread after
// fetch. Passing off appropriate parts to be enqueued for async
// processing, or handling some parts synchronously when required.
func (d *Dereferencer) dereferenceThread(
ctx context.Context,
requestUser string,
uri *url.URL,
status *gtsmodel.Status,
statusable ap.Statusable,
isNew bool,
) {
if isNew {
// This is a new status that we need the ancestors of in
// order to determine visibility. Perform the initial part
// of thread dereferencing, i.e. parents, synchronously.
err := d.DereferenceStatusAncestors(ctx, requestUser, status)
if err != nil {
log.Error(ctx, err)
}
// Enqueue dereferencing remaining status thread, (children), asychronously .
[performance] update remaining worker pools to use queues (#2865) * start replacing client + federator + media workers with new worker + queue types * refactor federatingDB.Delete(), drop queued messages when deleting account / status * move all queue purging to the processor workers * undo toolchain updates * code comments, ensure dereferencer worker pool gets started * update gruf libraries in readme * start the job scheduler separately to the worker pools * reshuffle ordering or server.go + remove duplicate worker start / stop * update go-list version * fix vendoring * move queue invalidation to before wipeing / deletion, to ensure queued work not dropped * add logging to worker processing functions in testrig, don't start workers in unexpected places * update go-structr to add (+then rely on) QueueCtx{} type * ensure more worker pools get started properly in tests * fix remaining broken tests relying on worker queue logic * fix account test suite queue popping logic, ensure noop workers do not pull from queue * move back accidentally shuffled account deletion order * ensure error (non nil!!) gets passed in refactored federatingDB{}.Delete() * silently drop deletes from accounts not permitted to * don't warn log on forwarded deletes * make if else clauses easier to parse * use getFederatorMsg() * improved code comment * improved code comment re: requesting account delete checks * remove boolean result from worker start / stop since false = already running or already stopped * remove optional passed-in http.client * remove worker starting from the admin CLI commands (we don't need to handle side-effects) * update prune cli to start scheduler but not all of the workers * fix rebase issues * remove redundant return statements * i'm sorry sir linter
2024-04-26 12:50:46 +00:00
d.state.Workers.Dereference.Queue.Push(func(ctx context.Context) {
if err := d.DereferenceStatusDescendants(ctx, requestUser, uri, statusable); err != nil {
log.Error(ctx, err)
}
})
} else {
// This is an existing status, dereference the WHOLE thread asynchronously.
[performance] update remaining worker pools to use queues (#2865) * start replacing client + federator + media workers with new worker + queue types * refactor federatingDB.Delete(), drop queued messages when deleting account / status * move all queue purging to the processor workers * undo toolchain updates * code comments, ensure dereferencer worker pool gets started * update gruf libraries in readme * start the job scheduler separately to the worker pools * reshuffle ordering or server.go + remove duplicate worker start / stop * update go-list version * fix vendoring * move queue invalidation to before wipeing / deletion, to ensure queued work not dropped * add logging to worker processing functions in testrig, don't start workers in unexpected places * update go-structr to add (+then rely on) QueueCtx{} type * ensure more worker pools get started properly in tests * fix remaining broken tests relying on worker queue logic * fix account test suite queue popping logic, ensure noop workers do not pull from queue * move back accidentally shuffled account deletion order * ensure error (non nil!!) gets passed in refactored federatingDB{}.Delete() * silently drop deletes from accounts not permitted to * don't warn log on forwarded deletes * make if else clauses easier to parse * use getFederatorMsg() * improved code comment * improved code comment re: requesting account delete checks * remove boolean result from worker start / stop since false = already running or already stopped * remove optional passed-in http.client * remove worker starting from the admin CLI commands (we don't need to handle side-effects) * update prune cli to start scheduler but not all of the workers * fix rebase issues * remove redundant return statements * i'm sorry sir linter
2024-04-26 12:50:46 +00:00
d.state.Workers.Dereference.Queue.Push(func(ctx context.Context) {
if err := d.DereferenceStatusAncestors(ctx, requestUser, status); err != nil {
log.Error(ctx, err)
}
if err := d.DereferenceStatusDescendants(ctx, requestUser, uri, statusable); err != nil {
log.Error(ctx, err)
}
})
}
}
// DereferenceStatusAncestors iterates upwards from the given status, using InReplyToURI, to ensure that as many parent statuses as possible are dereferenced.
func (d *Dereferencer) DereferenceStatusAncestors(ctx context.Context, username string, status *gtsmodel.Status) error {
// Start log entry with fields
l := log.WithContext(ctx).
WithFields(kv.Fields{
{"username", username},
{"original", status.URI},
}...)
// Keep track of already dereferenced statuses
// for this ancestor thread to prevent recursion.
derefdStatuses := make(map[string]struct{}, 10)
// Mark given status as the one
// we're currently working on.
current := status
for i := 0; i < maxIter; i++ {
if current.InReplyToURI == "" {
// Status has no parent, we've
// reached the top of the chain.
return nil
}
// Apparent current parent URI to log fields.
l = l.WithField("parent", current.InReplyToURI)
l.Trace("following status ancestor")
// Check whether this parent has already been deref'd.
if _, ok := derefdStatuses[current.InReplyToURI]; ok {
l.Warn("self referencing status ancestor")
return nil
}
// Add this status's parent URI to map of deref'd.
derefdStatuses[current.InReplyToURI] = struct{}{}
// Parse status parent URI for later use.
uri, err := url.Parse(current.InReplyToURI)
if err != nil {
l.Warnf("invalid uri: %v", err)
return nil
}
// Fetch parent status by current's reply URI, this handles
// case of existing (updating if necessary) or a new status.
parent, _, _, err := d.getStatusByURI(ctx, username, uri)
// Check for a returned HTTP code via error.
switch code := gtserror.StatusCode(err); {
// 404 may indicate deletion, but can also
// indicate that we don't have permission to
// view the status (it's followers-only and
// we don't follow, for example).
case code == http.StatusNotFound:
// If this reply is followers-only or stricter,
// we can safely assume the status it replies
// to is also followers only or stricter.
//
// In this case we should leave the inReplyTo
// URI in place for visibility filtering,
// and just return since we can go no further.
if status.Visibility == gtsmodel.VisibilityFollowersOnly ||
status.Visibility == gtsmodel.VisibilityMutualsOnly ||
status.Visibility == gtsmodel.VisibilityDirect {
return nil
}
// If the reply is public or unlisted then
// likely the replied-to status is/was public
// or unlisted and has indeed been deleted,
// fall through to the Gone case to clean up.
fallthrough
// Gone (410) definitely indicates deletion.
// Update the status to remove references to
// the now-gone parent.
case code == http.StatusGone:
l.Trace("status orphaned")
current.InReplyTo = nil
current.InReplyToAccount = nil
return d.updateStatusParent(ctx,
current,
"", // status ID
"", // status URI
"", // account ID
)
// An error was returned for a status during
// an attempted NEW dereference, return here.
//
// NOTE: this will catch all cases of a nil
// parent, all cases below can safely assume
// a non-nil parent in their code logic.
case err != nil && parent == nil:
return gtserror.Newf("error dereferencing new %s: %w", current.InReplyToURI, err)
// An error was returned for an existing parent,
// we simply treat this as a temporary situation.
case err != nil:
l.Errorf("error getting parent: %v", err)
}
// Start a new switch case
// as the following scenarios
// are possible with / without
// any returned error.
switch {
// The current status is using an indirect URL
// in order to reference the parent. This is just
// weird and broken... Leave the URI in place but
// don't link the statuses via database IDs as it
// could cause all sorts of unexpected situations.
case current.InReplyToURI != parent.URI:
l.Errorf("indirect in_reply_to_uri => %s", parent.URI)
// The ID has changed for currently stored parent ID
// (which may be empty, if new!) and fetched version.
//
// Update the current's inReplyTo fields to parent.
case current.InReplyToID != parent.ID:
l.Tracef("parent changed %s => %s", current.InReplyToID, parent.ID)
current.InReplyToAccount = parent.Account
if err := d.updateStatusParent(ctx,
current,
parent.ID,
parent.URI,
parent.AccountID,
); err != nil {
return err
}
}
// Set next parent to use.
current.InReplyTo = parent
current = current.InReplyTo
}
return gtserror.Newf("reached %d ancestor iterations for %q", maxIter, status.URI)
}
// DereferenceStatusDescendents iterates downwards from the given status, using its replies, to ensure that as many children statuses as possible are dereferenced.
func (d *Dereferencer) DereferenceStatusDescendants(ctx context.Context, username string, statusIRI *url.URL, parent ap.Statusable) error {
statusIRIStr := statusIRI.String()
[chore] use our own logging implementation (#716) * first commit Signed-off-by: kim <grufwub@gmail.com> * replace logging with our own log library Signed-off-by: kim <grufwub@gmail.com> * fix imports Signed-off-by: kim <grufwub@gmail.com> * fix log imports Signed-off-by: kim <grufwub@gmail.com> * add license text Signed-off-by: kim <grufwub@gmail.com> * fix package import cycle between config and log package Signed-off-by: kim <grufwub@gmail.com> * fix empty kv.Fields{} being passed to WithFields() Signed-off-by: kim <grufwub@gmail.com> * fix uses of log.WithFields() with whitespace issues and empty slices Signed-off-by: kim <grufwub@gmail.com> * *linter related grumbling* Signed-off-by: kim <grufwub@gmail.com> * gofmt the codebase! also fix more log.WithFields() formatting issues Signed-off-by: kim <grufwub@gmail.com> * update testrig code to match new changes Signed-off-by: kim <grufwub@gmail.com> * fix error wrapping in non fmt.Errorf function Signed-off-by: kim <grufwub@gmail.com> * add benchmarking of log.Caller() vs non-cached Signed-off-by: kim <grufwub@gmail.com> * fix syslog tests, add standard build tags to test runner to ensure consistency Signed-off-by: kim <grufwub@gmail.com> * make syslog tests more robust Signed-off-by: kim <grufwub@gmail.com> * fix caller depth arithmatic (is that how you spell it?) Signed-off-by: kim <grufwub@gmail.com> * update to use unkeyed fields in kv.Field{} instances Signed-off-by: kim <grufwub@gmail.com> * update go-kv library Signed-off-by: kim <grufwub@gmail.com> * update libraries list Signed-off-by: kim <grufwub@gmail.com> * fuck you linter get nerfed Signed-off-by: kim <grufwub@gmail.com> Co-authored-by: tobi <31960611+tsmethurst@users.noreply.github.com>
2022-07-19 08:47:55 +00:00
// Start log entry with fields
l := log.WithContext(ctx).
WithFields(kv.Fields{
{"username", username},
{"status", statusIRIStr},
}...)
// Log function start
l.Trace("beginning")
// OUR instance hostname.
localhost := config.GetHost()
// Keep track of already dereferenced collection
// pages for this thread to prevent recursion.
derefdPages := make(map[string]struct{}, 10)
// frame represents a single stack frame when
// iteratively derefencing status descendants.
type frame struct {
// page is the current activity streams
// collection page we are on (as we often
// push a frame to stack mid-paging).
page ap.CollectionPageIterator
// pageURI is the URI string of
// the frame's collection page
// (is useful for logging).
pageURI string
}
var (
// current stack frame
current *frame
// stack is a list of "shelved" descendand iterator
// frames. this is pushed to when a child status frame
// is found that we need to further iterate down, and
// popped from into 'current' when that child's tree
// of further descendants is exhausted.
stack = []*frame{
func() *frame {
// Start input frame is built from the first input.
page, pageURI := getAttachedStatusCollectionPage(parent)
if page == nil {
return nil
}
return &frame{page: page, pageURI: pageURI}
}(),
}
// popStack will remove and return the top frame
// from the stack, or nil if currently empty.
popStack = func() *frame {
if len(stack) == 0 {
return nil
}
// Get frame index
idx := len(stack) - 1
// Pop last frame
frame := stack[idx]
stack = stack[:idx]
return frame
}
)
stackLoop:
for i := 0; i < maxIter; i++ {
// Pop next frame, nil means we are at end
if current = popStack(); current == nil {
return nil
}
pageLoop:
for {
l.Tracef("following collection page: %s", current.pageURI)
itemLoop:
for {
// Get next item from page iter.
next := current.page.NextItem()
if next == nil {
break itemLoop
}
// Check for available IRI.
itemIRI, _ := pub.ToId(next)
if itemIRI == nil {
continue itemLoop
}
if itemIRI.Host == localhost {
// This child is one of ours,
continue itemLoop
}
[feature] status refetch support (#1690) * revamp http client to not limit requests, instead use sender worker Signed-off-by: kim <grufwub@gmail.com> * remove separate sender worker pool, spawn 2*GOMAXPROCS batch senders each time, no need for transport cache sweeping Signed-off-by: kim <grufwub@gmail.com> * improve batch senders to keep popping recipients until remote URL found Signed-off-by: kim <grufwub@gmail.com> * fix recipient looping issue Signed-off-by: kim <grufwub@gmail.com> * move request id ctx key to gtscontext, finish filling out more code comments, add basic support for not logging client IP Signed-off-by: kim <grufwub@gmail.com> * first draft of status refetching logic Signed-off-by: kim <grufwub@gmail.com> * fix testrig to use new federation alloc func signature Signed-off-by: kim <grufwub@gmail.com> * fix log format directive Signed-off-by: kim <grufwub@gmail.com> * add status fetched_at migration Signed-off-by: kim <grufwub@gmail.com> * remove unused / unchecked for error types Signed-off-by: kim <grufwub@gmail.com> * add back the used type... Signed-off-by: kim <grufwub@gmail.com> * add separate internal getStatus() function for derefThread() that doesn't recurse Signed-off-by: kim <grufwub@gmail.com> * improved mention and media attachment error handling Signed-off-by: kim <grufwub@gmail.com> * fix log and error format directives Signed-off-by: kim <grufwub@gmail.com> * update account deref to match status deref changes Signed-off-by: kim <grufwub@gmail.com> * very small code formatting change to make things clearer Signed-off-by: kim <grufwub@gmail.com> * add more code comments Signed-off-by: kim <grufwub@gmail.com> * improved code commenting Signed-off-by: kim <grufwub@gmail.com> * only check for required further derefs if needed Signed-off-by: kim <grufwub@gmail.com> * improved cache invalidation Signed-off-by: kim <grufwub@gmail.com> * tweak cache restarting to use a (very small) backoff Signed-off-by: kim <grufwub@gmail.com> * small readability changes and fixes Signed-off-by: kim <grufwub@gmail.com> * fix account sync issues Signed-off-by: kim <grufwub@gmail.com> * fix merge conflicts + update account enrichment to accept already-passed accountable Signed-off-by: kim <grufwub@gmail.com> * remove secondary function declaration Signed-off-by: kim <grufwub@gmail.com> * normalise dereferencer get status / account behaviour, fix remaining tests Signed-off-by: kim <grufwub@gmail.com> * fix remaining rebase conflicts, finish commenting code Signed-off-by: kim <grufwub@gmail.com> * appease the linter Signed-off-by: kim <grufwub@gmail.com> * add source file header Signed-off-by: kim <grufwub@gmail.com> * update to use TIMESTAMPTZ column type instead of just TIMESTAMP Signed-off-by: kim <grufwub@gmail.com> * don't pass in 'updated_at' to UpdateEmoji() Signed-off-by: kim <grufwub@gmail.com> * use new ap.Resolve{Account,Status}able() functions Signed-off-by: kim <grufwub@gmail.com> * remove the somewhat confusing rescoping of the same variable names Signed-off-by: kim <grufwub@gmail.com> * update migration file name, improved database delete error returns Signed-off-by: kim <grufwub@gmail.com> * formatting Signed-off-by: kim <grufwub@gmail.com> * improved multi-delete database functions to minimise DB calls Signed-off-by: kim <grufwub@gmail.com> * remove unused type Signed-off-by: kim <grufwub@gmail.com> * fix delete statements Signed-off-by: kim <grufwub@gmail.com> --------- Signed-off-by: kim <grufwub@gmail.com>
2023-05-12 09:15:54 +00:00
// Dereference the remote status and store in the database.
// getStatusByURI guards against the following conditions:
// - refetching recently fetched statuses (recursion!)
// - remote domain is blocked (will return unretrievable)
// - any http type error for a new status returns unretrievable
_, statusable, _, err := d.getStatusByURI(ctx, username, itemIRI)
if err != nil {
l.Errorf("error dereferencing remote status %s: %v", itemIRI, err)
[feature] status refetch support (#1690) * revamp http client to not limit requests, instead use sender worker Signed-off-by: kim <grufwub@gmail.com> * remove separate sender worker pool, spawn 2*GOMAXPROCS batch senders each time, no need for transport cache sweeping Signed-off-by: kim <grufwub@gmail.com> * improve batch senders to keep popping recipients until remote URL found Signed-off-by: kim <grufwub@gmail.com> * fix recipient looping issue Signed-off-by: kim <grufwub@gmail.com> * move request id ctx key to gtscontext, finish filling out more code comments, add basic support for not logging client IP Signed-off-by: kim <grufwub@gmail.com> * first draft of status refetching logic Signed-off-by: kim <grufwub@gmail.com> * fix testrig to use new federation alloc func signature Signed-off-by: kim <grufwub@gmail.com> * fix log format directive Signed-off-by: kim <grufwub@gmail.com> * add status fetched_at migration Signed-off-by: kim <grufwub@gmail.com> * remove unused / unchecked for error types Signed-off-by: kim <grufwub@gmail.com> * add back the used type... Signed-off-by: kim <grufwub@gmail.com> * add separate internal getStatus() function for derefThread() that doesn't recurse Signed-off-by: kim <grufwub@gmail.com> * improved mention and media attachment error handling Signed-off-by: kim <grufwub@gmail.com> * fix log and error format directives Signed-off-by: kim <grufwub@gmail.com> * update account deref to match status deref changes Signed-off-by: kim <grufwub@gmail.com> * very small code formatting change to make things clearer Signed-off-by: kim <grufwub@gmail.com> * add more code comments Signed-off-by: kim <grufwub@gmail.com> * improved code commenting Signed-off-by: kim <grufwub@gmail.com> * only check for required further derefs if needed Signed-off-by: kim <grufwub@gmail.com> * improved cache invalidation Signed-off-by: kim <grufwub@gmail.com> * tweak cache restarting to use a (very small) backoff Signed-off-by: kim <grufwub@gmail.com> * small readability changes and fixes Signed-off-by: kim <grufwub@gmail.com> * fix account sync issues Signed-off-by: kim <grufwub@gmail.com> * fix merge conflicts + update account enrichment to accept already-passed accountable Signed-off-by: kim <grufwub@gmail.com> * remove secondary function declaration Signed-off-by: kim <grufwub@gmail.com> * normalise dereferencer get status / account behaviour, fix remaining tests Signed-off-by: kim <grufwub@gmail.com> * fix remaining rebase conflicts, finish commenting code Signed-off-by: kim <grufwub@gmail.com> * appease the linter Signed-off-by: kim <grufwub@gmail.com> * add source file header Signed-off-by: kim <grufwub@gmail.com> * update to use TIMESTAMPTZ column type instead of just TIMESTAMP Signed-off-by: kim <grufwub@gmail.com> * don't pass in 'updated_at' to UpdateEmoji() Signed-off-by: kim <grufwub@gmail.com> * use new ap.Resolve{Account,Status}able() functions Signed-off-by: kim <grufwub@gmail.com> * remove the somewhat confusing rescoping of the same variable names Signed-off-by: kim <grufwub@gmail.com> * update migration file name, improved database delete error returns Signed-off-by: kim <grufwub@gmail.com> * formatting Signed-off-by: kim <grufwub@gmail.com> * improved multi-delete database functions to minimise DB calls Signed-off-by: kim <grufwub@gmail.com> * remove unused type Signed-off-by: kim <grufwub@gmail.com> * fix delete statements Signed-off-by: kim <grufwub@gmail.com> --------- Signed-off-by: kim <grufwub@gmail.com>
2023-05-12 09:15:54 +00:00
continue itemLoop
}
if statusable == nil {
// A nil statusable return from
// getStatusByURI() indicates a
// remote status that was already
// dereferenced recently (so no
// need to go through descendents).
continue itemLoop
}
// Extract any attached collection + ID URI from status.
page, pageURI := getAttachedStatusCollectionPage(statusable)
if page == nil {
continue itemLoop
}
// Put current and next frame at top of stack
stack = append(stack, current, &frame{
pageURI: pageURI,
page: page,
})
// Now start at top of loop
continue stackLoop
}
// Get the next page from iterator.
next := current.page.NextPage()
if next == nil || !next.IsIRI() {
continue stackLoop
}
// Get the next page IRI.
nextURI := next.GetIRI()
nextURIStr := nextURI.String()
// Check whether this page has already been deref'd.
if _, ok := derefdPages[nextURIStr]; ok {
l.Warnf("self referencing collection page(s): %s", nextURIStr)
continue stackLoop
}
// Mark this collection page as deref'd.
derefdPages[nextURIStr] = struct{}{}
// Dereference this next collection page by its IRI.
[feature] status refetch support (#1690) * revamp http client to not limit requests, instead use sender worker Signed-off-by: kim <grufwub@gmail.com> * remove separate sender worker pool, spawn 2*GOMAXPROCS batch senders each time, no need for transport cache sweeping Signed-off-by: kim <grufwub@gmail.com> * improve batch senders to keep popping recipients until remote URL found Signed-off-by: kim <grufwub@gmail.com> * fix recipient looping issue Signed-off-by: kim <grufwub@gmail.com> * move request id ctx key to gtscontext, finish filling out more code comments, add basic support for not logging client IP Signed-off-by: kim <grufwub@gmail.com> * first draft of status refetching logic Signed-off-by: kim <grufwub@gmail.com> * fix testrig to use new federation alloc func signature Signed-off-by: kim <grufwub@gmail.com> * fix log format directive Signed-off-by: kim <grufwub@gmail.com> * add status fetched_at migration Signed-off-by: kim <grufwub@gmail.com> * remove unused / unchecked for error types Signed-off-by: kim <grufwub@gmail.com> * add back the used type... Signed-off-by: kim <grufwub@gmail.com> * add separate internal getStatus() function for derefThread() that doesn't recurse Signed-off-by: kim <grufwub@gmail.com> * improved mention and media attachment error handling Signed-off-by: kim <grufwub@gmail.com> * fix log and error format directives Signed-off-by: kim <grufwub@gmail.com> * update account deref to match status deref changes Signed-off-by: kim <grufwub@gmail.com> * very small code formatting change to make things clearer Signed-off-by: kim <grufwub@gmail.com> * add more code comments Signed-off-by: kim <grufwub@gmail.com> * improved code commenting Signed-off-by: kim <grufwub@gmail.com> * only check for required further derefs if needed Signed-off-by: kim <grufwub@gmail.com> * improved cache invalidation Signed-off-by: kim <grufwub@gmail.com> * tweak cache restarting to use a (very small) backoff Signed-off-by: kim <grufwub@gmail.com> * small readability changes and fixes Signed-off-by: kim <grufwub@gmail.com> * fix account sync issues Signed-off-by: kim <grufwub@gmail.com> * fix merge conflicts + update account enrichment to accept already-passed accountable Signed-off-by: kim <grufwub@gmail.com> * remove secondary function declaration Signed-off-by: kim <grufwub@gmail.com> * normalise dereferencer get status / account behaviour, fix remaining tests Signed-off-by: kim <grufwub@gmail.com> * fix remaining rebase conflicts, finish commenting code Signed-off-by: kim <grufwub@gmail.com> * appease the linter Signed-off-by: kim <grufwub@gmail.com> * add source file header Signed-off-by: kim <grufwub@gmail.com> * update to use TIMESTAMPTZ column type instead of just TIMESTAMP Signed-off-by: kim <grufwub@gmail.com> * don't pass in 'updated_at' to UpdateEmoji() Signed-off-by: kim <grufwub@gmail.com> * use new ap.Resolve{Account,Status}able() functions Signed-off-by: kim <grufwub@gmail.com> * remove the somewhat confusing rescoping of the same variable names Signed-off-by: kim <grufwub@gmail.com> * update migration file name, improved database delete error returns Signed-off-by: kim <grufwub@gmail.com> * formatting Signed-off-by: kim <grufwub@gmail.com> * improved multi-delete database functions to minimise DB calls Signed-off-by: kim <grufwub@gmail.com> * remove unused type Signed-off-by: kim <grufwub@gmail.com> * fix delete statements Signed-off-by: kim <grufwub@gmail.com> --------- Signed-off-by: kim <grufwub@gmail.com>
2023-05-12 09:15:54 +00:00
collectionPage, err := d.dereferenceCollectionPage(ctx,
username,
nextURI,
[feature] status refetch support (#1690) * revamp http client to not limit requests, instead use sender worker Signed-off-by: kim <grufwub@gmail.com> * remove separate sender worker pool, spawn 2*GOMAXPROCS batch senders each time, no need for transport cache sweeping Signed-off-by: kim <grufwub@gmail.com> * improve batch senders to keep popping recipients until remote URL found Signed-off-by: kim <grufwub@gmail.com> * fix recipient looping issue Signed-off-by: kim <grufwub@gmail.com> * move request id ctx key to gtscontext, finish filling out more code comments, add basic support for not logging client IP Signed-off-by: kim <grufwub@gmail.com> * first draft of status refetching logic Signed-off-by: kim <grufwub@gmail.com> * fix testrig to use new federation alloc func signature Signed-off-by: kim <grufwub@gmail.com> * fix log format directive Signed-off-by: kim <grufwub@gmail.com> * add status fetched_at migration Signed-off-by: kim <grufwub@gmail.com> * remove unused / unchecked for error types Signed-off-by: kim <grufwub@gmail.com> * add back the used type... Signed-off-by: kim <grufwub@gmail.com> * add separate internal getStatus() function for derefThread() that doesn't recurse Signed-off-by: kim <grufwub@gmail.com> * improved mention and media attachment error handling Signed-off-by: kim <grufwub@gmail.com> * fix log and error format directives Signed-off-by: kim <grufwub@gmail.com> * update account deref to match status deref changes Signed-off-by: kim <grufwub@gmail.com> * very small code formatting change to make things clearer Signed-off-by: kim <grufwub@gmail.com> * add more code comments Signed-off-by: kim <grufwub@gmail.com> * improved code commenting Signed-off-by: kim <grufwub@gmail.com> * only check for required further derefs if needed Signed-off-by: kim <grufwub@gmail.com> * improved cache invalidation Signed-off-by: kim <grufwub@gmail.com> * tweak cache restarting to use a (very small) backoff Signed-off-by: kim <grufwub@gmail.com> * small readability changes and fixes Signed-off-by: kim <grufwub@gmail.com> * fix account sync issues Signed-off-by: kim <grufwub@gmail.com> * fix merge conflicts + update account enrichment to accept already-passed accountable Signed-off-by: kim <grufwub@gmail.com> * remove secondary function declaration Signed-off-by: kim <grufwub@gmail.com> * normalise dereferencer get status / account behaviour, fix remaining tests Signed-off-by: kim <grufwub@gmail.com> * fix remaining rebase conflicts, finish commenting code Signed-off-by: kim <grufwub@gmail.com> * appease the linter Signed-off-by: kim <grufwub@gmail.com> * add source file header Signed-off-by: kim <grufwub@gmail.com> * update to use TIMESTAMPTZ column type instead of just TIMESTAMP Signed-off-by: kim <grufwub@gmail.com> * don't pass in 'updated_at' to UpdateEmoji() Signed-off-by: kim <grufwub@gmail.com> * use new ap.Resolve{Account,Status}able() functions Signed-off-by: kim <grufwub@gmail.com> * remove the somewhat confusing rescoping of the same variable names Signed-off-by: kim <grufwub@gmail.com> * update migration file name, improved database delete error returns Signed-off-by: kim <grufwub@gmail.com> * formatting Signed-off-by: kim <grufwub@gmail.com> * improved multi-delete database functions to minimise DB calls Signed-off-by: kim <grufwub@gmail.com> * remove unused type Signed-off-by: kim <grufwub@gmail.com> * fix delete statements Signed-off-by: kim <grufwub@gmail.com> --------- Signed-off-by: kim <grufwub@gmail.com>
2023-05-12 09:15:54 +00:00
)
if err != nil {
l.Errorf("error dereferencing collection page %q: %s", nextURIStr, err)
continue stackLoop
}
// Set the next collection page.
current.page = collectionPage
current.pageURI = nextURIStr
continue pageLoop
}
}
return gtserror.Newf("reached %d descendant iterations for %q", maxIter, statusIRIStr)
}
// updateStatusParent updates the given status' parent
// status URI, ID and account ID to given values in DB.
func (d *Dereferencer) updateStatusParent(
ctx context.Context,
status *gtsmodel.Status,
parentStatusID string,
parentStatusURI string,
parentAccountID string,
) error {
status.InReplyToAccountID = parentAccountID
status.InReplyToURI = parentStatusURI
status.InReplyToID = parentStatusID
if err := d.state.DB.UpdateStatus(ctx,
status,
"in_reply_to_id",
"in_reply_to_uri",
"in_reply_to_account_id",
); err != nil {
return gtserror.Newf("error updating status %s: %w", status.URI, err)
}
return nil
}