From c675d47a8c8bacd926090e010816fe8024da1b79 Mon Sep 17 00:00:00 2001 From: Vyr Cossont Date: Wed, 31 Jan 2024 05:40:41 -0800 Subject: [PATCH] Improve context descendant sorting (#2579) * Improve context descendant sorting Topologically sort replies, then move self-replies to top of list * Unify descendant sort passes * Correct test package name * Preallocate maps --- internal/processing/status/get.go | 115 +++++++++++++-- internal/processing/status/get_test.go | 197 +++++++++++++++++++++++++ 2 files changed, 302 insertions(+), 10 deletions(-) create mode 100644 internal/processing/status/get_test.go diff --git a/internal/processing/status/get.go b/internal/processing/status/get.go index f8c037404..7c275acbd 100644 --- a/internal/processing/status/get.go +++ b/internal/processing/status/get.go @@ -19,7 +19,8 @@ import ( "context" - "sort" + "slices" + "strings" apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model" "github.com/superseriousbusiness/gotosocial/internal/gtserror" @@ -74,27 +75,23 @@ func (p *Processor) contextGet( return nil, errWithCode } - context := &apimodel.Context{ - Ancestors: []apimodel.Status{}, - Descendants: []apimodel.Status{}, - } - parents, err := p.state.DB.GetStatusParents(ctx, targetStatus) if err != nil { return nil, gtserror.NewErrorInternalError(err) } + var ancestors []*apimodel.Status for _, status := range parents { if v, err := p.filter.StatusVisible(ctx, requestingAccount, status); err == nil && v { apiStatus, err := convert(ctx, status, requestingAccount) if err == nil { - context.Ancestors = append(context.Ancestors, *apiStatus) + ancestors = append(ancestors, apiStatus) } } } - sort.Slice(context.Ancestors, func(i int, j int) bool { - return context.Ancestors[i].ID < context.Ancestors[j].ID + slices.SortFunc(ancestors, func(lhs, rhs *apimodel.Status) int { + return strings.Compare(lhs.ID, rhs.ID) }) children, err := p.state.DB.GetStatusChildren(ctx, targetStatus.ID) @@ -102,18 +99,116 @@ func (p *Processor) contextGet( return nil, gtserror.NewErrorInternalError(err) } + var descendants []*apimodel.Status for _, status := range children { if v, err := p.filter.StatusVisible(ctx, requestingAccount, status); err == nil && v { apiStatus, err := convert(ctx, status, requestingAccount) if err == nil { - context.Descendants = append(context.Descendants, *apiStatus) + descendants = append(descendants, apiStatus) } } } + TopoSort(descendants, targetStatus.AccountID) + + //goland:noinspection GoImportUsedAsName + context := &apimodel.Context{ + Ancestors: make([]apimodel.Status, 0, len(ancestors)), + Descendants: make([]apimodel.Status, 0, len(descendants)), + } + for _, ancestor := range ancestors { + context.Ancestors = append(context.Ancestors, *ancestor) + } + for _, descendant := range descendants { + context.Descendants = append(context.Descendants, *descendant) + } + return context, nil } +// TopoSort sorts statuses topologically, by self-reply, and by ID. +// Can handle cycles but the output order will be arbitrary. +// (But if there are cycles, something went wrong upstream.) +func TopoSort(apiStatuses []*apimodel.Status, targetAccountID string) { + if len(apiStatuses) == 0 { + return + } + + // Map of status IDs to statuses. + lookup := make(map[string]*apimodel.Status, len(apiStatuses)) + for _, apiStatus := range apiStatuses { + lookup[apiStatus.ID] = apiStatus + } + + // Tree of statuses to their children. + // The nil status may have children: any who don't have a parent, or whose parent isn't in the input. + tree := make(map[*apimodel.Status][]*apimodel.Status, len(apiStatuses)) + for _, apiStatus := range apiStatuses { + var parent *apimodel.Status + if apiStatus.InReplyToID != nil { + parent = lookup[*apiStatus.InReplyToID] + } + tree[parent] = append(tree[parent], apiStatus) + } + + // Sort children of each status by self-reply status and then ID, *in reverse*. + isSelfReply := func(apiStatus *apimodel.Status) bool { + return apiStatus.GetAccountID() == targetAccountID && + apiStatus.InReplyToAccountID != nil && + *apiStatus.InReplyToAccountID == targetAccountID + } + for id, children := range tree { + slices.SortFunc(children, func(lhs, rhs *apimodel.Status) int { + lhsIsContextSelfReply := isSelfReply(lhs) + rhsIsContextSelfReply := isSelfReply(rhs) + + if lhsIsContextSelfReply && !rhsIsContextSelfReply { + return 1 + } else if !lhsIsContextSelfReply && rhsIsContextSelfReply { + return -1 + } + + return -strings.Compare(lhs.ID, rhs.ID) + }) + tree[id] = children + } + + // Traverse the tree using preorder depth-first search, topologically sorting the statuses. + stack := make([]*apimodel.Status, 1, len(tree)) + apiStatusIndex := 0 + for len(stack) > 0 { + parent := stack[len(stack)-1] + children := tree[parent] + + if len(children) == 0 { + // Remove this node from the tree. + delete(tree, parent) + // Go back to this node's parent. + stack = stack[:len(stack)-1] + continue + } + + // Remove the last child entry (the first in sorted order). + child := children[len(children)-1] + tree[parent] = children[:len(children)-1] + + // Explore its children next. + stack = append(stack, child) + + // Overwrite the next entry of the input slice. + apiStatuses[apiStatusIndex] = child + apiStatusIndex++ + } + + // There should only be nodes left in the tree in the event of a cycle. + // Append them to the end in arbitrary order. + // This ensures that the slice of statuses has no duplicates. + for node := range tree { + apiStatuses[apiStatusIndex] = node + apiStatusIndex++ + } +} + // ContextGet returns the context (previous and following posts) from the given status ID. func (p *Processor) ContextGet(ctx context.Context, requestingAccount *gtsmodel.Account, targetStatusID string) (*apimodel.Context, gtserror.WithCode) { return p.contextGet(ctx, requestingAccount, targetStatusID, p.converter.StatusToAPIStatus) diff --git a/internal/processing/status/get_test.go b/internal/processing/status/get_test.go new file mode 100644 index 000000000..80482f1f2 --- /dev/null +++ b/internal/processing/status/get_test.go @@ -0,0 +1,197 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package status_test + +import ( + "github.com/stretchr/testify/suite" + apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model" + "github.com/superseriousbusiness/gotosocial/internal/processing/status" + "testing" +) + +type topoSortTestSuite struct { + suite.Suite +} + +func statusIDs(apiStatuses []*apimodel.Status) []string { + ids := make([]string, 0, len(apiStatuses)) + for _, apiStatus := range apiStatuses { + ids = append(ids, apiStatus.ID) + } + return ids +} + +func (suite *topoSortTestSuite) TestBranched() { + // https://commons.wikimedia.org/wiki/File:Sorted_binary_tree_ALL_RGB.svg + f := &apimodel.Status{ID: "F"} + b := &apimodel.Status{ID: "B", InReplyToID: &f.ID} + a := &apimodel.Status{ID: "A", InReplyToID: &b.ID} + d := &apimodel.Status{ID: "D", InReplyToID: &b.ID} + c := &apimodel.Status{ID: "C", InReplyToID: &d.ID} + e := &apimodel.Status{ID: "E", InReplyToID: &d.ID} + g := &apimodel.Status{ID: "G", InReplyToID: &f.ID} + i := &apimodel.Status{ID: "I", InReplyToID: &g.ID} + h := &apimodel.Status{ID: "H", InReplyToID: &i.ID} + + expected := statusIDs([]*apimodel.Status{f, b, a, d, c, e, g, i, h}) + list := []*apimodel.Status{a, b, c, d, e, f, g, h, i} + status.TopoSort(list, "") + actual := statusIDs(list) + + suite.Equal(expected, actual) +} + +func (suite *topoSortTestSuite) TestBranchedWithSelfReplyChain() { + targetAccount := &apimodel.Account{ID: "1"} + otherAccount := &apimodel.Account{ID: "2"} + + f := &apimodel.Status{ + ID: "F", + Account: targetAccount, + } + b := &apimodel.Status{ + ID: "B", + Account: targetAccount, + InReplyToID: &f.ID, + InReplyToAccountID: &f.Account.ID, + } + a := &apimodel.Status{ + ID: "A", + Account: otherAccount, + InReplyToID: &b.ID, + InReplyToAccountID: &b.Account.ID, + } + d := &apimodel.Status{ + ID: "D", + Account: targetAccount, + InReplyToID: &b.ID, + InReplyToAccountID: &b.Account.ID, + } + c := &apimodel.Status{ + ID: "C", + Account: otherAccount, + InReplyToID: &d.ID, + InReplyToAccountID: &d.Account.ID, + } + e := &apimodel.Status{ + ID: "E", + Account: targetAccount, + InReplyToID: &d.ID, + InReplyToAccountID: &d.Account.ID, + } + g := &apimodel.Status{ + ID: "G", + Account: otherAccount, + InReplyToID: &f.ID, + InReplyToAccountID: &f.Account.ID, + } + i := &apimodel.Status{ + ID: "I", + Account: targetAccount, + InReplyToID: &g.ID, + InReplyToAccountID: &g.Account.ID, + } + h := &apimodel.Status{ + ID: "H", + Account: otherAccount, + InReplyToID: &i.ID, + InReplyToAccountID: &i.Account.ID, + } + + expected := statusIDs([]*apimodel.Status{f, b, d, e, c, a, g, i, h}) + list := []*apimodel.Status{a, b, c, d, e, f, g, h, i} + status.TopoSort(list, targetAccount.ID) + actual := statusIDs(list) + + suite.Equal(expected, actual) +} + +func (suite *topoSortTestSuite) TestDisconnected() { + f := &apimodel.Status{ID: "F"} + b := &apimodel.Status{ID: "B", InReplyToID: &f.ID} + dID := "D" + e := &apimodel.Status{ID: "E", InReplyToID: &dID} + + expected := statusIDs([]*apimodel.Status{e, f, b}) + list := []*apimodel.Status{b, e, f} + status.TopoSort(list, "") + actual := statusIDs(list) + + suite.Equal(expected, actual) +} + +func (suite *topoSortTestSuite) TestTrivialCycle() { + xID := "X" + x := &apimodel.Status{ID: xID, InReplyToID: &xID} + + expected := statusIDs([]*apimodel.Status{x}) + list := []*apimodel.Status{x} + status.TopoSort(list, "") + actual := statusIDs(list) + + suite.ElementsMatch(expected, actual) +} + +func (suite *topoSortTestSuite) TestCycle() { + yID := "Y" + x := &apimodel.Status{ID: "X", InReplyToID: &yID} + y := &apimodel.Status{ID: yID, InReplyToID: &x.ID} + + expected := statusIDs([]*apimodel.Status{x, y}) + list := []*apimodel.Status{x, y} + status.TopoSort(list, "") + actual := statusIDs(list) + + suite.ElementsMatch(expected, actual) +} + +func (suite *topoSortTestSuite) TestMixedCycle() { + yID := "Y" + x := &apimodel.Status{ID: "X", InReplyToID: &yID} + y := &apimodel.Status{ID: yID, InReplyToID: &x.ID} + z := &apimodel.Status{ID: "Z"} + + expected := statusIDs([]*apimodel.Status{x, y, z}) + list := []*apimodel.Status{x, y, z} + status.TopoSort(list, "") + actual := statusIDs(list) + + suite.ElementsMatch(expected, actual) +} + +func (suite *topoSortTestSuite) TestEmpty() { + expected := statusIDs([]*apimodel.Status{}) + list := []*apimodel.Status{} + status.TopoSort(list, "") + actual := statusIDs(list) + + suite.Equal(expected, actual) +} + +func (suite *topoSortTestSuite) TestNil() { + expected := statusIDs(nil) + var list []*apimodel.Status + status.TopoSort(list, "") + actual := statusIDs(list) + + suite.Equal(expected, actual) +} + +func TestTopoSortTestSuite(t *testing.T) { + suite.Run(t, &topoSortTestSuite{}) +}