Improve context descendant sorting (#2579)

* Improve context descendant sorting

Topologically sort replies, then move self-replies to top of list

* Unify descendant sort passes

* Correct test package name

* Preallocate maps
This commit is contained in:
Vyr Cossont 2024-01-31 05:40:41 -08:00 committed by GitHub
parent c9452f32f3
commit c675d47a8c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 302 additions and 10 deletions

View file

@ -19,7 +19,8 @@
import ( import (
"context" "context"
"sort" "slices"
"strings"
apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model" apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model"
"github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/internal/gtserror"
@ -74,27 +75,23 @@ func (p *Processor) contextGet(
return nil, errWithCode return nil, errWithCode
} }
context := &apimodel.Context{
Ancestors: []apimodel.Status{},
Descendants: []apimodel.Status{},
}
parents, err := p.state.DB.GetStatusParents(ctx, targetStatus) parents, err := p.state.DB.GetStatusParents(ctx, targetStatus)
if err != nil { if err != nil {
return nil, gtserror.NewErrorInternalError(err) return nil, gtserror.NewErrorInternalError(err)
} }
var ancestors []*apimodel.Status
for _, status := range parents { for _, status := range parents {
if v, err := p.filter.StatusVisible(ctx, requestingAccount, status); err == nil && v { if v, err := p.filter.StatusVisible(ctx, requestingAccount, status); err == nil && v {
apiStatus, err := convert(ctx, status, requestingAccount) apiStatus, err := convert(ctx, status, requestingAccount)
if err == nil { if err == nil {
context.Ancestors = append(context.Ancestors, *apiStatus) ancestors = append(ancestors, apiStatus)
} }
} }
} }
sort.Slice(context.Ancestors, func(i int, j int) bool { slices.SortFunc(ancestors, func(lhs, rhs *apimodel.Status) int {
return context.Ancestors[i].ID < context.Ancestors[j].ID return strings.Compare(lhs.ID, rhs.ID)
}) })
children, err := p.state.DB.GetStatusChildren(ctx, targetStatus.ID) children, err := p.state.DB.GetStatusChildren(ctx, targetStatus.ID)
@ -102,18 +99,116 @@ func (p *Processor) contextGet(
return nil, gtserror.NewErrorInternalError(err) return nil, gtserror.NewErrorInternalError(err)
} }
var descendants []*apimodel.Status
for _, status := range children { for _, status := range children {
if v, err := p.filter.StatusVisible(ctx, requestingAccount, status); err == nil && v { if v, err := p.filter.StatusVisible(ctx, requestingAccount, status); err == nil && v {
apiStatus, err := convert(ctx, status, requestingAccount) apiStatus, err := convert(ctx, status, requestingAccount)
if err == nil { if err == nil {
context.Descendants = append(context.Descendants, *apiStatus) descendants = append(descendants, apiStatus)
} }
} }
} }
TopoSort(descendants, targetStatus.AccountID)
//goland:noinspection GoImportUsedAsName
context := &apimodel.Context{
Ancestors: make([]apimodel.Status, 0, len(ancestors)),
Descendants: make([]apimodel.Status, 0, len(descendants)),
}
for _, ancestor := range ancestors {
context.Ancestors = append(context.Ancestors, *ancestor)
}
for _, descendant := range descendants {
context.Descendants = append(context.Descendants, *descendant)
}
return context, nil return context, nil
} }
// TopoSort sorts statuses topologically, by self-reply, and by ID.
// Can handle cycles but the output order will be arbitrary.
// (But if there are cycles, something went wrong upstream.)
func TopoSort(apiStatuses []*apimodel.Status, targetAccountID string) {
if len(apiStatuses) == 0 {
return
}
// Map of status IDs to statuses.
lookup := make(map[string]*apimodel.Status, len(apiStatuses))
for _, apiStatus := range apiStatuses {
lookup[apiStatus.ID] = apiStatus
}
// Tree of statuses to their children.
// The nil status may have children: any who don't have a parent, or whose parent isn't in the input.
tree := make(map[*apimodel.Status][]*apimodel.Status, len(apiStatuses))
for _, apiStatus := range apiStatuses {
var parent *apimodel.Status
if apiStatus.InReplyToID != nil {
parent = lookup[*apiStatus.InReplyToID]
}
tree[parent] = append(tree[parent], apiStatus)
}
// Sort children of each status by self-reply status and then ID, *in reverse*.
isSelfReply := func(apiStatus *apimodel.Status) bool {
return apiStatus.GetAccountID() == targetAccountID &&
apiStatus.InReplyToAccountID != nil &&
*apiStatus.InReplyToAccountID == targetAccountID
}
for id, children := range tree {
slices.SortFunc(children, func(lhs, rhs *apimodel.Status) int {
lhsIsContextSelfReply := isSelfReply(lhs)
rhsIsContextSelfReply := isSelfReply(rhs)
if lhsIsContextSelfReply && !rhsIsContextSelfReply {
return 1
} else if !lhsIsContextSelfReply && rhsIsContextSelfReply {
return -1
}
return -strings.Compare(lhs.ID, rhs.ID)
})
tree[id] = children
}
// Traverse the tree using preorder depth-first search, topologically sorting the statuses.
stack := make([]*apimodel.Status, 1, len(tree))
apiStatusIndex := 0
for len(stack) > 0 {
parent := stack[len(stack)-1]
children := tree[parent]
if len(children) == 0 {
// Remove this node from the tree.
delete(tree, parent)
// Go back to this node's parent.
stack = stack[:len(stack)-1]
continue
}
// Remove the last child entry (the first in sorted order).
child := children[len(children)-1]
tree[parent] = children[:len(children)-1]
// Explore its children next.
stack = append(stack, child)
// Overwrite the next entry of the input slice.
apiStatuses[apiStatusIndex] = child
apiStatusIndex++
}
// There should only be nodes left in the tree in the event of a cycle.
// Append them to the end in arbitrary order.
// This ensures that the slice of statuses has no duplicates.
for node := range tree {
apiStatuses[apiStatusIndex] = node
apiStatusIndex++
}
}
// ContextGet returns the context (previous and following posts) from the given status ID. // ContextGet returns the context (previous and following posts) from the given status ID.
func (p *Processor) ContextGet(ctx context.Context, requestingAccount *gtsmodel.Account, targetStatusID string) (*apimodel.Context, gtserror.WithCode) { func (p *Processor) ContextGet(ctx context.Context, requestingAccount *gtsmodel.Account, targetStatusID string) (*apimodel.Context, gtserror.WithCode) {
return p.contextGet(ctx, requestingAccount, targetStatusID, p.converter.StatusToAPIStatus) return p.contextGet(ctx, requestingAccount, targetStatusID, p.converter.StatusToAPIStatus)

View file

@ -0,0 +1,197 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package status_test
import (
"github.com/stretchr/testify/suite"
apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model"
"github.com/superseriousbusiness/gotosocial/internal/processing/status"
"testing"
)
type topoSortTestSuite struct {
suite.Suite
}
func statusIDs(apiStatuses []*apimodel.Status) []string {
ids := make([]string, 0, len(apiStatuses))
for _, apiStatus := range apiStatuses {
ids = append(ids, apiStatus.ID)
}
return ids
}
func (suite *topoSortTestSuite) TestBranched() {
// https://commons.wikimedia.org/wiki/File:Sorted_binary_tree_ALL_RGB.svg
f := &apimodel.Status{ID: "F"}
b := &apimodel.Status{ID: "B", InReplyToID: &f.ID}
a := &apimodel.Status{ID: "A", InReplyToID: &b.ID}
d := &apimodel.Status{ID: "D", InReplyToID: &b.ID}
c := &apimodel.Status{ID: "C", InReplyToID: &d.ID}
e := &apimodel.Status{ID: "E", InReplyToID: &d.ID}
g := &apimodel.Status{ID: "G", InReplyToID: &f.ID}
i := &apimodel.Status{ID: "I", InReplyToID: &g.ID}
h := &apimodel.Status{ID: "H", InReplyToID: &i.ID}
expected := statusIDs([]*apimodel.Status{f, b, a, d, c, e, g, i, h})
list := []*apimodel.Status{a, b, c, d, e, f, g, h, i}
status.TopoSort(list, "")
actual := statusIDs(list)
suite.Equal(expected, actual)
}
func (suite *topoSortTestSuite) TestBranchedWithSelfReplyChain() {
targetAccount := &apimodel.Account{ID: "1"}
otherAccount := &apimodel.Account{ID: "2"}
f := &apimodel.Status{
ID: "F",
Account: targetAccount,
}
b := &apimodel.Status{
ID: "B",
Account: targetAccount,
InReplyToID: &f.ID,
InReplyToAccountID: &f.Account.ID,
}
a := &apimodel.Status{
ID: "A",
Account: otherAccount,
InReplyToID: &b.ID,
InReplyToAccountID: &b.Account.ID,
}
d := &apimodel.Status{
ID: "D",
Account: targetAccount,
InReplyToID: &b.ID,
InReplyToAccountID: &b.Account.ID,
}
c := &apimodel.Status{
ID: "C",
Account: otherAccount,
InReplyToID: &d.ID,
InReplyToAccountID: &d.Account.ID,
}
e := &apimodel.Status{
ID: "E",
Account: targetAccount,
InReplyToID: &d.ID,
InReplyToAccountID: &d.Account.ID,
}
g := &apimodel.Status{
ID: "G",
Account: otherAccount,
InReplyToID: &f.ID,
InReplyToAccountID: &f.Account.ID,
}
i := &apimodel.Status{
ID: "I",
Account: targetAccount,
InReplyToID: &g.ID,
InReplyToAccountID: &g.Account.ID,
}
h := &apimodel.Status{
ID: "H",
Account: otherAccount,
InReplyToID: &i.ID,
InReplyToAccountID: &i.Account.ID,
}
expected := statusIDs([]*apimodel.Status{f, b, d, e, c, a, g, i, h})
list := []*apimodel.Status{a, b, c, d, e, f, g, h, i}
status.TopoSort(list, targetAccount.ID)
actual := statusIDs(list)
suite.Equal(expected, actual)
}
func (suite *topoSortTestSuite) TestDisconnected() {
f := &apimodel.Status{ID: "F"}
b := &apimodel.Status{ID: "B", InReplyToID: &f.ID}
dID := "D"
e := &apimodel.Status{ID: "E", InReplyToID: &dID}
expected := statusIDs([]*apimodel.Status{e, f, b})
list := []*apimodel.Status{b, e, f}
status.TopoSort(list, "")
actual := statusIDs(list)
suite.Equal(expected, actual)
}
func (suite *topoSortTestSuite) TestTrivialCycle() {
xID := "X"
x := &apimodel.Status{ID: xID, InReplyToID: &xID}
expected := statusIDs([]*apimodel.Status{x})
list := []*apimodel.Status{x}
status.TopoSort(list, "")
actual := statusIDs(list)
suite.ElementsMatch(expected, actual)
}
func (suite *topoSortTestSuite) TestCycle() {
yID := "Y"
x := &apimodel.Status{ID: "X", InReplyToID: &yID}
y := &apimodel.Status{ID: yID, InReplyToID: &x.ID}
expected := statusIDs([]*apimodel.Status{x, y})
list := []*apimodel.Status{x, y}
status.TopoSort(list, "")
actual := statusIDs(list)
suite.ElementsMatch(expected, actual)
}
func (suite *topoSortTestSuite) TestMixedCycle() {
yID := "Y"
x := &apimodel.Status{ID: "X", InReplyToID: &yID}
y := &apimodel.Status{ID: yID, InReplyToID: &x.ID}
z := &apimodel.Status{ID: "Z"}
expected := statusIDs([]*apimodel.Status{x, y, z})
list := []*apimodel.Status{x, y, z}
status.TopoSort(list, "")
actual := statusIDs(list)
suite.ElementsMatch(expected, actual)
}
func (suite *topoSortTestSuite) TestEmpty() {
expected := statusIDs([]*apimodel.Status{})
list := []*apimodel.Status{}
status.TopoSort(list, "")
actual := statusIDs(list)
suite.Equal(expected, actual)
}
func (suite *topoSortTestSuite) TestNil() {
expected := statusIDs(nil)
var list []*apimodel.Status
status.TopoSort(list, "")
actual := statusIDs(list)
suite.Equal(expected, actual)
}
func TestTopoSortTestSuite(t *testing.T) {
suite.Run(t, &topoSortTestSuite{})
}