[bugfix] Don't return Account or Status if new and dereferencing failed, other small fixes (#2563)

* tidy up account, status, webfingering logic a wee bit

* go fmt

* invert published check

* alter resp initialization

* get Published from account in typeutils

* don't instantiate error for no darn good reason

* shadow err

* don't repeat error codes in wrapped errors

* don't wrap error unnecessarily
This commit is contained in:
tobi 2024-01-26 14:17:10 +01:00 committed by tobi
parent ad6f75694e
commit a3aa6042d7
12 changed files with 461 additions and 210 deletions

View file

@ -163,6 +163,7 @@ type Accountable interface {
WithManuallyApprovesFollowers
WithEndpoints
WithTag
WithPublished
}
// Statusable represents the minimum activitypub interface for representing a 'status'.

View file

@ -52,7 +52,7 @@ func accountUpToDate(account *gtsmodel.Account, force bool) bool {
return true
}
if !account.CreatedAt.IsZero() && account.IsInstance() {
if account.IsInstance() && !account.IsNew() {
// Existing instance account. No need for update.
return true
}
@ -232,8 +232,8 @@ func (d *Dereferencer) getAccountByUsernameDomain(
// Create and pass-through a new bare-bones model for dereferencing.
account, accountable, err := d.enrichAccountSafely(ctx, requestUser, nil, &gtsmodel.Account{
ID: id.NewULID(),
Username: username,
Domain: domain,
Username: username,
}, nil)
if err != nil {
return nil, nil, err
@ -372,17 +372,18 @@ func (d *Dereferencer) enrichAccountSafely(
// By default use account.URI
// as the per-URI deref lock.
uriStr := account.URI
if uriStr == "" {
var lockKey string
if account.URI != "" {
lockKey = account.URI
} else {
// No URI is set yet, instead generate a faux-one from user+domain.
uriStr = "https://" + account.Domain + "/user/" + account.Username
lockKey = "https://" + account.Domain + "/users/" + account.Username
}
// Acquire per-URI deref lock, wraping unlock
// to safely defer in case of panic, while still
// performing more granular unlocks when needed.
unlock := d.state.FedLocks.Lock(uriStr)
unlock := d.state.FedLocks.Lock(lockKey)
unlock = doOnce(unlock)
defer unlock()
@ -394,10 +395,30 @@ func (d *Dereferencer) enrichAccountSafely(
accountable,
)
if gtserror.StatusCode(err) >= 400 {
// Update fetch-at to slow re-attempts.
if code := gtserror.StatusCode(err); code >= 400 {
// No matter what, log the error
// so instance admins have an idea
// why something isn't working.
log.Info(ctx, err)
if account.IsNew() {
// This was a new account enrich
// attempt which failed before we
// got to store it, so we can't
// return anything useful.
return nil, nil, err
}
// We had this account stored already
// before this enrichment attempt.
//
// Update fetched_at to slow re-attempts
// but don't return early. We can still
// return the model we had stored already.
account.FetchedAt = time.Now()
_ = d.state.DB.UpdateAccount(ctx, account, "fetched_at")
if err := d.state.DB.UpdateAccount(ctx, account, "fetched_at"); err != nil {
log.Errorf(ctx, "error updating account fetched_at: %v", err)
}
}
// Unlock now
@ -414,7 +435,7 @@ func (d *Dereferencer) enrichAccountSafely(
// in a call to db.Put(Account). Look again in DB by URI.
latest, err = d.state.DB.GetAccountByURI(ctx, account.URI)
if err != nil {
err = gtserror.Newf("error getting account %s from database after race: %w", uriStr, err)
err = gtserror.Newf("error getting account %s from database after race: %w", lockKey, err)
}
}
@ -440,32 +461,44 @@ func (d *Dereferencer) enrichAccount(
if account.Username != "" {
// A username was provided so we can attempt a webfinger, this ensures up-to-date accountdomain info.
accDomain, accURI, err := d.fingerRemoteAccount(ctx, tsport, account.Username, account.Domain)
if err != nil {
if account.URI == "" {
// this is a new account (to us) with username@domain
switch {
case err != nil && account.URI == "":
// This is a new account (to us) with username@domain
// but failed webfinger, nothing more we can do.
err := gtserror.Newf("error webfingering account: %w", err)
return nil, nil, gtserror.SetUnretrievable(err)
}
// Simply log this error and move on, we already have an account URI.
log.Errorf(ctx, "error webfingering[1] remote account %s@%s: %v", account.Username, account.Domain, err)
}
case err != nil:
// Simply log this error and move on,
// we already have an account URI.
log.Errorf(ctx,
"error webfingering[1] remote account %s@%s: %v",
account.Username, account.Domain, err,
)
if err == nil {
if account.Domain != accDomain {
case err == nil && account.Domain != accDomain:
// After webfinger, we now have correct account domain from which we can do a final DB check.
alreadyAccount, err := d.state.DB.GetAccountByUsernameDomain(ctx, account.Username, accDomain)
alreadyAcct, err := d.state.DB.GetAccountByUsernameDomain(ctx, account.Username, accDomain)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return nil, nil, gtserror.Newf("db err looking for account again after webfinger: %w", err)
}
if alreadyAccount != nil {
// Enrich existing account.
account = alreadyAccount
}
if alreadyAcct != nil {
// We had this account stored under
// the discovered accountDomain.
// Proceed with this account.
account = alreadyAcct
}
// Whether we had the account or not, we
// now have webfinger info relevant to the
// account, so fallthrough to set webfinger
// info on either the account we just found,
// or the stub account we were passed.
fallthrough
case err == nil:
// Update account with latest info.
account.URI = accURI.String()
account.Domain = accDomain
@ -474,13 +507,31 @@ func (d *Dereferencer) enrichAccount(
}
if uri == nil {
// No URI provided / found, must parse from account.
// No URI provided / found,
// must parse from account.
uri, err = url.Parse(account.URI)
if err != nil {
return nil, nil, gtserror.Newf("invalid uri %q: %w", account.URI, err)
return nil, nil, gtserror.Newf(
"invalid uri %q: %w",
account.URI, gtserror.SetUnretrievable(err),
)
}
if uri.Scheme != "http" && uri.Scheme != "https" {
err = errors.New("account URI scheme must be http or https")
return nil, nil, gtserror.Newf(
"invalid uri %q: %w",
account.URI, gtserror.SetUnretrievable(err),
)
}
}
/*
BY THIS POINT we must have an account URI set,
either provided, pinned to the account, or
obtained via webfinger call.
*/
// Check whether this account URI is a blocked domain / subdomain.
if blocked, err := d.state.DB.IsDomainBlocked(ctx, uri.Host); err != nil {
return nil, nil, gtserror.Newf("error checking blocked domain: %w", err)
@ -493,27 +544,45 @@ func (d *Dereferencer) enrichAccount(
defer d.stopHandshake(requestUser, uri)
if apubAcc == nil {
// We were not given any (partial) ActivityPub
// version of this account as a parameter.
// Dereference latest version of the account.
b, err := tsport.Dereference(ctx, uri)
if err != nil {
err := gtserror.Newf("error deferencing %s: %w", uri, err)
err := gtserror.Newf("error dereferencing %s: %w", uri, err)
return nil, nil, gtserror.SetUnretrievable(err)
}
// Attempt to resolve ActivityPub acc from data.
apubAcc, err = ap.ResolveAccountable(ctx, b)
if err != nil {
return nil, nil, gtserror.Newf("error resolving accountable from data for account %s: %w", uri, err)
// ResolveAccountable will set Unretrievable/WrongType
// on the returned error, so we don't need to do it here.
err = gtserror.Newf("error resolving accountable from data for account %s: %w", uri, err)
return nil, nil, err
}
}
/*
BY THIS POINT we must have the ActivityPub
representation of the account, either provided,
or obtained via a dereference call.
*/
// Convert the dereferenced AP account object to our GTS model.
//
// We put this in the variable latestAcc because we might want
// to compare the provided account model with this fresh version,
// in order to check if anything changed since we last saw it.
latestAcc, err := d.converter.ASRepresentationToAccount(ctx,
apubAcc,
account.Domain,
)
if err != nil {
return nil, nil, gtserror.Newf("error converting accountable to gts model for account %s: %w", uri, err)
// ASRepresentationToAccount will set Malformed on the
// returned error, so we don't need to do it here.
err = gtserror.Newf("error converting accountable to gts model for account %s: %w", uri, err)
return nil, nil, err
}
if account.Username == "" {
@ -528,14 +597,15 @@ func (d *Dereferencer) enrichAccount(
// from example.org to somewhere.else: we want to take somewhere.else
// as the accountDomain then, not the example.org we were redirected from.
// Assume the host from the returned ActivityPub representation.
idProp := apubAcc.GetJSONLDId()
if idProp == nil || !idProp.IsIRI() {
// Assume the host from the returned
// ActivityPub representation.
id := ap.GetJSONLDId(apubAcc)
if id == nil {
return nil, nil, gtserror.New("no id property found on person, or id was not an iri")
}
// Get IRI host value.
accHost := idProp.GetIRI().Host
accHost := id.Host
latestAcc.Domain, _, err = d.fingerRemoteAccount(ctx,
tsport,
@ -553,7 +623,7 @@ func (d *Dereferencer) enrichAccount(
}
if latestAcc.Domain == "" {
// ensure we have a domain set by this point,
// Ensure we have a domain set by this point,
// otherwise it gets stored as a local user!
//
// TODO: there is probably a more granular way
@ -562,7 +632,16 @@ func (d *Dereferencer) enrichAccount(
return nil, nil, gtserror.Newf("empty domain for %s", uri)
}
// Ensure ID is set and update fetch time.
/*
BY THIS POINT we have more or less a fullly-formed
representation of the target account, derived from
a combination of webfinger lookups and dereferencing.
Further fetching beyond this point is for peripheral
things like account avatar, header, emojis.
*/
// Ensure internal db ID is
// set and update fetch time.
latestAcc.ID = account.ID
latestAcc.FetchedAt = time.Now()
@ -581,12 +660,14 @@ func (d *Dereferencer) enrichAccount(
log.Errorf(ctx, "error fetching remote emojis for account %s: %v", uri, err)
}
if account.CreatedAt.IsZero() {
// CreatedAt will be zero if no local copy was
// found in one of the GetAccountBy___() functions.
//
// Set time of creation from the last-fetched date.
if account.IsNew() {
// Prefer published/created time from
// apubAcc, fall back to FetchedAt value.
if latestAcc.CreatedAt.IsZero() {
latestAcc.CreatedAt = latestAcc.FetchedAt
}
// Set time of update from the last-fetched date.
latestAcc.UpdatedAt = latestAcc.FetchedAt
// This is new, put it in the database.
@ -595,11 +676,16 @@ func (d *Dereferencer) enrichAccount(
return nil, nil, gtserror.Newf("error putting in database: %w", err)
}
} else {
// Prefer published time from apubAcc,
// fall back to previous stored value.
if latestAcc.CreatedAt.IsZero() {
latestAcc.CreatedAt = account.CreatedAt
}
// Set time of update from the last-fetched date.
latestAcc.UpdatedAt = latestAcc.FetchedAt
// Use existing account values.
latestAcc.CreatedAt = account.CreatedAt
// Carry over existing account language.
latestAcc.Language = account.Language
// This is an existing account, update the model in the database.

View file

@ -20,54 +20,109 @@
import (
"context"
"encoding/json"
"errors"
"fmt"
"net/url"
"strings"
apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/transport"
"github.com/superseriousbusiness/gotosocial/internal/util"
)
func (d *Dereferencer) fingerRemoteAccount(ctx context.Context, transport transport.Transport, targetUsername string, targetHost string) (accountDomain string, accountURI *url.URL, err error) {
b, err := transport.Finger(ctx, targetUsername, targetHost)
// fingerRemoteAccount performs a webfinger call for the
// given username and host, using the provided transport.
//
// The webfinger response will be parsed, and the subject
// domain and AP URI will be extracted and returned.
//
// In case the response cannot be parsed, or the response
// does not contain a valid subject string or AP URI, an
// error will be returned instead.
func (d *Dereferencer) fingerRemoteAccount(
ctx context.Context,
transport transport.Transport,
username string,
host string,
) (
string, // discovered account domain
*url.URL, // discovered account URI
error,
) {
// Assemble target namestring for logging.
var target = "@" + username + "@" + host
b, err := transport.Finger(ctx, username, host)
if err != nil {
err = fmt.Errorf("fingerRemoteAccount: error fingering @%s@%s: %s", targetUsername, targetHost, err)
return
err = gtserror.Newf("error webfingering %s: %w", target, err)
return "", nil, err
}
resp := &apimodel.WellKnownResponse{}
if err = json.Unmarshal(b, resp); err != nil {
err = fmt.Errorf("fingerRemoteAccount: could not unmarshal server response as WebfingerAccountResponse while dereferencing @%s@%s: %s", targetUsername, targetHost, err)
return
var resp apimodel.WellKnownResponse
if err := json.Unmarshal(b, &resp); err != nil {
err = gtserror.Newf("error parsing response as JSON for %s: %w", target, err)
return "", nil, err
}
if len(resp.Links) == 0 {
err = fmt.Errorf("fingerRemoteAccount: no links found in webfinger response %s", string(b))
return
err = gtserror.Newf("no links found in response for %s", target)
return "", nil, err
}
if resp.Subject == "" {
err = fmt.Errorf("fingerRemoteAccount: no subject found in webfinger response %s", string(b))
return
err = gtserror.Newf("no subject found in response for %s", target)
return "", nil, err
}
_, accountDomain, err = util.ExtractWebfingerParts(resp.Subject)
_, accountDomain, err := util.ExtractWebfingerParts(resp.Subject)
if err != nil {
err = fmt.Errorf("fingerRemoteAccount: error extracting webfinger subject parts: %s", err)
err = gtserror.Newf("error extracting subject parts for %s: %w", target, err)
return "", nil, err
}
// look through the links for the first one that matches what we need
for _, l := range resp.Links {
if l.Rel == "self" && (strings.EqualFold(l.Type, "application/activity+json") || strings.EqualFold(l.Type, "application/ld+json; profile=\"https://www.w3.org/ns/activitystreams\"")) {
if uri, thiserr := url.Parse(l.Href); thiserr == nil && (uri.Scheme == "http" || uri.Scheme == "https") {
// found it!
accountURI = uri
return
}
}
// Look through links for the first
// one that matches what we need:
//
// - Must be self link.
// - Must be AP type.
// - Valid https/http URI.
for _, link := range resp.Links {
if link.Rel != "self" {
// Not self link, ignore.
continue
}
return "", nil, errors.New("fingerRemoteAccount: no match found in webfinger response")
if !strings.EqualFold(link.Type, "application/activity+json") &&
!strings.EqualFold(link.Type, "application/ld+json; profile=\"https://www.w3.org/ns/activitystreams\"") {
// Not an AP type, ignore.
continue
}
uri, err := url.Parse(link.Href)
if err != nil {
log.Warnf(ctx,
"invalid href for ActivityPub self link %s for %s: %v",
link.Href, target, err,
)
// Funky URL, ignore.
continue
}
if uri.Scheme != "http" && uri.Scheme != "https" {
log.Warnf(ctx,
"invalid href for ActivityPub self link %s for %s: schema must be http or https",
link.Href, target,
)
// Can't handle this
// schema, ignore.
continue
}
// All looks good, return happily!
return accountDomain, uri, nil
}
return "", nil, gtserror.Newf("no suitable self, AP-type link found in webfinger response for %s", target)
}

View file

@ -294,10 +294,30 @@ func (d *Dereferencer) enrichStatusSafely(
apubStatus,
)
if gtserror.StatusCode(err) >= 400 {
// Update fetch-at to slow re-attempts.
if code := gtserror.StatusCode(err); code >= 400 {
// No matter what, log the error
// so instance admins have an idea
// why something isn't working.
log.Info(ctx, err)
if isNew {
// This was a new status enrich
// attempt which failed before we
// got to store it, so we can't
// return anything useful.
return nil, nil, isNew, err
}
// We had this status stored already
// before this enrichment attempt.
//
// Update fetched_at to slow re-attempts
// but don't return early. We can still
// return the model we had stored already.
status.FetchedAt = time.Now()
_ = d.state.DB.UpdateStatus(ctx, status, "fetched_at")
if err := d.state.DB.UpdateStatus(ctx, status, "fetched_at"); err != nil {
log.Errorf(ctx, "error updating status fetched_at: %v", err)
}
}
// Unlock now
@ -358,7 +378,7 @@ func (d *Dereferencer) enrichStatus(
// Dereference latest version of the status.
b, err := tsport.Dereference(ctx, uri)
if err != nil {
err := gtserror.Newf("error deferencing %s: %w", uri, err)
err := gtserror.Newf("error dereferencing %s: %w", uri, err)
return nil, nil, gtserror.SetUnretrievable(err)
}
@ -388,16 +408,21 @@ func (d *Dereferencer) enrichStatus(
return nil, nil, gtserror.Newf("error converting statusable to gts model for status %s: %w", uri, err)
}
// Use existing status ID.
latestStatus.ID = status.ID
if latestStatus.ID == "" {
// Generate new status ID from the provided creation date.
// Check if we've previously
// stored this status in the DB.
// If we have, it'll be ID'd.
var isNew = (status.ID == "")
if isNew {
// No ID, we haven't stored this status before.
// Generate new status ID from the status publication time.
latestStatus.ID, err = id.NewULIDFromTime(latestStatus.CreatedAt)
if err != nil {
log.Errorf(ctx, "invalid created at date (falling back to 'now'): %v", err)
latestStatus.ID = id.NewULID() // just use "now"
}
} else {
// Reuse existing status ID.
latestStatus.ID = status.ID
}
// Carry-over values and set fetch time.
@ -436,10 +461,7 @@ func (d *Dereferencer) enrichStatus(
return nil, nil, gtserror.Newf("error populating emojis for status %s: %w", uri, err)
}
if status.CreatedAt.IsZero() {
// CreatedAt will be zero if no local copy was
// found in one of the GetStatusBy___() functions.
//
if isNew {
// This is new, put the status in the database.
err := d.state.DB.PutStatus(ctx, latestStatus)
if err != nil {

View file

@ -94,6 +94,12 @@ func (a *Account) IsRemote() bool {
return !a.IsLocal()
}
// IsNew returns whether an account is "new" in the sense
// that it has not been previously stored in the database.
func (a *Account) IsNew() bool {
return a.CreatedAt.IsZero()
}
// IsInstance returns whether account is an instance internal actor account.
func (a *Account) IsInstance() bool {
if a.IsLocal() {

View file

@ -59,6 +59,8 @@
userPathPrefix = `^/?` + users + `/(` + usernameRelaxed + `)`
userPath = userPathPrefix + `$`
userWebPathPrefix = `^/?` + `@(` + usernameRelaxed + `)`
userWebPath = userWebPathPrefix + `$`
publicKeyPath = userPathPrefix + `/` + publicKey + `$`
inboxPath = userPathPrefix + `/` + inbox + `$`
outboxPath = userPathPrefix + `/` + outbox + `$`
@ -110,6 +112,9 @@
// UserPath validates and captures the username part from eg /users/example_username.
UserPath = regexp.MustCompile(userPath)
// UserWebPath validates and captures the username part from eg /@example_username.
UserWebPath = regexp.MustCompile(userWebPath)
// PublicKeyPath parses a path that validates and captures the username part from eg /users/example_username/main-key
PublicKeyPath = regexp.MustCompile(publicKeyPath)

View file

@ -77,6 +77,18 @@ func (c *Converter) ASRepresentationToAccount(ctx context.Context, accountable a
return nil, gtserror.SetMalformed(err)
}
// Extract published time if possible.
//
// This denotes original creation time
// of the account on the remote instance.
//
// Not every implementation uses this property;
// so don't bother warning if we can't find it.
if pub := ap.GetPublished(accountable); !pub.IsZero() {
acct.CreatedAt = pub
acct.UpdatedAt = pub
}
// Extract a preferred name (display name), fallback to username.
if displayName := ap.ExtractName(accountable); displayName != "" {
acct.DisplayName = displayName
@ -314,7 +326,7 @@ func (c *Converter) ASStatusToStatus(ctx context.Context, statusable ap.Statusab
// status.Published
//
// Extract published time for the boost,
// Extract published time for the status,
// zero-time will fall back to db defaults.
if pub := ap.GetPublished(statusable); !pub.IsZero() {
status.CreatedAt = pub

View file

@ -138,6 +138,7 @@ func (suite *ASToInternalTestSuite) TestParseGargron() {
acct, err := suite.typeconverter.ASRepresentationToAccount(context.Background(), rep, "")
suite.NoError(err)
suite.Equal("https://mastodon.social/inbox", *acct.SharedInboxURI)
suite.Equal(int64(1458086400), acct.CreatedAt.Unix())
}
func (suite *ASToInternalTestSuite) TestParseReplyWithMention() {

View file

@ -279,6 +279,8 @@
"url": "https://mastodon.social/@Gargron",
"manuallyApprovesFollowers": false,
"discoverable": true,
"indexable": true,
"published": "2016-03-16T00:00:00Z",
"devices": "https://mastodon.social/users/Gargron/collections/devices",
"alsoKnownAs": [
"https://tooting.ai/users/Gargron"

View file

@ -248,6 +248,11 @@ func IsUserPath(id *url.URL) bool {
return regexes.UserPath.MatchString(id.Path)
}
// IsUserWebPath returns true if the given URL path corresponds to eg /@example_username
func IsUserWebPath(id *url.URL) bool {
return regexes.UserWebPath.MatchString(id.Path)
}
// IsInboxPath returns true if the given URL path corresponds to eg /users/example_username/inbox
func IsInboxPath(id *url.URL) bool {
return regexes.InboxPath.MatchString(id.Path)
@ -326,6 +331,17 @@ func ParseUserPath(id *url.URL) (username string, err error) {
return
}
// ParseUserPath returns the username from a path such as /@example_username
func ParseUserWebPath(id *url.URL) (username string, err error) {
matches := regexes.UserWebPath.FindStringSubmatch(id.Path)
if len(matches) != 2 {
err = fmt.Errorf("expected 2 matches but matches length was %d", len(matches))
return
}
username = matches[1]
return
}
// ParseInboxPath returns the username from a path such as /users/example_username/inbox
func ParseInboxPath(id *url.URL) (username string, err error) {
matches := regexes.InboxPath.FindStringSubmatch(id.Path)

View file

@ -23,101 +23,168 @@
"strings"
"github.com/superseriousbusiness/gotosocial/internal/regexes"
"github.com/superseriousbusiness/gotosocial/internal/uris"
)
// ExtractNamestringParts extracts the username test_user and
// the domain example.org from a string like @test_user@example.org.
//
// If nothing is matched, it will return an error.
func ExtractNamestringParts(mention string) (username, host string, err error) {
matches := regexes.MentionName.FindStringSubmatch(mention)
func ExtractNamestringParts(namestring string) (username, host string, err error) {
matches := regexes.MentionName.FindStringSubmatch(namestring)
switch len(matches) {
case 2:
return matches[1], "", nil
case 3:
return matches[1], matches[2], nil
default:
return "", "", fmt.Errorf("couldn't match mention %s", mention)
return "", "", fmt.Errorf("couldn't match namestring %s", namestring)
}
}
// ExtractWebfingerParts returns the username and domain from either an
// account query or an actor URI.
// ExtractWebfingerParts returns the username and domain from the "subject"
// part of a webfinger response: either an account namestring or an actor URI.
//
// All implementations in the wild generate webfinger account resource
// All AP implementations in the wild perform webfinger account resource
// queries with the "acct" scheme and without a leading "@"" on the username.
// This is also the format the "subject" in a webfinger response adheres to.
//
// Despite this fact, we're being permissive about a single leading @. This
// makes a query for acct:user@domain.tld and acct:@user@domain.tld
// equivalent. But a query for acct:@@user@domain.tld will have its username
// returned with the @ prefix.
// Despite this fact, we're permissive about a single leading @. This makes
// a query for "acct:user@domain.tld" and "acct:@user@domain.tld" equivalent.
//
// We also permit a resource of user@domain.tld or @user@domain.tld, without
// a scheme. In that case it gets interpreted as if it was using the "acct"
// scheme.
// We also permit a resource of "user@domain.tld" or "@user@domain.tld", without
// a scheme. In that case it gets interpreted as if it was using "acct:".
//
// When parsing fails, an error is returned.
func ExtractWebfingerParts(webfinger string) (username, host string, err error) {
orig := webfinger
u, oerr := url.ParseRequestURI(webfinger)
if oerr != nil {
// Most likely reason for failing to parse is if the "acct" scheme was
// missing but a :port was included. So try an extra time with the scheme.
u, err = url.ParseRequestURI("acct:" + webfinger)
// Will error if parsing fails, or if the extracted username or domain are empty.
func ExtractWebfingerParts(subject string) (
string, // username
string, // domain
error,
) {
u, err := url.ParseRequestURI(subject)
if err != nil {
return "", "", fmt.Errorf("failed to parse %s with acct sheme: %w", orig, oerr)
// Most likely reason for failing to parse is if
// the "acct" scheme was missing but a :port was
// included. So try an extra time with the scheme.
u, err = url.ParseRequestURI("acct:" + subject)
}
if err != nil {
return "", "", fmt.Errorf("failed to parse %s: %w", subject, err)
}
if u.Scheme == "http" || u.Scheme == "https" {
return ExtractWebfingerPartsFromURI(u)
switch u.Scheme {
// Subject looks like
// "https://example.org/users/whatever"
// or "https://example.org/@whatever".
case "http", "https":
return partsFromURI(u)
// Subject looks like
// "acct:whatever@example.org"
// or "acct:@whatever@example.org".
case "acct":
// Pass string without "acct:" prefix.
return partsFromNamestring(u.Opaque)
// Subject was probably a relative URL.
// Fail since we need the domain.
case "":
return "", "", fmt.Errorf("no scheme for resource %s", subject)
}
if u.Scheme != "acct" {
return "", "", fmt.Errorf("unsupported scheme: %s for resource: %s", u.Scheme, orig)
}
stripped := strings.TrimPrefix(u.Opaque, "@")
userDomain := strings.Split(stripped, "@")
if len(userDomain) != 2 {
return "", "", fmt.Errorf("failed to extract user and domain from: %s", orig)
}
return userDomain[0], userDomain[1], nil
return "", "", fmt.Errorf("unsupported scheme %s for resource %s", u.Scheme, subject)
}
// ExtractWebfingerPartsFromURI returns the user and domain extracted from
// the passed in URI. The URI should be an actor URI.
// partsFromNamestring returns the username
// and host parts extracted from a passed-in actor
// namestring of the format "whatever@example.org".
//
// The domain returned is the hostname, and the user will be extracted
// from either /@test_user or /users/test_user. These two paths match the
// "aliasses" we include in our webfinger response and are also present in
// our "links".
//
// Like with ExtractWebfingerParts, we're being permissive about a single
// leading @.
//
// Errors are returned in case we end up with an empty domain or username.
func ExtractWebfingerPartsFromURI(uri *url.URL) (username, host string, err error) {
host = uri.Host
if host == "" {
return "", "", fmt.Errorf("failed to extract domain from: %s", uri)
// The function returns an error if username or
// host cannot be extracted.
func partsFromNamestring(namestring string) (
string, // username
string, // host
error,
) {
// Trim all leading "@" symbols,
// and then inject just one "@".
namestring = strings.TrimLeft(namestring, "@")
namestring = "@" + namestring
username, host, err := ExtractNamestringParts(namestring)
if err != nil {
return "", "", err
}
// strip any leading slashes
path := strings.TrimLeft(uri.Path, "/")
segs := strings.Split(path, "/")
if username == "" {
err := fmt.Errorf("failed to extract username from: %s", namestring)
return "", "", err
}
if host == "" {
err := fmt.Errorf("failed to extract domain from: %s", namestring)
return "", "", err
}
return username, host, nil
}
// partsFromURI returns the username and host
// extracted from the passed in actor URI.
//
// The username will be extracted from one of
// the patterns "/@whatever" or "/users/whatever".
// These paths match the "aliases" and "links"
// we include in our own webfinger responses.
//
// This function tries to be permissive with
// regard to leading "@" symbols. Nevertheless,
// an error will be returned if username or host
// cannot be extracted.
func partsFromURI(uri *url.URL) (
string, // username
string, // host
error,
) {
host := uri.Host
if host == "" {
err := fmt.Errorf("failed to extract domain from: %s", uri)
return "", "", err
}
// Copy the URL, taking
// only the parts we need.
short := &url.URL{
Path: uri.Path,
}
// Try "/users/whatever".
username, err := uris.ParseUserPath(short)
if err == nil && username != "" {
return username, host, nil
}
// Try "/@whatever"
username, err = uris.ParseUserWebPath(short)
if err == nil && username != "" {
return username, host, nil
}
// Try some exotic fallbacks like
// "/users/@whatever", "/@@whatever", etc.
short.Path = strings.TrimLeft(short.Path, "/")
segs := strings.Split(short.Path, "/")
if segs[0] == "users" {
username = segs[1]
} else {
username = segs[0]
}
username = strings.TrimPrefix(username, "@")
if username == "" {
return "", "", fmt.Errorf("failed to extract username from: %s", uri)
username = strings.TrimLeft(username, "@")
if username != "" {
return username, host, nil
}
return
return "", "", fmt.Errorf("failed to extract username from: %s", uri)
}

View file

@ -18,7 +18,6 @@
package util_test
import (
"net/url"
"testing"
"github.com/stretchr/testify/suite"
@ -39,11 +38,26 @@ func (suite *NamestringSuite) TestExtractWebfingerParts() {
{in: "stonerkitty.monster@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "stonerkitty.monster@stonerkitty.monster:8080", username: "stonerkitty.monster", domain: "stonerkitty.monster:8080"},
{in: "@stonerkitty.monster@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "acct:@@stonerkitty.monster@stonerkitty.monster", err: "failed to extract user and domain from: acct:@@stonerkitty.monster@stonerkitty.monster"},
{in: "acct:@stonerkitty.monster@@stonerkitty.monster", err: "failed to extract user and domain from: acct:@stonerkitty.monster@@stonerkitty.monster"},
{in: "@@stonerkitty.monster@stonerkitty.monster", err: "failed to extract user and domain from: @@stonerkitty.monster@stonerkitty.monster"},
{in: "@stonerkitty.monster@@stonerkitty.monster", err: "failed to extract user and domain from: @stonerkitty.monster@@stonerkitty.monster"},
{in: "s3:stonerkitty.monster@stonerkitty.monster", err: "unsupported scheme: s3 for resource: s3:stonerkitty.monster@stonerkitty.monster"},
{in: "acct:@@stonerkitty.monster@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "acct:@stonerkitty.monster@@stonerkitty.monster", err: "couldn't match namestring @stonerkitty.monster@@stonerkitty.monster"},
{in: "@@stonerkitty.monster@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "@stonerkitty.monster@@stonerkitty.monster", err: "couldn't match namestring @stonerkitty.monster@@stonerkitty.monster"},
{in: "s3:stonerkitty.monster@stonerkitty.monster", err: "unsupported scheme s3 for resource s3:stonerkitty.monster@stonerkitty.monster"},
{in: "https://stonerkitty.monster/users/stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "https://stonerkitty.monster/users/@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "https://stonerkitty.monster/@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "https://stonerkitty.monster/@@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "https://stonerkitty.monster:8080/users/stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster:8080"},
{in: "https://stonerkitty.monster/users/stonerkitty.monster/evil", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "https://stonerkitty.monster/@stonerkitty.monster/evil", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "/@stonerkitty.monster", err: "no scheme for resource /@stonerkitty.monster"},
{in: "/users/stonerkitty.monster", err: "no scheme for resource /users/stonerkitty.monster"},
{in: "@stonerkitty.monster", err: "failed to extract domain from: @stonerkitty.monster"},
{in: "users/stonerkitty.monster", err: "couldn't match namestring @users/stonerkitty.monster"},
{in: "https://stonerkitty.monster/users/", err: "failed to extract username from: https://stonerkitty.monster/users/"},
{in: "https://stonerkitty.monster/users/@", err: "failed to extract username from: https://stonerkitty.monster/users/@"},
{in: "https://stonerkitty.monster/@", err: "failed to extract username from: https://stonerkitty.monster/@"},
{in: "https://stonerkitty.monster/", err: "failed to extract username from: https://stonerkitty.monster/"},
}
for _, tt := range tests {
@ -56,45 +70,9 @@ func (suite *NamestringSuite) TestExtractWebfingerParts() {
suite.Equal(tt.username, username)
suite.Equal(tt.domain, domain)
} else {
suite.EqualError(err, tt.err)
if !suite.EqualError(err, tt.err) {
suite.T().Logf("expected error %s", tt.err)
}
})
}
}
func (suite *NamestringSuite) TestExtractWebfingerPartsFromURI() {
tests := []struct {
in, username, domain, err string
}{
{in: "https://stonerkitty.monster/users/stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "https://stonerkitty.monster/users/@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "https://stonerkitty.monster/@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "https://stonerkitty.monster/@@stonerkitty.monster", username: "@stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "https://stonerkitty.monster:8080/users/stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster:8080"},
{in: "https://stonerkitty.monster/users/stonerkitty.monster/evil", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "https://stonerkitty.monster/@stonerkitty.monster/evil", username: "stonerkitty.monster", domain: "stonerkitty.monster"},
{in: "/@stonerkitty.monster", err: "failed to extract domain from: /@stonerkitty.monster"},
{in: "/users/stonerkitty.monster", err: "failed to extract domain from: /users/stonerkitty.monster"},
{in: "@stonerkitty.monster", err: "failed to extract domain from: @stonerkitty.monster"},
{in: "users/stonerkitty.monster", err: "failed to extract domain from: users/stonerkitty.monster"},
{in: "https://stonerkitty.monster/users/", err: "failed to extract username from: https://stonerkitty.monster/users/"},
{in: "https://stonerkitty.monster/users/@", err: "failed to extract username from: https://stonerkitty.monster/users/@"},
{in: "https://stonerkitty.monster/@", err: "failed to extract username from: https://stonerkitty.monster/@"},
{in: "https://stonerkitty.monster/", err: "failed to extract username from: https://stonerkitty.monster/"},
}
for _, tt := range tests {
tt := tt
suite.Run(tt.in, func() {
suite.T().Parallel()
uri, _ := url.Parse(tt.in)
username, domain, err := util.ExtractWebfingerPartsFromURI(uri)
if tt.err == "" {
suite.NoError(err)
suite.Equal(tt.username, username)
suite.Equal(tt.domain, domain)
} else {
suite.EqualError(err, tt.err)
}
})
}
@ -107,7 +85,7 @@ func (suite *NamestringSuite) TestExtractNamestring() {
{in: "@stonerkitty.monster@stonerkitty.monster", username: "stonerkitty.monster", host: "stonerkitty.monster"},
{in: "@stonerkitty.monster", username: "stonerkitty.monster"},
{in: "@someone@somewhere", username: "someone", host: "somewhere"},
{in: "", err: "couldn't match mention "},
{in: "", err: "couldn't match namestring "},
}
for _, tt := range tests {