From ca5492b65f45c7db8a9cfb767b0b48aa6cf6fe24 Mon Sep 17 00:00:00 2001
From: tobi <31960611+tsmethurst@users.noreply.github.com>
Date: Mon, 10 Jul 2023 17:05:59 +0200
Subject: [PATCH] [bugfix] Tidy up rss feed serving; don't error on empty feed
(#1970)
* [bugfix] Tidy up rss feed serving; don't error on empty feed
* fall back to account creation time as rss feed update time
* return feed early when account has no eligible statuses
---
internal/processing/account/rss.go | 167 ++++++++++++++------
internal/processing/account/rss_test.go | 28 ++++
internal/web/rss.go | 198 +++++++++++++++---------
3 files changed, 267 insertions(+), 126 deletions(-)
diff --git a/internal/processing/account/rss.go b/internal/processing/account/rss.go
index 9b5773809..ddc07b9ad 100644
--- a/internal/processing/account/rss.go
+++ b/internal/processing/account/rss.go
@@ -27,82 +27,151 @@
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
+ "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)
-const rssFeedLength = 20
+const (
+ rssFeedLength = 20
+)
+
+type GetRSSFeed func() (string, gtserror.WithCode)
+
+// GetRSSFeedForUsername returns a function to return the RSS feed of a local account
+// with the given username, and the last-modified time (time that the account last
+// posted a status eligible to be included in the rss feed).
+//
+// To save db calls, callers to this function should only call the returned GetRSSFeed
+// func if the last-modified time is newer than the last-modified time they have cached.
+//
+// If the account has not yet posted an RSS-eligible status, the returned last-modified
+// time will be zero, and the GetRSSFeed func will return a valid RSS xml with no items.
+func (p *Processor) GetRSSFeedForUsername(ctx context.Context, username string) (GetRSSFeed, time.Time, gtserror.WithCode) {
+ var (
+ never = time.Time{}
+ )
-// GetRSSFeedForUsername returns RSS feed for the given local username.
-func (p *Processor) GetRSSFeedForUsername(ctx context.Context, username string) (func() (string, gtserror.WithCode), time.Time, gtserror.WithCode) {
account, err := p.state.DB.GetAccountByUsernameDomain(ctx, username, "")
if err != nil {
- if err == db.ErrNoEntries {
- return nil, time.Time{}, gtserror.NewErrorNotFound(errors.New("GetRSSFeedForUsername: account not found"))
+ if errors.Is(err, db.ErrNoEntries) {
+ // Simply no account with this username.
+ err = gtserror.New("account not found")
+ return nil, never, gtserror.NewErrorNotFound(err)
}
- return nil, time.Time{}, gtserror.NewErrorInternalError(fmt.Errorf("GetRSSFeedForUsername: db error: %s", err))
+
+ // Real db error.
+ err = gtserror.Newf("db error getting account %s: %w", username, err)
+ return nil, never, gtserror.NewErrorInternalError(err)
}
+ // Ensure account has rss feed enabled.
if !*account.EnableRSS {
- return nil, time.Time{}, gtserror.NewErrorNotFound(errors.New("GetRSSFeedForUsername: account RSS feed not enabled"))
+ err = gtserror.New("account RSS feed not enabled")
+ return nil, never, gtserror.NewErrorNotFound(err)
}
- lastModified, err := p.state.DB.GetAccountLastPosted(ctx, account.ID, true)
- if err != nil {
- return nil, time.Time{}, gtserror.NewErrorInternalError(fmt.Errorf("GetRSSFeedForUsername: db error: %s", err))
+ // LastModified time is needed by callers to check freshness for cacheing.
+ // This might be a zero time.Time if account has never posted a status that's
+ // eligible to appear in the RSS feed; that's fine.
+ lastPostAt, err := p.state.DB.GetAccountLastPosted(ctx, account.ID, true)
+ if err != nil && !errors.Is(err, db.ErrNoEntries) {
+ err = gtserror.Newf("db error getting account %s last posted: %w", username, err)
+ return nil, never, gtserror.NewErrorInternalError(err)
}
return func() (string, gtserror.WithCode) {
- statuses, err := p.state.DB.GetAccountWebStatuses(ctx, account.ID, rssFeedLength, "")
- if err != nil && err != db.ErrNoEntries {
- return "", gtserror.NewErrorInternalError(fmt.Errorf("GetRSSFeedForUsername: db error: %s", err))
- }
-
+ // Assemble author namestring once only.
author := "@" + account.Username + "@" + config.GetAccountDomain()
- title := "Posts from " + author
- description := "Posts from " + author
- link := &feeds.Link{Href: account.URL}
- var image *feeds.Image
- if account.AvatarMediaAttachmentID != "" {
- if account.AvatarMediaAttachment == nil {
- avatar, err := p.state.DB.GetAttachmentByID(ctx, account.AvatarMediaAttachmentID)
- if err != nil {
- return "", gtserror.NewErrorInternalError(fmt.Errorf("GetRSSFeedForUsername: db error fetching avatar attachment: %s", err))
- }
- account.AvatarMediaAttachment = avatar
- }
- image = &feeds.Image{
- Url: account.AvatarMediaAttachment.Thumbnail.URL,
- Title: "Avatar for " + author,
- Link: account.URL,
- }
+ // Derive image/thumbnail for this account (may be nil).
+ image, errWithCode := p.rssImageForAccount(ctx, account, author)
+ if errWithCode != nil {
+ return "", errWithCode
}
feed := &feeds.Feed{
- Title: title,
- Description: description,
- Link: link,
+ Title: "Posts from " + author,
+ Description: "Posts from " + author,
+ Link: &feeds.Link{Href: account.URL},
Image: image,
}
- for i, s := range statuses {
- // take the date of the first (ie., latest) status as feed updated value
- if i == 0 {
- feed.Updated = s.UpdatedAt
- }
+ // If the account has never posted anything, just use
+ // account creation time as Updated value for the feed;
+ // we could use time.Now() here but this would likely
+ // mess up cacheing; we want something determinate.
+ //
+ // We can also return early rather than wasting a db call,
+ // since we already know there's no eligible statuses.
+ if lastPostAt.IsZero() {
+ feed.Updated = account.CreatedAt
+ return stringifyFeed(feed)
+ }
- item, err := p.tc.StatusToRSSItem(ctx, s)
+ // Account has posted at least one status that's
+ // eligible to appear in the RSS feed.
+ //
+ // Reuse the lastPostAt value for feed.Updated.
+ feed.Updated = lastPostAt
+
+ // Retrieve latest statuses as they'd be shown on the web view of the account profile.
+ statuses, err := p.state.DB.GetAccountWebStatuses(ctx, account.ID, rssFeedLength, "")
+ if err != nil && !errors.Is(err, db.ErrNoEntries) {
+ err = fmt.Errorf("db error getting account web statuses: %w", err)
+ return "", gtserror.NewErrorInternalError(err)
+ }
+
+ // Add each status to the rss feed.
+ for _, status := range statuses {
+ item, err := p.tc.StatusToRSSItem(ctx, status)
if err != nil {
- return "", gtserror.NewErrorInternalError(fmt.Errorf("GetRSSFeedForUsername: error converting status to feed item: %s", err))
+ err = gtserror.Newf("error converting status to feed item: %w", err)
+ return "", gtserror.NewErrorInternalError(err)
}
feed.Add(item)
}
- rss, err := feed.ToRss()
- if err != nil {
- return "", gtserror.NewErrorInternalError(fmt.Errorf("GetRSSFeedForUsername: error converting feed to rss string: %s", err))
- }
-
- return rss, nil
- }, lastModified, nil
+ return stringifyFeed(feed)
+ }, lastPostAt, nil
+}
+
+func (p *Processor) rssImageForAccount(ctx context.Context, account *gtsmodel.Account, author string) (*feeds.Image, gtserror.WithCode) {
+ if account.AvatarMediaAttachmentID == "" {
+ // No image, no problem!
+ return nil, nil
+ }
+
+ // Ensure account avatar attachment populated.
+ if account.AvatarMediaAttachment == nil {
+ var err error
+ account.AvatarMediaAttachment, err = p.state.DB.GetAttachmentByID(ctx, account.AvatarMediaAttachmentID)
+ if err != nil {
+ if errors.Is(err, db.ErrNoEntries) {
+ // No attachment found with this ID (race condition?).
+ return nil, nil
+ }
+
+ // Real db error.
+ err = gtserror.Newf("db error fetching avatar media attachment: %w", err)
+ return nil, gtserror.NewErrorInternalError(err)
+ }
+ }
+
+ return &feeds.Image{
+ Url: account.AvatarMediaAttachment.Thumbnail.URL,
+ Title: "Avatar for " + author,
+ Link: account.URL,
+ }, nil
+}
+
+func stringifyFeed(feed *feeds.Feed) (string, gtserror.WithCode) {
+ // Stringify the feed. Even with no statuses,
+ // this will still produce valid rss xml.
+ rss, err := feed.ToRss()
+ if err != nil {
+ err := gtserror.Newf("error converting feed to rss string: %w", err)
+ return "", gtserror.NewErrorInternalError(err)
+ }
+
+ return rss, nil
}
diff --git a/internal/processing/account/rss_test.go b/internal/processing/account/rss_test.go
index 1a0bb9788..80f86211f 100644
--- a/internal/processing/account/rss_test.go
+++ b/internal/processing/account/rss_test.go
@@ -55,6 +55,34 @@ func (suite *GetRSSTestSuite) TestGetAccountRSSZork() {
suite.Equal("\n \n Posts from @the_mighty_zork@localhost:8080\n http://localhost:8080/@the_mighty_zork\n Posts from @the_mighty_zork@localhost:8080\n Wed, 20 Oct 2021 10:40:37 +0000\n Wed, 20 Oct 2021 10:40:37 +0000\n \n http://localhost:8080/fileserver/01F8MH1H7YV1Z7D2C8K2730QBF/avatar/small/01F8MH58A357CV5K7R7TJMSH6S.jpg\n Avatar for @the_mighty_zork@localhost:8080\n http://localhost:8080/@the_mighty_zork\n \n \n introduction post\n http://localhost:8080/@the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY\n @the_mighty_zork@localhost:8080 made a new post: "hello everyone!"\n \n @the_mighty_zork@localhost:8080\n http://localhost:8080/@the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY\n Wed, 20 Oct 2021 10:40:37 +0000\n \n \n \n", feed)
}
+func (suite *GetRSSTestSuite) TestGetAccountRSSZorkNoPosts() {
+ ctx := context.Background()
+
+ // Get all of zork's posts.
+ statuses, err := suite.db.GetAccountStatuses(ctx, suite.testAccounts["local_account_1"].ID, 0, false, false, "", "", false, false)
+ if err != nil {
+ suite.FailNow(err.Error())
+ }
+
+ // Now delete them! Hahaha!
+ for _, status := range statuses {
+ if err := suite.db.DeleteStatusByID(ctx, status.ID); err != nil {
+ suite.FailNow(err.Error())
+ }
+ }
+
+ getFeed, lastModified, err := suite.accountProcessor.GetRSSFeedForUsername(ctx, "the_mighty_zork")
+ suite.NoError(err)
+ suite.Empty(lastModified)
+
+ feed, err := getFeed()
+ suite.NoError(err)
+
+ fmt.Println(feed)
+
+ suite.Equal("\n \n Posts from @the_mighty_zork@localhost:8080\n http://localhost:8080/@the_mighty_zork\n Posts from @the_mighty_zork@localhost:8080\n Fri, 20 May 2022 11:09:18 +0000\n Fri, 20 May 2022 11:09:18 +0000\n \n http://localhost:8080/fileserver/01F8MH1H7YV1Z7D2C8K2730QBF/avatar/small/01F8MH58A357CV5K7R7TJMSH6S.jpg\n Avatar for @the_mighty_zork@localhost:8080\n http://localhost:8080/@the_mighty_zork\n \n \n", feed)
+}
+
func TestGetRSSTestSuite(t *testing.T) {
suite.Run(t, new(GetRSSTestSuite))
}
diff --git a/internal/web/rss.go b/internal/web/rss.go
index 8c8d5112f..2d98efcb3 100644
--- a/internal/web/rss.go
+++ b/internal/web/rss.go
@@ -19,8 +19,6 @@
import (
"bytes"
- "errors"
- "fmt"
"net/http"
"strings"
"time"
@@ -31,86 +29,50 @@
"github.com/superseriousbusiness/gotosocial/internal/log"
)
-const appRSSUTF8 = string(apiutil.AppRSSXML + "; charset=utf-8")
-
-func (m *Module) GetRSSETag(urlPath string, lastModified time.Time, getRSSFeed func() (string, gtserror.WithCode)) (string, error) {
- if cachedETag, ok := m.eTagCache.Get(urlPath); ok && !lastModified.After(cachedETag.lastModified) {
- // only return our cached etag if the file wasn't
- // modified since last time, otherwise generate a
- // new one; eat fresh!
- return cachedETag.eTag, nil
- }
-
- rssFeed, errWithCode := getRSSFeed()
- if errWithCode != nil {
- return "", fmt.Errorf("error getting rss feed: %s", errWithCode)
- }
-
- eTag, err := generateEtag(bytes.NewReader([]byte(rssFeed)))
- if err != nil {
- return "", fmt.Errorf("error generating etag: %s", err)
- }
-
- // put new entry in cache before we return
- m.eTagCache.Set(urlPath, eTagCacheEntry{
- eTag: eTag,
- lastModified: lastModified,
- })
-
- return eTag, nil
-}
-
-func extractIfModifiedSince(r *http.Request) time.Time {
- hdr := r.Header.Get(ifModifiedSinceHeader)
-
- if hdr == "" {
- return time.Time{}
- }
-
- t, err := http.ParseTime(hdr)
- if err != nil {
- log.Errorf(r.Context(), "couldn't parse if-modified-since %s: %s", hdr, err)
- return time.Time{}
- }
-
- return t
-}
+const appRSSUTF8 = string(apiutil.AppRSSXML) + "; charset=utf-8"
func (m *Module) rssFeedGETHandler(c *gin.Context) {
- // set this Cache-Control header to instruct clients to validate the response with us
- // before each reuse (https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control)
- c.Header(cacheControlHeader, cacheControlNoCache)
- ctx := c.Request.Context()
-
if _, err := apiutil.NegotiateAccept(c, apiutil.AppRSSXML); err != nil {
apiutil.WebErrorHandler(c, gtserror.NewErrorNotAcceptable(err, err.Error()), m.processor.InstanceGetV1)
return
}
- // usernames on our instance will always be lowercase
- username := strings.ToLower(c.Param(usernameKey))
- if username == "" {
- err := errors.New("no account username specified")
- apiutil.WebErrorHandler(c, gtserror.NewErrorBadRequest(err, err.Error()), m.processor.InstanceGetV1)
- return
- }
-
- ifNoneMatch := c.Request.Header.Get(ifNoneMatchHeader)
- ifModifiedSince := extractIfModifiedSince(c.Request)
-
- getRssFeed, accountLastPostedPublic, errWithCode := m.processor.Account().GetRSSFeedForUsername(ctx, username)
+ // Fetch + normalize username from URL.
+ username, errWithCode := apiutil.ParseWebUsername(c.Param(apiutil.WebUsernameKey))
if errWithCode != nil {
apiutil.WebErrorHandler(c, errWithCode, m.processor.InstanceGetV1)
return
}
- var rssFeed string
- cacheKey := c.Request.URL.Path
- cacheEntry, ok := m.eTagCache.Get(cacheKey)
+ // Usernames on our instance will always be lowercase.
+ //
+ // todo: https://github.com/superseriousbusiness/gotosocial/issues/1813
+ username = strings.ToLower(username)
- if !ok || cacheEntry.lastModified.Before(accountLastPostedPublic) {
- // we either have no cache entry for this, or we have an expired cache entry; generate a new one
- rssFeed, errWithCode = getRssFeed()
+ // Retrieve the getRSSFeed function from the processor.
+ // We'll only call the function if we need to, to save db calls.
+ // lastPostAt may be a zero time if account has never posted.
+ getRSSFeed, lastPostAt, errWithCode := m.processor.Account().GetRSSFeedForUsername(c.Request.Context(), username)
+ if errWithCode != nil {
+ apiutil.WebErrorHandler(c, errWithCode, m.processor.InstanceGetV1)
+ return
+ }
+
+ var (
+ rssFeed string // Stringified rss feed.
+
+ cacheKey = c.Request.URL.Path
+ cacheEntry, wasCached = m.eTagCache.Get(cacheKey)
+ )
+
+ if !wasCached || unixAfter(lastPostAt, cacheEntry.lastModified) {
+ // We either have no ETag cache entry for this account's feed,
+ // or we have an expired cache entry (account has posted since
+ // the cache entry was last generated).
+ //
+ // As such, we need to generate a new ETag, and for that we need
+ // the string representation of the RSS feed.
+ rssFeed, errWithCode = getRSSFeed()
if errWithCode != nil {
apiutil.WebErrorHandler(c, errWithCode, m.processor.InstanceGetV1)
return
@@ -122,29 +84,73 @@ func (m *Module) rssFeedGETHandler(c *gin.Context) {
return
}
- cacheEntry.lastModified = accountLastPostedPublic
- cacheEntry.eTag = eTag
+ // We never want lastModified to be zero, so if account
+ // has never actually posted anything, just use Now as
+ // the lastModified time instead for cache control.
+ var lastModified time.Time
+ if lastPostAt.IsZero() {
+ lastModified = time.Now()
+ } else {
+ lastModified = lastPostAt
+ }
+
+ // Store the new cache entry.
+ cacheEntry = eTagCacheEntry{
+ eTag: eTag,
+ lastModified: lastModified,
+ }
m.eTagCache.Set(cacheKey, cacheEntry)
}
+ // Set 'ETag' and 'Last-Modified' headers no matter what;
+ // even if we return 304 in the next checks, caller may
+ // want to cache these header values.
c.Header(eTagHeader, cacheEntry.eTag)
- c.Header(lastModifiedHeader, accountLastPostedPublic.Format(http.TimeFormat))
+ c.Header(lastModifiedHeader, cacheEntry.lastModified.Format(http.TimeFormat))
+ // Instruct caller to validate the response with us before
+ // each reuse, so that the 'ETag' and 'Last-Modified' headers
+ // actually take effect.
+ //
+ // "The no-cache response directive indicates that the response
+ // can be stored in caches, but the response must be validated
+ // with the origin server before each reuse, even when the cache
+ // is disconnected from the origin server."
+ //
+ // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control
+ c.Header(cacheControlHeader, cacheControlNoCache)
+
+ // Check if caller submitted an ETag via 'If-None-Match'.
+ // If they did + it matches what we have, that means they've
+ // already seen the latest version of this feed, so just bail.
+ ifNoneMatch := c.Request.Header.Get(ifNoneMatchHeader)
if ifNoneMatch == cacheEntry.eTag {
c.AbortWithStatus(http.StatusNotModified)
return
}
- lmUnix := cacheEntry.lastModified.Unix()
- imsUnix := ifModifiedSince.Unix()
- if lmUnix <= imsUnix {
+ // Check if the caller submitted a time via 'If-Modified-Since'.
+ // If they did, and our cached ETag entry is not newer than the
+ // given time, this means the caller has already seen the latest
+ // version of this feed, so just bail.
+ ifModifiedSince := extractIfModifiedSince(c.Request)
+ if !ifModifiedSince.IsZero() &&
+ !unixAfter(cacheEntry.lastModified, ifModifiedSince) {
c.AbortWithStatus(http.StatusNotModified)
return
}
+ // At this point we know that the client wants the newest
+ // representation of the RSS feed, either because they didn't
+ // submit any 'If-None-Match' / 'If-Modified-Since' cache headers,
+ // or because they did but the account has posted more recently
+ // than the values of the submitted headers would suggest.
+ //
+ // If we had a cache hit earlier, we may not have called the
+ // getRSSFeed function yet; if that's the case then do call it
+ // now because we definitely need it.
if rssFeed == "" {
- // we had a cache entry already so we didn't call to get the rss feed yet
- rssFeed, errWithCode = getRssFeed()
+ rssFeed, errWithCode = getRSSFeed()
if errWithCode != nil {
apiutil.WebErrorHandler(c, errWithCode, m.processor.InstanceGetV1)
return
@@ -153,3 +159,41 @@ func (m *Module) rssFeedGETHandler(c *gin.Context) {
c.Data(http.StatusOK, appRSSUTF8, []byte(rssFeed))
}
+
+// unixAfter returns true if the unix value of t1
+// is greater than (ie., after) the unix value of t2.
+func unixAfter(t1 time.Time, t2 time.Time) bool {
+ if t1.IsZero() {
+ // if t1 is zero then it cannot
+ // possibly be greater than t2.
+ return false
+ }
+
+ if t2.IsZero() {
+ // t1 is not zero but t2 is,
+ // so t1 is necessarily greater.
+ return true
+ }
+
+ return t1.Unix() > t2.Unix()
+}
+
+// extractIfModifiedSince parses a time.Time from the
+// 'If-Modified-Since' header of the given request.
+//
+// If no time was provided, or the provided time was
+// not parseable, it will return a zero time.
+func extractIfModifiedSince(r *http.Request) time.Time {
+ imsStr := r.Header.Get(ifModifiedSinceHeader)
+ if imsStr == "" {
+ return time.Time{} // Nothing set.
+ }
+
+ ifModifiedSince, err := http.ParseTime(imsStr)
+ if err != nil {
+ log.Errorf(r.Context(), "couldn't parse %s value '%s' as time: %q", ifModifiedSinceHeader, imsStr, err)
+ return time.Time{}
+ }
+
+ return ifModifiedSince
+}