From 07d27709957248008c61d6b8d553e3d2eb14d154 Mon Sep 17 00:00:00 2001 From: tobi <31960611+tsmethurst@users.noreply.github.com> Date: Tue, 4 Feb 2025 16:52:42 +0100 Subject: [PATCH] [feature] Change `instance-stats-randomize` to `instance-stats-mode` with multiple options; implement nodeinfo 2.1 (#3734) * [feature] Change `instance-stats-randomize` to `instance-stats-mode` with multiple options; implement nodeinfo 2.1 * swaggalaggadingdong --- docs/admin/robots.md | 4 ++ docs/api/swagger.yaml | 31 ++++++++++++- docs/configuration/instance.md | 38 ++++++++++++---- example/config.yaml | 38 ++++++++++++---- internal/api/client/instance/instanceget.go | 25 ++++++++++- internal/api/model/well-known.go | 15 +++++-- internal/api/nodeinfo.go | 4 +- internal/api/nodeinfo/nodeinfo.go | 11 +++-- internal/api/nodeinfo/nodeinfoget.go | 32 +++++++++++-- internal/config/config.go | 2 +- internal/config/const.go | 20 +++++++-- internal/config/flags.go | 2 +- internal/config/helpers.gen.go | 24 +++++----- internal/config/validate.go | 13 ++++++ internal/processing/fedi/wellknown.go | 50 ++++++++++++++++----- internal/typeutils/internaltofrontend.go | 12 ++--- internal/web/robots.go | 35 +++++++++++---- test/envparsing.sh | 4 +- 18 files changed, 283 insertions(+), 77 deletions(-) diff --git a/docs/admin/robots.md b/docs/admin/robots.md index b9e0468ce..3de4fe079 100644 --- a/docs/admin/robots.md +++ b/docs/admin/robots.md @@ -2,6 +2,10 @@ GoToSocial serves a `robots.txt` file on the host domain. This file contains rules that attempt to block known AI scrapers, as well as some other indexers. It also includes some rules to ensure things like API endpoints aren't indexed by search engines since there really isn't any point to them. +## Allow/disallow stats collection + +You can allow or disallow crawlers from collecting stats about your instance from the `/nodeinfo/2.0` and `/nodeinfo/2.1` endpoints by changing the setting `instance-stats-mode`, which modifies the `robots.txt` file. See [instance configuration](../configuration/instance.md) for more details. + ## AI scrapers The AI scrapers come from a [community maintained repository][airobots]. It's manually kept in sync for the time being. If you know of any missing robots, please send them a PR! diff --git a/docs/api/swagger.yaml b/docs/api/swagger.yaml index 65d332227..ed44a5561 100644 --- a/docs/api/swagger.yaml +++ b/docs/api/swagger.yaml @@ -77,10 +77,20 @@ definitions: x-go-package: github.com/superseriousbusiness/gotosocial/internal/api/model NodeInfoSoftware: properties: + homepage: + description: Homepage for the software. Omitted in version 2.0. + example: https://docs.gotosocial.org + type: string + x-go-name: Homepage name: example: gotosocial type: string x-go-name: Name + repository: + description: Repository for the software. Omitted in version 2.0. + example: https://codeberg.org/superseriousbusiness/gotosocial + type: string + x-go-name: Repository version: example: 0.1.2 1234567 type: string @@ -90,6 +100,10 @@ definitions: x-go-package: github.com/superseriousbusiness/gotosocial/internal/api/model NodeInfoUsage: properties: + localComments: + format: int64 + type: integer + x-go-name: LocalComments localPosts: format: int64 type: integer @@ -101,6 +115,14 @@ definitions: x-go-package: github.com/superseriousbusiness/gotosocial/internal/api/model NodeInfoUsers: properties: + activeHalfYear: + format: int64 + type: integer + x-go-name: ActiveHalfYear + activeMonth: + format: int64 + type: integer + x-go-name: ActiveMonth total: format: int64 type: integer @@ -12504,12 +12526,19 @@ paths: summary: Returns code 200 if GoToSocial is "live", ie., able to respond to HTTP requests. tags: - health - /nodeinfo/2.0: + /nodeinfo/{schema_version}: get: description: 'See: https://nodeinfo.diaspora.software/schema.html' operationId: nodeInfoGet + parameters: + - description: Schema version of nodeinfo to request. 2.0 and 2.1 are currently supported. + in: path + name: schema_version + required: true + type: string produces: - application/json; profile="http://nodeinfo.diaspora.software/ns/schema/2.0#" + - application/json; profile="http://nodeinfo.diaspora.software/ns/schema/2.1#" responses: "200": description: "" diff --git a/docs/configuration/instance.md b/docs/configuration/instance.md index fdaf324cf..bffec8f70 100644 --- a/docs/configuration/instance.md +++ b/docs/configuration/instance.md @@ -139,14 +139,36 @@ instance-subscriptions-process-from: "23:00" # Default: "24h" (once per day). instance-subscriptions-process-every: "24h" -# Bool. Set this to true to randomize stats served at -# the /api/v1|v2/instance and /nodeinfo/2.0 endpoints. +# String. Allows you to customize if and how stats are served to +# crawlers at the /api/v1|v2/instance and /nodeinfo endpoints. # -# This can be useful when you don't want bots to obtain -# reliable information about the amount of users and -# statuses on your instance. +# Note that no matter what you set below, the /api/v1|v2/instance +# endpoints will not be allowed by robots.txt, as these are client +# API endpoints. # -# Options: [true, false] -# Default: false -instance-stats-randomize: false +# "" / empty string (default mode): Serve accurate stats at instance +# and nodeinfo endpoints, and DISALLOW crawlers from crawling +# those endpoints in robots.txt. This mode is equivalent to politely +# asking crawlers not to crawl, but there's no guarantee they will obey, +# as unfortunately many crawlers don't even check robots.txt. +# +# "zero": Serve zeroed-out stats at instance and nodeinfo endpoints, +# and DISALLOW crawlers from crawling those endpoints in robots.txt. +# This mode prevents even ill-behaved crawlers from gathering stats +# about your instance, as all gathered values will be 0. This is the +# safest way of preserving your instance's privacy in terms of stats. +# +# "serve": Serve accurate stats at instance and nodeinfo endpoints, +# and ALLOW crawlers to crawl those endpoints. This mode is useful +# if you want to contribute to fediverse statistics collection projects. +# +# "baffle": Serve randomized, preposterous stats at instance and nodeinfo +# endpoints, and DISALLOW crawlers from crawling those endpoints in robots.txt. +# This mode can be useful to annoy crawlers that don't respect robots.txt. +# Warning that this may draw the ire of crawler implementers who don't +# respect robots.txt, and may therefore put a target on your instance. +# +# Options: ["", "zero", "serve", "baffle"] +# Default: "" +instance-stats-mode: "" ``` diff --git a/example/config.yaml b/example/config.yaml index 10d7799c6..60d56bafc 100644 --- a/example/config.yaml +++ b/example/config.yaml @@ -425,16 +425,38 @@ instance-subscriptions-process-from: "23:00" # Default: "24h" (once per day). instance-subscriptions-process-every: "24h" -# Bool. Set this to true to randomize stats served at -# the /api/v1|v2/instance and /nodeinfo/2.0 endpoints. +# String. Allows you to customize if and how stats are served to +# crawlers at the /api/v1|v2/instance and /nodeinfo endpoints. # -# This can be useful when you don't want bots to obtain -# reliable information about the amount of users and -# statuses on your instance. +# Note that no matter what you set below, the /api/v1|v2/instance +# endpoints will not be allowed by robots.txt, as these are client +# API endpoints. # -# Options: [true, false] -# Default: false -instance-stats-randomize: false +# "" / empty string (default mode): Serve accurate stats at instance +# and nodeinfo endpoints, and DISALLOW crawlers from crawling +# those endpoints in robots.txt. This mode is equivalent to politely +# asking crawlers not to crawl, but there's no guarantee they will obey, +# as unfortunately many crawlers don't even check robots.txt. +# +# "zero": Serve zeroed-out stats at instance and nodeinfo endpoints, +# and DISALLOW crawlers from crawling those endpoints in robots.txt. +# This mode prevents even ill-behaved crawlers from gathering stats +# about your instance, as all gathered values will be 0. This is the +# safest way of preserving your instance's privacy in terms of stats. +# +# "serve": Serve accurate stats at instance and nodeinfo endpoints, +# and ALLOW crawlers to crawl those endpoints. This mode is useful +# if you want to contribute to fediverse statistics collection projects. +# +# "baffle": Serve randomized, preposterous stats at instance and nodeinfo +# endpoints, and DISALLOW crawlers from crawling those endpoints in robots.txt. +# This mode can be useful to annoy crawlers that don't respect robots.txt. +# Warning that this may draw the ire of crawler implementers who don't +# respect robots.txt, and may therefore put a target on your instance. +# +# Options: ["", "zero", "serve", "baffle"] +# Default: "" +instance-stats-mode: "" ########################### ##### ACCOUNTS CONFIG ##### diff --git a/internal/api/client/instance/instanceget.go b/internal/api/client/instance/instanceget.go index d7a688b43..3ca69d93b 100644 --- a/internal/api/client/instance/instanceget.go +++ b/internal/api/client/instance/instanceget.go @@ -60,10 +60,21 @@ func (m *Module) InstanceInformationGETHandlerV1(c *gin.Context) { return } - if config.GetInstanceStatsRandomize() { + switch config.GetInstanceStatsMode() { + + case config.InstanceStatsModeBaffle: // Replace actual stats with cached randomized ones. instance.Stats["user_count"] = util.Ptr(int(instance.RandomStats.TotalUsers)) instance.Stats["status_count"] = util.Ptr(int(instance.RandomStats.Statuses)) + + case config.InstanceStatsModeZero: + // Replace actual stats with zero. + instance.Stats["user_count"] = new(int) + instance.Stats["status_count"] = new(int) + + default: + // serve or default. + // Leave stats alone. } apiutil.JSON(c, http.StatusOK, instance) @@ -101,9 +112,19 @@ func (m *Module) InstanceInformationGETHandlerV2(c *gin.Context) { return } - if config.GetInstanceStatsRandomize() { + switch config.GetInstanceStatsMode() { + + case config.InstanceStatsModeBaffle: // Replace actual stats with cached randomized ones. instance.Usage.Users.ActiveMonth = int(instance.RandomStats.MonthlyActiveUsers) + + case config.InstanceStatsModeZero: + // Replace actual stats with zero. + instance.Usage.Users.ActiveMonth = 0 + + default: + // serve or default. + // Leave stats alone. } apiutil.JSON(c, http.StatusOK, instance) diff --git a/internal/api/model/well-known.go b/internal/api/model/well-known.go index 54d9912c8..d9948f951 100644 --- a/internal/api/model/well-known.go +++ b/internal/api/model/well-known.go @@ -70,6 +70,12 @@ type NodeInfoSoftware struct { Name string `json:"name"` // example: 0.1.2 1234567 Version string `json:"version"` + // Repository for the software. Omitted in version 2.0. + // example: https://codeberg.org/superseriousbusiness/gotosocial + Repository string `json:"repository,omitempty"` + // Homepage for the software. Omitted in version 2.0. + // example: https://docs.gotosocial.org + Homepage string `json:"homepage,omitempty"` } // NodeInfoServices represents inbound and outbound services that this node offers connections to. @@ -80,13 +86,16 @@ type NodeInfoServices struct { // NodeInfoUsage represents usage information about this server, such as number of users. type NodeInfoUsage struct { - Users NodeInfoUsers `json:"users"` - LocalPosts int `json:"localPosts"` + Users NodeInfoUsers `json:"users"` + LocalPosts int `json:"localPosts,omitempty"` + LocalComments int `json:"localComments,omitempty"` } // NodeInfoUsers represents aggregate information about the users on the server. type NodeInfoUsers struct { - Total int `json:"total"` + Total int `json:"total"` + ActiveHalfYear int `json:"activeHalfYear,omitempty"` + ActiveMonth int `json:"activeMonth,omitempty"` } // HostMeta represents a hostmeta document. diff --git a/internal/api/nodeinfo.go b/internal/api/nodeinfo.go index fb7918edc..29942aba4 100644 --- a/internal/api/nodeinfo.go +++ b/internal/api/nodeinfo.go @@ -36,9 +36,9 @@ func (w *NodeInfo) Route(r *router.Router, m ...gin.HandlerFunc) { // attach middlewares appropriate for this group nodeInfoGroup.Use(m...) nodeInfoGroup.Use( - // Allow public cache for 2 minutes. + // Allow public cache for 24 hours. middleware.CacheControl(middleware.CacheControlConfig{ - Directives: []string{"public", "max-age=120"}, + Directives: []string{"public", "max-age=86400"}, Vary: []string{"Accept-Encoding"}, }), ) diff --git a/internal/api/nodeinfo/nodeinfo.go b/internal/api/nodeinfo/nodeinfo.go index bf334b5e2..96adbc956 100644 --- a/internal/api/nodeinfo/nodeinfo.go +++ b/internal/api/nodeinfo/nodeinfo.go @@ -25,9 +25,12 @@ ) const ( - NodeInfo2Version = "2.0" - NodeInfo2Path = "/" + NodeInfo2Version - NodeInfo2ContentType = "application/json; profile=\"http://nodeinfo.diaspora.software/ns/schema/" + NodeInfo2Version + "#\"" + NodeInfo20 = "2.0" + NodeInfo20ContentType = "application/json; profile=\"http://nodeinfo.diaspora.software/ns/schema/" + NodeInfo20 + "#\"" + NodeInfo21 = "2.1" + NodeInfo21ContentType = "application/json; profile=\"http://nodeinfo.diaspora.software/ns/schema/" + NodeInfo21 + "#\"" + NodeInfoSchema = "schema" + NodeInfoPath = "/:" + NodeInfoSchema ) type Module struct { @@ -41,5 +44,5 @@ func New(processor *processing.Processor) *Module { } func (m *Module) Route(attachHandler func(method string, path string, f ...gin.HandlerFunc) gin.IRoutes) { - attachHandler(http.MethodGet, NodeInfo2Path, m.NodeInfo2GETHandler) + attachHandler(http.MethodGet, NodeInfoPath, m.NodeInfo2GETHandler) } diff --git a/internal/api/nodeinfo/nodeinfoget.go b/internal/api/nodeinfo/nodeinfoget.go index 368a5503d..28a60cff9 100644 --- a/internal/api/nodeinfo/nodeinfoget.go +++ b/internal/api/nodeinfo/nodeinfoget.go @@ -18,6 +18,7 @@ package nodeinfo import ( + "errors" "net/http" "github.com/gin-gonic/gin" @@ -25,7 +26,7 @@ "github.com/superseriousbusiness/gotosocial/internal/gtserror" ) -// NodeInfo2GETHandler swagger:operation GET /nodeinfo/2.0 nodeInfoGet +// NodeInfo2GETHandler swagger:operation GET /nodeinfo/{schema_version} nodeInfoGet // // Returns a compliant nodeinfo response to node info queries. // @@ -35,8 +36,17 @@ // tags: // - nodeinfo // +// parameters: +// - +// name: schema_version +// type: string +// description: Schema version of nodeinfo to request. 2.0 and 2.1 are currently supported. +// in: path +// required: true +// // produces: // - application/json; profile="http://nodeinfo.diaspora.software/ns/schema/2.0#" +// - application/json; profile="http://nodeinfo.diaspora.software/ns/schema/2.1#" // // responses: // '200': @@ -48,7 +58,23 @@ func (m *Module) NodeInfo2GETHandler(c *gin.Context) { return } - nodeInfo, errWithCode := m.processor.Fedi().NodeInfoGet(c.Request.Context()) + var ( + contentType string + schemaVersion = c.Param(NodeInfoSchema) + ) + + switch schemaVersion { + case NodeInfo20: + contentType = NodeInfo20ContentType + case NodeInfo21: + contentType = NodeInfo21ContentType + default: + const errText = "only nodeinfo 2.0 and 2.1 are supported" + apiutil.ErrorHandler(c, gtserror.NewErrorNotFound(errors.New(errText), errText), m.processor.InstanceGetV1) + return + } + + nodeInfo, errWithCode := m.processor.Fedi().NodeInfoGet(c.Request.Context(), schemaVersion) if errWithCode != nil { apiutil.ErrorHandler(c, errWithCode, m.processor.InstanceGetV1) return @@ -59,7 +85,7 @@ func (m *Module) NodeInfo2GETHandler(c *gin.Context) { c.Writer, c.Request, http.StatusOK, - NodeInfo2ContentType, + contentType, nodeInfo, ) } diff --git a/internal/config/config.go b/internal/config/config.go index 807d686d5..5c59c47cc 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -90,7 +90,7 @@ type Configuration struct { InstanceLanguages language.Languages `name:"instance-languages" usage:"BCP47 language tags for the instance. Used to indicate the preferred languages of instance residents (in order from most-preferred to least-preferred)."` InstanceSubscriptionsProcessFrom string `name:"instance-subscriptions-process-from" usage:"Time of day from which to start running instance subscriptions processing jobs. Should be in the format 'hh:mm:ss', eg., '15:04:05'."` InstanceSubscriptionsProcessEvery time.Duration `name:"instance-subscriptions-process-every" usage:"Period to elapse between instance subscriptions processing jobs, starting from instance-subscriptions-process-from."` - InstanceStatsRandomize bool `name:"instance-stats-randomize" usage:"Set to true to randomize the stats served at api/v1/instance and api/v2/instance endpoints. Home page stats remain unchanged."` + InstanceStatsMode string `name:"instance-stats-mode" usage:"Allows you to customize the way stats are served to crawlers: one of '', 'serve', 'zero', 'baffle'. Home page stats remain unchanged."` AccountsRegistrationOpen bool `name:"accounts-registration-open" usage:"Allow anyone to submit an account signup request. If false, server will be invite-only."` AccountsReasonRequired bool `name:"accounts-reason-required" usage:"Do new account signups require a reason to be submitted on registration?"` diff --git a/internal/config/const.go b/internal/config/const.go index 48087c4ce..c8e7a9f9d 100644 --- a/internal/config/const.go +++ b/internal/config/const.go @@ -17,16 +17,28 @@ package config +// Instance federation mode determines how this +// instance federates with others (if at all). const ( - // Instance federation mode determines how this - // instance federates with others (if at all). InstanceFederationModeBlocklist = "blocklist" InstanceFederationModeAllowlist = "allowlist" InstanceFederationModeDefault = InstanceFederationModeBlocklist +) - // Request header filter mode determines how - // this instance will perform request filtering. +// Request header filter mode determines how +// this instance will perform request filtering. +const ( RequestHeaderFilterModeAllow = "allow" RequestHeaderFilterModeBlock = "block" RequestHeaderFilterModeDisabled = "" ) + +// Instance stats mode determines if and how +// stats about the instance are served at +// nodeinfo and api/v1|v2/instance endpoints. +const ( + InstanceStatsModeDefault = "" + InstanceStatsModeServe = "serve" + InstanceStatsModeZero = "zero" + InstanceStatsModeBaffle = "baffle" +) diff --git a/internal/config/flags.go b/internal/config/flags.go index b0b530d0b..d67085d6d 100644 --- a/internal/config/flags.go +++ b/internal/config/flags.go @@ -92,7 +92,7 @@ func (s *ConfigState) AddServerFlags(cmd *cobra.Command) { cmd.Flags().StringSlice(InstanceLanguagesFlag(), cfg.InstanceLanguages.TagStrs(), fieldtag("InstanceLanguages", "usage")) cmd.Flags().String(InstanceSubscriptionsProcessFromFlag(), cfg.InstanceSubscriptionsProcessFrom, fieldtag("InstanceSubscriptionsProcessFrom", "usage")) cmd.Flags().Duration(InstanceSubscriptionsProcessEveryFlag(), cfg.InstanceSubscriptionsProcessEvery, fieldtag("InstanceSubscriptionsProcessEvery", "usage")) - cmd.Flags().Bool(InstanceStatsRandomizeFlag(), cfg.InstanceStatsRandomize, fieldtag("InstanceStatsRandomize", "usage")) + cmd.Flags().String(InstanceStatsModeFlag(), cfg.InstanceStatsMode, fieldtag("InstanceStatsMode", "usage")) // Accounts cmd.Flags().Bool(AccountsRegistrationOpenFlag(), cfg.AccountsRegistrationOpen, fieldtag("AccountsRegistrationOpen", "usage")) diff --git a/internal/config/helpers.gen.go b/internal/config/helpers.gen.go index 469c46a7a..54d1b62d9 100644 --- a/internal/config/helpers.gen.go +++ b/internal/config/helpers.gen.go @@ -1057,30 +1057,30 @@ func SetInstanceSubscriptionsProcessEvery(v time.Duration) { global.SetInstanceSubscriptionsProcessEvery(v) } -// GetInstanceStatsRandomize safely fetches the Configuration value for state's 'InstanceStatsRandomize' field -func (st *ConfigState) GetInstanceStatsRandomize() (v bool) { +// GetInstanceStatsMode safely fetches the Configuration value for state's 'InstanceStatsMode' field +func (st *ConfigState) GetInstanceStatsMode() (v string) { st.mutex.RLock() - v = st.config.InstanceStatsRandomize + v = st.config.InstanceStatsMode st.mutex.RUnlock() return } -// SetInstanceStatsRandomize safely sets the Configuration value for state's 'InstanceStatsRandomize' field -func (st *ConfigState) SetInstanceStatsRandomize(v bool) { +// SetInstanceStatsMode safely sets the Configuration value for state's 'InstanceStatsMode' field +func (st *ConfigState) SetInstanceStatsMode(v string) { st.mutex.Lock() defer st.mutex.Unlock() - st.config.InstanceStatsRandomize = v + st.config.InstanceStatsMode = v st.reloadToViper() } -// InstanceStatsRandomizeFlag returns the flag name for the 'InstanceStatsRandomize' field -func InstanceStatsRandomizeFlag() string { return "instance-stats-randomize" } +// InstanceStatsModeFlag returns the flag name for the 'InstanceStatsMode' field +func InstanceStatsModeFlag() string { return "instance-stats-mode" } -// GetInstanceStatsRandomize safely fetches the value for global configuration 'InstanceStatsRandomize' field -func GetInstanceStatsRandomize() bool { return global.GetInstanceStatsRandomize() } +// GetInstanceStatsMode safely fetches the value for global configuration 'InstanceStatsMode' field +func GetInstanceStatsMode() string { return global.GetInstanceStatsMode() } -// SetInstanceStatsRandomize safely sets the value for global configuration 'InstanceStatsRandomize' field -func SetInstanceStatsRandomize(v bool) { global.SetInstanceStatsRandomize(v) } +// SetInstanceStatsMode safely sets the value for global configuration 'InstanceStatsMode' field +func SetInstanceStatsMode(v string) { global.SetInstanceStatsMode(v) } // GetAccountsRegistrationOpen safely fetches the Configuration value for state's 'AccountsRegistrationOpen' field func (st *ConfigState) GetAccountsRegistrationOpen() (v bool) { diff --git a/internal/config/validate.go b/internal/config/validate.go index c8ebd4f2d..a4ed08106 100644 --- a/internal/config/validate.go +++ b/internal/config/validate.go @@ -115,6 +115,19 @@ func Validate() error { SetInstanceLanguages(parsedLangs) } + // `instance-stats-mode` should be + // "", "zero", "serve", or "baffle" + switch statsMode := GetInstanceStatsMode(); statsMode { + case InstanceStatsModeDefault, InstanceStatsModeZero, InstanceStatsModeServe, InstanceStatsModeBaffle: + // No problem. + + default: + errf( + "%s must be set to empty string, zero, serve, or baffle, provided value was %s", + InstanceFederationModeFlag(), statsMode, + ) + } + // `web-assets-base-dir`. webAssetsBaseDir := GetWebAssetBaseDir() if webAssetsBaseDir == "" { diff --git a/internal/processing/fedi/wellknown.go b/internal/processing/fedi/wellknown.go index ac92370c8..42a8e38e4 100644 --- a/internal/processing/fedi/wellknown.go +++ b/internal/processing/fedi/wellknown.go @@ -31,9 +31,11 @@ hostMetaRel = "lrdd" hostMetaType = "application/xrd+xml" hostMetaTemplate = ".well-known/webfinger?resource={uri}" - nodeInfoVersion = "2.0" nodeInfoSoftwareName = "gotosocial" - nodeInfoRel = "http://nodeinfo.diaspora.software/ns/schema/" + nodeInfoVersion + nodeInfo20Rel = "http://nodeinfo.diaspora.software/ns/schema/2.0" + nodeInfo21Rel = "http://nodeinfo.diaspora.software/ns/schema/2.1" + nodeInfoRepo = "https://github.com/superseriousbusiness/gotosocial" + nodeInfoHomepage = "https://docs.gotosocial.org" webfingerProfilePage = "http://webfinger.net/rel/profile-page" webFingerProfilePageContentType = "text/html" webfingerSelf = "self" @@ -56,27 +58,43 @@ func (p *Processor) NodeInfoRelGet(ctx context.Context) (*apimodel.WellKnownResp return &apimodel.WellKnownResponse{ Links: []apimodel.Link{ { - Rel: nodeInfoRel, - Href: fmt.Sprintf("%s://%s/nodeinfo/%s", protocol, host, nodeInfoVersion), + Rel: nodeInfo20Rel, + Href: fmt.Sprintf("%s://%s/nodeinfo/2.0", protocol, host), + }, + { + Rel: nodeInfo21Rel, + Href: fmt.Sprintf("%s://%s/nodeinfo/2.1", protocol, host), }, }, }, nil } -// NodeInfoGet returns a node info struct in response to a node info request. -func (p *Processor) NodeInfoGet(ctx context.Context) (*apimodel.Nodeinfo, gtserror.WithCode) { +// NodeInfoGet returns a node info struct in response to a 2.0 or 2.1 node info request. +func (p *Processor) NodeInfoGet(ctx context.Context, schemaVersion string) (*apimodel.Nodeinfo, gtserror.WithCode) { + const () + var ( userCount int postCount int + mau int err error ) - if config.GetInstanceStatsRandomize() { + switch config.GetInstanceStatsMode() { + + case config.InstanceStatsModeBaffle: // Use randomized stats. stats := p.converter.RandomStats() userCount = int(stats.TotalUsers) postCount = int(stats.Statuses) - } else { + mau = int(stats.MonthlyActiveUsers) + + case config.InstanceStatsModeZero: + // Use zeroed stats + // (don't count anything). + + default: + // Mode is either "serve" or "default". // Count actual stats. host := config.GetHost() @@ -91,8 +109,8 @@ func (p *Processor) NodeInfoGet(ctx context.Context) (*apimodel.Nodeinfo, gtserr } } - return &apimodel.Nodeinfo{ - Version: nodeInfoVersion, + nodeInfo := &apimodel.Nodeinfo{ + Version: schemaVersion, Software: apimodel.NodeInfoSoftware{ Name: nodeInfoSoftwareName, Version: config.GetSoftwareVersion(), @@ -105,12 +123,20 @@ func (p *Processor) NodeInfoGet(ctx context.Context) (*apimodel.Nodeinfo, gtserr OpenRegistrations: config.GetAccountsRegistrationOpen(), Usage: apimodel.NodeInfoUsage{ Users: apimodel.NodeInfoUsers{ - Total: userCount, + Total: userCount, + ActiveMonth: mau, }, LocalPosts: postCount, }, Metadata: nodeInfoMetadata, - }, nil + } + + if schemaVersion == "2.0" { + nodeInfo.Software.Repository = nodeInfoRepo + nodeInfo.Software.Homepage = nodeInfoHomepage + } + + return nodeInfo, nil } // HostMetaGet returns a host-meta struct in response to a host-meta request. diff --git a/internal/typeutils/internaltofrontend.go b/internal/typeutils/internaltofrontend.go index d966c054c..8375a8c3a 100644 --- a/internal/typeutils/internaltofrontend.go +++ b/internal/typeutils/internaltofrontend.go @@ -1745,9 +1745,9 @@ func (c *Converter) InstanceToAPIV1Instance(ctx context.Context, i *gtsmodel.Ins stats["domain_count"] = util.Ptr(domainCount) instance.Stats = stats - if config.GetInstanceStatsRandomize() { - // Whack some random stats on the instance - // to be injected by API handlers. + if config.GetInstanceStatsMode() == config.InstanceStatsModeBaffle { + // Whack random stats on the instance to be used + // by handlers in internal/api/client/instance. instance.RandomStats = c.RandomStats() } @@ -1827,9 +1827,9 @@ func (c *Converter) InstanceToAPIV2Instance(ctx context.Context, i *gtsmodel.Ins instance.Debug = util.Ptr(true) } - if config.GetInstanceStatsRandomize() { - // Whack some random stats on the instance - // to be injected by API handlers. + if config.GetInstanceStatsMode() == config.InstanceStatsModeBaffle { + // Whack random stats on the instance to be used + // by handlers in internal/api/client/instance. instance.RandomStats = c.RandomStats() } diff --git a/internal/web/robots.go b/internal/web/robots.go index ed665db9d..524550642 100644 --- a/internal/web/robots.go +++ b/internal/web/robots.go @@ -21,6 +21,7 @@ "net/http" "github.com/gin-gonic/gin" + "github.com/superseriousbusiness/gotosocial/internal/config" ) const ( @@ -90,8 +91,8 @@ # Well-known.dev crawler. Indexes stuff under /.well-known. # https://well-known.dev/about/ -User-agent: WellKnownBot -Disallow: / +User-agent: WellKnownBot +Disallow: / # Rules for everything else. User-agent: * @@ -108,10 +109,6 @@ Disallow: /account_disabled Disallow: /signup -# Well-known endpoints. -Disallow: /.well-known/ -Disallow: /nodeinfo/ - # Fileserver/media. Disallow: /fileserver/ @@ -125,7 +122,17 @@ Disallow: /settings/ # Domain blocklist. -Disallow: /about/suspended` +Disallow: /about/suspended + +# Webfinger endpoint. +Disallow: /.well-known/webfinger +` + + robotsTxtNoNodeInfo = robotsTxt + ` +# Disallow nodeinfo +Disallow: /.well-known/nodeinfo +Disallow: /nodeinfo/ +` ) // robotsGETHandler returns a decent robots.txt that prevents crawling @@ -134,5 +141,17 @@ // More granular robots meta tags are then applied for web pages // depending on user preferences (see internal/web). func (m *Module) robotsGETHandler(c *gin.Context) { - c.String(http.StatusOK, robotsTxt) + // Allow caching for 24 hrs. + // https://www.rfc-editor.org/rfc/rfc9309.html#section-2.4 + c.Header("Cache-Control", "public, max-age=86400") + + if config.GetInstanceStatsMode() == config.InstanceStatsModeServe { + // Serve robots.txt as-is + // without forbidding nodeinfo. + c.String(http.StatusOK, robotsTxt) + return + } + + // Disallow scraping nodeinfo. + c.String(http.StatusOK, robotsTxtNoNodeInfo) } diff --git a/test/envparsing.sh b/test/envparsing.sh index 565ecb1af..904dc8764 100755 --- a/test/envparsing.sh +++ b/test/envparsing.sh @@ -118,7 +118,7 @@ EXPECT=$(cat << "EOF" "nl", "en-GB" ], - "instance-stats-randomize": true, + "instance-stats-mode": "baffle", "instance-subscriptions-process-every": 86400000000000, "instance-subscriptions-process-from": "23:00", "landing-page-user": "admin", @@ -249,7 +249,7 @@ GTS_INSTANCE_FEDERATION_SPAM_FILTER=true \ GTS_INSTANCE_DELIVER_TO_SHARED_INBOXES=false \ GTS_INSTANCE_INJECT_MASTODON_VERSION=true \ GTS_INSTANCE_LANGUAGES="nl,en-gb" \ -GTS_INSTANCE_STATS_RANDOMIZE=true \ +GTS_INSTANCE_STATS_MODE="baffle" \ GTS_ACCOUNTS_ALLOW_CUSTOM_CSS=true \ GTS_ACCOUNTS_CUSTOM_CSS_LENGTH=5000 \ GTS_ACCOUNTS_REGISTRATION_OPEN=true \