mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2025-02-10 06:40:17 +00:00
[feature] Use X-Robots-Tag
headers to instruct scrapers/crawlers (#3737)
* [feature] Use `X-Robots-Tag` headers to instruct scrapers/crawlers * use switch for RobotsHeaders
This commit is contained in:
parent
bfb81f5bac
commit
baed591a1d
|
@ -417,7 +417,8 @@ func(context.Context, time.Time) {
|
||||||
return fmt.Errorf("error creating main router: %s", err)
|
return fmt.Errorf("error creating main router: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start preparing middleware stack.
|
// Start preparing global middleware
|
||||||
|
// stack (used for every request).
|
||||||
middlewares := make([]gin.HandlerFunc, 1)
|
middlewares := make([]gin.HandlerFunc, 1)
|
||||||
|
|
||||||
// RequestID middleware must run before tracing!
|
// RequestID middleware must run before tracing!
|
||||||
|
@ -499,13 +500,14 @@ func(context.Context, time.Time) {
|
||||||
metricsModule = api.NewMetrics() // Metrics endpoints
|
metricsModule = api.NewMetrics() // Metrics endpoints
|
||||||
healthModule = api.NewHealth(dbService.Ready) // Health check endpoints
|
healthModule = api.NewHealth(dbService.Ready) // Health check endpoints
|
||||||
fileserverModule = api.NewFileserver(process) // fileserver endpoints
|
fileserverModule = api.NewFileserver(process) // fileserver endpoints
|
||||||
|
robotsModule = api.NewRobots() // robots.txt endpoint
|
||||||
wellKnownModule = api.NewWellKnown(process) // .well-known endpoints
|
wellKnownModule = api.NewWellKnown(process) // .well-known endpoints
|
||||||
nodeInfoModule = api.NewNodeInfo(process) // nodeinfo endpoint
|
nodeInfoModule = api.NewNodeInfo(process) // nodeinfo endpoint
|
||||||
activityPubModule = api.NewActivityPub(dbService, process) // ActivityPub endpoints
|
activityPubModule = api.NewActivityPub(dbService, process) // ActivityPub endpoints
|
||||||
webModule = web.New(dbService, process) // web pages + user profiles + settings panels etc
|
webModule = web.New(dbService, process) // web pages + user profiles + settings panels etc
|
||||||
)
|
)
|
||||||
|
|
||||||
// create required middleware
|
// Create per-route / per-grouping middlewares.
|
||||||
// rate limiting
|
// rate limiting
|
||||||
rlLimit := config.GetAdvancedRateLimitRequests()
|
rlLimit := config.GetAdvancedRateLimitRequests()
|
||||||
clLimit := middleware.RateLimit(rlLimit, config.GetAdvancedRateLimitExceptionsParsed()) // client api
|
clLimit := middleware.RateLimit(rlLimit, config.GetAdvancedRateLimitExceptionsParsed()) // client api
|
||||||
|
@ -518,10 +520,25 @@ func(context.Context, time.Time) {
|
||||||
retryAfter := config.GetAdvancedThrottlingRetryAfter()
|
retryAfter := config.GetAdvancedThrottlingRetryAfter()
|
||||||
clThrottle := middleware.Throttle(cpuMultiplier, retryAfter) // client api
|
clThrottle := middleware.Throttle(cpuMultiplier, retryAfter) // client api
|
||||||
s2sThrottle := middleware.Throttle(cpuMultiplier, retryAfter)
|
s2sThrottle := middleware.Throttle(cpuMultiplier, retryAfter)
|
||||||
|
|
||||||
// server-to-server (AP)
|
// server-to-server (AP)
|
||||||
fsThrottle := middleware.Throttle(cpuMultiplier, retryAfter) // fileserver / web templates / emojis
|
fsThrottle := middleware.Throttle(cpuMultiplier, retryAfter) // fileserver / web templates / emojis
|
||||||
pkThrottle := middleware.Throttle(cpuMultiplier, retryAfter) // throttle public key endpoint separately
|
pkThrottle := middleware.Throttle(cpuMultiplier, retryAfter) // throttle public key endpoint separately
|
||||||
|
|
||||||
|
// Robots http headers (x-robots-tag).
|
||||||
|
//
|
||||||
|
// robotsDisallowAll is used for client API + S2S endpoints
|
||||||
|
// that definitely should never be indexed by crawlers.
|
||||||
|
//
|
||||||
|
// robotsDisallowAIOnly is used for utility endpoints,
|
||||||
|
// fileserver, and for web endpoints that set their own
|
||||||
|
// additional robots directives in HTML meta tags.
|
||||||
|
//
|
||||||
|
// Other endpoints like .well-known and nodeinfo handle
|
||||||
|
// robots headers themselves based on configuration.
|
||||||
|
robotsDisallowAll := middleware.RobotsHeaders("")
|
||||||
|
robotsDisallowAIOnly := middleware.RobotsHeaders("aiOnly")
|
||||||
|
|
||||||
// Gzip middleware is applied to all endpoints except
|
// Gzip middleware is applied to all endpoints except
|
||||||
// fileserver (compression too expensive for those),
|
// fileserver (compression too expensive for those),
|
||||||
// health (which really doesn't need compression), and
|
// health (which really doesn't need compression), and
|
||||||
|
@ -531,17 +548,18 @@ func(context.Context, time.Time) {
|
||||||
|
|
||||||
// these should be routed in order;
|
// these should be routed in order;
|
||||||
// apply throttling *after* rate limiting
|
// apply throttling *after* rate limiting
|
||||||
authModule.Route(route, clLimit, clThrottle, gzip)
|
authModule.Route(route, clLimit, clThrottle, robotsDisallowAll, gzip)
|
||||||
clientModule.Route(route, clLimit, clThrottle, gzip)
|
clientModule.Route(route, clLimit, clThrottle, robotsDisallowAll, gzip)
|
||||||
metricsModule.Route(route, clLimit, clThrottle)
|
metricsModule.Route(route, clLimit, clThrottle, robotsDisallowAIOnly)
|
||||||
healthModule.Route(route, clLimit, clThrottle)
|
healthModule.Route(route, clLimit, clThrottle, robotsDisallowAIOnly)
|
||||||
fileserverModule.Route(route, fsMainLimit, fsThrottle)
|
fileserverModule.Route(route, fsMainLimit, fsThrottle, robotsDisallowAIOnly)
|
||||||
fileserverModule.RouteEmojis(route, instanceAccount.ID, fsEmojiLimit, fsThrottle)
|
fileserverModule.RouteEmojis(route, instanceAccount.ID, fsEmojiLimit, fsThrottle, robotsDisallowAIOnly)
|
||||||
|
robotsModule.Route(route, fsMainLimit, fsThrottle, robotsDisallowAIOnly, gzip)
|
||||||
wellKnownModule.Route(route, gzip, s2sLimit, s2sThrottle)
|
wellKnownModule.Route(route, gzip, s2sLimit, s2sThrottle)
|
||||||
nodeInfoModule.Route(route, s2sLimit, s2sThrottle, gzip)
|
nodeInfoModule.Route(route, s2sLimit, s2sThrottle, gzip)
|
||||||
activityPubModule.Route(route, s2sLimit, s2sThrottle, gzip)
|
activityPubModule.Route(route, s2sLimit, s2sThrottle, robotsDisallowAll, gzip)
|
||||||
activityPubModule.RoutePublicKey(route, s2sLimit, pkThrottle, gzip)
|
activityPubModule.RoutePublicKey(route, s2sLimit, pkThrottle, robotsDisallowAll, gzip)
|
||||||
webModule.Route(route, fsMainLimit, fsThrottle, gzip)
|
webModule.Route(route, fsMainLimit, fsThrottle, robotsDisallowAIOnly, gzip)
|
||||||
|
|
||||||
// Finally start the main http server!
|
// Finally start the main http server!
|
||||||
if err := route.Start(); err != nil {
|
if err := route.Start(); err != nil {
|
||||||
|
|
|
@ -284,6 +284,7 @@
|
||||||
metricsModule = api.NewMetrics() // Metrics endpoints
|
metricsModule = api.NewMetrics() // Metrics endpoints
|
||||||
healthModule = api.NewHealth(state.DB.Ready) // Health check endpoints
|
healthModule = api.NewHealth(state.DB.Ready) // Health check endpoints
|
||||||
fileserverModule = api.NewFileserver(processor) // fileserver endpoints
|
fileserverModule = api.NewFileserver(processor) // fileserver endpoints
|
||||||
|
robotsModule = api.NewRobots() // robots.txt endpoint
|
||||||
wellKnownModule = api.NewWellKnown(processor) // .well-known endpoints
|
wellKnownModule = api.NewWellKnown(processor) // .well-known endpoints
|
||||||
nodeInfoModule = api.NewNodeInfo(processor) // nodeinfo endpoint
|
nodeInfoModule = api.NewNodeInfo(processor) // nodeinfo endpoint
|
||||||
activityPubModule = api.NewActivityPub(state.DB, processor) // ActivityPub endpoints
|
activityPubModule = api.NewActivityPub(state.DB, processor) // ActivityPub endpoints
|
||||||
|
@ -297,6 +298,7 @@
|
||||||
healthModule.Route(route)
|
healthModule.Route(route)
|
||||||
fileserverModule.Route(route)
|
fileserverModule.Route(route)
|
||||||
fileserverModule.RouteEmojis(route, instanceAccount.ID)
|
fileserverModule.RouteEmojis(route, instanceAccount.ID)
|
||||||
|
robotsModule.Route(route)
|
||||||
wellKnownModule.Route(route)
|
wellKnownModule.Route(route)
|
||||||
nodeInfoModule.Route(route)
|
nodeInfoModule.Route(route)
|
||||||
activityPubModule.Route(route)
|
activityPubModule.Route(route)
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
import (
|
import (
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
"github.com/superseriousbusiness/gotosocial/internal/api/nodeinfo"
|
"github.com/superseriousbusiness/gotosocial/internal/api/nodeinfo"
|
||||||
|
"github.com/superseriousbusiness/gotosocial/internal/config"
|
||||||
"github.com/superseriousbusiness/gotosocial/internal/middleware"
|
"github.com/superseriousbusiness/gotosocial/internal/middleware"
|
||||||
"github.com/superseriousbusiness/gotosocial/internal/processing"
|
"github.com/superseriousbusiness/gotosocial/internal/processing"
|
||||||
"github.com/superseriousbusiness/gotosocial/internal/router"
|
"github.com/superseriousbusiness/gotosocial/internal/router"
|
||||||
|
@ -43,6 +44,16 @@ func (w *NodeInfo) Route(r *router.Router, m ...gin.HandlerFunc) {
|
||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// If instance is configured to serve instance stats
|
||||||
|
// faithfully at nodeinfo, we should allow robots to
|
||||||
|
// crawl nodeinfo endpoints in a limited capacity.
|
||||||
|
// In all other cases, disallow everything.
|
||||||
|
if config.GetInstanceStatsMode() == config.InstanceStatsModeServe {
|
||||||
|
nodeInfoGroup.Use(middleware.RobotsHeaders("allowSome"))
|
||||||
|
} else {
|
||||||
|
nodeInfoGroup.Use(middleware.RobotsHeaders(""))
|
||||||
|
}
|
||||||
|
|
||||||
w.nodeInfo.Route(nodeInfoGroup.Handle)
|
w.nodeInfo.Route(nodeInfoGroup.Handle)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
52
internal/api/robots.go
Normal file
52
internal/api/robots.go
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
// GoToSocial
|
||||||
|
// Copyright (C) GoToSocial Authors admin@gotosocial.org
|
||||||
|
// SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 3 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU Affero General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU Affero General Public License
|
||||||
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/gin-gonic/gin"
|
||||||
|
"github.com/superseriousbusiness/gotosocial/internal/api/robots"
|
||||||
|
"github.com/superseriousbusiness/gotosocial/internal/middleware"
|
||||||
|
"github.com/superseriousbusiness/gotosocial/internal/router"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Robots struct {
|
||||||
|
robots *robots.Module
|
||||||
|
}
|
||||||
|
|
||||||
|
func (rb *Robots) Route(r *router.Router, m ...gin.HandlerFunc) {
|
||||||
|
// Create a group so we can attach middlewares.
|
||||||
|
robotsGroup := r.AttachGroup("robots.txt")
|
||||||
|
|
||||||
|
// Use passed-in middlewares.
|
||||||
|
robotsGroup.Use(m...)
|
||||||
|
|
||||||
|
// Allow caching for 24 hrs.
|
||||||
|
// https://www.rfc-editor.org/rfc/rfc9309.html#section-2.4
|
||||||
|
robotsGroup.Use(
|
||||||
|
middleware.CacheControl(middleware.CacheControlConfig{
|
||||||
|
Directives: []string{"public", "max-age=86400"},
|
||||||
|
Vary: []string{"Accept-Encoding"},
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
|
||||||
|
rb.robots.Route(robotsGroup.Handle)
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewRobots() *Robots {
|
||||||
|
return &Robots{}
|
||||||
|
}
|
57
internal/api/robots/robots.go
Normal file
57
internal/api/robots/robots.go
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
// GoToSocial
|
||||||
|
// Copyright (C) GoToSocial Authors admin@gotosocial.org
|
||||||
|
// SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 3 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU Affero General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU Affero General Public License
|
||||||
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
package robots
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/gin-gonic/gin"
|
||||||
|
apiutil "github.com/superseriousbusiness/gotosocial/internal/api/util"
|
||||||
|
"github.com/superseriousbusiness/gotosocial/internal/config"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Module struct{}
|
||||||
|
|
||||||
|
func New() *Module {
|
||||||
|
return &Module{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Module) Route(attachHandler func(method string, path string, f ...gin.HandlerFunc) gin.IRoutes) {
|
||||||
|
// Serve different robots.txt file depending on instance
|
||||||
|
// stats mode: Don't disallow scraping nodeinfo if admin
|
||||||
|
// has opted in to serving accurate stats there. In all
|
||||||
|
// other cases, disallow scraping nodeinfo.
|
||||||
|
var handler gin.HandlerFunc
|
||||||
|
if config.GetInstanceStatsMode() == config.InstanceStatsModeServe {
|
||||||
|
handler = m.robotsGETHandler
|
||||||
|
} else {
|
||||||
|
handler = m.robotsGETHandlerDisallowNodeInfo
|
||||||
|
}
|
||||||
|
|
||||||
|
// Attach handler at empty path as this
|
||||||
|
// is already grouped under /robots.txt.
|
||||||
|
attachHandler(http.MethodGet, "", handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Module) robotsGETHandler(c *gin.Context) {
|
||||||
|
c.String(http.StatusOK, apiutil.RobotsTxt)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Module) robotsGETHandlerDisallowNodeInfo(c *gin.Context) {
|
||||||
|
c.String(http.StatusOK, apiutil.RobotsTxtDisallowNodeInfo)
|
||||||
|
}
|
|
@ -15,19 +15,17 @@
|
||||||
// You should have received a copy of the GNU Affero General Public License
|
// You should have received a copy of the GNU Affero General Public License
|
||||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
package web
|
package util
|
||||||
|
|
||||||
import (
|
|
||||||
"net/http"
|
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
|
||||||
"github.com/superseriousbusiness/gotosocial/internal/config"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
// See:
|
||||||
|
//
|
||||||
|
// - https://developers.google.com/search/docs/crawling-indexing/robots-meta-tag#robotsmeta
|
||||||
|
// - https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Robots-Tag
|
||||||
|
// - https://www.rfc-editor.org/rfc/rfc9309.html
|
||||||
const (
|
const (
|
||||||
robotsPath = "/robots.txt"
|
RobotsDirectivesDisallow = "noindex, nofollow"
|
||||||
robotsMetaAllowSome = "nofollow, noarchive, nositelinkssearchbox, max-image-preview:standard" // https://developers.google.com/search/docs/crawling-indexing/robots-meta-tag#robotsmeta
|
RobotsDirectivesAllowSome = "nofollow, noarchive, nositelinkssearchbox, max-image-preview:standard"
|
||||||
robotsTxt = `# GoToSocial robots.txt -- to edit, see internal/web/robots.go
|
RobotsTxt = `# GoToSocial robots.txt -- to edit, see internal/api/util/robots.go
|
||||||
# More info @ https://developers.google.com/search/docs/crawling-indexing/robots/intro
|
# More info @ https://developers.google.com/search/docs/crawling-indexing/robots/intro
|
||||||
|
|
||||||
# AI scrapers and the like.
|
# AI scrapers and the like.
|
||||||
|
@ -127,31 +125,9 @@
|
||||||
# Webfinger endpoint.
|
# Webfinger endpoint.
|
||||||
Disallow: /.well-known/webfinger
|
Disallow: /.well-known/webfinger
|
||||||
`
|
`
|
||||||
|
RobotsTxtDisallowNodeInfo = RobotsTxt + `
|
||||||
robotsTxtNoNodeInfo = robotsTxt + `
|
|
||||||
# Disallow nodeinfo
|
# Disallow nodeinfo
|
||||||
Disallow: /.well-known/nodeinfo
|
Disallow: /.well-known/nodeinfo
|
||||||
Disallow: /nodeinfo/
|
Disallow: /nodeinfo/
|
||||||
`
|
`
|
||||||
)
|
)
|
||||||
|
|
||||||
// robotsGETHandler returns a decent robots.txt that prevents crawling
|
|
||||||
// the api, auth pages, settings pages, etc.
|
|
||||||
//
|
|
||||||
// More granular robots meta tags are then applied for web pages
|
|
||||||
// depending on user preferences (see internal/web).
|
|
||||||
func (m *Module) robotsGETHandler(c *gin.Context) {
|
|
||||||
// Allow caching for 24 hrs.
|
|
||||||
// https://www.rfc-editor.org/rfc/rfc9309.html#section-2.4
|
|
||||||
c.Header("Cache-Control", "public, max-age=86400")
|
|
||||||
|
|
||||||
if config.GetInstanceStatsMode() == config.InstanceStatsModeServe {
|
|
||||||
// Serve robots.txt as-is
|
|
||||||
// without forbidding nodeinfo.
|
|
||||||
c.String(http.StatusOK, robotsTxt)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Disallow scraping nodeinfo.
|
|
||||||
c.String(http.StatusOK, robotsTxtNoNodeInfo)
|
|
||||||
}
|
|
|
@ -21,6 +21,7 @@
|
||||||
"net/http"
|
"net/http"
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
|
"github.com/superseriousbusiness/gotosocial/internal/middleware"
|
||||||
"github.com/superseriousbusiness/gotosocial/internal/processing"
|
"github.com/superseriousbusiness/gotosocial/internal/processing"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -40,5 +41,6 @@ func New(processor *processing.Processor) *Module {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Module) Route(attachHandler func(method string, path string, f ...gin.HandlerFunc) gin.IRoutes) {
|
func (m *Module) Route(attachHandler func(method string, path string, f ...gin.HandlerFunc) gin.IRoutes) {
|
||||||
attachHandler(http.MethodGet, HostMetaPath, m.HostMetaGETHandler)
|
// Attach handler, injecting robots http header middleware to disallow all.
|
||||||
|
attachHandler(http.MethodGet, HostMetaPath, middleware.RobotsHeaders(""), m.HostMetaGETHandler)
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,10 @@
|
||||||
"net/http"
|
"net/http"
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
|
apiutil "github.com/superseriousbusiness/gotosocial/internal/api/util"
|
||||||
|
"github.com/superseriousbusiness/gotosocial/internal/config"
|
||||||
|
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
|
||||||
|
"github.com/superseriousbusiness/gotosocial/internal/middleware"
|
||||||
"github.com/superseriousbusiness/gotosocial/internal/processing"
|
"github.com/superseriousbusiness/gotosocial/internal/processing"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -42,5 +46,57 @@ func New(processor *processing.Processor) *Module {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Module) Route(attachHandler func(method string, path string, f ...gin.HandlerFunc) gin.IRoutes) {
|
func (m *Module) Route(attachHandler func(method string, path string, f ...gin.HandlerFunc) gin.IRoutes) {
|
||||||
attachHandler(http.MethodGet, NodeInfoWellKnownPath, m.NodeInfoWellKnownGETHandler)
|
// If instance is configured to serve instance stats
|
||||||
|
// faithfully at nodeinfo, we should allow robots to
|
||||||
|
// crawl nodeinfo endpoints in a limited capacity.
|
||||||
|
// In all other cases, disallow everything.
|
||||||
|
var robots gin.HandlerFunc
|
||||||
|
if config.GetInstanceStatsMode() == config.InstanceStatsModeServe {
|
||||||
|
robots = middleware.RobotsHeaders("allowSome")
|
||||||
|
} else {
|
||||||
|
robots = middleware.RobotsHeaders("")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Attach handler, injecting robots http header middleware.
|
||||||
|
attachHandler(http.MethodGet, NodeInfoWellKnownPath, robots, m.NodeInfoWellKnownGETHandler)
|
||||||
|
}
|
||||||
|
|
||||||
|
// NodeInfoWellKnownGETHandler swagger:operation GET /.well-known/nodeinfo nodeInfoWellKnownGet
|
||||||
|
//
|
||||||
|
// Returns a well-known response which redirects callers to `/nodeinfo/2.0`.
|
||||||
|
//
|
||||||
|
// eg. `{"links":[{"rel":"http://nodeinfo.diaspora.software/ns/schema/2.0","href":"http://example.org/nodeinfo/2.0"}]}`
|
||||||
|
// See: https://nodeinfo.diaspora.software/protocol.html
|
||||||
|
//
|
||||||
|
// ---
|
||||||
|
// tags:
|
||||||
|
// - .well-known
|
||||||
|
//
|
||||||
|
// produces:
|
||||||
|
// - application/json
|
||||||
|
//
|
||||||
|
// responses:
|
||||||
|
// '200':
|
||||||
|
// schema:
|
||||||
|
// "$ref": "#/definitions/wellKnownResponse"
|
||||||
|
func (m *Module) NodeInfoWellKnownGETHandler(c *gin.Context) {
|
||||||
|
if _, err := apiutil.NegotiateAccept(c, apiutil.JSONAcceptHeaders...); err != nil {
|
||||||
|
apiutil.ErrorHandler(c, gtserror.NewErrorNotAcceptable(err, err.Error()), m.processor.InstanceGetV1)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, errWithCode := m.processor.Fedi().NodeInfoRelGet(c.Request.Context())
|
||||||
|
if errWithCode != nil {
|
||||||
|
apiutil.ErrorHandler(c, errWithCode, m.processor.InstanceGetV1)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Encode JSON HTTP response.
|
||||||
|
apiutil.EncodeJSONResponse(
|
||||||
|
c.Writer,
|
||||||
|
c.Request,
|
||||||
|
http.StatusOK,
|
||||||
|
apiutil.AppJSON,
|
||||||
|
resp,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,66 +0,0 @@
|
||||||
// GoToSocial
|
|
||||||
// Copyright (C) GoToSocial Authors admin@gotosocial.org
|
|
||||||
// SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
//
|
|
||||||
// This program is free software: you can redistribute it and/or modify
|
|
||||||
// it under the terms of the GNU Affero General Public License as published by
|
|
||||||
// the Free Software Foundation, either version 3 of the License, or
|
|
||||||
// (at your option) any later version.
|
|
||||||
//
|
|
||||||
// This program is distributed in the hope that it will be useful,
|
|
||||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
// GNU Affero General Public License for more details.
|
|
||||||
//
|
|
||||||
// You should have received a copy of the GNU Affero General Public License
|
|
||||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
|
|
||||||
package nodeinfo
|
|
||||||
|
|
||||||
import (
|
|
||||||
"net/http"
|
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
|
||||||
apiutil "github.com/superseriousbusiness/gotosocial/internal/api/util"
|
|
||||||
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
|
|
||||||
)
|
|
||||||
|
|
||||||
// NodeInfoWellKnownGETHandler swagger:operation GET /.well-known/nodeinfo nodeInfoWellKnownGet
|
|
||||||
//
|
|
||||||
// Returns a well-known response which redirects callers to `/nodeinfo/2.0`.
|
|
||||||
//
|
|
||||||
// eg. `{"links":[{"rel":"http://nodeinfo.diaspora.software/ns/schema/2.0","href":"http://example.org/nodeinfo/2.0"}]}`
|
|
||||||
// See: https://nodeinfo.diaspora.software/protocol.html
|
|
||||||
//
|
|
||||||
// ---
|
|
||||||
// tags:
|
|
||||||
// - .well-known
|
|
||||||
//
|
|
||||||
// produces:
|
|
||||||
// - application/json
|
|
||||||
//
|
|
||||||
// responses:
|
|
||||||
// '200':
|
|
||||||
// schema:
|
|
||||||
// "$ref": "#/definitions/wellKnownResponse"
|
|
||||||
func (m *Module) NodeInfoWellKnownGETHandler(c *gin.Context) {
|
|
||||||
if _, err := apiutil.NegotiateAccept(c, apiutil.JSONAcceptHeaders...); err != nil {
|
|
||||||
apiutil.ErrorHandler(c, gtserror.NewErrorNotAcceptable(err, err.Error()), m.processor.InstanceGetV1)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
resp, errWithCode := m.processor.Fedi().NodeInfoRelGet(c.Request.Context())
|
|
||||||
if errWithCode != nil {
|
|
||||||
apiutil.ErrorHandler(c, errWithCode, m.processor.InstanceGetV1)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Encode JSON HTTP response.
|
|
||||||
apiutil.EncodeJSONResponse(
|
|
||||||
c.Writer,
|
|
||||||
c.Request,
|
|
||||||
http.StatusOK,
|
|
||||||
apiutil.AppJSON,
|
|
||||||
resp,
|
|
||||||
)
|
|
||||||
}
|
|
|
@ -21,6 +21,7 @@
|
||||||
"net/http"
|
"net/http"
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
|
"github.com/superseriousbusiness/gotosocial/internal/middleware"
|
||||||
"github.com/superseriousbusiness/gotosocial/internal/processing"
|
"github.com/superseriousbusiness/gotosocial/internal/processing"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -41,5 +42,6 @@ func New(processor *processing.Processor) *Module {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Module) Route(attachHandler func(method string, path string, f ...gin.HandlerFunc) gin.IRoutes) {
|
func (m *Module) Route(attachHandler func(method string, path string, f ...gin.HandlerFunc) gin.IRoutes) {
|
||||||
attachHandler(http.MethodGet, WebfingerBasePath, m.WebfingerGETRequest)
|
// Attach handler, injecting robots http header middleware to disallow all.
|
||||||
|
attachHandler(http.MethodGet, WebfingerBasePath, middleware.RobotsHeaders(""), m.WebfingerGETRequest)
|
||||||
}
|
}
|
||||||
|
|
|
@ -44,12 +44,5 @@ func ExtraHeaders() gin.HandlerFunc {
|
||||||
//
|
//
|
||||||
// See: https://github.com/patcg-individual-drafts/topics
|
// See: https://github.com/patcg-individual-drafts/topics
|
||||||
c.Header("Permissions-Policy", "browsing-topics=()")
|
c.Header("Permissions-Policy", "browsing-topics=()")
|
||||||
|
|
||||||
// Some AI scrapers respect the following tags to opt-out
|
|
||||||
// of their crawling and datasets.
|
|
||||||
c.Header("X-Robots-Tag", "noimageai")
|
|
||||||
// c.Header calls .Set(), but we want to emit the header
|
|
||||||
// twice, not override it.
|
|
||||||
c.Writer.Header().Add("X-Robots-Tag", "noai")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
67
internal/middleware/robots.go
Normal file
67
internal/middleware/robots.go
Normal file
|
@ -0,0 +1,67 @@
|
||||||
|
// GoToSocial
|
||||||
|
// Copyright (C) GoToSocial Authors admin@gotosocial.org
|
||||||
|
// SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 3 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU Affero General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU Affero General Public License
|
||||||
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
package middleware
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/gin-gonic/gin"
|
||||||
|
apiutil "github.com/superseriousbusiness/gotosocial/internal/api/util"
|
||||||
|
)
|
||||||
|
|
||||||
|
// RobotsHeaders adds robots directives to the X-Robots-Tag HTTP header.
|
||||||
|
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Robots-Tag
|
||||||
|
//
|
||||||
|
// If mode == "aiOnly" then only the noai and noimageai values will be set,
|
||||||
|
// and other headers will be left alone (for route groups / handlers to set).
|
||||||
|
//
|
||||||
|
// If mode == "allowSome" then noai, noimageai, and some indexing will be set.
|
||||||
|
//
|
||||||
|
// If mode == "" then noai, noimageai, noindex, and nofollow will be set
|
||||||
|
// (ie., as restrictive as possible).
|
||||||
|
func RobotsHeaders(mode string) gin.HandlerFunc {
|
||||||
|
const (
|
||||||
|
key = "X-Robots-Tag"
|
||||||
|
// Some AI scrapers respect the following tags
|
||||||
|
// to opt-out of their crawling and datasets.
|
||||||
|
// We add them regardless of allowSome.
|
||||||
|
noai = "noai, noimageai"
|
||||||
|
)
|
||||||
|
|
||||||
|
switch mode {
|
||||||
|
|
||||||
|
// Just set ai headers and
|
||||||
|
// leave the other headers be.
|
||||||
|
case "aiOnly":
|
||||||
|
return func(c *gin.Context) {
|
||||||
|
c.Writer.Header().Set(key, noai)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allow some limited indexing.
|
||||||
|
case "allowSome":
|
||||||
|
return func(c *gin.Context) {
|
||||||
|
c.Writer.Header().Set(key, apiutil.RobotsDirectivesAllowSome)
|
||||||
|
c.Writer.Header().Add(key, noai)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Disallow indexing via noindex, nofollow.
|
||||||
|
default:
|
||||||
|
return func(c *gin.Context) {
|
||||||
|
c.Writer.Header().Set(key, apiutil.RobotsDirectivesDisallow)
|
||||||
|
c.Writer.Header().Add(key, noai)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -103,7 +103,7 @@ func (m *Module) profileGETHandler(c *gin.Context) {
|
||||||
// index if account is discoverable.
|
// index if account is discoverable.
|
||||||
var robotsMeta string
|
var robotsMeta string
|
||||||
if targetAccount.Discoverable {
|
if targetAccount.Discoverable {
|
||||||
robotsMeta = robotsMetaAllowSome
|
robotsMeta = apiutil.RobotsDirectivesAllowSome
|
||||||
}
|
}
|
||||||
|
|
||||||
// We need to change our response slightly if the
|
// We need to change our response slightly if the
|
||||||
|
|
|
@ -95,8 +95,6 @@ func (m *Module) Route(r *router.Router, mi ...gin.HandlerFunc) {
|
||||||
// Route static assets.
|
// Route static assets.
|
||||||
routeAssets(m, r, mi...)
|
routeAssets(m, r, mi...)
|
||||||
|
|
||||||
// Route all other endpoints + handlers.
|
|
||||||
//
|
|
||||||
// Handlers that serve profiles and statuses should use
|
// Handlers that serve profiles and statuses should use
|
||||||
// the SignatureCheck middleware, so that requests with
|
// the SignatureCheck middleware, so that requests with
|
||||||
// content-type application/activity+json can be served
|
// content-type application/activity+json can be served
|
||||||
|
@ -108,24 +106,25 @@ func (m *Module) Route(r *router.Router, mi ...gin.HandlerFunc) {
|
||||||
profileGroup.Handle(http.MethodGet, "", m.profileGETHandler) // use empty path here since it's the base of the group
|
profileGroup.Handle(http.MethodGet, "", m.profileGETHandler) // use empty path here since it's the base of the group
|
||||||
profileGroup.Handle(http.MethodGet, statusPath, m.threadGETHandler)
|
profileGroup.Handle(http.MethodGet, statusPath, m.threadGETHandler)
|
||||||
|
|
||||||
// Individual web handlers requiring no specific middlewares.
|
// Group for all other web handlers.
|
||||||
r.AttachHandler(http.MethodGet, "/", m.indexHandler) // front-page
|
everythingElseGroup := r.AttachGroup("")
|
||||||
r.AttachHandler(http.MethodGet, settingsPathPrefix, m.SettingsPanelHandler)
|
everythingElseGroup.Use(mi...)
|
||||||
r.AttachHandler(http.MethodGet, settingsPanelGlob, m.SettingsPanelHandler)
|
everythingElseGroup.Handle(http.MethodGet, "/", m.indexHandler) // front-page
|
||||||
r.AttachHandler(http.MethodGet, customCSSPath, m.customCSSGETHandler)
|
everythingElseGroup.Handle(http.MethodGet, settingsPathPrefix, m.SettingsPanelHandler)
|
||||||
r.AttachHandler(http.MethodGet, instanceCustomCSSPath, m.instanceCustomCSSGETHandler)
|
everythingElseGroup.Handle(http.MethodGet, settingsPanelGlob, m.SettingsPanelHandler)
|
||||||
r.AttachHandler(http.MethodGet, rssFeedPath, m.rssFeedGETHandler)
|
everythingElseGroup.Handle(http.MethodGet, customCSSPath, m.customCSSGETHandler)
|
||||||
r.AttachHandler(http.MethodGet, confirmEmailPath, m.confirmEmailGETHandler)
|
everythingElseGroup.Handle(http.MethodGet, instanceCustomCSSPath, m.instanceCustomCSSGETHandler)
|
||||||
r.AttachHandler(http.MethodPost, confirmEmailPath, m.confirmEmailPOSTHandler)
|
everythingElseGroup.Handle(http.MethodGet, rssFeedPath, m.rssFeedGETHandler)
|
||||||
r.AttachHandler(http.MethodGet, robotsPath, m.robotsGETHandler)
|
everythingElseGroup.Handle(http.MethodGet, confirmEmailPath, m.confirmEmailGETHandler)
|
||||||
r.AttachHandler(http.MethodGet, aboutPath, m.aboutGETHandler)
|
everythingElseGroup.Handle(http.MethodPost, confirmEmailPath, m.confirmEmailPOSTHandler)
|
||||||
r.AttachHandler(http.MethodGet, loginPath, m.loginGETHandler)
|
everythingElseGroup.Handle(http.MethodGet, aboutPath, m.aboutGETHandler)
|
||||||
r.AttachHandler(http.MethodGet, domainBlockListPath, m.domainBlockListGETHandler)
|
everythingElseGroup.Handle(http.MethodGet, loginPath, m.loginGETHandler)
|
||||||
r.AttachHandler(http.MethodGet, tagsPath, m.tagGETHandler)
|
everythingElseGroup.Handle(http.MethodGet, domainBlockListPath, m.domainBlockListGETHandler)
|
||||||
r.AttachHandler(http.MethodGet, signupPath, m.signupGETHandler)
|
everythingElseGroup.Handle(http.MethodGet, tagsPath, m.tagGETHandler)
|
||||||
r.AttachHandler(http.MethodPost, signupPath, m.signupPOSTHandler)
|
everythingElseGroup.Handle(http.MethodGet, signupPath, m.signupGETHandler)
|
||||||
|
everythingElseGroup.Handle(http.MethodPost, signupPath, m.signupPOSTHandler)
|
||||||
|
|
||||||
// Redirects from old endpoints to for back compat.
|
// Redirects from old endpoints for back compat.
|
||||||
r.AttachHandler(http.MethodGet, "/auth/edit", func(c *gin.Context) { c.Redirect(http.StatusMovedPermanently, userPanelPath) })
|
r.AttachHandler(http.MethodGet, "/auth/edit", func(c *gin.Context) { c.Redirect(http.StatusMovedPermanently, userPanelPath) })
|
||||||
r.AttachHandler(http.MethodGet, "/user", func(c *gin.Context) { c.Redirect(http.StatusMovedPermanently, userPanelPath) })
|
r.AttachHandler(http.MethodGet, "/user", func(c *gin.Context) { c.Redirect(http.StatusMovedPermanently, userPanelPath) })
|
||||||
r.AttachHandler(http.MethodGet, "/admin", func(c *gin.Context) { c.Redirect(http.StatusMovedPermanently, adminPanelPath) })
|
r.AttachHandler(http.MethodGet, "/admin", func(c *gin.Context) { c.Redirect(http.StatusMovedPermanently, adminPanelPath) })
|
||||||
|
|
|
@ -47,7 +47,7 @@ image/webp
|
||||||
<meta charset="UTF-8">
|
<meta charset="UTF-8">
|
||||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
<meta name="robots" content="{{- if .robotsMeta -}}{{- .robotsMeta -}}{{- else -}}noindex, nofollow{{- end -}}">
|
<meta name="robots" content="{{- if .robotsMeta -}}{{- .robotsMeta -}}{{- else -}}noindex, nofollow, noai, noimageai{{- end -}}">
|
||||||
{{- if .ogMeta }}
|
{{- if .ogMeta }}
|
||||||
{{- include "page_ogmeta.tmpl" . | indent 2 }}
|
{{- include "page_ogmeta.tmpl" . | indent 2 }}
|
||||||
{{- else }}
|
{{- else }}
|
||||||
|
|
Loading…
Reference in a new issue