mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2025-02-09 22:30:16 +00:00
[feature] Use X-Robots-Tag
headers to instruct scrapers/crawlers (#3737)
* [feature] Use `X-Robots-Tag` headers to instruct scrapers/crawlers * use switch for RobotsHeaders
This commit is contained in:
parent
bfb81f5bac
commit
baed591a1d
|
@ -417,7 +417,8 @@ func(context.Context, time.Time) {
|
|||
return fmt.Errorf("error creating main router: %s", err)
|
||||
}
|
||||
|
||||
// Start preparing middleware stack.
|
||||
// Start preparing global middleware
|
||||
// stack (used for every request).
|
||||
middlewares := make([]gin.HandlerFunc, 1)
|
||||
|
||||
// RequestID middleware must run before tracing!
|
||||
|
@ -499,13 +500,14 @@ func(context.Context, time.Time) {
|
|||
metricsModule = api.NewMetrics() // Metrics endpoints
|
||||
healthModule = api.NewHealth(dbService.Ready) // Health check endpoints
|
||||
fileserverModule = api.NewFileserver(process) // fileserver endpoints
|
||||
robotsModule = api.NewRobots() // robots.txt endpoint
|
||||
wellKnownModule = api.NewWellKnown(process) // .well-known endpoints
|
||||
nodeInfoModule = api.NewNodeInfo(process) // nodeinfo endpoint
|
||||
activityPubModule = api.NewActivityPub(dbService, process) // ActivityPub endpoints
|
||||
webModule = web.New(dbService, process) // web pages + user profiles + settings panels etc
|
||||
)
|
||||
|
||||
// create required middleware
|
||||
// Create per-route / per-grouping middlewares.
|
||||
// rate limiting
|
||||
rlLimit := config.GetAdvancedRateLimitRequests()
|
||||
clLimit := middleware.RateLimit(rlLimit, config.GetAdvancedRateLimitExceptionsParsed()) // client api
|
||||
|
@ -518,10 +520,25 @@ func(context.Context, time.Time) {
|
|||
retryAfter := config.GetAdvancedThrottlingRetryAfter()
|
||||
clThrottle := middleware.Throttle(cpuMultiplier, retryAfter) // client api
|
||||
s2sThrottle := middleware.Throttle(cpuMultiplier, retryAfter)
|
||||
|
||||
// server-to-server (AP)
|
||||
fsThrottle := middleware.Throttle(cpuMultiplier, retryAfter) // fileserver / web templates / emojis
|
||||
pkThrottle := middleware.Throttle(cpuMultiplier, retryAfter) // throttle public key endpoint separately
|
||||
|
||||
// Robots http headers (x-robots-tag).
|
||||
//
|
||||
// robotsDisallowAll is used for client API + S2S endpoints
|
||||
// that definitely should never be indexed by crawlers.
|
||||
//
|
||||
// robotsDisallowAIOnly is used for utility endpoints,
|
||||
// fileserver, and for web endpoints that set their own
|
||||
// additional robots directives in HTML meta tags.
|
||||
//
|
||||
// Other endpoints like .well-known and nodeinfo handle
|
||||
// robots headers themselves based on configuration.
|
||||
robotsDisallowAll := middleware.RobotsHeaders("")
|
||||
robotsDisallowAIOnly := middleware.RobotsHeaders("aiOnly")
|
||||
|
||||
// Gzip middleware is applied to all endpoints except
|
||||
// fileserver (compression too expensive for those),
|
||||
// health (which really doesn't need compression), and
|
||||
|
@ -531,17 +548,18 @@ func(context.Context, time.Time) {
|
|||
|
||||
// these should be routed in order;
|
||||
// apply throttling *after* rate limiting
|
||||
authModule.Route(route, clLimit, clThrottle, gzip)
|
||||
clientModule.Route(route, clLimit, clThrottle, gzip)
|
||||
metricsModule.Route(route, clLimit, clThrottle)
|
||||
healthModule.Route(route, clLimit, clThrottle)
|
||||
fileserverModule.Route(route, fsMainLimit, fsThrottle)
|
||||
fileserverModule.RouteEmojis(route, instanceAccount.ID, fsEmojiLimit, fsThrottle)
|
||||
authModule.Route(route, clLimit, clThrottle, robotsDisallowAll, gzip)
|
||||
clientModule.Route(route, clLimit, clThrottle, robotsDisallowAll, gzip)
|
||||
metricsModule.Route(route, clLimit, clThrottle, robotsDisallowAIOnly)
|
||||
healthModule.Route(route, clLimit, clThrottle, robotsDisallowAIOnly)
|
||||
fileserverModule.Route(route, fsMainLimit, fsThrottle, robotsDisallowAIOnly)
|
||||
fileserverModule.RouteEmojis(route, instanceAccount.ID, fsEmojiLimit, fsThrottle, robotsDisallowAIOnly)
|
||||
robotsModule.Route(route, fsMainLimit, fsThrottle, robotsDisallowAIOnly, gzip)
|
||||
wellKnownModule.Route(route, gzip, s2sLimit, s2sThrottle)
|
||||
nodeInfoModule.Route(route, s2sLimit, s2sThrottle, gzip)
|
||||
activityPubModule.Route(route, s2sLimit, s2sThrottle, gzip)
|
||||
activityPubModule.RoutePublicKey(route, s2sLimit, pkThrottle, gzip)
|
||||
webModule.Route(route, fsMainLimit, fsThrottle, gzip)
|
||||
activityPubModule.Route(route, s2sLimit, s2sThrottle, robotsDisallowAll, gzip)
|
||||
activityPubModule.RoutePublicKey(route, s2sLimit, pkThrottle, robotsDisallowAll, gzip)
|
||||
webModule.Route(route, fsMainLimit, fsThrottle, robotsDisallowAIOnly, gzip)
|
||||
|
||||
// Finally start the main http server!
|
||||
if err := route.Start(); err != nil {
|
||||
|
|
|
@ -284,6 +284,7 @@
|
|||
metricsModule = api.NewMetrics() // Metrics endpoints
|
||||
healthModule = api.NewHealth(state.DB.Ready) // Health check endpoints
|
||||
fileserverModule = api.NewFileserver(processor) // fileserver endpoints
|
||||
robotsModule = api.NewRobots() // robots.txt endpoint
|
||||
wellKnownModule = api.NewWellKnown(processor) // .well-known endpoints
|
||||
nodeInfoModule = api.NewNodeInfo(processor) // nodeinfo endpoint
|
||||
activityPubModule = api.NewActivityPub(state.DB, processor) // ActivityPub endpoints
|
||||
|
@ -297,6 +298,7 @@
|
|||
healthModule.Route(route)
|
||||
fileserverModule.Route(route)
|
||||
fileserverModule.RouteEmojis(route, instanceAccount.ID)
|
||||
robotsModule.Route(route)
|
||||
wellKnownModule.Route(route)
|
||||
nodeInfoModule.Route(route)
|
||||
activityPubModule.Route(route)
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
import (
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/api/nodeinfo"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/config"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/middleware"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/processing"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/router"
|
||||
|
@ -43,6 +44,16 @@ func (w *NodeInfo) Route(r *router.Router, m ...gin.HandlerFunc) {
|
|||
}),
|
||||
)
|
||||
|
||||
// If instance is configured to serve instance stats
|
||||
// faithfully at nodeinfo, we should allow robots to
|
||||
// crawl nodeinfo endpoints in a limited capacity.
|
||||
// In all other cases, disallow everything.
|
||||
if config.GetInstanceStatsMode() == config.InstanceStatsModeServe {
|
||||
nodeInfoGroup.Use(middleware.RobotsHeaders("allowSome"))
|
||||
} else {
|
||||
nodeInfoGroup.Use(middleware.RobotsHeaders(""))
|
||||
}
|
||||
|
||||
w.nodeInfo.Route(nodeInfoGroup.Handle)
|
||||
}
|
||||
|
||||
|
|
52
internal/api/robots.go
Normal file
52
internal/api/robots.go
Normal file
|
@ -0,0 +1,52 @@
|
|||
// GoToSocial
|
||||
// Copyright (C) GoToSocial Authors admin@gotosocial.org
|
||||
// SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package api
|
||||
|
||||
import (
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/api/robots"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/middleware"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/router"
|
||||
)
|
||||
|
||||
type Robots struct {
|
||||
robots *robots.Module
|
||||
}
|
||||
|
||||
func (rb *Robots) Route(r *router.Router, m ...gin.HandlerFunc) {
|
||||
// Create a group so we can attach middlewares.
|
||||
robotsGroup := r.AttachGroup("robots.txt")
|
||||
|
||||
// Use passed-in middlewares.
|
||||
robotsGroup.Use(m...)
|
||||
|
||||
// Allow caching for 24 hrs.
|
||||
// https://www.rfc-editor.org/rfc/rfc9309.html#section-2.4
|
||||
robotsGroup.Use(
|
||||
middleware.CacheControl(middleware.CacheControlConfig{
|
||||
Directives: []string{"public", "max-age=86400"},
|
||||
Vary: []string{"Accept-Encoding"},
|
||||
}),
|
||||
)
|
||||
|
||||
rb.robots.Route(robotsGroup.Handle)
|
||||
}
|
||||
|
||||
func NewRobots() *Robots {
|
||||
return &Robots{}
|
||||
}
|
57
internal/api/robots/robots.go
Normal file
57
internal/api/robots/robots.go
Normal file
|
@ -0,0 +1,57 @@
|
|||
// GoToSocial
|
||||
// Copyright (C) GoToSocial Authors admin@gotosocial.org
|
||||
// SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package robots
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
apiutil "github.com/superseriousbusiness/gotosocial/internal/api/util"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/config"
|
||||
)
|
||||
|
||||
type Module struct{}
|
||||
|
||||
func New() *Module {
|
||||
return &Module{}
|
||||
}
|
||||
|
||||
func (m *Module) Route(attachHandler func(method string, path string, f ...gin.HandlerFunc) gin.IRoutes) {
|
||||
// Serve different robots.txt file depending on instance
|
||||
// stats mode: Don't disallow scraping nodeinfo if admin
|
||||
// has opted in to serving accurate stats there. In all
|
||||
// other cases, disallow scraping nodeinfo.
|
||||
var handler gin.HandlerFunc
|
||||
if config.GetInstanceStatsMode() == config.InstanceStatsModeServe {
|
||||
handler = m.robotsGETHandler
|
||||
} else {
|
||||
handler = m.robotsGETHandlerDisallowNodeInfo
|
||||
}
|
||||
|
||||
// Attach handler at empty path as this
|
||||
// is already grouped under /robots.txt.
|
||||
attachHandler(http.MethodGet, "", handler)
|
||||
}
|
||||
|
||||
func (m *Module) robotsGETHandler(c *gin.Context) {
|
||||
c.String(http.StatusOK, apiutil.RobotsTxt)
|
||||
}
|
||||
|
||||
func (m *Module) robotsGETHandlerDisallowNodeInfo(c *gin.Context) {
|
||||
c.String(http.StatusOK, apiutil.RobotsTxtDisallowNodeInfo)
|
||||
}
|
|
@ -15,19 +15,17 @@
|
|||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package web
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/config"
|
||||
)
|
||||
package util
|
||||
|
||||
// See:
|
||||
//
|
||||
// - https://developers.google.com/search/docs/crawling-indexing/robots-meta-tag#robotsmeta
|
||||
// - https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Robots-Tag
|
||||
// - https://www.rfc-editor.org/rfc/rfc9309.html
|
||||
const (
|
||||
robotsPath = "/robots.txt"
|
||||
robotsMetaAllowSome = "nofollow, noarchive, nositelinkssearchbox, max-image-preview:standard" // https://developers.google.com/search/docs/crawling-indexing/robots-meta-tag#robotsmeta
|
||||
robotsTxt = `# GoToSocial robots.txt -- to edit, see internal/web/robots.go
|
||||
RobotsDirectivesDisallow = "noindex, nofollow"
|
||||
RobotsDirectivesAllowSome = "nofollow, noarchive, nositelinkssearchbox, max-image-preview:standard"
|
||||
RobotsTxt = `# GoToSocial robots.txt -- to edit, see internal/api/util/robots.go
|
||||
# More info @ https://developers.google.com/search/docs/crawling-indexing/robots/intro
|
||||
|
||||
# AI scrapers and the like.
|
||||
|
@ -127,31 +125,9 @@
|
|||
# Webfinger endpoint.
|
||||
Disallow: /.well-known/webfinger
|
||||
`
|
||||
|
||||
robotsTxtNoNodeInfo = robotsTxt + `
|
||||
RobotsTxtDisallowNodeInfo = RobotsTxt + `
|
||||
# Disallow nodeinfo
|
||||
Disallow: /.well-known/nodeinfo
|
||||
Disallow: /nodeinfo/
|
||||
`
|
||||
)
|
||||
|
||||
// robotsGETHandler returns a decent robots.txt that prevents crawling
|
||||
// the api, auth pages, settings pages, etc.
|
||||
//
|
||||
// More granular robots meta tags are then applied for web pages
|
||||
// depending on user preferences (see internal/web).
|
||||
func (m *Module) robotsGETHandler(c *gin.Context) {
|
||||
// Allow caching for 24 hrs.
|
||||
// https://www.rfc-editor.org/rfc/rfc9309.html#section-2.4
|
||||
c.Header("Cache-Control", "public, max-age=86400")
|
||||
|
||||
if config.GetInstanceStatsMode() == config.InstanceStatsModeServe {
|
||||
// Serve robots.txt as-is
|
||||
// without forbidding nodeinfo.
|
||||
c.String(http.StatusOK, robotsTxt)
|
||||
return
|
||||
}
|
||||
|
||||
// Disallow scraping nodeinfo.
|
||||
c.String(http.StatusOK, robotsTxtNoNodeInfo)
|
||||
}
|
|
@ -21,6 +21,7 @@
|
|||
"net/http"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/middleware"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/processing"
|
||||
)
|
||||
|
||||
|
@ -40,5 +41,6 @@ func New(processor *processing.Processor) *Module {
|
|||
}
|
||||
|
||||
func (m *Module) Route(attachHandler func(method string, path string, f ...gin.HandlerFunc) gin.IRoutes) {
|
||||
attachHandler(http.MethodGet, HostMetaPath, m.HostMetaGETHandler)
|
||||
// Attach handler, injecting robots http header middleware to disallow all.
|
||||
attachHandler(http.MethodGet, HostMetaPath, middleware.RobotsHeaders(""), m.HostMetaGETHandler)
|
||||
}
|
||||
|
|
|
@ -21,6 +21,10 @@
|
|||
"net/http"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
apiutil "github.com/superseriousbusiness/gotosocial/internal/api/util"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/config"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/middleware"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/processing"
|
||||
)
|
||||
|
||||
|
@ -42,5 +46,57 @@ func New(processor *processing.Processor) *Module {
|
|||
}
|
||||
|
||||
func (m *Module) Route(attachHandler func(method string, path string, f ...gin.HandlerFunc) gin.IRoutes) {
|
||||
attachHandler(http.MethodGet, NodeInfoWellKnownPath, m.NodeInfoWellKnownGETHandler)
|
||||
// If instance is configured to serve instance stats
|
||||
// faithfully at nodeinfo, we should allow robots to
|
||||
// crawl nodeinfo endpoints in a limited capacity.
|
||||
// In all other cases, disallow everything.
|
||||
var robots gin.HandlerFunc
|
||||
if config.GetInstanceStatsMode() == config.InstanceStatsModeServe {
|
||||
robots = middleware.RobotsHeaders("allowSome")
|
||||
} else {
|
||||
robots = middleware.RobotsHeaders("")
|
||||
}
|
||||
|
||||
// Attach handler, injecting robots http header middleware.
|
||||
attachHandler(http.MethodGet, NodeInfoWellKnownPath, robots, m.NodeInfoWellKnownGETHandler)
|
||||
}
|
||||
|
||||
// NodeInfoWellKnownGETHandler swagger:operation GET /.well-known/nodeinfo nodeInfoWellKnownGet
|
||||
//
|
||||
// Returns a well-known response which redirects callers to `/nodeinfo/2.0`.
|
||||
//
|
||||
// eg. `{"links":[{"rel":"http://nodeinfo.diaspora.software/ns/schema/2.0","href":"http://example.org/nodeinfo/2.0"}]}`
|
||||
// See: https://nodeinfo.diaspora.software/protocol.html
|
||||
//
|
||||
// ---
|
||||
// tags:
|
||||
// - .well-known
|
||||
//
|
||||
// produces:
|
||||
// - application/json
|
||||
//
|
||||
// responses:
|
||||
// '200':
|
||||
// schema:
|
||||
// "$ref": "#/definitions/wellKnownResponse"
|
||||
func (m *Module) NodeInfoWellKnownGETHandler(c *gin.Context) {
|
||||
if _, err := apiutil.NegotiateAccept(c, apiutil.JSONAcceptHeaders...); err != nil {
|
||||
apiutil.ErrorHandler(c, gtserror.NewErrorNotAcceptable(err, err.Error()), m.processor.InstanceGetV1)
|
||||
return
|
||||
}
|
||||
|
||||
resp, errWithCode := m.processor.Fedi().NodeInfoRelGet(c.Request.Context())
|
||||
if errWithCode != nil {
|
||||
apiutil.ErrorHandler(c, errWithCode, m.processor.InstanceGetV1)
|
||||
return
|
||||
}
|
||||
|
||||
// Encode JSON HTTP response.
|
||||
apiutil.EncodeJSONResponse(
|
||||
c.Writer,
|
||||
c.Request,
|
||||
http.StatusOK,
|
||||
apiutil.AppJSON,
|
||||
resp,
|
||||
)
|
||||
}
|
||||
|
|
|
@ -1,66 +0,0 @@
|
|||
// GoToSocial
|
||||
// Copyright (C) GoToSocial Authors admin@gotosocial.org
|
||||
// SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package nodeinfo
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
apiutil "github.com/superseriousbusiness/gotosocial/internal/api/util"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
|
||||
)
|
||||
|
||||
// NodeInfoWellKnownGETHandler swagger:operation GET /.well-known/nodeinfo nodeInfoWellKnownGet
|
||||
//
|
||||
// Returns a well-known response which redirects callers to `/nodeinfo/2.0`.
|
||||
//
|
||||
// eg. `{"links":[{"rel":"http://nodeinfo.diaspora.software/ns/schema/2.0","href":"http://example.org/nodeinfo/2.0"}]}`
|
||||
// See: https://nodeinfo.diaspora.software/protocol.html
|
||||
//
|
||||
// ---
|
||||
// tags:
|
||||
// - .well-known
|
||||
//
|
||||
// produces:
|
||||
// - application/json
|
||||
//
|
||||
// responses:
|
||||
// '200':
|
||||
// schema:
|
||||
// "$ref": "#/definitions/wellKnownResponse"
|
||||
func (m *Module) NodeInfoWellKnownGETHandler(c *gin.Context) {
|
||||
if _, err := apiutil.NegotiateAccept(c, apiutil.JSONAcceptHeaders...); err != nil {
|
||||
apiutil.ErrorHandler(c, gtserror.NewErrorNotAcceptable(err, err.Error()), m.processor.InstanceGetV1)
|
||||
return
|
||||
}
|
||||
|
||||
resp, errWithCode := m.processor.Fedi().NodeInfoRelGet(c.Request.Context())
|
||||
if errWithCode != nil {
|
||||
apiutil.ErrorHandler(c, errWithCode, m.processor.InstanceGetV1)
|
||||
return
|
||||
}
|
||||
|
||||
// Encode JSON HTTP response.
|
||||
apiutil.EncodeJSONResponse(
|
||||
c.Writer,
|
||||
c.Request,
|
||||
http.StatusOK,
|
||||
apiutil.AppJSON,
|
||||
resp,
|
||||
)
|
||||
}
|
|
@ -21,6 +21,7 @@
|
|||
"net/http"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/middleware"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/processing"
|
||||
)
|
||||
|
||||
|
@ -41,5 +42,6 @@ func New(processor *processing.Processor) *Module {
|
|||
}
|
||||
|
||||
func (m *Module) Route(attachHandler func(method string, path string, f ...gin.HandlerFunc) gin.IRoutes) {
|
||||
attachHandler(http.MethodGet, WebfingerBasePath, m.WebfingerGETRequest)
|
||||
// Attach handler, injecting robots http header middleware to disallow all.
|
||||
attachHandler(http.MethodGet, WebfingerBasePath, middleware.RobotsHeaders(""), m.WebfingerGETRequest)
|
||||
}
|
||||
|
|
|
@ -44,12 +44,5 @@ func ExtraHeaders() gin.HandlerFunc {
|
|||
//
|
||||
// See: https://github.com/patcg-individual-drafts/topics
|
||||
c.Header("Permissions-Policy", "browsing-topics=()")
|
||||
|
||||
// Some AI scrapers respect the following tags to opt-out
|
||||
// of their crawling and datasets.
|
||||
c.Header("X-Robots-Tag", "noimageai")
|
||||
// c.Header calls .Set(), but we want to emit the header
|
||||
// twice, not override it.
|
||||
c.Writer.Header().Add("X-Robots-Tag", "noai")
|
||||
}
|
||||
}
|
||||
|
|
67
internal/middleware/robots.go
Normal file
67
internal/middleware/robots.go
Normal file
|
@ -0,0 +1,67 @@
|
|||
// GoToSocial
|
||||
// Copyright (C) GoToSocial Authors admin@gotosocial.org
|
||||
// SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package middleware
|
||||
|
||||
import (
|
||||
"github.com/gin-gonic/gin"
|
||||
apiutil "github.com/superseriousbusiness/gotosocial/internal/api/util"
|
||||
)
|
||||
|
||||
// RobotsHeaders adds robots directives to the X-Robots-Tag HTTP header.
|
||||
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Robots-Tag
|
||||
//
|
||||
// If mode == "aiOnly" then only the noai and noimageai values will be set,
|
||||
// and other headers will be left alone (for route groups / handlers to set).
|
||||
//
|
||||
// If mode == "allowSome" then noai, noimageai, and some indexing will be set.
|
||||
//
|
||||
// If mode == "" then noai, noimageai, noindex, and nofollow will be set
|
||||
// (ie., as restrictive as possible).
|
||||
func RobotsHeaders(mode string) gin.HandlerFunc {
|
||||
const (
|
||||
key = "X-Robots-Tag"
|
||||
// Some AI scrapers respect the following tags
|
||||
// to opt-out of their crawling and datasets.
|
||||
// We add them regardless of allowSome.
|
||||
noai = "noai, noimageai"
|
||||
)
|
||||
|
||||
switch mode {
|
||||
|
||||
// Just set ai headers and
|
||||
// leave the other headers be.
|
||||
case "aiOnly":
|
||||
return func(c *gin.Context) {
|
||||
c.Writer.Header().Set(key, noai)
|
||||
}
|
||||
|
||||
// Allow some limited indexing.
|
||||
case "allowSome":
|
||||
return func(c *gin.Context) {
|
||||
c.Writer.Header().Set(key, apiutil.RobotsDirectivesAllowSome)
|
||||
c.Writer.Header().Add(key, noai)
|
||||
}
|
||||
|
||||
// Disallow indexing via noindex, nofollow.
|
||||
default:
|
||||
return func(c *gin.Context) {
|
||||
c.Writer.Header().Set(key, apiutil.RobotsDirectivesDisallow)
|
||||
c.Writer.Header().Add(key, noai)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -103,7 +103,7 @@ func (m *Module) profileGETHandler(c *gin.Context) {
|
|||
// index if account is discoverable.
|
||||
var robotsMeta string
|
||||
if targetAccount.Discoverable {
|
||||
robotsMeta = robotsMetaAllowSome
|
||||
robotsMeta = apiutil.RobotsDirectivesAllowSome
|
||||
}
|
||||
|
||||
// We need to change our response slightly if the
|
||||
|
|
|
@ -95,8 +95,6 @@ func (m *Module) Route(r *router.Router, mi ...gin.HandlerFunc) {
|
|||
// Route static assets.
|
||||
routeAssets(m, r, mi...)
|
||||
|
||||
// Route all other endpoints + handlers.
|
||||
//
|
||||
// Handlers that serve profiles and statuses should use
|
||||
// the SignatureCheck middleware, so that requests with
|
||||
// content-type application/activity+json can be served
|
||||
|
@ -108,24 +106,25 @@ func (m *Module) Route(r *router.Router, mi ...gin.HandlerFunc) {
|
|||
profileGroup.Handle(http.MethodGet, "", m.profileGETHandler) // use empty path here since it's the base of the group
|
||||
profileGroup.Handle(http.MethodGet, statusPath, m.threadGETHandler)
|
||||
|
||||
// Individual web handlers requiring no specific middlewares.
|
||||
r.AttachHandler(http.MethodGet, "/", m.indexHandler) // front-page
|
||||
r.AttachHandler(http.MethodGet, settingsPathPrefix, m.SettingsPanelHandler)
|
||||
r.AttachHandler(http.MethodGet, settingsPanelGlob, m.SettingsPanelHandler)
|
||||
r.AttachHandler(http.MethodGet, customCSSPath, m.customCSSGETHandler)
|
||||
r.AttachHandler(http.MethodGet, instanceCustomCSSPath, m.instanceCustomCSSGETHandler)
|
||||
r.AttachHandler(http.MethodGet, rssFeedPath, m.rssFeedGETHandler)
|
||||
r.AttachHandler(http.MethodGet, confirmEmailPath, m.confirmEmailGETHandler)
|
||||
r.AttachHandler(http.MethodPost, confirmEmailPath, m.confirmEmailPOSTHandler)
|
||||
r.AttachHandler(http.MethodGet, robotsPath, m.robotsGETHandler)
|
||||
r.AttachHandler(http.MethodGet, aboutPath, m.aboutGETHandler)
|
||||
r.AttachHandler(http.MethodGet, loginPath, m.loginGETHandler)
|
||||
r.AttachHandler(http.MethodGet, domainBlockListPath, m.domainBlockListGETHandler)
|
||||
r.AttachHandler(http.MethodGet, tagsPath, m.tagGETHandler)
|
||||
r.AttachHandler(http.MethodGet, signupPath, m.signupGETHandler)
|
||||
r.AttachHandler(http.MethodPost, signupPath, m.signupPOSTHandler)
|
||||
// Group for all other web handlers.
|
||||
everythingElseGroup := r.AttachGroup("")
|
||||
everythingElseGroup.Use(mi...)
|
||||
everythingElseGroup.Handle(http.MethodGet, "/", m.indexHandler) // front-page
|
||||
everythingElseGroup.Handle(http.MethodGet, settingsPathPrefix, m.SettingsPanelHandler)
|
||||
everythingElseGroup.Handle(http.MethodGet, settingsPanelGlob, m.SettingsPanelHandler)
|
||||
everythingElseGroup.Handle(http.MethodGet, customCSSPath, m.customCSSGETHandler)
|
||||
everythingElseGroup.Handle(http.MethodGet, instanceCustomCSSPath, m.instanceCustomCSSGETHandler)
|
||||
everythingElseGroup.Handle(http.MethodGet, rssFeedPath, m.rssFeedGETHandler)
|
||||
everythingElseGroup.Handle(http.MethodGet, confirmEmailPath, m.confirmEmailGETHandler)
|
||||
everythingElseGroup.Handle(http.MethodPost, confirmEmailPath, m.confirmEmailPOSTHandler)
|
||||
everythingElseGroup.Handle(http.MethodGet, aboutPath, m.aboutGETHandler)
|
||||
everythingElseGroup.Handle(http.MethodGet, loginPath, m.loginGETHandler)
|
||||
everythingElseGroup.Handle(http.MethodGet, domainBlockListPath, m.domainBlockListGETHandler)
|
||||
everythingElseGroup.Handle(http.MethodGet, tagsPath, m.tagGETHandler)
|
||||
everythingElseGroup.Handle(http.MethodGet, signupPath, m.signupGETHandler)
|
||||
everythingElseGroup.Handle(http.MethodPost, signupPath, m.signupPOSTHandler)
|
||||
|
||||
// Redirects from old endpoints to for back compat.
|
||||
// Redirects from old endpoints for back compat.
|
||||
r.AttachHandler(http.MethodGet, "/auth/edit", func(c *gin.Context) { c.Redirect(http.StatusMovedPermanently, userPanelPath) })
|
||||
r.AttachHandler(http.MethodGet, "/user", func(c *gin.Context) { c.Redirect(http.StatusMovedPermanently, userPanelPath) })
|
||||
r.AttachHandler(http.MethodGet, "/admin", func(c *gin.Context) { c.Redirect(http.StatusMovedPermanently, adminPanelPath) })
|
||||
|
|
|
@ -47,7 +47,7 @@ image/webp
|
|||
<meta charset="UTF-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<meta name="robots" content="{{- if .robotsMeta -}}{{- .robotsMeta -}}{{- else -}}noindex, nofollow{{- end -}}">
|
||||
<meta name="robots" content="{{- if .robotsMeta -}}{{- .robotsMeta -}}{{- else -}}noindex, nofollow, noai, noimageai{{- end -}}">
|
||||
{{- if .ogMeta }}
|
||||
{{- include "page_ogmeta.tmpl" . | indent 2 }}
|
||||
{{- else }}
|
||||
|
|
Loading…
Reference in a new issue