From dd83ad053c0cde5b948cbfe34ec4864cf0a123e3 Mon Sep 17 00:00:00 2001
From: tobi <31960611+tsmethurst@users.noreply.github.com>
Date: Thu, 29 Sep 2022 12:03:17 +0200
Subject: [PATCH] [feature] Add `meta robots` tag; allow robots to index
profile card if user is Discoverable (#842)
* rework robots.txt response
* don't let robots snippet from statuses/threads
* allow robots to index if user is Discoverable
* add license text
---
internal/api/security/robots.go | 46 ++++++++++++++++++++++++++++++---
internal/web/profile.go | 7 +++++
internal/web/robots.go | 24 +++++++++++++++++
web/template/header.tmpl | 1 +
web/template/profile.tmpl | 2 +-
web/template/status.tmpl | 4 +--
web/template/thread.tmpl | 2 +-
7 files changed, 79 insertions(+), 7 deletions(-)
create mode 100644 internal/web/robots.go
diff --git a/internal/api/security/robots.go b/internal/api/security/robots.go
index 65056072a..5b8ba3c05 100644
--- a/internal/api/security/robots.go
+++ b/internal/api/security/robots.go
@@ -1,3 +1,21 @@
+/*
+ GoToSocial
+ Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+*/
+
package security
import (
@@ -7,11 +25,33 @@
)
const robotsString = `User-agent: *
-Disallow: /
+Crawl-delay: 500
+# api stuff
+Disallow: /api/
+# auth/login stuff
+Disallow: /auth/
+Disallow: /oauth/
+Disallow: /check_your_email
+Disallow: /wait_for_approval
+Disallow: /account_disabled
+# well known stuff
+Disallow: /.well-known/
+# files
+Disallow: /fileserver/
+# s2s AP stuff
+Disallow: /users/
+Disallow: /emoji/
+# panels
+Disallow: /admin
+Disallow: /user
+Disallow: /settings/
`
-// RobotsGETHandler returns the most restrictive possible robots.txt file in response to a call to /robots.txt.
-// The response instructs bots with *any* user agent not to index the instance at all.
+// RobotsGETHandler returns a decent robots.txt that prevents crawling
+// the api, auth pages, settings pages, etc.
+//
+// More granular robots meta tags are then applied for web pages
+// depending on user preferences (see internal/web).
func (m *Module) RobotsGETHandler(c *gin.Context) {
c.String(http.StatusOK, robotsString)
}
diff --git a/internal/web/profile.go b/internal/web/profile.go
index a4332b0c1..a1518b517 100644
--- a/internal/web/profile.go
+++ b/internal/web/profile.go
@@ -82,6 +82,12 @@ func (m *Module) profileGETHandler(c *gin.Context) {
return
}
+ // only allow search engines / robots to view this page if account is discoverable
+ var robotsMeta string
+ if account.Discoverable {
+ robotsMeta = robotsAllowSome
+ }
+
// we should only show the 'back to top' button if the
// profile visitor is paging through statuses
showBackToTop := false
@@ -112,6 +118,7 @@ func (m *Module) profileGETHandler(c *gin.Context) {
"instance": instance,
"account": account,
"ogMeta": ogBase(instance).withAccount(account),
+ "robotsMeta": robotsMeta,
"statuses": statusResp.Items,
"statuses_next": statusResp.NextLink,
"show_back_to_top": showBackToTop,
diff --git a/internal/web/robots.go b/internal/web/robots.go
new file mode 100644
index 000000000..c3307d068
--- /dev/null
+++ b/internal/web/robots.go
@@ -0,0 +1,24 @@
+/*
+ GoToSocial
+ Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+*/
+
+package web
+
+// https://developers.google.com/search/docs/crawling-indexing/robots-meta-tag#robotsmeta
+const (
+ robotsAllowSome = "nofollow, noarchive, nositelinkssearchbox, max-image-preview:standard"
+)
diff --git a/web/template/header.tmpl b/web/template/header.tmpl
index 560b10f19..f1cfec026 100644
--- a/web/template/header.tmpl
+++ b/web/template/header.tmpl
@@ -6,6 +6,7 @@
+
{{ if .ogMeta }}{{ if .ogMeta.Locale }}
{{ end }}
diff --git a/web/template/profile.tmpl b/web/template/profile.tmpl
index 9838e5b30..4c348d028 100644
--- a/web/template/profile.tmpl
+++ b/web/template/profile.tmpl
@@ -29,7 +29,7 @@
Latest public toots
{{ if not .statuses }}
- Nothing here!
+ Nothing here!
{{ else }}
{{ range .statuses }}
diff --git a/web/template/status.tmpl b/web/template/status.tmpl
index 73e7d1b03..5136b6ad7 100644
--- a/web/template/status.tmpl
+++ b/web/template/status.tmpl
@@ -1,4 +1,4 @@
-
-
View toot
\ No newline at end of file
+
View toot
\ No newline at end of file
diff --git a/web/template/thread.tmpl b/web/template/thread.tmpl
index d0f003b91..334bd8a9f 100644
--- a/web/template/thread.tmpl
+++ b/web/template/thread.tmpl
@@ -1,6 +1,6 @@
{{ template "header.tmpl" .}}
-
+
{{range .context.Ancestors}}
{{ template "status.tmpl" .}}