2023-03-12 15:00:57 +00:00
|
|
|
// GoToSocial
|
|
|
|
// Copyright (C) GoToSocial Authors admin@gotosocial.org
|
|
|
|
// SPDX-License-Identifier: AGPL-3.0-or-later
|
|
|
|
//
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
//
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU Affero General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
2021-07-13 14:03:51 +00:00
|
|
|
|
2021-07-26 18:25:54 +00:00
|
|
|
package text
|
2021-07-13 14:03:51 +00:00
|
|
|
|
|
|
|
import (
|
2022-07-19 13:21:17 +00:00
|
|
|
"html"
|
2021-08-16 17:17:56 +00:00
|
|
|
"regexp"
|
2022-07-19 13:21:17 +00:00
|
|
|
"strings"
|
2021-08-16 17:17:56 +00:00
|
|
|
|
2021-07-13 14:03:51 +00:00
|
|
|
"github.com/microcosm-cc/bluemonday"
|
|
|
|
)
|
|
|
|
|
|
|
|
// '[A]llows a broad selection of HTML elements and attributes that are safe for user generated content.
|
|
|
|
// Note that this policy does not allow iframes, object, embed, styles, script, etc.
|
|
|
|
// An example usage scenario would be blog post bodies where a variety of formatting is expected along with the potential for TABLEs and IMGs.'
|
|
|
|
//
|
|
|
|
// Source: https://github.com/microcosm-cc/bluemonday#usage
|
|
|
|
var regular *bluemonday.Policy = bluemonday.UGCPolicy().
|
|
|
|
RequireNoReferrerOnLinks(true).
|
2022-11-07 13:25:36 +00:00
|
|
|
RequireNoFollowOnLinks(false). // remove the global default which adds rel="nofollow" to all links including local relative
|
|
|
|
RequireNoFollowOnFullyQualifiedLinks(true). // add rel="nofollow" on all external links
|
2021-07-29 11:18:22 +00:00
|
|
|
RequireCrossOriginAnonymous(true).
|
2021-08-16 17:17:56 +00:00
|
|
|
AddTargetBlankToFullyQualifiedLinks(true).
|
2021-07-29 11:18:22 +00:00
|
|
|
AllowAttrs("class", "href", "rel").OnElements("a").
|
2021-08-16 17:17:56 +00:00
|
|
|
AllowAttrs("class").OnElements("span").
|
|
|
|
AllowAttrs("class").Matching(regexp.MustCompile("^language-[a-zA-Z0-9]+$")).OnElements("code").
|
|
|
|
SkipElementsContent("code", "pre")
|
2021-07-13 14:03:51 +00:00
|
|
|
|
|
|
|
// '[C]an be thought of as equivalent to stripping all HTML elements and their attributes as it has nothing on its allowlist.
|
|
|
|
// An example usage scenario would be blog post titles where HTML tags are not expected at all
|
|
|
|
// and if they are then the elements and the content of the elements should be stripped. This is a very strict policy.'
|
|
|
|
//
|
|
|
|
// Source: https://github.com/microcosm-cc/bluemonday#usage
|
|
|
|
var strict *bluemonday.Policy = bluemonday.StrictPolicy()
|
|
|
|
|
2022-05-26 09:37:13 +00:00
|
|
|
// removeHTML strictly removes *all* recognized HTML elements from the given string.
|
|
|
|
func removeHTML(in string) string {
|
|
|
|
return strict.Sanitize(in)
|
|
|
|
}
|
|
|
|
|
|
|
|
// SanitizeHTML sanitizes risky html elements from the given string, allowing only safe ones through.
|
2021-07-13 14:03:51 +00:00
|
|
|
func SanitizeHTML(in string) string {
|
|
|
|
return regular.Sanitize(in)
|
|
|
|
}
|
|
|
|
|
2022-05-26 09:37:13 +00:00
|
|
|
// SanitizePlaintext runs text through basic sanitization. This removes
|
|
|
|
// any html elements that were in the string, and returns clean plaintext.
|
|
|
|
func SanitizePlaintext(in string) string {
|
2022-07-19 13:21:17 +00:00
|
|
|
content := html.UnescapeString(in)
|
2022-05-26 09:37:13 +00:00
|
|
|
content = removeHTML(content)
|
2022-07-19 13:21:17 +00:00
|
|
|
content = html.UnescapeString(content)
|
|
|
|
return strings.TrimSpace(content)
|
2021-07-13 14:03:51 +00:00
|
|
|
}
|