mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2024-11-29 23:22:45 +00:00
26b74aefaf
* fix existing bio text showing as HTML - updated replaced mentions to include instance - strips HTML from account source note in Verify handler - update text formatter to use buffers for string writes Signed-off-by: kim <grufwub@gmail.com> * go away linter Signed-off-by: kim <grufwub@gmail.com> * change buf reset location, change html mention tags Signed-off-by: kim <grufwub@gmail.com> * reduce FindLinks code complexity Signed-off-by: kim <grufwub@gmail.com> * fix HTML to text conversion Signed-off-by: kim <grufwub@gmail.com> * Update internal/regexes/regexes.go Co-authored-by: Mina Galić <mina.galic@puppet.com> * use improved html2text lib with more options Signed-off-by: kim <grufwub@gmail.com> * fix to produce actual plaintext from html Signed-off-by: kim <grufwub@gmail.com> * fix span tags instead written as space Signed-off-by: kim <grufwub@gmail.com> * performance improvements to regex replacements, fix link replace logic for un-html-ing in the future Signed-off-by: kim <grufwub@gmail.com> * fix tag/mention replacements to use input string, fix link replace to not include scheme Signed-off-by: kim <grufwub@gmail.com> * use matched input string for link replace href text Signed-off-by: kim <grufwub@gmail.com> * remove unused code (to appease linter :sobs:) Signed-off-by: kim <grufwub@gmail.com> * improve hashtagFinger regex to be more compliant Signed-off-by: kim <grufwub@gmail.com> * update breakReplacer to include both unix and windows line endings Signed-off-by: kim <grufwub@gmail.com> * add NoteRaw field to Account to store plaintext account bio, add migration for this, set for sensitive accounts Signed-off-by: kim <grufwub@gmail.com> * drop unnecessary code Signed-off-by: kim <grufwub@gmail.com> * update text package tests to fix logic changes Signed-off-by: kim <grufwub@gmail.com> * add raw note content testing to account update and account verify Signed-off-by: kim <grufwub@gmail.com> * remove unused modules Signed-off-by: kim <grufwub@gmail.com> * fix emoji regex Signed-off-by: kim <grufwub@gmail.com> * fix replacement of hashtags Signed-off-by: kim <grufwub@gmail.com> * update code comment Signed-off-by: kim <grufwub@gmail.com> Co-authored-by: Mina Galić <mina.galic@puppet.com>
169 lines
6.9 KiB
Go
169 lines
6.9 KiB
Go
/*
|
|
GoToSocial
|
|
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU Affero General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU Affero General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Affero General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
package regexes
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"regexp"
|
|
"sync"
|
|
|
|
"mvdan.cc/xurls/v2"
|
|
)
|
|
|
|
const (
|
|
users = "users"
|
|
actors = "actors"
|
|
statuses = "statuses"
|
|
inbox = "inbox"
|
|
outbox = "outbox"
|
|
followers = "followers"
|
|
following = "following"
|
|
liked = "liked"
|
|
// collections = "collections"
|
|
// featured = "featured"
|
|
publicKey = "main-key"
|
|
follow = "follow"
|
|
// update = "updates"
|
|
blocks = "blocks"
|
|
)
|
|
|
|
const (
|
|
maximumUsernameLength = 64
|
|
maximumEmojiShortcodeLength = 30
|
|
maximumHashtagLength = 30
|
|
)
|
|
|
|
var (
|
|
schemes = `(http|https)://`
|
|
// LinkScheme captures http/https schemes in URLs.
|
|
LinkScheme = func() *regexp.Regexp {
|
|
rgx, err := xurls.StrictMatchingScheme(schemes)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
return rgx
|
|
}()
|
|
|
|
mentionName = `^@(\w+)(?:@([a-zA-Z0-9_\-\.:]+))?$`
|
|
// MentionName captures the username and domain part from a mention string
|
|
// such as @whatever_user@example.org, returning whatever_user and example.org (without the @ symbols)
|
|
MentionName = regexp.MustCompile(mentionName)
|
|
|
|
// mention regex can be played around with here: https://regex101.com/r/G1oGR0/1
|
|
mentionFinder = `(?:^|\s)(@\w+(?:@[a-zA-Z0-9_\-\.]+)?)`
|
|
// MentionFinder extracts mentions from a piece of text.
|
|
MentionFinder = regexp.MustCompile(mentionFinder)
|
|
|
|
// hashtag regex can be played with here: https://regex101.com/r/bPxeca/1
|
|
hashtagFinder = fmt.Sprintf(`(?:^|\s)(?:#*)(#[a-zA-Z0-9]{1,%d})(?:#|\b)`, maximumHashtagLength)
|
|
// HashtagFinder finds possible hashtags in a string.
|
|
// It returns just the string part of the hashtag, not the # symbol.
|
|
HashtagFinder = regexp.MustCompile(hashtagFinder)
|
|
|
|
emojiShortcode = fmt.Sprintf(`\w{2,%d}`, maximumEmojiShortcodeLength)
|
|
// EmojiShortcode validates an emoji name.
|
|
EmojiShortcode = regexp.MustCompile(fmt.Sprintf("^%s$", emojiShortcode))
|
|
|
|
// emoji regex can be played with here: https://regex101.com/r/478XGM/1
|
|
emojiFinderString = fmt.Sprintf(`(?:\b)?:(%s):(?:\b)?`, emojiShortcode)
|
|
// EmojiFinder extracts emoji strings from a piece of text.
|
|
EmojiFinder = regexp.MustCompile(emojiFinderString)
|
|
|
|
// usernameString defines an acceptable username on this instance
|
|
usernameString = fmt.Sprintf(`[a-z0-9_]{2,%d}`, maximumUsernameLength)
|
|
// Username can be used to validate usernames of new signups
|
|
Username = regexp.MustCompile(fmt.Sprintf(`^%s$`, usernameString))
|
|
|
|
userPathString = fmt.Sprintf(`^?/%s/(%s)$`, users, usernameString)
|
|
// UserPath parses a path that validates and captures the username part from eg /users/example_username
|
|
UserPath = regexp.MustCompile(userPathString)
|
|
|
|
publicKeyPath = fmt.Sprintf(`^?/%s/(%s)/%s`, users, usernameString, publicKey)
|
|
// PublicKeyPath parses a path that validates and captures the username part from eg /users/example_username/main-key
|
|
PublicKeyPath = regexp.MustCompile(publicKeyPath)
|
|
|
|
inboxPath = fmt.Sprintf(`^/?%s/(%s)/%s$`, users, usernameString, inbox)
|
|
// InboxPath parses a path that validates and captures the username part from eg /users/example_username/inbox
|
|
InboxPath = regexp.MustCompile(inboxPath)
|
|
|
|
outboxPath = fmt.Sprintf(`^/?%s/(%s)/%s$`, users, usernameString, outbox)
|
|
// OutboxPath parses a path that validates and captures the username part from eg /users/example_username/outbox
|
|
OutboxPath = regexp.MustCompile(outboxPath)
|
|
|
|
actorPath = fmt.Sprintf(`^?/%s/(%s)$`, actors, usernameString)
|
|
// ActorPath parses a path that validates and captures the username part from eg /actors/example_username
|
|
ActorPath = regexp.MustCompile(actorPath)
|
|
|
|
followersPath = fmt.Sprintf(`^/?%s/(%s)/%s$`, users, usernameString, followers)
|
|
// FollowersPath parses a path that validates and captures the username part from eg /users/example_username/followers
|
|
FollowersPath = regexp.MustCompile(followersPath)
|
|
|
|
followingPath = fmt.Sprintf(`^/?%s/(%s)/%s$`, users, usernameString, following)
|
|
// FollowingPath parses a path that validates and captures the username part from eg /users/example_username/following
|
|
FollowingPath = regexp.MustCompile(followingPath)
|
|
|
|
followPath = fmt.Sprintf(`^/?%s/(%s)/%s/(%s)$`, users, usernameString, follow, ulid)
|
|
// FollowPath parses a path that validates and captures the username part and the ulid part
|
|
// from eg /users/example_username/follow/01F7XT5JZW1WMVSW1KADS8PVDH
|
|
FollowPath = regexp.MustCompile(followPath)
|
|
|
|
ulid = `[0123456789ABCDEFGHJKMNPQRSTVWXYZ]{26}`
|
|
// ULID parses and validate a ULID.
|
|
ULID = regexp.MustCompile(fmt.Sprintf(`^%s$`, ulid))
|
|
|
|
likedPath = fmt.Sprintf(`^/?%s/(%s)/%s$`, users, usernameString, liked)
|
|
// LikedPath parses a path that validates and captures the username part from eg /users/example_username/liked
|
|
LikedPath = regexp.MustCompile(likedPath)
|
|
|
|
likePath = fmt.Sprintf(`^/?%s/(%s)/%s/(%s)$`, users, usernameString, liked, ulid)
|
|
// LikePath parses a path that validates and captures the username part and the ulid part
|
|
// from eg /users/example_username/like/01F7XT5JZW1WMVSW1KADS8PVDH
|
|
LikePath = regexp.MustCompile(likePath)
|
|
|
|
statusesPath = fmt.Sprintf(`^/?%s/(%s)/%s/(%s)$`, users, usernameString, statuses, ulid)
|
|
// StatusesPath parses a path that validates and captures the username part and the ulid part
|
|
// from eg /users/example_username/statuses/01F7XT5JZW1WMVSW1KADS8PVDH
|
|
// The regex can be played with here: https://regex101.com/r/G9zuxQ/1
|
|
StatusesPath = regexp.MustCompile(statusesPath)
|
|
|
|
blockPath = fmt.Sprintf(`^/?%s/(%s)/%s/(%s)$`, users, usernameString, blocks, ulid)
|
|
// BlockPath parses a path that validates and captures the username part and the ulid part
|
|
// from eg /users/example_username/blocks/01F7XT5JZW1WMVSW1KADS8PVDH
|
|
BlockPath = regexp.MustCompile(blockPath)
|
|
)
|
|
|
|
// bufpool is a memory pool of byte buffers for use in our regex utility functions.
|
|
var bufpool = sync.Pool{
|
|
New: func() any {
|
|
buf := bytes.NewBuffer(make([]byte, 0, 512))
|
|
return buf
|
|
},
|
|
}
|
|
|
|
// ReplaceAllStringFunc will call through to .ReplaceAllStringFunc in the provided regex, but provide you a clean byte buffer for optimized string writes.
|
|
func ReplaceAllStringFunc(rgx *regexp.Regexp, src string, repl func(match string, buf *bytes.Buffer) string) string {
|
|
buf := bufpool.Get().(*bytes.Buffer) //nolint
|
|
defer bufpool.Put(buf)
|
|
return rgx.ReplaceAllStringFunc(src, func(match string) string {
|
|
buf.Reset() // reset use
|
|
return repl(match, buf)
|
|
})
|
|
}
|