[chore/bugfix] Deinterface text.Formatter, allow underscores in hashtags (#2233)

This commit is contained in:
tobi 2023-09-29 10:39:56 +02:00 committed by GitHub
parent b6b8f82c87
commit 536d9e482d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
18 changed files with 1040 additions and 713 deletions

View file

@ -41,7 +41,7 @@ type Processor struct {
mediaManager *media.Manager mediaManager *media.Manager
oauthServer oauth.Server oauthServer oauth.Server
filter *visibility.Filter filter *visibility.Filter
formatter text.Formatter formatter *text.Formatter
federator federation.Federator federator federation.Federator
parseMention gtsmodel.ParseMentionFunc parseMention gtsmodel.ParseMentionFunc
} }

View file

@ -277,7 +277,7 @@ func processLanguage(ctx context.Context, form *apimodel.AdvancedStatusCreateFor
return nil return nil
} }
func processContent(ctx context.Context, dbService db.DB, formatter text.Formatter, parseMention gtsmodel.ParseMentionFunc, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error { func processContent(ctx context.Context, dbService db.DB, formatter *text.Formatter, parseMention gtsmodel.ParseMentionFunc, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error {
// if there's nothing in the status at all we can just return early // if there's nothing in the status at all we can just return early
if form.Status == "" { if form.Status == "" {
status.Content = "" status.Content = ""

View file

@ -31,7 +31,7 @@ type Processor struct {
federator federation.Federator federator federation.Federator
converter *typeutils.Converter converter *typeutils.Converter
filter *visibility.Filter filter *visibility.Filter
formatter text.Formatter formatter *text.Formatter
parseMention gtsmodel.ParseMentionFunc parseMention gtsmodel.ParseMentionFunc
} }

View file

@ -1,70 +0,0 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package text
import (
"bytes"
"context"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer/html"
"github.com/yuin/goldmark/util"
)
func (f *formatter) FromPlainEmojiOnly(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult {
result := &FormatResult{
Mentions: []*gtsmodel.Mention{},
Tags: []*gtsmodel.Tag{},
Emojis: []*gtsmodel.Emoji{},
}
// parse markdown text into html, using custom renderer to add hashtag/mention links
md := goldmark.New(
goldmark.WithRendererOptions(
html.WithXHTML(),
html.WithHardWraps(),
),
goldmark.WithParser(
parser.NewParser(
parser.WithBlockParsers(
util.Prioritized(newPlaintextParser(), 500),
),
),
),
goldmark.WithExtensions(
&customRenderer{f, ctx, pmf, authorID, statusID, true, result},
),
)
var htmlContentBytes bytes.Buffer
err := md.Convert([]byte(plain), &htmlContentBytes)
if err != nil {
log.Errorf(ctx, "error formatting plaintext to HTML: %s", err)
}
result.HTML = htmlContentBytes.String()
// clean anything dangerous out of the HTML
result.HTML = SanitizeToHTML(result.HTML)
// shrink ray
result.HTML = MinifyHTML(result.HTML)
return result
}

View file

@ -24,29 +24,25 @@
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
) )
// Formatter wraps some logic and functions for parsing statuses and other text input into nice html. // FormatFunc is fulfilled by FromPlain,
// Each of the member functions returns a struct containing the formatted HTML and any tags, mentions, and // FromPlainNoParagraph, and FromMarkdown.
// emoji that were found in the text. type FormatFunc func(
type Formatter interface { ctx context.Context,
// FromPlain parses an HTML text from a plaintext. parseMention gtsmodel.ParseMentionFunc,
FromPlain(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult authorID string,
// FromPlainNoParagraph parses an HTML text from a plaintext, without wrapping the resulting text in <p> tags. statusID string,
FromPlainNoParagraph(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult text string,
// FromMarkdown parses an HTML text from a markdown-formatted text. ) *FormatResult
FromMarkdown(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, md string) *FormatResult
// FromPlainEmojiOnly parses an HTML text from a plaintext, only parsing emojis and not mentions etc.
FromPlainEmojiOnly(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult
}
type FormatFunc func(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, text string) *FormatResult // Formatter wraps logic and functions for parsing
// statuses and other text input into nice html.
type formatter struct { type Formatter struct {
db db.DB db db.DB
} }
// NewFormatter returns a new Formatter interface for parsing statuses and other text input into nice html. // NewFormatter returns a new Formatter.
func NewFormatter(db db.DB) Formatter { func NewFormatter(db db.DB) *Formatter {
return &formatter{ return &Formatter{
db: db, db: db,
} }
} }

View file

@ -48,7 +48,7 @@ type TextStandardTestSuite struct {
testEmojis map[string]*gtsmodel.Emoji testEmojis map[string]*gtsmodel.Emoji
// module being tested // module being tested
formatter text.Formatter formatter *text.Formatter
} }
func (suite *TextStandardTestSuite) SetupSuite() { func (suite *TextStandardTestSuite) SetupSuite() {
@ -85,14 +85,32 @@ func (suite *TextStandardTestSuite) TearDownTest() {
testrig.StandardDBTeardown(suite.db) testrig.StandardDBTeardown(suite.db)
} }
func (suite *TextStandardTestSuite) FromMarkdown(text string) *text.FormatResult { func (suite *TextStandardTestSuite) FromMarkdown(input string) *text.FormatResult {
return suite.formatter.FromMarkdown(context.Background(), suite.parseMention, suite.testAccounts["local_account_1"].ID, "status_ID", text) return suite.formatter.FromMarkdown(
context.Background(),
suite.parseMention,
suite.testAccounts["local_account_1"].ID,
"dummy_status_ID",
input,
)
} }
func (suite *TextStandardTestSuite) FromPlain(text string) *text.FormatResult { func (suite *TextStandardTestSuite) FromPlain(input string) *text.FormatResult {
return suite.formatter.FromPlain(context.Background(), suite.parseMention, suite.testAccounts["local_account_1"].ID, "status_ID", text) return suite.formatter.FromPlain(
context.Background(),
suite.parseMention,
suite.testAccounts["local_account_1"].ID,
"dummy_status_ID",
input,
)
} }
func (suite *TextStandardTestSuite) FromPlainNoParagraph(text string) *text.FormatResult { func (suite *TextStandardTestSuite) FromPlainNoParagraph(input string) *text.FormatResult {
return suite.formatter.FromPlainNoParagraph(context.Background(), suite.parseMention, suite.testAccounts["local_account_1"].ID, "status_ID", text) return suite.formatter.FromPlainNoParagraph(
context.Background(),
suite.parseMention,
suite.testAccounts["local_account_1"].ID,
"dummmy_status_ID",
input,
)
} }

View file

@ -0,0 +1,423 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package text
import (
"context"
"errors"
"strings"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/id"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/uris"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer"
mdutil "github.com/yuin/goldmark/util"
)
// customRenderer fulfils the following goldmark interfaces:
//
// - renderer.NodeRenderer
// - goldmark.Extender.
//
// It is used as a goldmark extension by FromMarkdown and
// (variants of) FromPlain.
//
// The custom renderer extracts and re-renders mentions, hashtags,
// and emojis that are encountered during parsing, writing out valid
// HTML representations of these elements.
//
// The customRenderer has the following side effects:
//
// - May use its db connection to retrieve existing and/or
// store new mentions, hashtags, and emojis.
// - May update its *FormatResult to append discovered
// mentions, hashtags, and emojis to it.
type customRenderer struct {
ctx context.Context
db db.DB
parseMention gtsmodel.ParseMentionFunc
accountID string
statusID string
emojiOnly bool
result *FormatResult
}
func (cr *customRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
reg.Register(kindMention, cr.renderMention)
reg.Register(kindHashtag, cr.renderHashtag)
reg.Register(kindEmoji, cr.renderEmoji)
}
func (cr *customRenderer) Extend(markdown goldmark.Markdown) {
// 1000 is set as the lowest
// priority, but it's arbitrary.
const prio = 1000
if cr.emojiOnly {
// Parse + render only emojis.
markdown.Parser().AddOptions(
parser.WithInlineParsers(
mdutil.Prioritized(new(emojiParser), prio),
),
)
} else {
// Parse + render emojis, mentions, hashtags.
markdown.Parser().AddOptions(parser.WithInlineParsers(
mdutil.Prioritized(new(emojiParser), prio),
mdutil.Prioritized(new(mentionParser), prio),
mdutil.Prioritized(new(hashtagParser), prio),
))
}
// Add this custom renderer.
markdown.Renderer().AddOptions(
renderer.WithNodeRenderers(
mdutil.Prioritized(cr, prio),
),
)
}
/*
MENTION RENDERING STUFF
*/
// renderMention takes a mention
// ast.Node and renders it as HTML.
func (cr *customRenderer) renderMention(
w mdutil.BufWriter,
source []byte,
node ast.Node,
entering bool,
) (ast.WalkStatus, error) {
if !entering {
return ast.WalkSkipChildren, nil
}
// This function is registered
// only for kindMention, and
// should not be called for
// any other node type.
n, ok := node.(*mention)
if !ok {
log.Panic(cr.ctx, "type assertion failed")
}
// Get raw mention string eg., '@someone@domain.org'.
text := string(n.Segment.Value(source))
// Handle mention and get text to render.
text = cr.handleMention(text)
// Write returned text into HTML.
if _, err := w.WriteString(text); err != nil {
// We don't have much recourse if this fails.
log.Errorf(cr.ctx, "error writing HTML: %s", err)
}
return ast.WalkSkipChildren, nil
}
// handleMention takes a string in the form '@username@domain.com'
// or '@localusername', and does the following:
//
// - Parse the mention string into a *gtsmodel.Mention.
// - Insert mention into database if necessary.
// - Add mention to cr.results.Mentions slice.
// - Return mention rendered as nice HTML.
//
// If the mention is invalid or cannot be created,
// the unaltered input text will be returned instead.
func (cr *customRenderer) handleMention(text string) string {
mention, err := cr.parseMention(cr.ctx, text, cr.accountID, cr.statusID)
if err != nil {
log.Errorf(cr.ctx, "error parsing mention %s from status: %s", text, err)
return text
}
if cr.statusID != "" {
if err := cr.db.PutMention(cr.ctx, mention); err != nil {
log.Errorf(cr.ctx, "error putting mention in db: %s", err)
return text
}
}
// Append mention to result if not done already.
//
// This prevents multiple occurences of mention
// in the same status generating multiple
// entries for the same mention in result.
func() {
for _, m := range cr.result.Mentions {
if mention.TargetAccountID == m.TargetAccountID {
// Already appended.
return
}
}
// Not appended yet.
cr.result.Mentions = append(cr.result.Mentions, mention)
}()
if mention.TargetAccount == nil {
// Fetch mention target account if not yet populated.
mention.TargetAccount, err = cr.db.GetAccountByID(
gtscontext.SetBarebones(cr.ctx),
mention.TargetAccountID,
)
if err != nil {
log.Errorf(cr.ctx, "error populating mention target account: %v", err)
return text
}
}
// Replace the mention with the formatted mention content,
// eg. `@someone@domain.org` becomes:
// `<span class="h-card"><a href="https://domain.org/@someone" class="u-url mention">@<span>someone</span></a></span>`
var b strings.Builder
b.WriteString(`<span class="h-card"><a href="`)
b.WriteString(mention.TargetAccount.URL)
b.WriteString(`" class="u-url mention">@<span>`)
b.WriteString(mention.TargetAccount.Username)
b.WriteString(`</span></a></span>`)
return b.String()
}
/*
HASHTAG RENDERING STUFF
*/
// renderHashtag takes a hashtag
// ast.Node and renders it as HTML.
func (cr *customRenderer) renderHashtag(
w mdutil.BufWriter,
source []byte,
node ast.Node,
entering bool,
) (ast.WalkStatus, error) {
if !entering {
return ast.WalkSkipChildren, nil
}
// This function is registered
// only for kindHashtag, and
// should not be called for
// any other node type.
n, ok := node.(*hashtag)
if !ok {
log.Panic(cr.ctx, "type assertion failed")
}
// Get raw hashtag string eg., '#SomeHashtag'.
text := string(n.Segment.Value(source))
// Handle hashtag and get text to render.
text = cr.handleHashtag(text)
// Write returned text into HTML.
if _, err := w.WriteString(text); err != nil {
// We don't have much recourse if this fails.
log.Errorf(cr.ctx, "error writing HTML: %s", err)
}
return ast.WalkSkipChildren, nil
}
// handleHashtag takes a string in the form '#SomeHashtag',
// and does the following:
//
// - Normalize + validate the hashtag.
// - Get or create hashtag in the db.
// - Add hashtag to cr.results.Tags slice.
// - Return hashtag rendered as nice HTML.
//
// If the hashtag is invalid or cannot be retrieved,
// the unaltered input text will be returned instead.
func (cr *customRenderer) handleHashtag(text string) string {
normalized, ok := NormalizeHashtag(text)
if !ok {
// Not a valid hashtag.
return text
}
getOrCreateHashtag := func(name string) (*gtsmodel.Tag, error) {
var (
tag *gtsmodel.Tag
err error
)
// Check if we have a tag with this name already.
tag, err = cr.db.GetTagByName(cr.ctx, name)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return nil, gtserror.Newf("db error getting tag %s: %w", name, err)
}
if tag != nil {
// We had it!
return tag, nil
}
// We didn't have a tag with
// this name, create one.
tag = &gtsmodel.Tag{
ID: id.NewULID(),
Name: name,
}
if err = cr.db.PutTag(cr.ctx, tag); err != nil {
return nil, gtserror.Newf("db error putting new tag %s: %w", name, err)
}
return tag, nil
}
tag, err := getOrCreateHashtag(normalized)
if err != nil {
log.Errorf(cr.ctx, "error generating hashtags from status: %s", err)
return text
}
// Append tag to result if not done already.
//
// This prevents multiple uses of a tag in
// the same status generating multiple
// entries for the same tag in result.
func() {
for _, t := range cr.result.Tags {
if tag.ID == t.ID {
// Already appended.
return
}
}
// Not appended yet.
cr.result.Tags = append(cr.result.Tags, tag)
}()
// Replace tag with the formatted tag content, eg. `#SomeHashtag` becomes:
// `<a href="https://example.org/tags/somehashtag" class="mention hashtag" rel="tag">#<span>SomeHashtag</span></a>`
var b strings.Builder
b.WriteString(`<a href="`)
b.WriteString(uris.GenerateURIForTag(normalized))
b.WriteString(`" class="mention hashtag" rel="tag">#<span>`)
b.WriteString(normalized)
b.WriteString(`</span></a>`)
return b.String()
}
/*
EMOJI RENDERING STUFF
*/
// renderEmoji doesn't actually turn an emoji
// ast.Node into HTML, but instead only adds it to
// the custom renderer results for later processing.
func (cr *customRenderer) renderEmoji(
w mdutil.BufWriter,
source []byte,
node ast.Node,
entering bool,
) (ast.WalkStatus, error) {
if !entering {
return ast.WalkSkipChildren, nil
}
// This function is registered
// only for kindEmoji, and
// should not be called for
// any other node type.
n, ok := node.(*emoji)
if !ok {
log.Panic(cr.ctx, "type assertion failed")
}
// Get raw emoji string eg., ':boobs:'.
text := string(n.Segment.Value(source))
// Handle emoji and get text to render.
text = cr.handleEmoji(text)
// Write returned text into HTML.
if _, err := w.WriteString(text); err != nil {
// We don't have much recourse if this fails.
log.Errorf(cr.ctx, "error writing HTML: %s", err)
}
return ast.WalkSkipChildren, nil
}
// handleEmoji takes a string in the form ':some_emoji:',
// and does the following:
//
// - Try to get emoji from the db.
// - Add emoji to cr.results.Emojis slice if found and useable.
//
// This function will always return the unaltered input
// text, since emojification is handled elsewhere.
func (cr *customRenderer) handleEmoji(text string) string {
// Check if text points to a valid
// local emoji by using its shortcode.
//
// The shortcode is the text
// between enclosing ':' chars.
shortcode := strings.Trim(text, ":")
// Try to fetch emoji as a locally stored emoji.
emoji, err := cr.db.GetEmojiByShortcodeDomain(cr.ctx, shortcode, "")
if err != nil && !errors.Is(err, db.ErrNoEntries) {
log.Errorf(nil, "db error getting local emoji with shortcode %s: %s", shortcode, err)
}
if emoji == nil {
// No emoji found for this
// shortcode, oh well!
return text
}
if *emoji.Disabled || !*emoji.VisibleInPicker {
// Emoji was found but not useable.
return text
}
// Emoji was found and useable.
// Append to result if not done already.
//
// This prevents multiple uses of an emoji
// in the same status generating multiple
// entries for the same emoji in result.
func() {
for _, e := range cr.result.Emojis {
if emoji.Shortcode == e.Shortcode {
// Already appended.
return
}
}
// Not appended yet.
cr.result.Emojis = append(cr.result.Emojis, emoji)
}()
return text
}

View file

@ -1,313 +0,0 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package text
import (
"context"
"fmt"
"strings"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/regexes"
"github.com/superseriousbusiness/gotosocial/internal/util"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer"
"github.com/yuin/goldmark/text"
mdutil "github.com/yuin/goldmark/util"
)
// A goldmark extension that parses potential mentions and hashtags separately from regular
// text, so that they stay as one contiguous text fragment in the AST, and then renders
// them separately too, to avoid scanning normal text for mentions and tags.
// mention and hashtag fulfil the goldmark ast.Node interface.
type mention struct {
ast.BaseInline
Segment text.Segment
}
type hashtag struct {
ast.BaseInline
Segment text.Segment
}
type emoji struct {
ast.BaseInline
Segment text.Segment
}
var (
kindMention = ast.NewNodeKind("Mention")
kindHashtag = ast.NewNodeKind("Hashtag")
kindEmoji = ast.NewNodeKind("Emoji")
)
func (n *mention) Kind() ast.NodeKind {
return kindMention
}
func (n *hashtag) Kind() ast.NodeKind {
return kindHashtag
}
func (n *emoji) Kind() ast.NodeKind {
return kindEmoji
}
// Dump can be used for debugging.
func (n *mention) Dump(source []byte, level int) {
fmt.Printf("%sMention: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
}
func (n *hashtag) Dump(source []byte, level int) {
fmt.Printf("%sHashtag: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
}
func (n *emoji) Dump(source []byte, level int) {
fmt.Printf("%sEmoji: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
}
// newMention and newHashtag create a goldmark ast.Node from a goldmark text.Segment.
// The contained segment is used in rendering.
func newMention(s text.Segment) *mention {
return &mention{
BaseInline: ast.BaseInline{},
Segment: s,
}
}
func newHashtag(s text.Segment) *hashtag {
return &hashtag{
BaseInline: ast.BaseInline{},
Segment: s,
}
}
func newEmoji(s text.Segment) *emoji {
return &emoji{
BaseInline: ast.BaseInline{},
Segment: s,
}
}
// mentionParser and hashtagParser fulfil the goldmark parser.InlineParser interface.
type mentionParser struct{}
type hashtagParser struct{}
type emojiParser struct{}
func (p *mentionParser) Trigger() []byte {
return []byte{'@'}
}
func (p *hashtagParser) Trigger() []byte {
return []byte{'#'}
}
func (p *emojiParser) Trigger() []byte {
return []byte{':'}
}
func (p *mentionParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
before := block.PrecendingCharacter()
line, segment := block.PeekLine()
if !util.IsMentionOrHashtagBoundary(before) {
return nil
}
// unideal for performance but makes use of existing regex
loc := regexes.MentionFinder.FindIndex(line)
switch {
case loc == nil:
fallthrough
case loc[0] != 0: // fail if not found at start
return nil
default:
block.Advance(loc[1])
return newMention(segment.WithStop(segment.Start + loc[1]))
}
}
func (p *hashtagParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
before := block.PrecendingCharacter()
line, segment := block.PeekLine()
s := string(line)
if !util.IsMentionOrHashtagBoundary(before) || len(s) == 1 {
return nil
}
for i, r := range s {
switch {
case r == '#' && i == 0:
// ignore initial #
continue
case !util.IsPlausiblyInHashtag(r) && !util.IsMentionOrHashtagBoundary(r):
// Fake hashtag, don't trust it
return nil
case util.IsMentionOrHashtagBoundary(r):
if i <= 1 {
// empty
return nil
}
// End of hashtag
block.Advance(i)
return newHashtag(segment.WithStop(segment.Start + i))
}
}
// If we don't find invalid characters before the end of the line then it's all hashtag, babey
block.Advance(segment.Len())
return newHashtag(segment)
}
func (p *emojiParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
line, segment := block.PeekLine()
// unideal for performance but makes use of existing regex
loc := regexes.EmojiFinder.FindIndex(line)
switch {
case loc == nil:
fallthrough
case loc[0] != 0: // fail if not found at start
return nil
default:
block.Advance(loc[1])
return newEmoji(segment.WithStop(segment.Start + loc[1]))
}
}
// customRenderer fulfils both the renderer.NodeRenderer and goldmark.Extender interfaces.
// It is created in FromMarkdown and FromPlain to be used as a goldmark extension, and the
// fields are used to report tags and mentions to the caller for use as metadata.
type customRenderer struct {
f *formatter
ctx context.Context
parseMention gtsmodel.ParseMentionFunc
accountID string
statusID string
emojiOnly bool
result *FormatResult
}
func (r *customRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
reg.Register(kindMention, r.renderMention)
reg.Register(kindHashtag, r.renderHashtag)
reg.Register(kindEmoji, r.renderEmoji)
}
func (r *customRenderer) Extend(m goldmark.Markdown) {
// 1000 is set as the lowest priority, but it's arbitrary
m.Parser().AddOptions(parser.WithInlineParsers(
mdutil.Prioritized(&emojiParser{}, 1000),
))
if !r.emojiOnly {
m.Parser().AddOptions(parser.WithInlineParsers(
mdutil.Prioritized(&mentionParser{}, 1000),
mdutil.Prioritized(&hashtagParser{}, 1000),
))
}
m.Renderer().AddOptions(renderer.WithNodeRenderers(
mdutil.Prioritized(r, 1000),
))
}
// renderMention and renderHashtag take a mention or a hashtag ast.Node and render it as HTML.
func (r *customRenderer) renderMention(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
if !entering {
return ast.WalkSkipChildren, nil
}
n, ok := node.(*mention) // this function is only registered for kindMention
if !ok {
log.Panic(r.ctx, "type assertion failed")
}
text := string(n.Segment.Value(source))
html := r.replaceMention(text)
// we don't have much recourse if this fails
if _, err := w.WriteString(html); err != nil {
log.Errorf(r.ctx, "error writing HTML: %s", err)
}
return ast.WalkSkipChildren, nil
}
func (r *customRenderer) renderHashtag(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
if !entering {
return ast.WalkSkipChildren, nil
}
n, ok := node.(*hashtag) // this function is only registered for kindHashtag
if !ok {
log.Panic(r.ctx, "type assertion failed")
}
text := string(n.Segment.Value(source))
html := r.replaceHashtag(text)
_, err := w.WriteString(html)
// we don't have much recourse if this fails
if err != nil {
log.Errorf(r.ctx, "error writing HTML: %s", err)
}
return ast.WalkSkipChildren, nil
}
// renderEmoji doesn't turn an emoji into HTML, but adds it to the metadata.
func (r *customRenderer) renderEmoji(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
if !entering {
return ast.WalkSkipChildren, nil
}
n, ok := node.(*emoji) // this function is only registered for kindEmoji
if !ok {
log.Panic(r.ctx, "type assertion failed")
}
text := string(n.Segment.Value(source))
shortcode := text[1 : len(text)-1]
emoji, err := r.f.db.GetEmojiByShortcodeDomain(r.ctx, shortcode, "")
if err != nil {
if err != db.ErrNoEntries {
log.Errorf(nil, "error getting local emoji with shortcode %s: %s", shortcode, err)
}
} else if *emoji.VisibleInPicker && !*emoji.Disabled {
listed := false
for _, e := range r.result.Emojis {
if e.Shortcode == emoji.Shortcode {
listed = true
break
}
}
if !listed {
r.result.Emojis = append(r.result.Emojis, emoji)
}
}
// we don't have much recourse if this fails
if _, err := w.WriteString(text); err != nil {
log.Errorf(r.ctx, "error writing HTML: %s", err)
}
return ast.WalkSkipChildren, nil
}

View file

@ -0,0 +1,281 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package text
import (
"fmt"
"strings"
"github.com/superseriousbusiness/gotosocial/internal/regexes"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/text"
)
/*
MENTION PARSER STUFF
*/
// mention fulfils the goldmark
// ast.Node interface.
type mention struct {
ast.BaseInline
Segment text.Segment
}
var kindMention = ast.NewNodeKind("Mention")
func (n *mention) Kind() ast.NodeKind {
return kindMention
}
func (n *mention) Dump(source []byte, level int) {
fmt.Printf("%sMention: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
}
// newMention creates a goldmark ast.Node
// from a text.Segment. The contained segment
// is used in rendering.
func newMention(s text.Segment) *mention {
return &mention{
BaseInline: ast.BaseInline{},
Segment: s,
}
}
// mentionParser fulfils the goldmark
// parser.InlineParser interface.
type mentionParser struct{}
// Mention parsing is triggered by the `@` symbol
// which appears at the beginning of a mention.
func (p *mentionParser) Trigger() []byte {
return []byte{'@'}
}
func (p *mentionParser) Parse(
_ ast.Node,
block text.Reader,
_ parser.Context,
) ast.Node {
// If preceding character is not a valid boundary
// character, then this cannot be a valid mention.
if !isMentionBoundary(block.PrecendingCharacter()) {
return nil
}
line, segment := block.PeekLine()
// Ascertain location of mention in the line
// that starts with the trigger character.
loc := regexes.MentionFinder.FindIndex(line)
if loc == nil || loc[0] != 0 {
// Noop if not found or
// not found at start.
return nil
}
// Advance the block to
// the end of the mention.
block.Advance(loc[1])
// mention ast.Node spans from the
// beginning of this segment up to
// the last character of the mention.
return newMention(
segment.WithStop(
segment.Start + loc[1],
),
)
}
/*
HASHTAG PARSER STUFF
*/
// hashtag fulfils the goldmark
// ast.Node interface.
type hashtag struct {
ast.BaseInline
Segment text.Segment
}
var kindHashtag = ast.NewNodeKind("Hashtag")
func (n *hashtag) Kind() ast.NodeKind {
return kindHashtag
}
func (n *hashtag) Dump(source []byte, level int) {
fmt.Printf("%sHashtag: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
}
// newHashtag creates a goldmark ast.Node
// from a text.Segment. The contained segment
// is used in rendering.
func newHashtag(s text.Segment) *hashtag {
return &hashtag{
BaseInline: ast.BaseInline{},
Segment: s,
}
}
type hashtagParser struct{}
// Hashtag parsing is triggered by a '#' symbol
// which appears at the beginning of a hashtag.
func (p *hashtagParser) Trigger() []byte {
return []byte{'#'}
}
func (p *hashtagParser) Parse(
_ ast.Node,
block text.Reader,
_ parser.Context,
) ast.Node {
// If preceding character is not a valid boundary
// character, then this cannot be a valid hashtag.
if !isHashtagBoundary(block.PrecendingCharacter()) {
return nil
}
var (
line, segment = block.PeekLine()
lineStr = string(line)
lineStrLen = len(lineStr)
)
if lineStrLen <= 1 {
// This is probably just
// a lonely '#' char.
return nil
}
// Iterate through the runes in the detected
// hashtag string until we reach either:
// - A weird character (bad).
// - The end of the hashtag (ok).
// - The end of the string (also ok).
for i, r := range lineStr {
switch {
case r == '#' && i == 0:
// Ignore initial '#'.
continue
case !isPlausiblyInHashtag(r) &&
!isHashtagBoundary(r):
// Weird non-boundary character
// in the hashtag. Don't trust it.
return nil
case isHashtagBoundary(r):
// Reached closing hashtag
// boundary. Advance block
// to the end of the hashtag.
block.Advance(i)
// hashtag ast.Node spans from
// the beginning of this segment
// up to the boundary character.
return newHashtag(
segment.WithStop(
segment.Start + i,
),
)
}
}
// No invalid or boundary characters before the
// end of the line: it's all hashtag, baby 😎
//
// Advance block to the end of the segment.
block.Advance(segment.Len())
// hashtag ast.Node spans
// the entire segment.
return newHashtag(segment)
}
/*
EMOJI PARSER STUFF
*/
// emoji fulfils the goldmark
// ast.Node interface.
type emoji struct {
ast.BaseInline
Segment text.Segment
}
var kindEmoji = ast.NewNodeKind("Emoji")
func (n *emoji) Kind() ast.NodeKind {
return kindEmoji
}
func (n *emoji) Dump(source []byte, level int) {
fmt.Printf("%sEmoji: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
}
// newEmoji creates a goldmark ast.Node
// from a text.Segment. The contained
// segment is used in rendering.
func newEmoji(s text.Segment) *emoji {
return &emoji{
BaseInline: ast.BaseInline{},
Segment: s,
}
}
type emojiParser struct{}
// Emoji parsing is triggered by a ':' char
// which appears at the start of the emoji.
func (p *emojiParser) Trigger() []byte {
return []byte{':'}
}
func (p *emojiParser) Parse(
_ ast.Node,
block text.Reader,
_ parser.Context,
) ast.Node {
line, segment := block.PeekLine()
// Ascertain location of emoji in the line
// that starts with the trigger character.
loc := regexes.EmojiFinder.FindIndex(line)
if loc == nil || loc[0] != 0 {
// Noop if not found or
// not found at start.
return nil
}
// Advance the block to
// the end of the emoji.
block.Advance(loc[1])
// emoji ast.Node spans from the
// beginning of this segment up to
// the last character of the emoji.
return newEmoji(
segment.WithStop(
segment.Start + loc[1],
),
)
}

View file

@ -26,7 +26,7 @@
// plaintextParser implements goldmark.parser.BlockParser // plaintextParser implements goldmark.parser.BlockParser
type plaintextParser struct{} type plaintextParser struct{}
var defaultPlaintextParser = &plaintextParser{} var defaultPlaintextParser = new(plaintextParser)
func newPlaintextParser() parser.BlockParser { func newPlaintextParser() parser.BlockParser {
return defaultPlaintextParser return defaultPlaintextParser
@ -64,7 +64,7 @@ func (b *plaintextParser) CanAcceptIndentedLine() bool {
// plaintextParserNoParagraph implements goldmark.parser.BlockParser // plaintextParserNoParagraph implements goldmark.parser.BlockParser
type plaintextParserNoParagraph struct{} type plaintextParserNoParagraph struct{}
var defaultPlaintextParserNoParagraph = &plaintextParserNoParagraph{} var defaultPlaintextParserNoParagraph = new(plaintextParserNoParagraph)
func newPlaintextParserNoParagraph() parser.BlockParser { func newPlaintextParserNoParagraph() parser.BlockParser {
return defaultPlaintextParserNoParagraph return defaultPlaintextParserNoParagraph

View file

@ -28,38 +28,55 @@
"github.com/yuin/goldmark/renderer/html" "github.com/yuin/goldmark/renderer/html"
) )
func (f *formatter) FromMarkdown(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, markdownText string) *FormatResult { // FromMarkdown fulfils FormatFunc by parsing
result := &FormatResult{ // the given markdown input into a FormatResult.
Mentions: []*gtsmodel.Mention{}, func (f *Formatter) FromMarkdown(
Tags: []*gtsmodel.Tag{}, ctx context.Context,
Emojis: []*gtsmodel.Emoji{}, parseMention gtsmodel.ParseMentionFunc,
} authorID string,
statusID string,
input string,
) *FormatResult {
result := new(FormatResult)
// parse markdown text into html, using custom renderer to add hashtag/mention links // Instantiate goldmark parser for
// markdown, using custom renderer
// to add hashtag/mention links.
md := goldmark.New( md := goldmark.New(
goldmark.WithRendererOptions( goldmark.WithRendererOptions(
html.WithXHTML(), html.WithXHTML(),
html.WithHardWraps(), html.WithHardWraps(),
html.WithUnsafe(), // allows raw HTML // Allows raw HTML. We sanitize
// at the end so this is OK.
html.WithUnsafe(),
), ),
goldmark.WithExtensions( goldmark.WithExtensions(
&customRenderer{f, ctx, pmf, authorID, statusID, false, result}, &customRenderer{
extension.Linkify, // turns URLs into links ctx,
f.db,
parseMention,
authorID,
statusID,
false, // emojiOnly = false.
result,
},
extension.Linkify, // Turns URLs into links.
extension.Strikethrough, extension.Strikethrough,
), ),
) )
var htmlContentBytes bytes.Buffer // Parse input into HTML.
err := md.Convert([]byte(markdownText), &htmlContentBytes) var htmlBytes bytes.Buffer
if err != nil { if err := md.Convert(
log.Errorf(ctx, "error formatting markdown to HTML: %s", err) []byte(input),
&htmlBytes,
); err != nil {
log.Errorf(ctx, "error formatting markdown input to HTML: %s", err)
} }
result.HTML = htmlContentBytes.String()
// clean anything dangerous out of the HTML // Clean and shrink HTML.
result.HTML = htmlBytes.String()
result.HTML = SanitizeToHTML(result.HTML) result.HTML = SanitizeToHTML(result.HTML)
// shrink ray
result.HTML = MinifyHTML(result.HTML) result.HTML = MinifyHTML(result.HTML)
return result return result

View file

@ -76,10 +76,16 @@
mdWithLinkExpected = "<p>Check out this code, i heard it was written by a sloth <a href=\"https://github.com/superseriousbusiness/gotosocial\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">https://github.com/superseriousbusiness/gotosocial</a></p>" mdWithLinkExpected = "<p>Check out this code, i heard it was written by a sloth <a href=\"https://github.com/superseriousbusiness/gotosocial\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">https://github.com/superseriousbusiness/gotosocial</a></p>"
mdObjectInCodeBlock = "@foss_satan@fossbros-anonymous.io this is how to mention a user\n```\n@the_mighty_zork hey bud! nice #ObjectOrientedProgramming software you've been writing lately! :rainbow:\n```\nhope that helps" mdObjectInCodeBlock = "@foss_satan@fossbros-anonymous.io this is how to mention a user\n```\n@the_mighty_zork hey bud! nice #ObjectOrientedProgramming software you've been writing lately! :rainbow:\n```\nhope that helps"
mdObjectInCodeBlockExpected = "<p><span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span> this is how to mention a user</p><pre><code>@the_mighty_zork hey bud! nice #ObjectOrientedProgramming software you&#39;ve been writing lately! :rainbow:\n</code></pre><p>hope that helps</p>" mdObjectInCodeBlockExpected = "<p><span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span> this is how to mention a user</p><pre><code>@the_mighty_zork hey bud! nice #ObjectOrientedProgramming software you&#39;ve been writing lately! :rainbow:\n</code></pre><p>hope that helps</p>"
mdItalicHashtag = "_#hashtag_" // Hashtags can be italicized but only with *, not _.
mdItalicHashtagExpected = "<p><em><a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a></em></p>" mdItalicHashtag = "*#hashtag*"
mdItalicHashtags = "_#hashtag #hashtag #hashtag_" mdItalicHashtagExpected = "<p><em><a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a></em></p>"
mdItalicHashtagsExpected = "<p><em><a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a> <a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a> <a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a></em></p>" mdItalicHashtags = "*#hashtag #hashtag #hashtag*"
mdItalicHashtagsExpected = "<p><em><a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a> <a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a> <a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a></em></p>"
// Hashtags can end with or contain _ but not start with it.
mdUnderscorePrefixHashtag = "_#hashtag"
mdUnderscorePrefixHashtagExpected = "<p>_#hashtag</p>"
mdUnderscoreSuffixHashtag = "#hashtag_"
mdUnderscoreSuffixHashtagExpected = "<p><a href=\"http://localhost:8080/tags/hashtag_\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag_</span></a></p>"
// BEWARE: sneaky unicode business going on. // BEWARE: sneaky unicode business going on.
// the first ö is one rune, the second ö is an o with a combining diacritic. // the first ö is one rune, the second ö is an o with a combining diacritic.
mdUnnormalizedHashtag = "#hellöthere #hellöthere" mdUnnormalizedHashtag = "#hellöthere #hellöthere"
@ -194,6 +200,19 @@ func (suite *MarkdownTestSuite) TestParseItalicHashtags() {
suite.Equal(mdItalicHashtagsExpected, formatted.HTML) suite.Equal(mdItalicHashtagsExpected, formatted.HTML)
} }
func (suite *MarkdownTestSuite) TestParseHashtagUnderscorePrefix() {
formatted := suite.FromMarkdown(mdUnderscorePrefixHashtag)
suite.Equal(mdUnderscorePrefixHashtagExpected, formatted.HTML)
suite.Empty(formatted.Tags)
}
func (suite *MarkdownTestSuite) TestParseHashtagUnderscoreSuffix() {
formatted := suite.FromMarkdown(mdUnderscoreSuffixHashtag)
suite.Equal(mdUnderscoreSuffixHashtagExpected, formatted.HTML)
suite.NotEmpty(formatted.Tags)
suite.Equal("hashtag_", formatted.Tags[0].Name)
}
func (suite *MarkdownTestSuite) TestParseUnnormalizedHashtag() { func (suite *MarkdownTestSuite) TestParseUnnormalizedHashtag() {
formatted := suite.FromMarkdown(mdUnnormalizedHashtag) formatted := suite.FromMarkdown(mdUnnormalizedHashtag)
suite.Equal(mdUnnormalizedHashtagExpected, formatted.HTML) suite.Equal(mdUnnormalizedHashtagExpected, formatted.HTML)

View file

@ -20,7 +20,6 @@
import ( import (
"strings" "strings"
"github.com/superseriousbusiness/gotosocial/internal/util"
"golang.org/x/text/unicode/norm" "golang.org/x/text/unicode/norm"
) )
@ -36,8 +35,10 @@
// //
// Finally, it will do a check on the normalized string to // Finally, it will do a check on the normalized string to
// ensure that it's below maximumHashtagLength chars, and // ensure that it's below maximumHashtagLength chars, and
// contains only unicode letters and numbers. If this passes, // contains only letters, numbers, and underscores (and not
// returned bool will be true. // *JUST* underscores).
//
// If all this passes, returned bool will be true.
func NormalizeHashtag(text string) (string, bool) { func NormalizeHashtag(text string) (string, bool) {
// This normalization is specifically to avoid cases // This normalization is specifically to avoid cases
// where visually-identical hashtags are stored with // where visually-identical hashtags are stored with
@ -47,14 +48,31 @@ func NormalizeHashtag(text string) (string, bool) {
// with parent characters to form regular letter symbols. // with parent characters to form regular letter symbols.
normalized := norm.NFC.String(strings.TrimPrefix(text, "#")) normalized := norm.NFC.String(strings.TrimPrefix(text, "#"))
// Validate normalized. // Validate normalized result.
ok := true var (
notJustUnderscores = false
onlyPermittedChars = true
lengthOK = true
)
for i, r := range normalized { for i, r := range normalized {
if i >= maximumHashtagLength || !util.IsPermittedInHashtag(r) { if r != '_' {
ok = false // This isn't an underscore,
// so the whole hashtag isn't
// just underscores.
notJustUnderscores = true
}
if i >= maximumHashtagLength {
lengthOK = false
break
}
if !isPermittedInHashtag(r) {
onlyPermittedChars = false
break break
} }
} }
return normalized, ok return normalized, (lengthOK && onlyPermittedChars && notJustUnderscores)
} }

View file

@ -30,66 +30,150 @@
"github.com/yuin/goldmark/util" "github.com/yuin/goldmark/util"
) )
func (f *formatter) fromPlain( // FromPlain fulfils FormatFunc by parsing
// the given plaintext input into a FormatResult.
func (f *Formatter) FromPlain(
ctx context.Context, ctx context.Context,
ptParser parser.Parser, parseMention gtsmodel.ParseMentionFunc,
pmf gtsmodel.ParseMentionFunc,
authorID string, authorID string,
statusID string, statusID string,
plain string, input string,
) *FormatResult { ) *FormatResult {
result := &FormatResult{ // Initialize standard block parser
Mentions: []*gtsmodel.Mention{}, // that wraps result in <p> tags.
Tags: []*gtsmodel.Tag{}, plainTextParser := parser.NewParser(
Emojis: []*gtsmodel.Emoji{},
}
// Parse markdown into html, using custom renderer
// to add hashtag/mention links and emoji images.
md := goldmark.New(
goldmark.WithRendererOptions(
html.WithXHTML(),
html.WithHardWraps(),
),
goldmark.WithParser(ptParser), // use parser we were passed
goldmark.WithExtensions(
&customRenderer{f, ctx, pmf, authorID, statusID, false, result},
extension.Linkify, // turns URLs into links
),
)
var htmlContentBytes bytes.Buffer
if err := md.Convert([]byte(plain), &htmlContentBytes); err != nil {
log.Errorf(ctx, "error formatting plaintext to HTML: %s", err)
}
result.HTML = htmlContentBytes.String()
// Clean anything dangerous out of resulting HTML.
result.HTML = SanitizeToHTML(result.HTML)
// Shrink ray!
result.HTML = MinifyHTML(result.HTML)
return result
}
func (f *formatter) FromPlain(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult {
ptParser := parser.NewParser(
parser.WithBlockParsers( parser.WithBlockParsers(
util.Prioritized(newPlaintextParser(), 500), util.Prioritized(newPlaintextParser(), 500),
), ),
) )
return f.fromPlain(ctx, ptParser, pmf, authorID, statusID, plain) return f.fromPlain(
ctx,
plainTextParser,
false, // emojiOnly = false
parseMention,
authorID,
statusID,
input,
)
} }
func (f *formatter) FromPlainNoParagraph(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult { // FromPlainNoParagraph fulfils FormatFunc by parsing
ptParser := parser.NewParser( // the given plaintext input into a FormatResult.
//
// Unlike FromPlain, it will not wrap the resulting
// HTML in <p> tags, making it useful for parsing
// short fragments of text that oughtn't be formally
// wrapped as a paragraph.
func (f *Formatter) FromPlainNoParagraph(
ctx context.Context,
parseMention gtsmodel.ParseMentionFunc,
authorID string,
statusID string,
input string,
) *FormatResult {
// Initialize block parser that
// doesn't wrap result in <p> tags.
plainTextParser := parser.NewParser(
parser.WithBlockParsers( parser.WithBlockParsers(
// Initialize block parser that doesn't wrap in <p> tags.
util.Prioritized(newPlaintextParserNoParagraph(), 500), util.Prioritized(newPlaintextParserNoParagraph(), 500),
), ),
) )
return f.fromPlain(ctx, ptParser, pmf, authorID, statusID, plain) return f.fromPlain(
ctx,
plainTextParser,
false, // emojiOnly = false
parseMention,
authorID,
statusID,
input,
)
}
// FromPlainEmojiOnly fulfils FormatFunc by parsing
// the given plaintext input into a FormatResult.
//
// Unlike FromPlain, it will only parse emojis with
// the custom renderer, leaving aside mentions and tags.
func (f *Formatter) FromPlainEmojiOnly(
ctx context.Context,
parseMention gtsmodel.ParseMentionFunc,
authorID string,
statusID string,
input string,
) *FormatResult {
// Initialize standard block parser
// that wraps result in <p> tags.
plainTextParser := parser.NewParser(
parser.WithBlockParsers(
util.Prioritized(newPlaintextParser(), 500),
),
)
return f.fromPlain(
ctx,
plainTextParser,
true, // emojiOnly = true
parseMention,
authorID,
statusID,
input,
)
}
// fromPlain parses the given input text
// using the given plainTextParser, and
// returns the result.
func (f *Formatter) fromPlain(
ctx context.Context,
plainTextParser parser.Parser,
emojiOnly bool,
parseMention gtsmodel.ParseMentionFunc,
authorID string,
statusID string,
input string,
) *FormatResult {
result := new(FormatResult)
// Instantiate goldmark parser for
// plaintext, using custom renderer
// to add hashtag/mention links.
md := goldmark.New(
goldmark.WithRendererOptions(
html.WithXHTML(),
html.WithHardWraps(),
),
// Use whichever plaintext
// parser we were passed.
goldmark.WithParser(plainTextParser),
goldmark.WithExtensions(
&customRenderer{
ctx,
f.db,
parseMention,
authorID,
statusID,
emojiOnly,
result,
},
extension.Linkify, // Turns URLs into links.
),
)
// Parse input into HTML.
var htmlBytes bytes.Buffer
if err := md.Convert(
[]byte(input),
&htmlBytes,
); err != nil {
log.Errorf(ctx, "error formatting plaintext input to HTML: %s", err)
}
// Clean and shrink HTML.
result.HTML = htmlBytes.String()
result.HTML = SanitizeToHTML(result.HTML)
result.HTML = MinifyHTML(result.HTML)
return result
} }

View file

@ -20,7 +20,6 @@
import ( import (
"testing" "testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite" "github.com/stretchr/testify/suite"
) )
@ -85,7 +84,7 @@ func (suite *PlainTestSuite) TestLinkNoMention() {
func (suite *PlainTestSuite) TestDeriveMentionsEmpty() { func (suite *PlainTestSuite) TestDeriveMentionsEmpty() {
statusText := `` statusText := ``
menchies := suite.FromPlain(statusText).Mentions menchies := suite.FromPlain(statusText).Mentions
assert.Len(suite.T(), menchies, 0) suite.Len(menchies, 0)
} }
func (suite *PlainTestSuite) TestDeriveHashtagsOK() { func (suite *PlainTestSuite) TestDeriveHashtagsOK() {
@ -98,7 +97,9 @@ func (suite *PlainTestSuite) TestDeriveHashtagsOK() {
here's a link with a fragment: https://example.org/whatever#ahhh here's a link with a fragment: https://example.org/whatever#ahhh
here's another link with a fragment: https://example.org/whatever/#ahhh here's another link with a fragment: https://example.org/whatever/#ahhh
(#ThisShouldAlsoWork) #this_should_be_split (#ThisShouldAlsoWork) #this_should_not_be_split
#__ <- just underscores, shouldn't work
#111111 thisalsoshouldn'twork#### ## #111111 thisalsoshouldn'twork#### ##
@ -108,24 +109,24 @@ func (suite *PlainTestSuite) TestDeriveHashtagsOK() {
` `
tags := suite.FromPlain(statusText).Tags tags := suite.FromPlain(statusText).Tags
assert.Len(suite.T(), tags, 13) suite.Len(tags, 13)
assert.Equal(suite.T(), "testing123", tags[0].Name) suite.Equal("testing123", tags[0].Name)
assert.Equal(suite.T(), "also", tags[1].Name) suite.Equal("also", tags[1].Name)
assert.Equal(suite.T(), "thisshouldwork", tags[2].Name) suite.Equal("thisshouldwork", tags[2].Name)
assert.Equal(suite.T(), "dupe", tags[3].Name) suite.Equal("dupe", tags[3].Name)
assert.Equal(suite.T(), "ThisShouldAlsoWork", tags[4].Name) suite.Equal("ThisShouldAlsoWork", tags[4].Name)
assert.Equal(suite.T(), "this", tags[5].Name) suite.Equal("this_should_not_be_split", tags[5].Name)
assert.Equal(suite.T(), "111111", tags[6].Name) suite.Equal("111111", tags[6].Name)
assert.Equal(suite.T(), "alimentación", tags[7].Name) suite.Equal("alimentación", tags[7].Name)
assert.Equal(suite.T(), "saúde", tags[8].Name) suite.Equal("saúde", tags[8].Name)
assert.Equal(suite.T(), "lävistää", tags[9].Name) suite.Equal("lävistää", tags[9].Name)
assert.Equal(suite.T(), "ö", tags[10].Name) suite.Equal("ö", tags[10].Name)
assert.Equal(suite.T(), "네", tags[11].Name) suite.Equal("네", tags[11].Name)
assert.Equal(suite.T(), "ThisOneIsThirteyCharactersLong", tags[12].Name) suite.Equal("ThisOneIsThirteyCharactersLong", tags[12].Name)
statusText = `#올빼미 hej` statusText = `#올빼미 hej`
tags = suite.FromPlain(statusText).Tags tags = suite.FromPlain(statusText).Tags
assert.Equal(suite.T(), "올빼미", tags[0].Name) suite.Equal("올빼미", tags[0].Name)
} }
func (suite *PlainTestSuite) TestDeriveMultiple() { func (suite *PlainTestSuite) TestDeriveMultiple() {
@ -137,20 +138,20 @@ func (suite *PlainTestSuite) TestDeriveMultiple() {
f := suite.FromPlain(statusText) f := suite.FromPlain(statusText)
assert.Len(suite.T(), f.Mentions, 1) suite.Len(f.Mentions, 1)
assert.Equal(suite.T(), "@foss_satan@fossbros-anonymous.io", f.Mentions[0].NameString) suite.Equal("@foss_satan@fossbros-anonymous.io", f.Mentions[0].NameString)
assert.Len(suite.T(), f.Tags, 1) suite.Len(f.Tags, 1)
assert.Equal(suite.T(), "hashtag", f.Tags[0].Name) suite.Equal("hashtag", f.Tags[0].Name)
assert.Len(suite.T(), f.Emojis, 0) suite.Len(f.Emojis, 0)
} }
func (suite *PlainTestSuite) TestZalgoHashtag() { func (suite *PlainTestSuite) TestZalgoHashtag() {
statusText := `yo who else loves #praying to #z̸͉̅a̸͚͋l̵͈̊g̸̫͌ỏ̷̪?` statusText := `yo who else loves #praying to #z̸͉̅a̸͚͋l̵͈̊g̸̫͌ỏ̷̪?`
f := suite.FromPlain(statusText) f := suite.FromPlain(statusText)
assert.Len(suite.T(), f.Tags, 1) suite.Len(f.Tags, 1)
assert.Equal(suite.T(), "praying", f.Tags[0].Name) suite.Equal("praying", f.Tags[0].Name)
} }
func TestPlainTestSuite(t *testing.T) { func TestPlainTestSuite(t *testing.T) {

View file

@ -1,161 +0,0 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package text
import (
"errors"
"strings"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/id"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/uris"
)
// replaceMention takes a string in the form @username@domain.com or @localusername
func (r *customRenderer) replaceMention(text string) string {
mention, err := r.parseMention(r.ctx, text, r.accountID, r.statusID)
if err != nil {
log.Errorf(r.ctx, "error parsing mention %s from status: %s", text, err)
return text
}
if r.statusID != "" {
if err := r.f.db.PutMention(r.ctx, mention); err != nil {
log.Errorf(r.ctx, "error putting mention in db: %s", err)
return text
}
}
// only append if it's not been listed yet
listed := false
for _, m := range r.result.Mentions {
if mention.ID == m.ID {
listed = true
break
}
}
if !listed {
r.result.Mentions = append(r.result.Mentions, mention)
}
if mention.TargetAccount == nil {
// Fetch mention target account if not yet populated.
mention.TargetAccount, err = r.f.db.GetAccountByID(
gtscontext.SetBarebones(r.ctx),
mention.TargetAccountID,
)
if err != nil {
log.Errorf(r.ctx, "error populating mention target account: %v", err)
return text
}
}
// The mention's target is our target
targetAccount := mention.TargetAccount
var b strings.Builder
// replace the mention with the formatted mention content
// <span class="h-card"><a href="targetAccount.URL" class="u-url mention">@<span>targetAccount.Username</span></a></span>
b.WriteString(`<span class="h-card"><a href="`)
b.WriteString(targetAccount.URL)
b.WriteString(`" class="u-url mention">@<span>`)
b.WriteString(targetAccount.Username)
b.WriteString(`</span></a></span>`)
return b.String()
}
// replaceHashtag takes a string in the form #SomeHashtag, and will normalize
// it before adding it to the db (or just getting it from the db if it already
// exists) and turning it into HTML.
func (r *customRenderer) replaceHashtag(text string) string {
normalized, ok := NormalizeHashtag(text)
if !ok {
// Not a valid hashtag.
return text
}
tag, err := r.getOrCreateHashtag(normalized)
if err != nil {
log.Errorf(r.ctx, "error generating hashtags from status: %s", err)
return text
}
// Append tag to result if not done already.
//
// This prevents multiple uses of a tag in
// the same status generating multiple
// entries for the same tag in result.
func() {
for _, t := range r.result.Tags {
if tag.ID == t.ID {
// Already appended.
return
}
}
// Not appended yet.
r.result.Tags = append(r.result.Tags, tag)
}()
// Replace tag with the formatted tag content, eg. `#SomeHashtag` becomes:
// `<a href="https://example.org/tags/somehashtag" class="mention hashtag" rel="tag">#<span>SomeHashtag</span></a>`
var b strings.Builder
b.WriteString(`<a href="`)
b.WriteString(uris.GenerateURIForTag(normalized))
b.WriteString(`" class="mention hashtag" rel="tag">#<span>`)
b.WriteString(normalized)
b.WriteString(`</span></a>`)
return b.String()
}
func (r *customRenderer) getOrCreateHashtag(name string) (*gtsmodel.Tag, error) {
var (
tag *gtsmodel.Tag
err error
)
// Check if we have a tag with this name already.
tag, err = r.f.db.GetTagByName(r.ctx, name)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return nil, gtserror.Newf("db error getting tag %s: %w", name, err)
}
if tag != nil {
// We had it!
return tag, nil
}
// We didn't have a tag with
// this name, create one.
tag = &gtsmodel.Tag{
ID: id.NewULID(),
Name: name,
}
if err = r.f.db.PutTag(r.ctx, tag); err != nil {
return nil, gtserror.Newf("db error putting new tag %s: %w", name, err)
}
return tag, nil
}

51
internal/text/util.go Normal file
View file

@ -0,0 +1,51 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package text
import "unicode"
func isPlausiblyInHashtag(r rune) bool {
// Marks are allowed during parsing
// prior to normalization, but not after,
// since they may be combined into letters
// during normalization.
return unicode.IsMark(r) ||
isPermittedInHashtag(r)
}
func isPermittedInHashtag(r rune) bool {
return unicode.IsLetter(r) ||
unicode.IsNumber(r) ||
r == '_'
}
// isHashtagBoundary returns true if rune r
// is a recognized break character for before
// or after a #hashtag.
func isHashtagBoundary(r rune) bool {
return unicode.IsSpace(r) ||
(unicode.IsPunct(r) && r != '_')
}
// isMentionBoundary returns true if rune r
// is a recognized break character for before
// or after a @mention.
func isMentionBoundary(r rune) bool {
return unicode.IsSpace(r) ||
unicode.IsPunct(r)
}

View file

@ -1,37 +0,0 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package util
import (
"unicode"
)
func IsPlausiblyInHashtag(r rune) bool {
// Marks are allowed during parsing, prior to normalization, but not after,
// since they may be combined into letters during normalization.
return unicode.IsLetter(r) || unicode.IsNumber(r) || unicode.IsMark(r)
}
func IsPermittedInHashtag(r rune) bool {
return unicode.IsLetter(r) || unicode.IsNumber(r)
}
// Decides where to break before or after a #hashtag or @mention
func IsMentionOrHashtagBoundary(r rune) bool {
return unicode.IsSpace(r) || unicode.IsPunct(r)
}