From 00d38855d416e834c2657271823e0ee95397d7ba Mon Sep 17 00:00:00 2001 From: tobi <31960611+tsmethurst@users.noreply.github.com> Date: Tue, 27 Sep 2022 14:27:53 +0200 Subject: [PATCH] [bugfix] Fix emphasis being added to emoji shortcodes with markdown parsing (#856) * fix underscored emoji shortcodes being emphasized * remove footnote parsing from md --- internal/processing/status/create_test.go | 41 +++++++++++++++++++++++ internal/processing/status/util.go | 2 +- internal/text/formatter.go | 2 +- internal/text/markdown.go | 35 ++++++++++++++----- internal/text/markdown_test.go | 28 ++++++++-------- 5 files changed, 83 insertions(+), 25 deletions(-) diff --git a/internal/processing/status/create_test.go b/internal/processing/status/create_test.go index c92148108..bccd47a1c 100644 --- a/internal/processing/status/create_test.go +++ b/internal/processing/status/create_test.go @@ -24,6 +24,8 @@ "github.com/stretchr/testify/suite" "github.com/superseriousbusiness/gotosocial/internal/api/model" + "github.com/superseriousbusiness/gotosocial/internal/db" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" ) type StatusCreateTestSuite struct { @@ -98,6 +100,45 @@ func (suite *StatusCreateTestSuite) TestProcessContentWarningWithHTMLEscapedQuot suite.Equal("\"test\"", apiStatus.SpoilerText) } +func (suite *StatusCreateTestSuite) TestProcessStatusMarkdownWithUnderscoreEmoji() { + ctx := context.Background() + + // update the shortcode of the rainbow emoji to surround it in underscores + if err := suite.db.UpdateWhere(ctx, []db.Where{{Key: "shortcode", Value: "rainbow"}}, "shortcode", "_rainbow_", >smodel.Emoji{}); err != nil { + suite.FailNow(err.Error()) + } + + creatingAccount := suite.testAccounts["local_account_1"] + creatingApplication := suite.testApplications["application_1"] + + statusCreateForm := &model.AdvancedStatusCreateForm{ + StatusCreateRequest: model.StatusCreateRequest{ + Status: "poopoo peepee :_rainbow_:", + MediaIDs: []string{}, + Poll: nil, + InReplyToID: "", + Sensitive: false, + Visibility: model.VisibilityPublic, + ScheduledAt: "", + Language: "en", + Format: model.StatusFormatMarkdown, + }, + AdvancedVisibilityFlagsForm: model.AdvancedVisibilityFlagsForm{ + Federated: nil, + Boostable: nil, + Replyable: nil, + Likeable: nil, + }, + } + + apiStatus, err := suite.status.Create(ctx, creatingAccount, creatingApplication, statusCreateForm) + suite.NoError(err) + suite.NotNil(apiStatus) + + suite.Equal("

poopoo peepee :_rainbow_:

", apiStatus.Content) + suite.NotEmpty(apiStatus.Emojis) +} + func TestStatusCreateTestSuite(t *testing.T) { suite.Run(t, new(StatusCreateTestSuite)) } diff --git a/internal/processing/status/util.go b/internal/processing/status/util.go index 298d4fbd0..d3fc8f3b1 100644 --- a/internal/processing/status/util.go +++ b/internal/processing/status/util.go @@ -302,7 +302,7 @@ func (p *processor) ProcessContent(ctx context.Context, form *apimodel.AdvancedS case apimodel.StatusFormatPlain: formatted = p.formatter.FromPlain(ctx, form.Status, status.Mentions, status.Tags) case apimodel.StatusFormatMarkdown: - formatted = p.formatter.FromMarkdown(ctx, form.Status, status.Mentions, status.Tags) + formatted = p.formatter.FromMarkdown(ctx, form.Status, status.Mentions, status.Tags, status.Emojis) default: return fmt.Errorf("format %s not recognised as a valid status format", form.Format) } diff --git a/internal/text/formatter.go b/internal/text/formatter.go index 3970d0c72..5daec5c82 100644 --- a/internal/text/formatter.go +++ b/internal/text/formatter.go @@ -30,7 +30,7 @@ type Formatter interface { // FromPlain parses an HTML text from a plaintext. FromPlain(ctx context.Context, plain string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string // FromMarkdown parses an HTML text from a markdown-formatted text. - FromMarkdown(ctx context.Context, md string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string + FromMarkdown(ctx context.Context, md string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag, emojis []*gtsmodel.Emoji) string // ReplaceTags takes a piece of text and a slice of tags, and returns the same text with the tags nicely formatted as hrefs. ReplaceTags(ctx context.Context, in string, tags []*gtsmodel.Tag) string diff --git a/internal/text/markdown.go b/internal/text/markdown.go index 1382cbf61..b512e3b0f 100644 --- a/internal/text/markdown.go +++ b/internal/text/markdown.go @@ -22,6 +22,7 @@ "bytes" "context" "io" + "strings" "github.com/russross/blackfriday/v2" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" @@ -31,7 +32,7 @@ ) var ( - bfExtensions = blackfriday.CommonExtensions | blackfriday.HardLineBreak | blackfriday.Footnotes + bfExtensions = blackfriday.NoIntraEmphasis | blackfriday.FencedCode | blackfriday.Autolink | blackfriday.Strikethrough | blackfriday.SpaceHeadings | blackfriday.HardLineBreak m *minify.M ) @@ -54,8 +55,7 @@ func (r *renderer) RenderNode(w io.Writer, node *blackfriday.Node, entering bool html = r.f.ReplaceMentions(r.ctx, html, r.mentions) // we don't have much recourse if this fails - _, err := io.WriteString(w, html) - if err != nil { + if _, err := io.WriteString(w, html); err != nil { log.Errorf("error outputting markdown text: %s", err) } return status @@ -63,7 +63,7 @@ func (r *renderer) RenderNode(w io.Writer, node *blackfriday.Node, entering bool return r.HTMLRenderer.RenderNode(w, node, entering) } -func (f *formatter) FromMarkdown(ctx context.Context, md string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string { +func (f *formatter) FromMarkdown(ctx context.Context, markdownText string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag, emojis []*gtsmodel.Emoji) string { renderer := &renderer{ f: f, @@ -75,11 +75,28 @@ func (f *formatter) FromMarkdown(ctx context.Context, md string, mentions []*gts }), } - // parse markdown, use custom renderer to add hashtag/mention links - contentBytes := blackfriday.Run([]byte(md), blackfriday.WithExtensions(bfExtensions), blackfriday.WithRenderer(renderer)) + // Temporarily replace all found emoji shortcodes in the markdown text with + // their ID so that they're not parsed as anything by the markdown parser - + // this fixes cases where emojis with some underscores in them are parsed as + // words with emphasis, eg `:_some_emoji:` becomes `:someemoji:` + // + // Since the IDs of the emojis are just uppercase letters + numbers they should + // be safe to pass through the markdown parser without unexpected effects. + for _, e := range emojis { + markdownText = strings.ReplaceAll(markdownText, ":"+e.Shortcode+":", ":"+e.ID+":") + } - // clean anything dangerous out of it - content := SanitizeHTML(string(contentBytes)) + // parse markdown text into html, using custom renderer to add hashtag/mention links + htmlContentBytes := blackfriday.Run([]byte(markdownText), blackfriday.WithExtensions(bfExtensions), blackfriday.WithRenderer(renderer)) + htmlContent := string(htmlContentBytes) + + // Replace emoji IDs in the parsed html content with their shortcodes again + for _, e := range emojis { + htmlContent = strings.ReplaceAll(htmlContent, ":"+e.ID+":", ":"+e.Shortcode+":") + } + + // clean anything dangerous out of the html + htmlContent = SanitizeHTML(htmlContent) if m == nil { m = minify.New() @@ -89,7 +106,7 @@ func (f *formatter) FromMarkdown(ctx context.Context, md string, mentions []*gts }) } - minified, err := m.String("text/html", content) + minified, err := m.String("text/html", htmlContent) if err != nil { log.Errorf("error minifying markdown text: %s", err) } diff --git a/internal/text/markdown_test.go b/internal/text/markdown_test.go index 4c9483c7e..31ef69eea 100644 --- a/internal/text/markdown_test.go +++ b/internal/text/markdown_test.go @@ -62,7 +62,7 @@ mdCodeBlockWithNewlines = "some code coming up\n\n```\n\n\n\n```\nthat was some code" mdCodeBlockWithNewlinesExpected = "

some code coming up

\n\n\n

that was some code

" mdWithFootnote = "fox mulder,fbi.[^1]\n\n[^1]: federated bureau of investigation" - mdWithFootnoteExpected = "

fox mulder,fbi.1


  1. federated bureau of investigation
" + mdWithFootnoteExpected = "

fox mulder,fbi.[^1]

[^1]: federated bureau of investigation

" mdWithBlockQuote = "get ready, there's a block quote coming:\n\n>line1\n>line2\n>\n>line3\n\n" mdWithBlockQuoteExpected = "

get ready, there’s a block quote coming:

line1
line2

line3

" mdHashtagAndCodeBlock = "#Hashtag\n\n```\n#Hashtag\n```" @@ -76,22 +76,22 @@ type MarkdownTestSuite struct { } func (suite *MarkdownTestSuite) TestParseSimple() { - s := suite.formatter.FromMarkdown(context.Background(), simpleMarkdown, nil, nil) + s := suite.formatter.FromMarkdown(context.Background(), simpleMarkdown, nil, nil, nil) suite.Equal(simpleMarkdownExpected, s) } func (suite *MarkdownTestSuite) TestParseWithCodeBlock() { - s := suite.formatter.FromMarkdown(context.Background(), withCodeBlock, nil, nil) + s := suite.formatter.FromMarkdown(context.Background(), withCodeBlock, nil, nil, nil) suite.Equal(withCodeBlockExpected, s) } func (suite *MarkdownTestSuite) TestParseWithInlineCode() { - s := suite.formatter.FromMarkdown(context.Background(), withInlineCode, nil, nil) + s := suite.formatter.FromMarkdown(context.Background(), withInlineCode, nil, nil, nil) suite.Equal(withInlineCodeExpected, s) } func (suite *MarkdownTestSuite) TestParseWithInlineCode2() { - s := suite.formatter.FromMarkdown(context.Background(), withInlineCode2, nil, nil) + s := suite.formatter.FromMarkdown(context.Background(), withInlineCode2, nil, nil, nil) suite.Equal(withInlineCode2Expected, s) } @@ -100,17 +100,17 @@ func (suite *MarkdownTestSuite) TestParseWithHashtag() { suite.testTags["Hashtag"], } - s := suite.formatter.FromMarkdown(context.Background(), withHashtag, nil, foundTags) + s := suite.formatter.FromMarkdown(context.Background(), withHashtag, nil, foundTags, nil) suite.Equal(withHashtagExpected, s) } func (suite *MarkdownTestSuite) TestParseWithHTML() { - s := suite.formatter.FromMarkdown(context.Background(), mdWithHTML, nil, nil) + s := suite.formatter.FromMarkdown(context.Background(), mdWithHTML, nil, nil, nil) suite.Equal(mdWithHTMLExpected, s) } func (suite *MarkdownTestSuite) TestParseWithCheekyHTML() { - s := suite.formatter.FromMarkdown(context.Background(), mdWithCheekyHTML, nil, nil) + s := suite.formatter.FromMarkdown(context.Background(), mdWithCheekyHTML, nil, nil, nil) suite.Equal(mdWithCheekyHTMLExpected, s) } @@ -118,36 +118,36 @@ func (suite *MarkdownTestSuite) TestParseWithHashtagInitial() { s := suite.formatter.FromMarkdown(context.Background(), mdWithHashtagInitial, nil, []*gtsmodel.Tag{ suite.testTags["Hashtag"], suite.testTags["welcome"], - }) + }, nil) suite.Equal(mdWithHashtagInitialExpected, s) } func (suite *MarkdownTestSuite) TestParseCodeBlockWithNewlines() { - s := suite.formatter.FromMarkdown(context.Background(), mdCodeBlockWithNewlines, nil, nil) + s := suite.formatter.FromMarkdown(context.Background(), mdCodeBlockWithNewlines, nil, nil, nil) suite.Equal(mdCodeBlockWithNewlinesExpected, s) } func (suite *MarkdownTestSuite) TestParseWithFootnote() { - s := suite.formatter.FromMarkdown(context.Background(), mdWithFootnote, nil, nil) + s := suite.formatter.FromMarkdown(context.Background(), mdWithFootnote, nil, nil, nil) suite.Equal(mdWithFootnoteExpected, s) } func (suite *MarkdownTestSuite) TestParseWithBlockquote() { - s := suite.formatter.FromMarkdown(context.Background(), mdWithBlockQuote, nil, nil) + s := suite.formatter.FromMarkdown(context.Background(), mdWithBlockQuote, nil, nil, nil) suite.Equal(mdWithBlockQuoteExpected, s) } func (suite *MarkdownTestSuite) TestParseHashtagWithCodeBlock() { s := suite.formatter.FromMarkdown(context.Background(), mdHashtagAndCodeBlock, nil, []*gtsmodel.Tag{ suite.testTags["Hashtag"], - }) + }, nil) suite.Equal(mdHashtagAndCodeBlockExpected, s) } func (suite *MarkdownTestSuite) TestParseMentionWithCodeBlock() { s := suite.formatter.FromMarkdown(context.Background(), mdMentionAndCodeBlock, []*gtsmodel.Mention{ suite.testMentions["local_user_2_mention_zork"], - }, nil) + }, nil, nil) suite.Equal(mdMentionAndCodeBlockExpected, s) }