[feature] Federate status language in and out (#2366)

* [feature] Federate status language in + out

* go fmt

* tests, little fix

* improve comments

* unnest a bit

* avoid unnecessary nil check

* use more descriptive variable for contentMap

* prefer instance languages when selecting from contentMap

* update docs to reflect lang selection

* rename rdfLangString -> rdfLangs

* update comments to mention Pollable

* iter through slice instead of map
This commit is contained in:
tobi 2023-11-21 15:13:30 +01:00 committed by GitHub
parent 1f962372af
commit cfefbc08d8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 758 additions and 168 deletions

View file

@ -482,3 +482,64 @@ For the convenience of remote servers, GoToSocial will always provide both the `
GoToSocial tries to parse incoming Mentions in the same way it sends them out: as a `Mention` type entry in the `tag` property. However, when parsing incoming Mentions it's a bit more relaxed with regards to which properties must be set.
GoToSocial will prefer the `href` property, which can be either the ActivityPub ID/URI or the web URL of the target; if `href` is not present, it will fall back to using the `name` property. If neither property is present, the mention will be considered invalid and discarded.
## Content, ContentMap, and Language
In line with other ActivityPub implementations, GoToSocial uses `content` and `contentMap` fields on `Objects` to infer content and language of incoming posts, and to set content and language on outgoing posts.
### Outgoing
If an outgoing `Object` (usually a `Note`) has content, it will be set as stringified HTML on the `content` field.
If the `content` is in a specific user-selected language, then the `Object` will also have the `contentMap` property set to a single-entry key/value map, where the key is a BCP47 language tag, and the value is the same content from the `content` field.
For example, a post written in English (`en`) will look something like this:
```json
{
"@context": "https://www.w3.org/ns/activitystreams",
"type": "Note",
"attributedTo": "http://example.org/users/i_p_freely",
"to": "https://www.w3.org/ns/activitystreams#Public",
"cc": "http://example.org/users/i_p_freely/followers",
"id": "http://example.org/users/i_p_freely/statuses/01FF25D5Q0DH7CHD57CTRS6WK0",
"url": "http://example.org/@i_p_freely/statuses/01FF25D5Q0DH7CHD57CTRS6WK0",
"published": "2021-11-20T13:32:16Z",
"content": "<p>This is an example note.</p>",
"contentMap": {
"en": "<p>This is an example note.</p>"
},
"attachment": [],
"replies": {...},
"sensitive": false,
"summary": "",
"tag": {...}
}
```
GoToSocial will always set the `content` field if the post has content, but it may not always set the `contentMap` field, if an old version of GoToSocial is in use, or the language used by a user is not set or not a recognized BCP47 language tag.
### Incoming
GoToSocial uses both the `content` and the `contentMap` properties on incoming `Object`s to determine the content and infer the intended "primary" language for that content. It uses the following algorithm:
#### Only `content` is set
Take that content only and mark language as unknown.
#### Both `content` and `contentMap` are set
Look for a language tag as key in the `contentMap`, with a value that matches the stringified HTML set in `content`.
If a match is found, use this as the post's language.
If a match is not found, keep content from `content` and mark language as unknown.
#### Only `contentMap` is set
If `contentMap` has only one entry, take the language tag and content value as the "primary" language and content.
If `contentMap` has multiple entries, we have no way of determining the intended preferred content and language of the post, since map order is not deterministic. In this case, try to pick a language and content entry that matches one of the languages configured in the GoToSocial instance's [configured languages](../configuration/instance.md). If no language can be matched this way, pick a language and content entry from the `contentMap` at random as the "primary" language and content.
!!! Note
In all of the above cases, if the inferred language cannot be parsed as a valid BCP47 language tag, language will fall back to unknown.

View file

@ -93,6 +93,12 @@ func noteWithMentions1() vocab.ActivityStreamsNote {
content := streams.NewActivityStreamsContentProperty()
content.AppendXMLSchemaString("hey @f0x and @dumpsterqueer")
rdfLangString := make(map[string]string)
rdfLangString["en"] = "hey @f0x and @dumpsterqueer"
rdfLangString["fr"] = "bonjour @f0x et @dumpsterqueer"
content.AppendRDFLangString(rdfLangString)
note.SetActivityStreamsContent(content)
return note

View file

@ -631,27 +631,34 @@ func ExtractPublicKey(i WithPublicKey) (
return nil, nil, nil, gtserror.New("couldn't find public key")
}
// ExtractContent returns a string representation of the
// given interface's Content property, or an empty string
// if no Content is found.
func ExtractContent(i WithContent) string {
contentProperty := i.GetActivityStreamsContent()
if contentProperty == nil {
return ""
// ExtractContent returns an intermediary representation of
// the given interface's Content and/or ContentMap property.
func ExtractContent(i WithContent) gtsmodel.Content {
content := gtsmodel.Content{}
contentProp := i.GetActivityStreamsContent()
if contentProp == nil {
// No content at all.
return content
}
for iter := contentProperty.Begin(); iter != contentProperty.End(); iter = iter.Next() {
for iter := contentProp.Begin(); iter != contentProp.End(); iter = iter.Next() {
switch {
// Content may be parsed as IRI, depending on
// how it's formatted, so account for this.
case iter.IsXMLSchemaString():
return iter.GetXMLSchemaString()
case iter.IsIRI():
return iter.GetIRI().String()
case iter.IsRDFLangString() &&
len(content.ContentMap) == 0:
content.ContentMap = iter.GetRDFLangString()
case iter.IsXMLSchemaString() &&
content.Content == "":
content.Content = iter.GetXMLSchemaString()
case iter.IsIRI() &&
content.Content == "":
content.Content = iter.GetIRI().String()
}
}
return ""
return content
}
// ExtractAttachments attempts to extract barebones MediaAttachment objects from given AS interface type.

View file

@ -30,10 +30,11 @@ type ExtractContentTestSuite struct {
func (suite *ExtractContentTestSuite) TestExtractContent1() {
note := suite.noteWithMentions1
content := ap.ExtractContent(note)
suite.Equal("hey @f0x and @dumpsterqueer", content)
suite.Equal("hey @f0x and @dumpsterqueer", content.Content)
suite.Equal("bonjour @f0x et @dumpsterqueer", content.ContentMap["fr"])
suite.Equal("hey @f0x and @dumpsterqueer", content.ContentMap["en"])
}
func TestExtractContentTestSuite(t *testing.T) {

View file

@ -20,11 +20,12 @@
import (
"github.com/superseriousbusiness/activity/pub"
"github.com/superseriousbusiness/activity/streams"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/text"
)
/*
NORMALIZE INCOMING
INCOMING NORMALIZATION
The below functions should be called to normalize the content
of messages *COMING INTO* GoToSocial via the federation API,
either as the result of delivery from a remote instance to this
@ -84,39 +85,84 @@ func NormalizeIncomingActivity(activity pub.Activity, rawJSON map[string]interfa
}
}
// NormalizeIncomingContent replaces the Content of the given item
// with the sanitized version of the raw 'content' value from the
// raw json object map.
// normalizeContent normalizes the given content
// string by sanitizing its HTML and minimizing it.
//
// noop if there was no content in the json object map or the
// content was not a plain string.
func NormalizeIncomingContent(item WithContent, rawJSON map[string]interface{}) {
rawContent, ok := rawJSON["content"]
if !ok {
// No content in rawJSON.
// TODO: In future we might also
// look for "contentMap" property.
return
// Noop for non-string content.
func normalizeContent(rawContent interface{}) string {
if rawContent == nil {
// Nothing to fix.
return ""
}
content, ok := rawContent.(string)
if !ok {
// Not interested in content arrays.
return
// Not interested in
// content slices etc.
return ""
}
// Content should be HTML encoded by default:
if content == "" {
// Nothing to fix.
return ""
}
// Content entries should be HTML encoded by default:
// https://www.w3.org/TR/activitystreams-vocabulary/#dfn-content
//
// TODO: sanitize differently based on mediaType.
// https://www.w3.org/TR/activitystreams-vocabulary/#dfn-mediatype
content = text.SanitizeToHTML(content)
content = text.MinifyHTML(content)
return content
}
// Set normalized content property from the raw string;
// this replaces any existing content property on the item.
// NormalizeIncomingContent replaces the Content property of the given
// item with the normalized versions of the raw 'content' and 'contentMap'
// values from the raw json object map.
//
// noop if there was no 'content' or 'contentMap' in the json object map.
func NormalizeIncomingContent(item WithContent, rawJSON map[string]interface{}) {
var (
rawContent = rawJSON["content"]
rawContentMap = rawJSON["contentMap"]
)
if rawContent == nil &&
rawContentMap == nil {
// Nothing to normalize,
// leave no content on item.
return
}
// Create wrapper for normalized content.
contentProp := streams.NewActivityStreamsContentProperty()
// Fix 'content' if applicable.
content := normalizeContent(rawContent)
if content != "" {
contentProp.AppendXMLSchemaString(content)
}
// Fix 'contentMap' if applicable.
contentMap, ok := rawContentMap.(map[string]interface{})
if ok {
rdfLangs := make(map[string]string, len(contentMap))
for lang, rawContent := range contentMap {
content := normalizeContent(rawContent)
if content != "" {
rdfLangs[lang] = content
}
}
if len(rdfLangs) != 0 {
contentProp.AppendRDFLangString(rdfLangs)
}
}
// Replace any existing content property
// on the item with normalized version.
item.SetActivityStreamsContent(contentProp)
}
@ -299,3 +345,204 @@ func NormalizeIncomingPollOptions(item WithOneOf, rawJSON map[string]interface{}
NormalizeIncomingName(choiceable, rawChoice)
}
}
/*
OUTGOING NORMALIZATION
The below functions should be called to normalize the content
of messages *GOING OUT OF* GoToSocial via the federation API,
either as the result of delivery to a remote instance from this
instance, or as a result of a remote instance doing an http call
to us to dereference something.
*/
// NormalizeOutgoingAttachmentProp replaces single-entry Attachment objects with
// single-entry arrays, for better compatibility with other AP implementations.
//
// Ie:
//
// "attachment": {
// ...
// }
//
// becomes:
//
// "attachment": [
// {
// ...
// }
// ]
//
// Noop for items with no attachments, or with attachments that are already a slice.
func NormalizeOutgoingAttachmentProp(item WithAttachment, rawJSON map[string]interface{}) {
attachment, ok := rawJSON["attachment"]
if !ok {
// No 'attachment',
// nothing to change.
return
}
if _, ok := attachment.([]interface{}); ok {
// Already slice,
// nothing to change.
return
}
// Coerce single-object to slice.
rawJSON["attachment"] = []interface{}{attachment}
}
// NormalizeOutgoingContentProp normalizes go-fed's funky formatting of content and
// contentMap properties to a format better understood by other AP implementations.
//
// Ie., incoming "content" property like this:
//
// "content": [
// "hello world!",
// {
// "en": "hello world!"
// }
// ]
//
// Is unpacked to:
//
// "content": "hello world!",
// "contentMap": {
// "en": "hello world!"
// }
//
// Noop if neither content nor contentMap are set.
func NormalizeOutgoingContentProp(item WithContent, rawJSON map[string]interface{}) {
contentProp := item.GetActivityStreamsContent()
if contentProp == nil {
// Nothing to do,
// bail early.
return
}
contentPropLen := contentProp.Len()
if contentPropLen == 0 {
// Nothing to do,
// bail early.
return
}
var (
content string
contentMap map[string]string
)
for iter := contentProp.Begin(); iter != contentProp.End(); iter = iter.Next() {
switch {
case iter.IsRDFLangString() &&
contentMap == nil:
contentMap = iter.GetRDFLangString()
case content == "" &&
iter.IsXMLSchemaString():
content = iter.GetXMLSchemaString()
}
}
if content != "" {
rawJSON["content"] = content
} else {
delete(rawJSON, "content")
}
if contentMap != nil {
rawJSON["contentMap"] = contentMap
} else {
delete(rawJSON, "contentMap")
}
}
// NormalizeOutgoingObjectProp normalizes each Object entry in the rawJSON of the given
// item by calling custom serialization / normalization functions on them in turn.
//
// This function also unnests single-entry arrays, so that:
//
// "object": [
// {
// ...
// }
// ]
//
// Becomes:
//
// "object": {
// ...
// }
//
// Noop for each Object entry that isn't an Accountable or Statusable.
func NormalizeOutgoingObjectProp(item WithObject, rawJSON map[string]interface{}) error {
objectProp := item.GetActivityStreamsObject()
if objectProp == nil {
// Nothing to do,
// bail early.
return nil
}
objectPropLen := objectProp.Len()
if objectPropLen == 0 {
// Nothing to do,
// bail early.
return nil
}
// The thing we already serialized has objects
// on it, so we should see if we need to custom
// serialize any of those objects, and replace
// them on the data map as necessary.
objects := make([]interface{}, 0, objectPropLen)
for iter := objectProp.Begin(); iter != objectProp.End(); iter = iter.Next() {
if iter.IsIRI() {
// Plain IRIs don't need custom serialization.
objects = append(objects, iter.GetIRI().String())
continue
}
var (
objectType = iter.GetType()
objectSer map[string]interface{}
)
if objectType == nil {
// This is awkward.
return gtserror.Newf("could not resolve object iter %T to vocab.Type", iter)
}
var err error
// In the below accountable and statusable serialization,
// `@context` will be included in the wrapping type already,
// so we shouldn't also include it in the object itself.
switch tn := objectType.GetTypeName(); {
case IsAccountable(tn):
objectSer, err = serializeAccountable(objectType, false)
case IsStatusable(tn):
// IsStatusable includes Pollable as well.
objectSer, err = serializeStatusable(objectType, false)
default:
// No custom serializer for this type; serialize as normal.
objectSer, err = objectType.Serialize()
}
if err != nil {
return err
}
objects = append(objects, objectSer)
}
if objectPropLen == 1 {
// Unnest single object.
rawJSON["object"] = objects[0]
} else {
// Array of objects.
rawJSON["object"] = objects
}
return nil
}

View file

@ -46,6 +46,9 @@ func (suite *NormalizeTestSuite) getStatusable() (vocab.ActivityStreamsNote, map
"https://example.org/users/someone/followers"
],
"content": "UPDATE: As of this morning there are now more than 7 million Mastodon users, most from the <a class=\"hashtag\" data-tag=\"twittermigration\" href=\"https://example.org/tag/twittermigration\" rel=\"tag ugc\">#TwitterMigration</a>.<br><br>In fact, 100,000 new accounts have been created since last night.<br><br>Since last night&#39;s spike 8,000-12,000 new accounts are being created every hour.<br><br>Yesterday, I estimated that Mastodon would have 8 million users by the end of the week. That might happen a lot sooner if this trend continues.",
"contentMap": {
"en": "UPDATE: As of this morning there are now more than 7 million Mastodon users, most from the <a class=\"hashtag\" data-tag=\"twittermigration\" href=\"https://example.org/tag/twittermigration\" rel=\"tag ugc\">#TwitterMigration</a>.<br><br>In fact, 100,000 new accounts have been created since last night.<br><br>Since last night&#39;s spike 8,000-12,000 new accounts are being created every hour.<br><br>Yesterday, I estimated that Mastodon would have 8 million users by the end of the week. That might happen a lot sooner if this trend continues."
},
"context": "https://example.org/contexts/01GX0MSHPER1E0FT022Q209EJZ",
"conversation": "https://example.org/contexts/01GX0MSHPER1E0FT022Q209EJZ",
"id": "https://example.org/objects/01GX0MT2PA58JNSMK11MCS65YD",
@ -182,7 +185,15 @@ func (suite *NormalizeTestSuite) getAccountable() (vocab.ActivityStreamsPerson,
func (suite *NormalizeTestSuite) TestNormalizeActivityObject() {
note, rawNote := suite.getStatusable()
suite.Equal(`update: As of this morning there are now more than 7 million Mastodon users, most from the <a class="hashtag" data-tag="twittermigration" href="https://example.org/tag/twittermigration" rel="tag ugc">#TwitterMigration%3C/a%3E.%3Cbr%3E%3Cbr%3EIn%20fact,%20100,000%20new%20accounts%20have%20been%20created%20since%20last%20night.%3Cbr%3E%3Cbr%3ESince%20last%20night&%2339;s%20spike%208,000-12,000%20new%20accounts%20are%20being%20created%20every%20hour.%3Cbr%3E%3Cbr%3EYesterday,%20I%20estimated%20that%20Mastodon%20would%20have%208%20million%20users%20by%20the%20end%20of%20the%20week.%20That%20might%20happen%20a%20lot%20sooner%20if%20this%20trend%20continues.`, ap.ExtractContent(note))
content := ap.ExtractContent(note)
suite.Equal(
`update: As of this morning there are now more than 7 million Mastodon users, most from the <a class="hashtag" data-tag="twittermigration" href="https://example.org/tag/twittermigration" rel="tag ugc">#TwitterMigration%3C/a%3E.%3Cbr%3E%3Cbr%3EIn%20fact,%20100,000%20new%20accounts%20have%20been%20created%20since%20last%20night.%3Cbr%3E%3Cbr%3ESince%20last%20night&%2339;s%20spike%208,000-12,000%20new%20accounts%20are%20being%20created%20every%20hour.%3Cbr%3E%3Cbr%3EYesterday,%20I%20estimated%20that%20Mastodon%20would%20have%208%20million%20users%20by%20the%20end%20of%20the%20week.%20That%20might%20happen%20a%20lot%20sooner%20if%20this%20trend%20continues.`,
content.Content,
)
// Malformed contentMap entry
// will not be extractable yet.
suite.Empty(content.ContentMap["en"])
create := testrig.WrapAPNoteInCreate(
testrig.URLMustParse("https://example.org/create_something"),
@ -192,7 +203,18 @@ func (suite *NormalizeTestSuite) TestNormalizeActivityObject() {
)
ap.NormalizeIncomingActivity(create, map[string]interface{}{"object": rawNote})
suite.Equal(`UPDATE: As of this morning there are now more than 7 million Mastodon users, most from the <a class="hashtag" href="https://example.org/tag/twittermigration" rel="tag ugc nofollow noreferrer noopener" target="_blank">#TwitterMigration</a>.<br><br>In fact, 100,000 new accounts have been created since last night.<br><br>Since last night's spike 8,000-12,000 new accounts are being created every hour.<br><br>Yesterday, I estimated that Mastodon would have 8 million users by the end of the week. That might happen a lot sooner if this trend continues.`, ap.ExtractContent(note))
content = ap.ExtractContent(note)
suite.Equal(
`UPDATE: As of this morning there are now more than 7 million Mastodon users, most from the <a class="hashtag" href="https://example.org/tag/twittermigration" rel="tag ugc nofollow noreferrer noopener" target="_blank">#TwitterMigration</a>.<br><br>In fact, 100,000 new accounts have been created since last night.<br><br>Since last night's spike 8,000-12,000 new accounts are being created every hour.<br><br>Yesterday, I estimated that Mastodon would have 8 million users by the end of the week. That might happen a lot sooner if this trend continues.`,
content.Content,
)
// Content map entry should now be extractable.
suite.Equal(
`UPDATE: As of this morning there are now more than 7 million Mastodon users, most from the <a class="hashtag" href="https://example.org/tag/twittermigration" rel="tag ugc nofollow noreferrer noopener" target="_blank">#TwitterMigration</a>.<br><br>In fact, 100,000 new accounts have been created since last night.<br><br>Since last night's spike 8,000-12,000 new accounts are being created every hour.<br><br>Yesterday, I estimated that Mastodon would have 8 million users by the end of the week. That might happen a lot sooner if this trend continues.`,
content.ContentMap["en"],
)
}
func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsOneAttachment() {
@ -202,12 +224,14 @@ func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsOneAttachment
// the attachment(s) should be all jacked up.
suite.Equal(`{
"@context": "https://www.w3.org/ns/activitystreams",
"attachment": {
"attachment": [
{
"mediaType": "image/jpeg",
"name": "description: here's \u003c\u003ca\u003e\u003e picture of a #cat,%20it%27s%20cute!%20here%27s%20some%20special%20characters:%20%22%22%20%5C%20weeee%27%27%27%27",
"type": "Document",
"url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg"
},
}
],
"attributedTo": "https://example.org/users/hourlycatbot",
"id": "https://example.org/users/hourlycatbot/statuses/01GYW48H311PZ78C5G856MGJJJ",
"to": "https://www.w3.org/ns/activitystreams#Public",
@ -222,12 +246,14 @@ func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsOneAttachment
// attachment should no longer be all jacked up.
suite.Equal(`{
"@context": "https://www.w3.org/ns/activitystreams",
"attachment": {
"attachment": [
{
"mediaType": "image/jpeg",
"name": "DESCRIPTION: here's \u003c\u003e picture of a #cat, it's cute! here's some special characters: \"\" \\ weeee''''",
"type": "Document",
"url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg"
},
}
],
"attributedTo": "https://example.org/users/hourlycatbot",
"id": "https://example.org/users/hourlycatbot/statuses/01GYW48H311PZ78C5G856MGJJJ",
"to": "https://www.w3.org/ns/activitystreams#Public",
@ -243,12 +269,14 @@ func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsOneAttachment
// the attachment(s) should be all jacked up.
suite.Equal(`{
"@context": "https://www.w3.org/ns/activitystreams",
"attachment": {
"attachment": [
{
"mediaType": "image/jpeg",
"name": "description: here's \u003c\u003ca\u003e\u003e picture of a #cat,%20it%27s%20cute!%20here%27s%20some%20special%20characters:%20%22%22%20%5C%20weeee%27%27%27%27",
"type": "Document",
"url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg"
},
}
],
"attributedTo": "https://example.org/users/hourlycatbot",
"id": "https://example.org/users/hourlycatbot/statuses/01GYW48H311PZ78C5G856MGJJJ",
"to": "https://www.w3.org/ns/activitystreams#Public",
@ -263,12 +291,14 @@ func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsOneAttachment
// attachment should no longer be all jacked up.
suite.Equal(`{
"@context": "https://www.w3.org/ns/activitystreams",
"attachment": {
"attachment": [
{
"mediaType": "image/jpeg",
"name": "DESCRIPTION: here's \u003c\u003e picture of a #cat, it's cute! here's some special characters: \"\" \\ weeee''''",
"type": "Document",
"url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg"
},
}
],
"attributedTo": "https://example.org/users/hourlycatbot",
"id": "https://example.org/users/hourlycatbot/statuses/01GYW48H311PZ78C5G856MGJJJ",
"to": "https://www.w3.org/ns/activitystreams#Public",

View file

@ -18,10 +18,9 @@
package ap
import (
"fmt"
"github.com/superseriousbusiness/activity/streams"
"github.com/superseriousbusiness/activity/streams/vocab"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
)
// Serialize is a custom serializer for ActivityStreams types.
@ -37,15 +36,18 @@
//
// - OrderedCollection: 'orderedItems' property will always be made into an array.
// - Any Accountable type: 'attachment' property will always be made into an array.
// - Update: any Accountable 'object's set on an update will be custom serialized as above.
// - Any Statusable type: 'attachment' property will always be made into an array; 'content' and 'contentMap' will be normalized.
// - Any Activityable type: any 'object's set on an activity will be custom serialized as above.
func Serialize(t vocab.Type) (m map[string]interface{}, e error) {
switch t.GetTypeName() {
case ObjectOrderedCollection:
switch tn := t.GetTypeName(); {
case tn == ObjectOrderedCollection:
return serializeOrderedCollection(t)
case ActorApplication, ActorGroup, ActorOrganization, ActorPerson, ActorService:
case IsAccountable(tn):
return serializeAccountable(t, true)
case ActivityUpdate:
return serializeWithObject(t)
case IsStatusable(tn):
return serializeStatusable(t, true)
case IsActivityable(tn):
return serializeActivityable(t, true)
default:
// No custom serializer necessary.
return streams.Serialize(t)
@ -61,8 +63,8 @@ func Serialize(t vocab.Type) (m map[string]interface{}, e error) {
// See:
// - https://github.com/go-fed/activity/issues/139
// - https://github.com/mastodon/mastodon/issues/24225
func serializeOrderedCollection(orderedCollection vocab.Type) (map[string]interface{}, error) {
data, err := streams.Serialize(orderedCollection)
func serializeOrderedCollection(t vocab.Type) (map[string]interface{}, error) {
data, err := streams.Serialize(t)
if err != nil {
return nil, err
}
@ -99,7 +101,12 @@ func serializeOrderedCollection(orderedCollection vocab.Type) (map[string]interf
// If the accountable is being serialized as part of another object (eg., as the
// object of an activity), then includeContext should be set to false, as the
// @context entry should be included on the top-level/wrapping activity/object.
func serializeAccountable(accountable vocab.Type, includeContext bool) (map[string]interface{}, error) {
func serializeAccountable(t vocab.Type, includeContext bool) (map[string]interface{}, error) {
accountable, ok := t.(Accountable)
if !ok {
return nil, gtserror.Newf("vocab.Type %T not accountable", t)
}
var (
data map[string]interface{}
err error
@ -115,91 +122,61 @@ func serializeAccountable(accountable vocab.Type, includeContext bool) (map[stri
return nil, err
}
attachment, ok := data["attachment"]
if !ok {
// No 'attachment', nothing to change.
return data, nil
}
if _, ok := attachment.([]interface{}); ok {
// Already slice.
return data, nil
}
// Coerce single-object to slice.
data["attachment"] = []interface{}{attachment}
NormalizeOutgoingAttachmentProp(accountable, data)
return data, nil
}
func serializeWithObject(t vocab.Type) (map[string]interface{}, error) {
withObject, ok := t.(WithObject)
func serializeStatusable(t vocab.Type, includeContext bool) (map[string]interface{}, error) {
statusable, ok := t.(Statusable)
if !ok {
return nil, fmt.Errorf("serializeWithObject: could not resolve %T to WithObject", t)
}
data, err := streams.Serialize(t)
if err != nil {
return nil, err
}
object := withObject.GetActivityStreamsObject()
if object == nil {
// Nothing to do, bail early.
return data, nil
}
objectLen := object.Len()
if objectLen == 0 {
// Nothing to do, bail early.
return data, nil
}
// The thing we already serialized has objects
// on it, so we should see if we need to custom
// serialize any of those objects, and replace
// them on the data map as necessary.
objects := make([]interface{}, 0, objectLen)
for iter := object.Begin(); iter != object.End(); iter = iter.Next() {
if iter.IsIRI() {
// Plain IRIs don't need custom serialization.
objects = append(objects, iter.GetIRI().String())
continue
return nil, gtserror.Newf("vocab.Type %T not statusable", t)
}
var (
objectType = iter.GetType()
objectSer map[string]interface{}
data map[string]interface{}
err error
)
if objectType == nil {
// This is awkward.
return nil, fmt.Errorf("serializeWithObject: could not resolve object iter %T to vocab.Type", iter)
}
switch objectType.GetTypeName() {
case ActorApplication, ActorGroup, ActorOrganization, ActorPerson, ActorService:
// @context will be included in wrapping type already,
// we don't need to include it in the object itself.
objectSer, err = serializeAccountable(objectType, false)
default:
// No custom serializer for this type; serialize as normal.
objectSer, err = objectType.Serialize()
if includeContext {
data, err = streams.Serialize(statusable)
} else {
data, err = statusable.Serialize()
}
if err != nil {
return nil, err
}
objects = append(objects, objectSer)
NormalizeOutgoingAttachmentProp(statusable, data)
NormalizeOutgoingContentProp(statusable, data)
return data, nil
}
func serializeActivityable(t vocab.Type, includeContext bool) (map[string]interface{}, error) {
activityable, ok := t.(Activityable)
if !ok {
return nil, gtserror.Newf("vocab.Type %T not activityable", t)
}
if objectLen == 1 {
// Unnest single object.
data["object"] = objects[0]
var (
data map[string]interface{}
err error
)
if includeContext {
data, err = streams.Serialize(activityable)
} else {
// Array of objects.
data["object"] = objects
data, err = activityable.Serialize()
}
if err != nil {
return nil, err
}
if err := NormalizeOutgoingObjectProp(activityable, data); err != nil {
return nil, err
}
return data, nil

View file

@ -237,3 +237,14 @@ type StatusToEmoji struct {
// VisibilityDefault is used when no other setting can be found.
VisibilityDefault Visibility = VisibilityUnlocked
)
// Content models the simple string content
// of a status along with its ContentMap,
// which contains content entries keyed by
// BCP47 language tag.
//
// Content and/or ContentMap may be zero/nil.
type Content struct {
Content string
ContentMap map[string]string
}

View file

@ -244,9 +244,15 @@ func (c *Converter) ASStatusToStatus(ctx context.Context, statusable ap.Statusab
}
// status.Content
// status.Language
//
// The (html-formatted) content of this status.
status.Content = ap.ExtractContent(statusable)
// Many implementations set both content
// and contentMap; we can use these to
// infer the language of the status.
status.Content, status.Language = ContentToContentLanguage(
ctx,
ap.ExtractContent(statusable),
)
// status.Attachments
//
@ -396,9 +402,6 @@ func (c *Converter) ASStatusToStatus(ctx context.Context, statusable ap.Statusab
return &s
}()
// language
// TODO: we might be able to extract this from the contentMap field
// ActivityStreamsType
status.ActivityStreamsType = statusable.GetTypeName()
@ -707,7 +710,7 @@ func (c *Converter) ASFlagToReport(ctx context.Context, flaggable ap.Flaggable)
// For Mastodon, this will just be a string, or nothing.
// In Misskey's case, it may also contain the URLs of
// one or more reported statuses, so extract these too.
content := ap.ExtractContent(flaggable)
content := ap.ExtractContent(flaggable).Content
statusURIs := []*url.URL{}
inlineURLs := misskeyReportInlineURLs(content)
statusURIs = append(statusURIs, inlineURLs...)

View file

@ -45,6 +45,10 @@ func (suite *ASToInternalTestSuite) jsonToType(in string) vocab.Type {
suite.FailNow(err.Error())
}
if statusable, ok := t.(ap.Statusable); ok {
ap.NormalizeIncomingContent(statusable, m)
}
return t
}
@ -103,7 +107,8 @@ func (suite *ASToInternalTestSuite) TestParsePublicStatus() {
suite.NoError(err)
suite.Equal("reading: Punishment and Reward in the Corporate University", status.ContentWarning)
suite.Equal(`<p>&gt; So we have to examine critical thinking as a signifier, dynamic and ambiguous. It has a normative definition, a tacit definition, and an ideal definition. One of the hallmarks of graduate training is learning to comprehend those definitions and applying the correct one as needed for professional success.</p>`, status.Content)
suite.Equal(`<p>> So we have to examine critical thinking as a signifier, dynamic and ambiguous. It has a normative definition, a tacit definition, and an ideal definition. One of the hallmarks of graduate training is learning to comprehend those definitions and applying the correct one as needed for professional success.</p>`, status.Content)
suite.Equal("en", status.Language)
}
func (suite *ASToInternalTestSuite) TestParsePublicStatusNoURL() {
@ -117,7 +122,7 @@ func (suite *ASToInternalTestSuite) TestParsePublicStatusNoURL() {
suite.NoError(err)
suite.Equal("reading: Punishment and Reward in the Corporate University", status.ContentWarning)
suite.Equal(`<p>&gt; So we have to examine critical thinking as a signifier, dynamic and ambiguous. It has a normative definition, a tacit definition, and an ideal definition. One of the hallmarks of graduate training is learning to comprehend those definitions and applying the correct one as needed for professional success.</p>`, status.Content)
suite.Equal(`<p>> So we have to examine critical thinking as a signifier, dynamic and ambiguous. It has a normative definition, a tacit definition, and an ideal definition. One of the hallmarks of graduate training is learning to comprehend those definitions and applying the correct one as needed for professional success.</p>`, status.Content)
// on statuses with no URL in them (like ones we get from pleroma sometimes) we should use the AP URI of the status as URL
suite.Equal("http://fossbros-anonymous.io/users/foss_satan/statuses/108138763199405167", status.URL)

View file

@ -607,9 +607,17 @@ func (c *Converter) StatusToAS(ctx context.Context, s *gtsmodel.Status) (ap.Stat
// conversation
// TODO
// content -- the actual post itself
// content -- the actual post
// itself, plus the language
contentProp := streams.NewActivityStreamsContentProperty()
contentProp.AppendXMLSchemaString(s.Content)
if s.Language != "" {
contentProp.AppendRDFLangString(map[string]string{
s.Language: s.Content,
})
}
status.SetActivityStreamsContent(contentProp)
// attachments

View file

@ -340,6 +340,9 @@ func (suite *InternalToASTestSuite) TestStatusToAS() {
"attributedTo": "http://localhost:8080/users/the_mighty_zork",
"cc": "http://localhost:8080/users/the_mighty_zork/followers",
"content": "hello everyone!",
"contentMap": {
"en": "hello everyone!"
},
"id": "http://localhost:8080/users/the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY",
"published": "2021-10-20T12:40:37+02:00",
"replies": {
@ -379,16 +382,21 @@ func (suite *InternalToASTestSuite) TestStatusWithTagsToASWithIDs() {
// http://joinmastodon.org/ns, https://www.w3.org/ns/activitystreams --
// will appear, so trim them out of the string for consistency
trimmed := strings.SplitAfter(string(bytes), `"attachment":`)[1]
suite.Equal(` {
suite.Equal(` [
{
"blurhash": "LNJRdVM{00Rj%Mayt7j[4nWBofRj",
"mediaType": "image/jpeg",
"name": "Black and white image of some 50's style text saying: Welcome On Board",
"type": "Document",
"url": "http://localhost:8080/fileserver/01F8MH17FWEB39HZJ76B6VXSKF/attachment/original/01F8MH6NEM8D7527KZAECTCR76.jpg"
},
}
],
"attributedTo": "http://localhost:8080/users/admin",
"cc": "http://localhost:8080/users/admin/followers",
"content": "hello world! #welcome ! first post on the instance :rainbow: !",
"contentMap": {
"en": "hello world! #welcome ! first post on the instance :rainbow: !"
},
"id": "http://localhost:8080/users/admin/statuses/01F8MH75CBF9JFX4ZAD54N0W0R",
"published": "2021-10-20T11:36:45Z",
"replies": {
@ -446,16 +454,21 @@ func (suite *InternalToASTestSuite) TestStatusWithTagsToASFromDB() {
// http://joinmastodon.org/ns, https://www.w3.org/ns/activitystreams --
// will appear, so trim them out of the string for consistency
trimmed := strings.SplitAfter(string(bytes), `"attachment":`)[1]
suite.Equal(` {
suite.Equal(` [
{
"blurhash": "LNJRdVM{00Rj%Mayt7j[4nWBofRj",
"mediaType": "image/jpeg",
"name": "Black and white image of some 50's style text saying: Welcome On Board",
"type": "Document",
"url": "http://localhost:8080/fileserver/01F8MH17FWEB39HZJ76B6VXSKF/attachment/original/01F8MH6NEM8D7527KZAECTCR76.jpg"
},
}
],
"attributedTo": "http://localhost:8080/users/admin",
"cc": "http://localhost:8080/users/admin/followers",
"content": "hello world! #welcome ! first post on the instance :rainbow: !",
"contentMap": {
"en": "hello world! #welcome ! first post on the instance :rainbow: !"
},
"id": "http://localhost:8080/users/admin/statuses/01F8MH75CBF9JFX4ZAD54N0W0R",
"published": "2021-10-20T11:36:45Z",
"replies": {
@ -519,6 +532,9 @@ func (suite *InternalToASTestSuite) TestStatusToASWithMentions() {
"http://localhost:8080/users/the_mighty_zork"
],
"content": "hi @the_mighty_zork welcome to the instance!",
"contentMap": {
"en": "hi @the_mighty_zork welcome to the instance!"
},
"id": "http://localhost:8080/users/admin/statuses/01FF25D5Q0DH7CHD57CTRS6WK0",
"inReplyTo": "http://localhost:8080/users/the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY",
"published": "2021-11-20T13:32:16Z",

View file

@ -31,6 +31,8 @@
apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/language"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/regexes"
"github.com/superseriousbusiness/gotosocial/internal/text"
)
@ -184,3 +186,102 @@ func placeholdUnknownAttachments(arr []apimodel.Attachment) (string, []apimodel.
return text.SanitizeToHTML(aside.String()), arr
}
// ContentToContentLanguage tries to
// extract a content string and language
// tag string from the given intermediary
// content.
//
// Either/both of the returned strings may
// be empty, depending on how things go.
func ContentToContentLanguage(
ctx context.Context,
content gtsmodel.Content,
) (
string, // content
string, // language
) {
var (
contentStr string
langTagStr string
)
switch contentMap := content.ContentMap; {
// Simplest case: no `contentMap`.
// Return `content`, even if empty.
case contentMap == nil:
return content.Content, ""
// `content` and `contentMap` set.
// Try to infer "primary" language.
case content.Content != "":
// Assume `content` is intended
// primary content, and look for
// corresponding language tag.
contentStr = content.Content
for t, c := range contentMap {
if contentStr == c {
langTagStr = t
break
}
}
// `content` not set; `contentMap`
// is set with only one value.
// This must be the "primary" lang.
case len(contentMap) == 1:
// Use an empty loop to
// get the values we want.
// nolint:revive
for langTagStr, contentStr = range contentMap {
}
// Only `contentMap` is set, with more
// than one value. Map order is not
// guaranteed so we can't know the
// "primary" language.
//
// Try to select content using our
// instance's configured languages.
//
// In case of no hits, just take the
// first tag and content in the map.
default:
instanceLangs := config.GetInstanceLanguages()
for _, langTagStr = range instanceLangs.TagStrs() {
if contentStr = contentMap[langTagStr]; contentStr != "" {
// Hit!
break
}
}
// If nothing found, just take
// the first entry we can get by
// breaking after the first iter.
if contentStr == "" {
for langTagStr, contentStr = range contentMap {
break
}
}
}
if langTagStr != "" {
// Found a lang tag for this content,
// make sure it's valid / parseable.
lang, err := language.Parse(langTagStr)
if err != nil {
log.Warnf(
ctx,
"could not parse %s as BCP47 language tag in status contentMap: %v",
langTagStr, err,
)
} else {
// Inferred the language!
// Use normalized version.
langTagStr = lang.TagStr
}
}
return contentStr, langTagStr
}

View file

@ -18,7 +18,12 @@
package typeutils
import (
"context"
"testing"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/language"
)
func TestMisskeyReportContentURLs1(t *testing.T) {
@ -44,3 +49,112 @@ func TestMisskeyReportContentURLs2(t *testing.T) {
t.Fatalf("wanted 0 urls, got %d", l)
}
}
func TestContentToContentLanguage(t *testing.T) {
type testcase struct {
content gtsmodel.Content
instanceLanguages language.Languages
expectedContent string
expectedLang string
}
ctx, cncl := context.WithCancel(context.Background())
defer cncl()
for i, testcase := range []testcase{
{
content: gtsmodel.Content{
Content: "hello world",
ContentMap: nil,
},
expectedContent: "hello world",
expectedLang: "",
},
{
content: gtsmodel.Content{
Content: "",
ContentMap: map[string]string{
"en": "hello world",
},
},
expectedContent: "hello world",
expectedLang: "en",
},
{
content: gtsmodel.Content{
Content: "bonjour le monde",
ContentMap: map[string]string{
"en": "hello world",
"fr": "bonjour le monde",
},
},
expectedContent: "bonjour le monde",
expectedLang: "fr",
},
{
content: gtsmodel.Content{
Content: "bonjour le monde",
ContentMap: map[string]string{
"en": "hello world",
},
},
expectedContent: "bonjour le monde",
expectedLang: "",
},
{
content: gtsmodel.Content{
Content: "",
ContentMap: map[string]string{
"en": "hello world",
"ru": "Привет, мир!",
"nl": "hallo wereld!",
"ca": "Hola món!",
},
},
instanceLanguages: language.Languages{
{TagStr: "en"},
{TagStr: "ca"},
},
expectedContent: "hello world",
expectedLang: "en",
},
{
content: gtsmodel.Content{
Content: "",
ContentMap: map[string]string{
"en": "hello world",
"ru": "Привет, мир!",
"nl": "hallo wereld!",
"ca": "Hola món!",
},
},
instanceLanguages: language.Languages{
{TagStr: "ca"},
{TagStr: "en"},
},
expectedContent: "Hola món!",
expectedLang: "ca",
},
} {
langs, err := language.InitLangs(testcase.instanceLanguages.TagStrs())
if err != nil {
t.Fatal(err)
}
config.SetInstanceLanguages(langs)
content, language := ContentToContentLanguage(ctx, testcase.content)
if content != testcase.expectedContent {
t.Errorf(
"test %d expected content '%s' got '%s'",
i, testcase.expectedContent, content,
)
}
if language != testcase.expectedLang {
t.Errorf(
"test %d expected language '%s' got '%s'",
i, testcase.expectedLang, language,
)
}
}
}

View file

@ -85,6 +85,9 @@ func (suite *WrapTestSuite) TestWrapNoteInCreate() {
"attributedTo": "http://localhost:8080/users/the_mighty_zork",
"cc": "http://localhost:8080/users/the_mighty_zork/followers",
"content": "hello everyone!",
"contentMap": {
"en": "hello everyone!"
},
"id": "http://localhost:8080/users/the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY",
"published": "2021-10-20T12:40:37+02:00",
"replies": {