From 0f812746b7fd9dfeef41e329af5215772672689a Mon Sep 17 00:00:00 2001 From: Vyr Cossont Date: Mon, 7 Aug 2023 01:25:54 -0700 Subject: [PATCH] [feature] Allow full BCP 47 in language inputs (#2067) * Allow full BCP 47 in language inputs Fixes #2066 * Fuse validation and normalization for languages * Remove outdated comment line * Move post language canonicalization test --- internal/api/client/accounts/accountcreate.go | 11 ++- internal/api/client/statuses/statuscreate.go | 10 ++- .../api/client/statuses/statuscreate_test.go | 36 ++++++++ internal/processing/account/create.go | 3 +- internal/processing/account/update.go | 5 +- internal/processing/status/create.go | 3 +- internal/processing/status/create_test.go | 34 ++++++++ internal/validate/formvalidation.go | 17 ++-- internal/validate/formvalidation_test.go | 83 +++++++------------ 9 files changed, 132 insertions(+), 70 deletions(-) diff --git a/internal/api/client/accounts/accountcreate.go b/internal/api/client/accounts/accountcreate.go index c8247ecf2..473000f6d 100644 --- a/internal/api/client/accounts/accountcreate.go +++ b/internal/api/client/accounts/accountcreate.go @@ -87,7 +87,7 @@ func (m *Module) AccountCreatePOSTHandler(c *gin.Context) { return } - if err := validateCreateAccount(form); err != nil { + if err := validateNormalizeCreateAccount(form); err != nil { apiutil.ErrorHandler(c, gtserror.NewErrorBadRequest(err, err.Error()), m.processor.InstanceGetV1) return } @@ -110,9 +110,10 @@ func (m *Module) AccountCreatePOSTHandler(c *gin.Context) { c.JSON(http.StatusOK, ti) } -// validateCreateAccount checks through all the necessary prerequisites for creating a new account, +// validateNormalizeCreateAccount checks through all the necessary prerequisites for creating a new account, // according to the provided account create request. If the account isn't eligible, an error will be returned. -func validateCreateAccount(form *apimodel.AccountCreateRequest) error { +// Side effect: normalizes the provided language tag for the user's locale. +func validateNormalizeCreateAccount(form *apimodel.AccountCreateRequest) error { if form == nil { return errors.New("form was nil") } @@ -137,9 +138,11 @@ func validateCreateAccount(form *apimodel.AccountCreateRequest) error { return errors.New("agreement to terms and conditions not given") } - if err := validate.Language(form.Locale); err != nil { + locale, err := validate.Language(form.Locale) + if err != nil { return err } + form.Locale = locale return validate.SignUpReason(form.Reason, config.GetAccountsReasonRequired()) } diff --git a/internal/api/client/statuses/statuscreate.go b/internal/api/client/statuses/statuscreate.go index e4d8588c7..e8378f461 100644 --- a/internal/api/client/statuses/statuscreate.go +++ b/internal/api/client/statuses/statuscreate.go @@ -98,7 +98,7 @@ func (m *Module) StatusCreatePOSTHandler(c *gin.Context) { // } // form.Status += "\n\nsent from " + user + "'s iphone\n" - if err := validateCreateStatus(form); err != nil { + if err := validateNormalizeCreateStatus(form); err != nil { apiutil.ErrorHandler(c, gtserror.NewErrorBadRequest(err, err.Error()), m.processor.InstanceGetV1) return } @@ -112,7 +112,9 @@ func (m *Module) StatusCreatePOSTHandler(c *gin.Context) { c.JSON(http.StatusOK, apiStatus) } -func validateCreateStatus(form *apimodel.AdvancedStatusCreateForm) error { +// validateNormalizeCreateStatus checks the form for disallowed combinations of attachments and overlength inputs. +// Side effect: normalizes the post's language tag. +func validateNormalizeCreateStatus(form *apimodel.AdvancedStatusCreateForm) error { hasStatus := form.Status != "" hasMedia := len(form.MediaIDs) != 0 hasPoll := form.Poll != nil @@ -162,9 +164,11 @@ func validateCreateStatus(form *apimodel.AdvancedStatusCreateForm) error { } if form.Language != "" { - if err := validate.Language(form.Language); err != nil { + language, err := validate.Language(form.Language) + if err != nil { return err } + form.Language = language } return nil diff --git a/internal/api/client/statuses/statuscreate_test.go b/internal/api/client/statuses/statuscreate_test.go index 05f24c24c..d47a74bbc 100644 --- a/internal/api/client/statuses/statuscreate_test.go +++ b/internal/api/client/statuses/statuscreate_test.go @@ -391,6 +391,42 @@ func (suite *StatusCreateTestSuite) TestAttachNewMediaSuccess() { suite.Equal(statusResponse.ID, gtsAttachment.StatusID) } +// Post a new status with a language tag that is not in canonical format +func (suite *StatusCreateTestSuite) TestPostNewStatusWithNoncanonicalLanguageTag() { + t := suite.testTokens["local_account_1"] + oauthToken := oauth.DBTokenToToken(t) + + // setup + recorder := httptest.NewRecorder() + ctx, _ := testrig.CreateGinTestContext(recorder, nil) + ctx.Set(oauth.SessionAuthorizedApplication, suite.testApplications["application_1"]) + ctx.Set(oauth.SessionAuthorizedToken, oauthToken) + ctx.Set(oauth.SessionAuthorizedUser, suite.testUsers["local_account_1"]) + ctx.Set(oauth.SessionAuthorizedAccount, suite.testAccounts["local_account_1"]) + ctx.Request = httptest.NewRequest(http.MethodPost, fmt.Sprintf("http://localhost:8080/%s", statuses.BasePath), nil) // the endpoint we're hitting + ctx.Request.Header.Set("accept", "application/json") + ctx.Request.Form = url.Values{ + "status": {"English? what's English? i speak American"}, + "language": {"en-us"}, + } + suite.statusModule.StatusCreatePOSTHandler(ctx) + + suite.EqualValues(http.StatusOK, recorder.Code) + + result := recorder.Result() + defer result.Body.Close() + b, err := ioutil.ReadAll(result.Body) + suite.NoError(err) + + statusReply := &apimodel.Status{} + err = json.Unmarshal(b, statusReply) + suite.NoError(err) + + suite.Equal("

English? what's English? i speak American

", statusReply.Content) + suite.NotNil(statusReply.Language) + suite.Equal("en-US", *statusReply.Language) +} + func TestStatusCreateTestSuite(t *testing.T) { suite.Run(t, new(StatusCreateTestSuite)) } diff --git a/internal/processing/account/create.go b/internal/processing/account/create.go index 1a172b865..32a59d1ef 100644 --- a/internal/processing/account/create.go +++ b/internal/processing/account/create.go @@ -34,8 +34,7 @@ // Create processes the given form for creating a new account, // returning an oauth token for that account if successful. // -// Fields on the form should have already been validated by the -// caller, before this function is called. +// Precondition: the form's fields should have already been validated and normalized by the caller. func (p *Processor) Create( ctx context.Context, appToken oauth2.TokenInfo, diff --git a/internal/processing/account/update.go b/internal/processing/account/update.go index 01c62d7e3..f75b3c8d9 100644 --- a/internal/processing/account/update.go +++ b/internal/processing/account/update.go @@ -222,10 +222,11 @@ func (p *Processor) Update(ctx context.Context, account *gtsmodel.Account, form if form.Source != nil { if form.Source.Language != nil { - if err := validate.Language(*form.Source.Language); err != nil { + language, err := validate.Language(*form.Source.Language) + if err != nil { return nil, gtserror.NewErrorBadRequest(err) } - account.Language = *form.Source.Language + account.Language = language } if form.Source.Sensitive != nil { diff --git a/internal/processing/status/create.go b/internal/processing/status/create.go index 2d9c3a196..36842ee07 100644 --- a/internal/processing/status/create.go +++ b/internal/processing/status/create.go @@ -37,6 +37,8 @@ ) // Create processes the given form to create a new status, returning the api model representation of that status if it's OK. +// +// Precondition: the form's fields should have already been validated and normalized by the caller. func (p *Processor) Create(ctx context.Context, account *gtsmodel.Account, application *gtsmodel.Application, form *apimodel.AdvancedStatusCreateForm) (*apimodel.Status, gtserror.WithCode) { accountURIs := uris.GenerateURIsForAccount(account.Username) thisStatusID := id.NewULID() @@ -55,7 +57,6 @@ func (p *Processor) Create(ctx context.Context, account *gtsmodel.Account, appli ContentWarning: text.SanitizePlaintext(form.SpoilerText), ActivityStreamsType: ap.ObjectNote, Sensitive: &sensitive, - Language: form.Language, CreatedWithApplicationID: application.ID, Text: form.Status, } diff --git a/internal/processing/status/create_test.go b/internal/processing/status/create_test.go index 2a797516d..2c86e5a29 100644 --- a/internal/processing/status/create_test.go +++ b/internal/processing/status/create_test.go @@ -208,6 +208,40 @@ func (suite *StatusCreateTestSuite) TestProcessMediaDescriptionTooShort() { suite.Nil(apiStatus) } +func (suite *StatusCreateTestSuite) TestProcessLanguageWithScriptPart() { + ctx := context.Background() + + creatingAccount := suite.testAccounts["local_account_1"] + creatingApplication := suite.testApplications["application_1"] + + statusCreateForm := &apimodel.AdvancedStatusCreateForm{ + StatusCreateRequest: apimodel.StatusCreateRequest{ + Status: "你好世界", // hello world + MediaIDs: []string{}, + Poll: nil, + InReplyToID: "", + Sensitive: false, + SpoilerText: "", + Visibility: apimodel.VisibilityPublic, + ScheduledAt: "", + Language: "zh-Hans", + ContentType: apimodel.StatusContentTypePlain, + }, + AdvancedVisibilityFlagsForm: apimodel.AdvancedVisibilityFlagsForm{ + Federated: nil, + Boostable: nil, + Replyable: nil, + Likeable: nil, + }, + } + + apiStatus, err := suite.status.Create(ctx, creatingAccount, creatingApplication, statusCreateForm) + suite.NoError(err) + suite.NotNil(apiStatus) + + suite.Equal("zh-Hans", *apiStatus.Language) +} + func TestStatusCreateTestSuite(t *testing.T) { suite.Run(t, new(StatusCreateTestSuite)) } diff --git a/internal/validate/formvalidation.go b/internal/validate/formvalidation.go index 76ce6a8de..1b5996b4b 100644 --- a/internal/validate/formvalidation.go +++ b/internal/validate/formvalidation.go @@ -99,14 +99,19 @@ func Email(email string) error { return err } -// Language checks that the given language string is a 2- or 3-letter ISO 639 code. -// Returns an error if the language cannot be parsed. See: https://pkg.go.dev/golang.org/x/text/language -func Language(lang string) error { +// Language checks that the given language string is a valid, if not necessarily canonical, BCP 47 language tag. +// Returns a canonicalized version of the tag if the language can be parsed. +// Returns an error if the language cannot be parsed. +// See: https://pkg.go.dev/golang.org/x/text/language +func Language(lang string) (string, error) { if lang == "" { - return errors.New("no language provided") + return "", errors.New("no language provided") } - _, err := language.ParseBase(lang) - return err + parsed, err := language.Parse(lang) + if err != nil { + return "", err + } + return parsed.String(), err } // SignUpReason checks that a sufficient reason is given for a server signup request diff --git a/internal/validate/formvalidation_test.go b/internal/validate/formvalidation_test.go index 534e5b849..40830407c 100644 --- a/internal/validate/formvalidation_test.go +++ b/internal/validate/formvalidation_test.go @@ -159,60 +159,39 @@ func (suite *ValidationTestSuite) TestValidateEmail() { } func (suite *ValidationTestSuite) TestValidateLanguage() { - empty := "" - notALanguage := "this isn't a language at all!" - english := "en" - capitalEnglish := "EN" - arabic3Letters := "ara" - mixedCapsEnglish := "eN" - englishUS := "en-us" - dutch := "nl" - german := "de" - var err error - - err = validate.Language(empty) - if suite.Error(err) { - suite.Equal(errors.New("no language provided"), err) + testCases := []struct { + name, input, expected, err string + }{ + {name: "empty", err: "no language provided"}, + {name: "notALanguage", input: "this isn't a language at all!", err: "language: tag is not well-formed"}, + {name: "english", input: "en", expected: "en"}, + // Should be all lowercase + {name: "capitalEnglish", input: "EN", expected: "en"}, + // Overlong, should be in ISO 639-1 format + {name: "arabic3Letters", input: "ara", expected: "ar"}, + // Should be all lowercase + {name: "mixedCapsEnglish", input: "eN", expected: "en"}, + // Region should be capitalized + {name: "englishUS", input: "en-us", expected: "en-US"}, + {name: "dutch", input: "nl", expected: "nl"}, + {name: "german", input: "de", expected: "de"}, + {name: "chinese", input: "zh", expected: "zh"}, + {name: "chineseSimplified", input: "zh-Hans", expected: "zh-Hans"}, + {name: "chineseTraditional", input: "zh-Hant", expected: "zh-Hant"}, } - err = validate.Language(notALanguage) - if suite.Error(err) { - suite.Equal(errors.New("language: tag is not well-formed"), err) - } - - err = validate.Language(english) - if suite.NoError(err) { - suite.Equal(nil, err) - } - - err = validate.Language(capitalEnglish) - if suite.NoError(err) { - suite.Equal(nil, err) - } - - err = validate.Language(arabic3Letters) - if suite.NoError(err) { - suite.Equal(nil, err) - } - - err = validate.Language(mixedCapsEnglish) - if suite.NoError(err) { - suite.Equal(nil, err) - } - - err = validate.Language(englishUS) - if suite.Error(err) { - suite.Equal(errors.New("language: tag is not well-formed"), err) - } - - err = validate.Language(dutch) - if suite.NoError(err) { - suite.Equal(nil, err) - } - - err = validate.Language(german) - if suite.NoError(err) { - suite.Equal(nil, err) + for _, testCase := range testCases { + testCase := testCase + suite.Run(testCase.name, func() { + actual, actualErr := validate.Language(testCase.input) + if testCase.err == "" { + suite.Equal(testCase.expected, actual) + suite.NoError(actualErr) + } else { + suite.Empty(actual) + suite.EqualError(actualErr, testCase.err) + } + }) } }