feat: harden localization against malicious HTML (#5703)
- Add a new script that proccess the localization files and verify that they only contain HTML according to our strictly defined rules. - This should make adding malicious HTML near-impossible. Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/5703 Reviewed-by: 0ko <0ko@noreply.codeberg.org> Co-authored-by: Gusted <postmaster@gusted.xyz> Co-committed-by: Gusted <postmaster@gusted.xyz>
This commit is contained in:
parent
031451e740
commit
dfe3ffc581
43 changed files with 361 additions and 151 deletions
156
build/lint-locale.go
Normal file
156
build/lint-locale.go
Normal file
|
@ -0,0 +1,156 @@
|
|||
// Copyright 2024 The Forgejo Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
//nolint:forbidigo
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"html"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/microcosm-cc/bluemonday"
|
||||
"github.com/sergi/go-diff/diffmatchpatch"
|
||||
"gopkg.in/ini.v1" //nolint:depguard
|
||||
)
|
||||
|
||||
var (
|
||||
policy *bluemonday.Policy
|
||||
tagRemover *strings.Replacer
|
||||
safeURL = "https://TO-BE-REPLACED.COM"
|
||||
|
||||
// Matches href="", href="#", href="%s", href="#%s", href="%[1]s" and href="#%[1]s".
|
||||
placeHolderRegex = regexp.MustCompile(`href="#?(%s|%\[\d\]s)?"`)
|
||||
)
|
||||
|
||||
func initBlueMondayPolicy() {
|
||||
policy = bluemonday.NewPolicy()
|
||||
|
||||
policy.RequireParseableURLs(true)
|
||||
policy.AllowURLSchemes("https")
|
||||
|
||||
// Only allow safe URL on href.
|
||||
// Only allow target="_blank".
|
||||
// Only allow rel="nopener noreferrer", rel="noopener" and rel="noreferrer".
|
||||
// Only allow placeholder on id and class.
|
||||
policy.AllowAttrs("href").Matching(regexp.MustCompile("^" + regexp.QuoteMeta(safeURL) + "$")).OnElements("a")
|
||||
policy.AllowAttrs("target").Matching(regexp.MustCompile("^_blank$")).OnElements("a")
|
||||
policy.AllowAttrs("rel").Matching(regexp.MustCompile("^(noopener|noreferrer|noopener noreferrer)$")).OnElements("a")
|
||||
policy.AllowAttrs("id", "class").Matching(regexp.MustCompile(`^%s|%\[\d\]s$`)).OnElements("a")
|
||||
|
||||
// Only allow positional placeholder as class.
|
||||
positionalPlaceholderRe := regexp.MustCompile(`^%\[\d\]s$`)
|
||||
policy.AllowAttrs("class").Matching(positionalPlaceholderRe).OnElements("strong")
|
||||
policy.AllowAttrs("id").Matching(positionalPlaceholderRe).OnElements("code")
|
||||
|
||||
// Allowed elements with no attributes. Must be a recognized tagname.
|
||||
policy.AllowElements("strong", "br", "b", "strike", "code", "i")
|
||||
|
||||
// TODO: Remove <c> in `actions.workflow.dispatch.trigger_found`.
|
||||
policy.AllowNoAttrs().OnElements("c")
|
||||
}
|
||||
|
||||
func initRemoveTags() {
|
||||
oldnew := []string{}
|
||||
for _, el := range []string{
|
||||
"email@example.com", "correu@example.com", "epasts@domens.lv", "email@exemplo.com", "eposta@ornek.com", "email@példa.hu", "email@esempio.it",
|
||||
"user", "utente", "lietotājs", "gebruiker", "usuário", "Benutzer", "Bruker",
|
||||
"server", "servidor", "kiszolgáló", "serveris",
|
||||
"label", "etichetta", "etiķete", "rótulo", "Label", "utilizador",
|
||||
"filename", "bestandsnaam", "dosyaadi", "fails", "nome do arquivo",
|
||||
} {
|
||||
oldnew = append(oldnew, "<"+el+">", "REPLACED-TAG")
|
||||
}
|
||||
|
||||
tagRemover = strings.NewReplacer(oldnew...)
|
||||
}
|
||||
|
||||
func preprocessTranslationValue(value string) string {
|
||||
// href should be a parsable URL, replace placeholder strings with a safe url.
|
||||
value = placeHolderRegex.ReplaceAllString(value, `href="`+safeURL+`"`)
|
||||
|
||||
// Remove tags that aren't tags but will be parsed as tags. We already know they are safe and sound.
|
||||
value = tagRemover.Replace(value)
|
||||
|
||||
return value
|
||||
}
|
||||
|
||||
func checkLocaleContent(localeContent []byte) []string {
|
||||
// Same configuration as Forgejo uses.
|
||||
cfg := ini.Empty(ini.LoadOptions{
|
||||
IgnoreContinuation: true,
|
||||
})
|
||||
cfg.NameMapper = ini.SnackCase
|
||||
|
||||
if err := cfg.Append(localeContent); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
dmp := diffmatchpatch.New()
|
||||
errors := []string{}
|
||||
|
||||
for _, section := range cfg.Sections() {
|
||||
for _, key := range section.Keys() {
|
||||
var trKey string
|
||||
if section.Name() == "" || section.Name() == "DEFAULT" || section.Name() == "common" {
|
||||
trKey = key.Name()
|
||||
} else {
|
||||
trKey = section.Name() + "." + key.Name()
|
||||
}
|
||||
|
||||
keyValue := preprocessTranslationValue(key.Value())
|
||||
|
||||
if html.UnescapeString(policy.Sanitize(keyValue)) != keyValue {
|
||||
// Create a nice diff of the difference.
|
||||
diffs := dmp.DiffMain(keyValue, html.UnescapeString(policy.Sanitize(keyValue)), false)
|
||||
diffs = dmp.DiffCleanupSemantic(diffs)
|
||||
diffs = dmp.DiffCleanupEfficiency(diffs)
|
||||
|
||||
errors = append(errors, trKey+": "+dmp.DiffPrettyText(diffs))
|
||||
}
|
||||
}
|
||||
}
|
||||
return errors
|
||||
}
|
||||
|
||||
func main() {
|
||||
initBlueMondayPolicy()
|
||||
initRemoveTags()
|
||||
|
||||
localeDir := filepath.Join("options", "locale")
|
||||
localeFiles, err := os.ReadDir(localeDir)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
if !slices.ContainsFunc(localeFiles, func(e fs.DirEntry) bool { return strings.HasSuffix(e.Name(), ".ini") }) {
|
||||
fmt.Println("No locale files found")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
exitCode := 0
|
||||
for _, localeFile := range localeFiles {
|
||||
if !strings.HasSuffix(localeFile.Name(), ".ini") {
|
||||
continue
|
||||
}
|
||||
|
||||
localeContent, err := os.ReadFile(filepath.Join(localeDir, localeFile.Name()))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
if err := checkLocaleContent(localeContent); len(err) > 0 {
|
||||
fmt.Println(localeFile.Name())
|
||||
fmt.Println(strings.Join(err, "\n"))
|
||||
fmt.Println()
|
||||
exitCode = 1
|
||||
}
|
||||
}
|
||||
|
||||
os.Exit(exitCode)
|
||||
}
|
65
build/lint-locale_test.go
Normal file
65
build/lint-locale_test.go
Normal file
|
@ -0,0 +1,65 @@
|
|||
// Copyright 2024 The Forgejo Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
package main
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestLocalizationPolicy(t *testing.T) {
|
||||
initBlueMondayPolicy()
|
||||
initRemoveTags()
|
||||
|
||||
t.Run("Remove tags", func(t *testing.T) {
|
||||
assert.Empty(t, checkLocaleContent([]byte(`hidden_comment_types_description = Comment types checked here will not be shown inside issue pages. Checking "Label" for example removes all "<user> added/removed <label>" comments.`)))
|
||||
|
||||
assert.EqualValues(t, []string{"key: \x1b[31m<not-an-allowed-key>\x1b[0m REPLACED-TAG"}, checkLocaleContent([]byte(`key = "<not-an-allowed-key> <label>"`)))
|
||||
assert.EqualValues(t, []string{"key: \x1b[31m<user@example.com>\x1b[0m REPLACED-TAG"}, checkLocaleContent([]byte(`key = "<user@example.com> <email@example.com>"`)))
|
||||
assert.EqualValues(t, []string{"key: \x1b[31m<tag>\x1b[0m REPLACED-TAG \x1b[31m</tag>\x1b[0m"}, checkLocaleContent([]byte(`key = "<tag> <email@example.com> </tag>"`)))
|
||||
})
|
||||
|
||||
t.Run("Specific exception", func(t *testing.T) {
|
||||
assert.Empty(t, checkLocaleContent([]byte(`workflow.dispatch.trigger_found = This workflow has a <c>workflow_dispatch</c> event trigger.`)))
|
||||
assert.Empty(t, checkLocaleContent([]byte(`pulls.title_desc_one = wants to merge %[1]d commit from <code>%[2]s</code> into <code id="%[4]s">%[3]s</code>`)))
|
||||
assert.Empty(t, checkLocaleContent([]byte(`editor.commit_directly_to_this_branch = Commit directly to the <strong class="%[2]s">%[1]s</strong> branch.`)))
|
||||
|
||||
assert.EqualValues(t, []string{"workflow.dispatch.trigger_found: This workflow has a \x1b[31m<d>\x1b[0mworkflow_dispatch\x1b[31m</d>\x1b[0m event trigger."}, checkLocaleContent([]byte(`workflow.dispatch.trigger_found = This workflow has a <d>workflow_dispatch</d> event trigger.`)))
|
||||
assert.EqualValues(t, []string{"key: <code\x1b[31m id=\"branch_targe\"\x1b[0m>%[3]s</code>"}, checkLocaleContent([]byte(`key = <code id="branch_targe">%[3]s</code>`)))
|
||||
assert.EqualValues(t, []string{"key: <a\x1b[31m class=\"ui sh\"\x1b[0m href=\"https://TO-BE-REPLACED.COM\">"}, checkLocaleContent([]byte(`key = <a class="ui sh" href="%[3]s">`)))
|
||||
assert.EqualValues(t, []string{"key: <a\x1b[31m class=\"js-click-me\"\x1b[0m href=\"https://TO-BE-REPLACED.COM\">"}, checkLocaleContent([]byte(`key = <a class="js-click-me" href="%[3]s">`)))
|
||||
assert.EqualValues(t, []string{"key: <strong\x1b[31m class=\"branch-target\"\x1b[0m>%[1]s</strong>"}, checkLocaleContent([]byte(`key = <strong class="branch-target">%[1]s</strong>`)))
|
||||
})
|
||||
|
||||
t.Run("General safe tags", func(t *testing.T) {
|
||||
assert.Empty(t, checkLocaleContent([]byte("error404 = The page you are trying to reach either <strong>does not exist</strong> or <strong>you are not authorized</strong> to view it.")))
|
||||
assert.Empty(t, checkLocaleContent([]byte("teams.specific_repositories_helper = Members will only have access to repositories explicitly added to the team. Selecting this <strong>will not</strong> automatically remove repositories already added with <i>All repositories</i>.")))
|
||||
assert.Empty(t, checkLocaleContent([]byte("sqlite_helper = File path for the SQLite3 database.<br>Enter an absolute path if you run Forgejo as a service.")))
|
||||
assert.Empty(t, checkLocaleContent([]byte("hi_user_x = Hi <b>%s</b>,")))
|
||||
|
||||
assert.EqualValues(t, []string{"error404: The page you are trying to reach either <strong\x1b[31m title='aaa'\x1b[0m>does not exist</strong> or <strong>you are not authorized</strong> to view it."}, checkLocaleContent([]byte("error404 = The page you are trying to reach either <strong title='aaa'>does not exist</strong> or <strong>you are not authorized</strong> to view it.")))
|
||||
})
|
||||
|
||||
t.Run("<a>", func(t *testing.T) {
|
||||
assert.Empty(t, checkLocaleContent([]byte(`admin.new_user.text = Please <a href="%s">click here</a> to manage this user from the admin panel.`)))
|
||||
assert.Empty(t, checkLocaleContent([]byte(`access_token_desc = Selected token permissions limit authorization only to the corresponding <a href="%[1]s" target="_blank">API</a> routes. Read the <a href="%[2]s" target="_blank">documentation</a> for more information.`)))
|
||||
assert.Empty(t, checkLocaleContent([]byte(`webauthn_desc = Security keys are hardware devices containing cryptographic keys. They can be used for two-factor authentication. Security keys must support the <a rel="noreferrer" target="_blank" href="%s">WebAuthn Authenticator</a> standard.`)))
|
||||
assert.Empty(t, checkLocaleContent([]byte("issues.closed_at = `closed this issue <a id=\"%[1]s\" href=\"#%[1]s\">%[2]s</a>`")))
|
||||
|
||||
assert.EqualValues(t, []string{"key: \x1b[31m<a href=\"https://example.com\">\x1b[0m"}, checkLocaleContent([]byte(`key = <a href="https://example.com">`)))
|
||||
assert.EqualValues(t, []string{"key: \x1b[31m<a href=\"javascript:alert('1')\">\x1b[0m"}, checkLocaleContent([]byte(`key = <a href="javascript:alert('1')">`)))
|
||||
assert.EqualValues(t, []string{"key: <a href=\"https://TO-BE-REPLACED.COM\"\x1b[31m download\x1b[0m>"}, checkLocaleContent([]byte(`key = <a href="%s" download>`)))
|
||||
assert.EqualValues(t, []string{"key: <a href=\"https://TO-BE-REPLACED.COM\"\x1b[31m target=\"_self\"\x1b[0m>"}, checkLocaleContent([]byte(`key = <a href="%s" target="_self">`)))
|
||||
assert.EqualValues(t, []string{"key: \x1b[31m<a href=\"https://example.com/%s\">\x1b[0m"}, checkLocaleContent([]byte(`key = <a href="https://example.com/%s">`)))
|
||||
assert.EqualValues(t, []string{"key: \x1b[31m<a href=\"https://example.com/?q=%s\">\x1b[0m"}, checkLocaleContent([]byte(`key = <a href="https://example.com/?q=%s">`)))
|
||||
assert.EqualValues(t, []string{"key: \x1b[31m<a href=\"%s/open-redirect\">\x1b[0m"}, checkLocaleContent([]byte(`key = <a href="%s/open-redirect">`)))
|
||||
assert.EqualValues(t, []string{"key: \x1b[31m<a href=\"%s?q=open-redirect\">\x1b[0m"}, checkLocaleContent([]byte(`key = <a href="%s?q=open-redirect">`)))
|
||||
})
|
||||
|
||||
t.Run("Escaped HTML characters", func(t *testing.T) {
|
||||
assert.Empty(t, checkLocaleContent([]byte("activity.git_stats_push_to_branch = `إلى %s و\"`")))
|
||||
|
||||
assert.EqualValues(t, []string{"key: و\x1b[31m \x1b[0m\x1b[32m\u00a0\x1b[0m"}, checkLocaleContent([]byte(`key = و `)))
|
||||
})
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue