forked from forgejo/forgejo
upgrade to most recent bluemonday (#11007)
* upgrade to most recent bluemonday * make vendor * update tests for bluemonday * update tests for bluemonday * update tests for bluemonday
This commit is contained in:
parent
4c54477bb5
commit
d00ebf445b
50 changed files with 4977 additions and 300 deletions
35
vendor/github.com/microcosm-cc/bluemonday/.gitignore
generated
vendored
35
vendor/github.com/microcosm-cc/bluemonday/.gitignore
generated
vendored
|
@ -1,22 +1,15 @@
|
|||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
# Binaries for programs and plugins
|
||||
*.exe
|
||||
*.exe~
|
||||
*.dll
|
||||
*.so
|
||||
*.dylib
|
||||
|
||||
# Test binary, built with `go test -c`
|
||||
*.test
|
||||
|
||||
# Output of the go coverage tool, specifically when used with LiteIDE
|
||||
*.out
|
||||
|
||||
# goland idea folder
|
||||
*.idea
|
20
vendor/github.com/microcosm-cc/bluemonday/.travis.yml
generated
vendored
20
vendor/github.com/microcosm-cc/bluemonday/.travis.yml
generated
vendored
|
@ -1,18 +1,22 @@
|
|||
language: go
|
||||
go:
|
||||
- 1.1
|
||||
- 1.2
|
||||
- 1.3
|
||||
- 1.4
|
||||
- 1.5
|
||||
- 1.6
|
||||
- 1.7
|
||||
- 1.2.x
|
||||
- 1.3.x
|
||||
- 1.4.x
|
||||
- 1.5.x
|
||||
- 1.6.x
|
||||
- 1.7.x
|
||||
- 1.8.x
|
||||
- 1.9.x
|
||||
- 1.10.x
|
||||
- 1.11.x
|
||||
- 1.12.x
|
||||
- tip
|
||||
matrix:
|
||||
allow_failures:
|
||||
- go: tip
|
||||
fast_finish: true
|
||||
install:
|
||||
- go get golang.org/x/net/html
|
||||
- go get .
|
||||
script:
|
||||
- go test -v ./...
|
||||
|
|
4
vendor/github.com/microcosm-cc/bluemonday/CONTRIBUTING.md
generated
vendored
4
vendor/github.com/microcosm-cc/bluemonday/CONTRIBUTING.md
generated
vendored
|
@ -6,6 +6,10 @@ Third-party patches are essential for keeping bluemonday secure and offering the
|
|||
|
||||
* Make sure you have a [Github account](https://github.com/signup/free)
|
||||
|
||||
## Guidelines
|
||||
|
||||
1. Do not vendor dependencies. As a security package, were we to vendor dependencies the projects that then vendor bluemonday may not receive the latest security updates to the dependencies. By not vendoring dependencies the project that implements bluemonday will vendor the latest version of any dependent packages. Vendoring is a project problem, not a package problem. bluemonday will be tested against the latest version of dependencies periodically and during any PR/merge.
|
||||
|
||||
## Submitting an Issue
|
||||
|
||||
* Submit a ticket for your issue, assuming one does not already exist
|
||||
|
|
7
vendor/github.com/microcosm-cc/bluemonday/CREDITS.md
generated
vendored
7
vendor/github.com/microcosm-cc/bluemonday/CREDITS.md
generated
vendored
|
@ -1,6 +1,7 @@
|
|||
|
||||
1. John Graham-Cumming http://jgc.org/
|
||||
1. Mohammad Gufran https://github.com/Gufran
|
||||
1. Steven Gutzwiller https://github.com/StevenGutzwiller
|
||||
1. Andrew Krasichkov @buglloc https://github.com/buglloc
|
||||
1. Mike Samuel mikesamuel@gmail.com
|
||||
1. Dmitri Shuralyov shurcooL@gmail.com
|
||||
1. https://github.com/opennota
|
||||
1. https://github.com/Gufran
|
||||
1. https://github.com/opennota
|
79
vendor/github.com/microcosm-cc/bluemonday/README.md
generated
vendored
79
vendor/github.com/microcosm-cc/bluemonday/README.md
generated
vendored
|
@ -1,4 +1,4 @@
|
|||
# bluemonday [](https://travis-ci.org/microcosm-cc/bluemonday) [](https://godoc.org/github.com/microcosm-cc/bluemonday)
|
||||
# bluemonday [](https://travis-ci.org/microcosm-cc/bluemonday) [](https://godoc.org/github.com/microcosm-cc/bluemonday) [](https://sourcegraph.com/github.com/microcosm-cc/bluemonday?badge)
|
||||
|
||||
bluemonday is a HTML sanitizer implemented in Go. It is fast and highly configurable.
|
||||
|
||||
|
@ -58,10 +58,12 @@ We expect to be supplied with well-formatted HTML (closing elements for every ap
|
|||
|
||||
### Supported Go Versions
|
||||
|
||||
bluemonday is regularly tested against Go 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7 and tip.
|
||||
bluemonday is tested against Go 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 1.10, 1.11, 1.12, and tip.
|
||||
|
||||
We do not support Go 1.0 as we depend on `golang.org/x/net/html` which includes a reference to `io.ErrNoProgress` which did not exist in Go 1.0.
|
||||
|
||||
We support Go 1.1 but Travis no longer tests against it.
|
||||
|
||||
## Is it production ready?
|
||||
|
||||
*Yes*
|
||||
|
@ -87,7 +89,11 @@ import (
|
|||
)
|
||||
|
||||
func main() {
|
||||
// Do this once for each unique policy, and use the policy for the life of the program
|
||||
// Policy creation/editing is not safe to use in multiple goroutines
|
||||
p := bluemonday.UGCPolicy()
|
||||
|
||||
// The policy can then be used to sanitize lots of input and it is safe to use the policy in multiple goroutines
|
||||
html := p.Sanitize(
|
||||
`<a onblur="alert(secret)" href="http://www.google.com">Google</a>`,
|
||||
)
|
||||
|
@ -140,7 +146,7 @@ func main() {
|
|||
|
||||
We ship two default policies:
|
||||
|
||||
1. `bluemonday.StrictPolicy()` which can be thought of as equivalent to stripping all HTML elements and their attributes as it has nothing on it's whitelist. An example usage scenario would be blog post titles where HTML tags are not expected at all and if they are then the elements *and* the content of the elements should be stripped. This is a *very* strict policy.
|
||||
1. `bluemonday.StrictPolicy()` which can be thought of as equivalent to stripping all HTML elements and their attributes as it has nothing on its whitelist. An example usage scenario would be blog post titles where HTML tags are not expected at all and if they are then the elements *and* the content of the elements should be stripped. This is a *very* strict policy.
|
||||
2. `bluemonday.UGCPolicy()` which allows a broad selection of HTML elements and attributes that are safe for user generated content. Note that this policy does *not* whitelist iframes, object, embed, styles, script, etc. An example usage scenario would be blog post bodies where a variety of formatting is expected along with the potential for TABLEs and IMGs.
|
||||
|
||||
## Policy Building
|
||||
|
@ -163,12 +169,26 @@ To add elements to a policy either add just the elements:
|
|||
p.AllowElements("b", "strong")
|
||||
```
|
||||
|
||||
Or using a regex:
|
||||
|
||||
_Note: if an element is added by name as shown above, any matching regex will be ignored_
|
||||
|
||||
It is also recommended to ensure multiple patterns don't overlap as order of execution is not guaranteed and can result in some rules being missed.
|
||||
```go
|
||||
p.AllowElementsMatching(regex.MustCompile(`^my-element-`))
|
||||
```
|
||||
|
||||
Or add elements as a virtue of adding an attribute:
|
||||
```go
|
||||
// Not the recommended pattern, see the recommendation on using .Matching() below
|
||||
p.AllowAttrs("nowrap").OnElements("td", "th")
|
||||
```
|
||||
|
||||
Again, this also supports a regex pattern match alternative:
|
||||
```go
|
||||
p.AllowAttrs("nowrap").OnElementsMatching(regex.MustCompile(`^my-element-`))
|
||||
```
|
||||
|
||||
Attributes can either be added to all elements:
|
||||
```go
|
||||
p.AllowAttrs("dir").Matching(regexp.MustCompile("(?i)rtl|ltr")).Globally()
|
||||
|
@ -198,6 +218,49 @@ p := bluemonday.UGCPolicy()
|
|||
p.AllowElements("fieldset", "select", "option")
|
||||
```
|
||||
|
||||
### Inline CSS
|
||||
|
||||
Although it's possible to handle inline CSS using `AllowAttrs` with a `Matching` rule, writing a single monolithic regular expression to safely process all inline CSS which you wish to allow is not a trivial task. Instead of attempting to do so, you can whitelist the `style` attribute on whichever element(s) you desire and use style policies to control and sanitize inline styles.
|
||||
|
||||
It is suggested that you use `Matching` (with a suitable regular expression)
|
||||
`MatchingEnum`, or `MatchingHandler` to ensure each style matches your needs,
|
||||
but default handlers are supplied for most widely used styles.
|
||||
|
||||
Similar to attributes, you can allow specific CSS properties to be set inline:
|
||||
```go
|
||||
p.AllowAttrs("style").OnElements("span", "p")
|
||||
// Allow the 'color' property with valid RGB(A) hex values only (on any element allowed a 'style' attribute)
|
||||
p.AllowStyles("color").Matching(regexp.MustCompile("(?i)^#([0-9a-f]{3,4}|[0-9a-f]{6}|[0-9a-f]{8})$")).Globally()
|
||||
```
|
||||
|
||||
Additionally, you can allow a CSS property to be set only to an allowed value:
|
||||
```go
|
||||
p.AllowAttrs("style").OnElements("span", "p")
|
||||
// Allow the 'text-decoration' property to be set to 'underline', 'line-through' or 'none'
|
||||
// on 'span' elements only
|
||||
p.AllowStyles("text-decoration").MatchingEnum("underline", "line-through", "none").OnElements("span")
|
||||
```
|
||||
|
||||
Or you can specify elements based on a regex patterm match:
|
||||
```go
|
||||
p.AllowAttrs("style").OnElementsMatching(regex.MustCompile(`^my-element-`))
|
||||
// Allow the 'text-decoration' property to be set to 'underline', 'line-through' or 'none'
|
||||
// on 'span' elements only
|
||||
p.AllowStyles("text-decoration").MatchingEnum("underline", "line-through", "none").OnElementsMatching(regex.MustCompile(`^my-element-`))
|
||||
```
|
||||
|
||||
If you need more specific checking, you can create a handler that takes in a string and returns a bool to
|
||||
validate the values for a given property. The string parameter has been
|
||||
converted to lowercase and unicode code points have been converted.
|
||||
```go
|
||||
myHandler := func(value string) bool{
|
||||
return true
|
||||
}
|
||||
p.AllowAttrs("style").OnElements("span", "p")
|
||||
// Allow the 'color' property with values validated by the handler (on any element allowed a 'style' attribute)
|
||||
p.AllowStyles("color").MatchingHandler(myHandler).Globally()
|
||||
```
|
||||
|
||||
### Links
|
||||
|
||||
Links are difficult beasts to sanitise safely and also one of the biggest attack vectors for malicious content.
|
||||
|
@ -232,6 +295,13 @@ Regardless of whether you have enabled parseable URLs, you can force all URLs to
|
|||
p.RequireNoFollowOnLinks(true)
|
||||
```
|
||||
|
||||
Similarly, you can force all URLs to have "noreferrer" in their rel attribute.
|
||||
```go
|
||||
// This applies to "a" "area" "link" elements that have a "href" attribute
|
||||
p.RequireNoReferrerOnLinks(true)
|
||||
```
|
||||
|
||||
|
||||
We provide a convenience method that applies all of the above, but you will still need to whitelist the linkable elements for the URL rules to be applied to:
|
||||
```go
|
||||
p.AllowStandardURLs()
|
||||
|
@ -273,7 +343,7 @@ We also bundle some helpers to simplify policy building:
|
|||
// Permits the "dir", "id", "lang", "title" attributes globally
|
||||
p.AllowStandardAttributes()
|
||||
|
||||
// Permits the "img" element and it's standard attributes
|
||||
// Permits the "img" element and its standard attributes
|
||||
p.AllowImages()
|
||||
|
||||
// Permits ordered and unordered lists, and also definition lists
|
||||
|
@ -312,7 +382,6 @@ It is not the job of bluemonday to fix your bad HTML, it is merely the job of bl
|
|||
|
||||
## TODO
|
||||
|
||||
* Add support for CSS sanitisation to allow some CSS properties based on a whitelist, possibly using the [Gorilla CSS3 scanner](http://www.gorillatoolkit.org/pkg/css/scanner)
|
||||
* Investigate whether devs want to blacklist elements and attributes. This would allow devs to take an existing policy (such as the `bluemonday.UGCPolicy()` ) that encapsulates 90% of what they're looking for but does more than they need, and to remove the extra things they do not want to make it 100% what they want
|
||||
* Investigate whether devs want a validating HTML mode, in which the HTML elements are not just transformed into a balanced tree (every start tag has a closing tag at the correct depth) but also that elements and character data appear only in their allowed context (i.e. that a `table` element isn't a descendent of a `caption`, that `colgroup`, `thead`, `tbody`, `tfoot` and `tr` are permitted, and that character data is not permitted)
|
||||
|
||||
|
|
2
vendor/github.com/microcosm-cc/bluemonday/doc.go
generated
vendored
2
vendor/github.com/microcosm-cc/bluemonday/doc.go
generated
vendored
|
@ -84,7 +84,7 @@ bluemonday is heavily inspired by both the OWASP Java HTML Sanitizer
|
|||
|
||||
We ship two default policies, one is bluemonday.StrictPolicy() and can be
|
||||
thought of as equivalent to stripping all HTML elements and their attributes as
|
||||
it has nothing on it's whitelist.
|
||||
it has nothing on its whitelist.
|
||||
|
||||
The other is bluemonday.UGCPolicy() and allows a broad selection of HTML
|
||||
elements and attributes that are safe for user generated content. Note that
|
||||
|
|
10
vendor/github.com/microcosm-cc/bluemonday/go.mod
generated
vendored
Normal file
10
vendor/github.com/microcosm-cc/bluemonday/go.mod
generated
vendored
Normal file
|
@ -0,0 +1,10 @@
|
|||
module github.com/microcosm-cc/bluemonday
|
||||
|
||||
go 1.9
|
||||
|
||||
require (
|
||||
github.com/aymerick/douceur v0.2.0 // indirect
|
||||
github.com/chris-ramon/douceur v0.2.0
|
||||
github.com/gorilla/css v1.0.0 // indirect
|
||||
golang.org/x/net v0.0.0-20181220203305-927f97764cc3
|
||||
)
|
8
vendor/github.com/microcosm-cc/bluemonday/go.sum
generated
vendored
Normal file
8
vendor/github.com/microcosm-cc/bluemonday/go.sum
generated
vendored
Normal file
|
@ -0,0 +1,8 @@
|
|||
github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk=
|
||||
github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4=
|
||||
github.com/chris-ramon/douceur v0.2.0 h1:IDMEdxlEUUBYBKE4z/mJnFyVXox+MjuEVDJNN27glkU=
|
||||
github.com/chris-ramon/douceur v0.2.0/go.mod h1:wDW5xjJdeoMm1mRt4sD4c/LbF/mWdEpRXQKjTR8nIBE=
|
||||
github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY=
|
||||
github.com/gorilla/css v1.0.0/go.mod h1:Dn721qIggHpt4+EFCcTLTU/vk5ySda2ReITrtgBl60c=
|
||||
golang.org/x/net v0.0.0-20181220203305-927f97764cc3 h1:eH6Eip3UpmR+yM/qI9Ijluzb1bNv/cAU/n+6l8tRSis=
|
||||
golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
2085
vendor/github.com/microcosm-cc/bluemonday/handlers.go
generated
vendored
Normal file
2085
vendor/github.com/microcosm-cc/bluemonday/handlers.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
2
vendor/github.com/microcosm-cc/bluemonday/helpers.go
generated
vendored
2
vendor/github.com/microcosm-cc/bluemonday/helpers.go
generated
vendored
|
@ -135,7 +135,7 @@ func (p *Policy) AllowStandardURLs() {
|
|||
// Most common URL schemes only
|
||||
p.AllowURLSchemes("mailto", "http", "https")
|
||||
|
||||
// For all anchors we will add rel="nofollow" if it does not already exist
|
||||
// For linking elements we will add rel="nofollow" if it does not already exist
|
||||
// This applies to "a" "area" "link"
|
||||
p.RequireNoFollowOnLinks(true)
|
||||
}
|
||||
|
|
313
vendor/github.com/microcosm-cc/bluemonday/policy.go
generated
vendored
313
vendor/github.com/microcosm-cc/bluemonday/policy.go
generated
vendored
|
@ -29,6 +29,8 @@
|
|||
|
||||
package bluemonday
|
||||
|
||||
//TODO sgutzwiller create map of styles to default handlers
|
||||
//TODO sgutzwiller create handlers for various attributes
|
||||
import (
|
||||
"net/url"
|
||||
"regexp"
|
||||
|
@ -47,21 +49,26 @@ type Policy struct {
|
|||
// exceptions
|
||||
initialized bool
|
||||
|
||||
// Allows the <!DOCTYPE > tag to exist in the sanitized document
|
||||
allowDocType bool
|
||||
|
||||
// If true then we add spaces when stripping tags, specifically the closing
|
||||
// tag is replaced by a space character.
|
||||
addSpaces bool
|
||||
|
||||
// When true, add rel="nofollow" to HTML anchors
|
||||
// When true, add rel="nofollow" to HTML a, area, and link tags
|
||||
requireNoFollow bool
|
||||
|
||||
// When true, add rel="nofollow" to HTML anchors
|
||||
// When true, add rel="nofollow" to HTML a, area, and link tags
|
||||
// Will add for href="http://foo"
|
||||
// Will skip for href="/foo" or href="foo"
|
||||
requireNoFollowFullyQualifiedLinks bool
|
||||
|
||||
// When true, add rel="noreferrer" to HTML a, area, and link tags
|
||||
requireNoReferrer bool
|
||||
|
||||
// When true, add rel="noreferrer" to HTML a, area, and link tags
|
||||
// Will add for href="http://foo"
|
||||
// Will skip for href="/foo" or href="foo"
|
||||
requireNoReferrerFullyQualifiedLinks bool
|
||||
|
||||
// When true add target="_blank" to fully qualified links
|
||||
// Will add for href="http://foo"
|
||||
// Will skip for href="/foo" or href="foo"
|
||||
|
@ -73,12 +80,27 @@ type Policy struct {
|
|||
// When true, u, _ := url.Parse("url"); !u.IsAbs() is permitted
|
||||
allowRelativeURLs bool
|
||||
|
||||
// When true, allow data attributes.
|
||||
allowDataAttributes bool
|
||||
|
||||
// map[htmlElementName]map[htmlAttributeName]attrPolicy
|
||||
elsAndAttrs map[string]map[string]attrPolicy
|
||||
|
||||
// elsMatchingAndAttrs stores regex based element matches along with attributes
|
||||
elsMatchingAndAttrs map[*regexp.Regexp]map[string]attrPolicy
|
||||
|
||||
// map[htmlAttributeName]attrPolicy
|
||||
globalAttrs map[string]attrPolicy
|
||||
|
||||
// map[htmlElementName]map[cssPropertyName]stylePolicy
|
||||
elsAndStyles map[string]map[string]stylePolicy
|
||||
|
||||
// map[regex]map[cssPropertyName]stylePolicy
|
||||
elsMatchingAndStyles map[*regexp.Regexp]map[string]stylePolicy
|
||||
|
||||
// map[cssPropertyName]stylePolicy
|
||||
globalStyles map[string]stylePolicy
|
||||
|
||||
// If urlPolicy is nil, all URLs with matching schema are allowed.
|
||||
// Otherwise, only the URLs with matching schema and urlPolicy(url)
|
||||
// returning true are allowed.
|
||||
|
@ -93,6 +115,16 @@ type Policy struct {
|
|||
// be maintained in the output HTML.
|
||||
setOfElementsAllowedWithoutAttrs map[string]struct{}
|
||||
|
||||
// If an element has had all attributes removed as a result of a policy
|
||||
// being applied, then the element would be removed from the output.
|
||||
//
|
||||
// However some elements are valid and have strong layout meaning without
|
||||
// any attributes, i.e. <table>.
|
||||
//
|
||||
// In this case, any element matching a regular expression will be accepted without
|
||||
// attributes added.
|
||||
setOfElementsMatchingAllowedWithoutAttrs []*regexp.Regexp
|
||||
|
||||
setOfElementsToSkipContent map[string]struct{}
|
||||
}
|
||||
|
||||
|
@ -103,6 +135,20 @@ type attrPolicy struct {
|
|||
regexp *regexp.Regexp
|
||||
}
|
||||
|
||||
type stylePolicy struct {
|
||||
// handler to validate
|
||||
handler func(string) bool
|
||||
|
||||
// optional pattern to match, when not nil the regexp needs to match
|
||||
// otherwise the property is removed
|
||||
regexp *regexp.Regexp
|
||||
|
||||
// optional list of allowed property values, for properties which
|
||||
// have a defined list of allowed values; property will be removed
|
||||
// if the value is not allowed
|
||||
enum []string
|
||||
}
|
||||
|
||||
type attrPolicyBuilder struct {
|
||||
p *Policy
|
||||
|
||||
|
@ -111,13 +157,26 @@ type attrPolicyBuilder struct {
|
|||
allowEmpty bool
|
||||
}
|
||||
|
||||
type stylePolicyBuilder struct {
|
||||
p *Policy
|
||||
|
||||
propertyNames []string
|
||||
regexp *regexp.Regexp
|
||||
enum []string
|
||||
handler func(string) bool
|
||||
}
|
||||
|
||||
type urlPolicy func(url *url.URL) (allowUrl bool)
|
||||
|
||||
// init initializes the maps if this has not been done already
|
||||
func (p *Policy) init() {
|
||||
if !p.initialized {
|
||||
p.elsAndAttrs = make(map[string]map[string]attrPolicy)
|
||||
p.elsMatchingAndAttrs = make(map[*regexp.Regexp]map[string]attrPolicy)
|
||||
p.globalAttrs = make(map[string]attrPolicy)
|
||||
p.elsAndStyles = make(map[string]map[string]stylePolicy)
|
||||
p.elsMatchingAndStyles = make(map[*regexp.Regexp]map[string]stylePolicy)
|
||||
p.globalStyles = make(map[string]stylePolicy)
|
||||
p.allowURLSchemes = make(map[string]urlPolicy)
|
||||
p.setOfElementsAllowedWithoutAttrs = make(map[string]struct{})
|
||||
p.setOfElementsToSkipContent = make(map[string]struct{})
|
||||
|
@ -161,6 +220,21 @@ func (p *Policy) AllowAttrs(attrNames ...string) *attrPolicyBuilder {
|
|||
return &abp
|
||||
}
|
||||
|
||||
// AllowDataAttributes whitelists all data attributes. We can't specify the name
|
||||
// of each attribute exactly as they are customized.
|
||||
//
|
||||
// NOTE: These values are not sanitized and applications that evaluate or process
|
||||
// them without checking and verification of the input may be at risk if this option
|
||||
// is enabled. This is a 'caveat emptor' option and the person enabling this option
|
||||
// needs to fully understand the potential impact with regards to whatever application
|
||||
// will be consuming the sanitized HTML afterwards, i.e. if you know you put a link in a
|
||||
// data attribute and use that to automatically load some new window then you're giving
|
||||
// the author of a HTML fragment the means to open a malicious destination automatically.
|
||||
// Use with care!
|
||||
func (p *Policy) AllowDataAttributes() {
|
||||
p.allowDataAttributes = true
|
||||
}
|
||||
|
||||
// AllowNoAttrs says that attributes on element are optional.
|
||||
//
|
||||
// The attribute policy is only added to the core policy when OnElements(...)
|
||||
|
@ -230,6 +304,30 @@ func (abp *attrPolicyBuilder) OnElements(elements ...string) *Policy {
|
|||
return abp.p
|
||||
}
|
||||
|
||||
// OnElementsMatching will bind an attribute policy to all elements matching a given regex
|
||||
// and return the updated policy
|
||||
func (abp *attrPolicyBuilder) OnElementsMatching(regex *regexp.Regexp) *Policy {
|
||||
for _, attr := range abp.attrNames {
|
||||
if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok {
|
||||
abp.p.elsMatchingAndAttrs[regex] = make(map[string]attrPolicy)
|
||||
}
|
||||
ap := attrPolicy{}
|
||||
if abp.regexp != nil {
|
||||
ap.regexp = abp.regexp
|
||||
}
|
||||
abp.p.elsMatchingAndAttrs[regex][attr] = ap
|
||||
}
|
||||
|
||||
if abp.allowEmpty {
|
||||
abp.p.setOfElementsMatchingAllowedWithoutAttrs = append(abp.p.setOfElementsMatchingAllowedWithoutAttrs, regex)
|
||||
if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok {
|
||||
abp.p.elsMatchingAndAttrs[regex] = make(map[string]attrPolicy)
|
||||
}
|
||||
}
|
||||
|
||||
return abp.p
|
||||
}
|
||||
|
||||
// Globally will bind an attribute policy to all HTML elements and return the
|
||||
// updated policy
|
||||
func (abp *attrPolicyBuilder) Globally() *Policy {
|
||||
|
@ -250,6 +348,139 @@ func (abp *attrPolicyBuilder) Globally() *Policy {
|
|||
return abp.p
|
||||
}
|
||||
|
||||
// AllowStyles takes a range of CSS property names and returns a
|
||||
// style policy builder that allows you to specify the pattern and scope of
|
||||
// the whitelisted property.
|
||||
//
|
||||
// The style policy is only added to the core policy when either Globally()
|
||||
// or OnElements(...) are called.
|
||||
func (p *Policy) AllowStyles(propertyNames ...string) *stylePolicyBuilder {
|
||||
|
||||
p.init()
|
||||
|
||||
abp := stylePolicyBuilder{
|
||||
p: p,
|
||||
}
|
||||
|
||||
for _, propertyName := range propertyNames {
|
||||
abp.propertyNames = append(abp.propertyNames, strings.ToLower(propertyName))
|
||||
}
|
||||
|
||||
return &abp
|
||||
}
|
||||
|
||||
// Matching allows a regular expression to be applied to a nascent style
|
||||
// policy, and returns the style policy. Calling this more than once will
|
||||
// replace the existing regexp.
|
||||
func (spb *stylePolicyBuilder) Matching(regex *regexp.Regexp) *stylePolicyBuilder {
|
||||
|
||||
spb.regexp = regex
|
||||
|
||||
return spb
|
||||
}
|
||||
|
||||
// MatchingEnum allows a list of allowed values to be applied to a nascent style
|
||||
// policy, and returns the style policy. Calling this more than once will
|
||||
// replace the existing list of allowed values.
|
||||
func (spb *stylePolicyBuilder) MatchingEnum(enum ...string) *stylePolicyBuilder {
|
||||
|
||||
spb.enum = enum
|
||||
|
||||
return spb
|
||||
}
|
||||
|
||||
// MatchingHandler allows a handler to be applied to a nascent style
|
||||
// policy, and returns the style policy. Calling this more than once will
|
||||
// replace the existing handler.
|
||||
func (spb *stylePolicyBuilder) MatchingHandler(handler func(string) bool) *stylePolicyBuilder {
|
||||
|
||||
spb.handler = handler
|
||||
|
||||
return spb
|
||||
}
|
||||
|
||||
// OnElements will bind a style policy to a given range of HTML elements
|
||||
// and return the updated policy
|
||||
func (spb *stylePolicyBuilder) OnElements(elements ...string) *Policy {
|
||||
|
||||
for _, element := range elements {
|
||||
element = strings.ToLower(element)
|
||||
|
||||
for _, attr := range spb.propertyNames {
|
||||
|
||||
if _, ok := spb.p.elsAndStyles[element]; !ok {
|
||||
spb.p.elsAndStyles[element] = make(map[string]stylePolicy)
|
||||
}
|
||||
|
||||
sp := stylePolicy{}
|
||||
if spb.handler != nil {
|
||||
sp.handler = spb.handler
|
||||
} else if len(spb.enum) > 0 {
|
||||
sp.enum = spb.enum
|
||||
} else if spb.regexp != nil {
|
||||
sp.regexp = spb.regexp
|
||||
} else {
|
||||
sp.handler = getDefaultHandler(attr)
|
||||
}
|
||||
spb.p.elsAndStyles[element][attr] = sp
|
||||
}
|
||||
}
|
||||
|
||||
return spb.p
|
||||
}
|
||||
|
||||
// OnElementsMatching will bind a style policy to any HTML elements matching the pattern
|
||||
// and return the updated policy
|
||||
func (spb *stylePolicyBuilder) OnElementsMatching(regex *regexp.Regexp) *Policy {
|
||||
|
||||
for _, attr := range spb.propertyNames {
|
||||
|
||||
if _, ok := spb.p.elsMatchingAndStyles[regex]; !ok {
|
||||
spb.p.elsMatchingAndStyles[regex] = make(map[string]stylePolicy)
|
||||
}
|
||||
|
||||
sp := stylePolicy{}
|
||||
if spb.handler != nil {
|
||||
sp.handler = spb.handler
|
||||
} else if len(spb.enum) > 0 {
|
||||
sp.enum = spb.enum
|
||||
} else if spb.regexp != nil {
|
||||
sp.regexp = spb.regexp
|
||||
} else {
|
||||
sp.handler = getDefaultHandler(attr)
|
||||
}
|
||||
spb.p.elsMatchingAndStyles[regex][attr] = sp
|
||||
}
|
||||
|
||||
return spb.p
|
||||
}
|
||||
|
||||
// Globally will bind a style policy to all HTML elements and return the
|
||||
// updated policy
|
||||
func (spb *stylePolicyBuilder) Globally() *Policy {
|
||||
|
||||
for _, attr := range spb.propertyNames {
|
||||
if _, ok := spb.p.globalStyles[attr]; !ok {
|
||||
spb.p.globalStyles[attr] = stylePolicy{}
|
||||
}
|
||||
|
||||
// Use only one strategy for validating styles, fallback to default
|
||||
sp := stylePolicy{}
|
||||
if spb.handler != nil {
|
||||
sp.handler = spb.handler
|
||||
} else if len(spb.enum) > 0 {
|
||||
sp.enum = spb.enum
|
||||
} else if spb.regexp != nil {
|
||||
sp.regexp = spb.regexp
|
||||
} else {
|
||||
sp.handler = getDefaultHandler(attr)
|
||||
}
|
||||
spb.p.globalStyles[attr] = sp
|
||||
}
|
||||
|
||||
return spb.p
|
||||
}
|
||||
|
||||
// AllowElements will append HTML elements to the whitelist without applying an
|
||||
// attribute policy to those elements (the elements are permitted
|
||||
// sans-attributes)
|
||||
|
@ -267,8 +498,16 @@ func (p *Policy) AllowElements(names ...string) *Policy {
|
|||
return p
|
||||
}
|
||||
|
||||
// RequireNoFollowOnLinks will result in all <a> tags having a rel="nofollow"
|
||||
// added to them if one does not already exist
|
||||
func (p *Policy) AllowElementsMatching(regex *regexp.Regexp) *Policy {
|
||||
p.init()
|
||||
if _, ok := p.elsMatchingAndAttrs[regex]; !ok {
|
||||
p.elsMatchingAndAttrs[regex] = make(map[string]attrPolicy)
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
// RequireNoFollowOnLinks will result in all a, area, link tags having a
|
||||
// rel="nofollow"added to them if one does not already exist
|
||||
//
|
||||
// Note: This requires p.RequireParseableURLs(true) and will enable it.
|
||||
func (p *Policy) RequireNoFollowOnLinks(require bool) *Policy {
|
||||
|
@ -279,9 +518,10 @@ func (p *Policy) RequireNoFollowOnLinks(require bool) *Policy {
|
|||
return p
|
||||
}
|
||||
|
||||
// RequireNoFollowOnFullyQualifiedLinks will result in all <a> tags that point
|
||||
// to a non-local destination (i.e. starts with a protocol and has a host)
|
||||
// having a rel="nofollow" added to them if one does not already exist
|
||||
// RequireNoFollowOnFullyQualifiedLinks will result in all a, area, and link
|
||||
// tags that point to a non-local destination (i.e. starts with a protocol and
|
||||
// has a host) having a rel="nofollow" added to them if one does not already
|
||||
// exist
|
||||
//
|
||||
// Note: This requires p.RequireParseableURLs(true) and will enable it.
|
||||
func (p *Policy) RequireNoFollowOnFullyQualifiedLinks(require bool) *Policy {
|
||||
|
@ -292,9 +532,35 @@ func (p *Policy) RequireNoFollowOnFullyQualifiedLinks(require bool) *Policy {
|
|||
return p
|
||||
}
|
||||
|
||||
// AddTargetBlankToFullyQualifiedLinks will result in all <a> tags that point
|
||||
// to a non-local destination (i.e. starts with a protocol and has a host)
|
||||
// having a target="_blank" added to them if one does not already exist
|
||||
// RequireNoReferrerOnLinks will result in all a, area, and link tags having a
|
||||
// rel="noreferrrer" added to them if one does not already exist
|
||||
//
|
||||
// Note: This requires p.RequireParseableURLs(true) and will enable it.
|
||||
func (p *Policy) RequireNoReferrerOnLinks(require bool) *Policy {
|
||||
|
||||
p.requireNoReferrer = require
|
||||
p.requireParseableURLs = true
|
||||
|
||||
return p
|
||||
}
|
||||
|
||||
// RequireNoReferrerOnFullyQualifiedLinks will result in all a, area, and link
|
||||
// tags that point to a non-local destination (i.e. starts with a protocol and
|
||||
// has a host) having a rel="noreferrer" added to them if one does not already
|
||||
// exist
|
||||
//
|
||||
// Note: This requires p.RequireParseableURLs(true) and will enable it.
|
||||
func (p *Policy) RequireNoReferrerOnFullyQualifiedLinks(require bool) *Policy {
|
||||
|
||||
p.requireNoReferrerFullyQualifiedLinks = require
|
||||
p.requireParseableURLs = true
|
||||
|
||||
return p
|
||||
}
|
||||
|
||||
// AddTargetBlankToFullyQualifiedLinks will result in all a, area and link tags
|
||||
// that point to a non-local destination (i.e. starts with a protocol and has a
|
||||
// host) having a target="_blank" added to them if one does not already exist
|
||||
//
|
||||
// Note: This requires p.RequireParseableURLs(true) and will enable it.
|
||||
func (p *Policy) AddTargetBlankToFullyQualifiedLinks(require bool) *Policy {
|
||||
|
@ -369,21 +635,6 @@ func (p *Policy) AllowURLSchemeWithCustomPolicy(
|
|||
return p
|
||||
}
|
||||
|
||||
// AllowDocType states whether the HTML sanitised by the sanitizer is allowed to
|
||||
// contain the HTML DocType tag: <!DOCTYPE HTML> or one of it's variants.
|
||||
//
|
||||
// The HTML spec only permits one doctype per document, and as you know how you
|
||||
// are using the output of this, you know best as to whether we should ignore it
|
||||
// (default) or not.
|
||||
//
|
||||
// If you are sanitizing a HTML fragment the default (false) is fine.
|
||||
func (p *Policy) AllowDocType(allow bool) *Policy {
|
||||
|
||||
p.allowDocType = allow
|
||||
|
||||
return p
|
||||
}
|
||||
|
||||
// AddSpaceWhenStrippingTag states whether to add a single space " " when
|
||||
// removing tags that are not whitelisted by the policy.
|
||||
//
|
||||
|
@ -402,7 +653,7 @@ func (p *Policy) AddSpaceWhenStrippingTag(allow bool) *Policy {
|
|||
}
|
||||
|
||||
// SkipElementsContent adds the HTML elements whose tags is needed to be removed
|
||||
// with it's content.
|
||||
// with its content.
|
||||
func (p *Policy) SkipElementsContent(names ...string) *Policy {
|
||||
|
||||
p.init()
|
||||
|
@ -440,6 +691,7 @@ func (p *Policy) addDefaultElementsWithoutAttrs() {
|
|||
|
||||
p.setOfElementsAllowedWithoutAttrs["abbr"] = struct{}{}
|
||||
p.setOfElementsAllowedWithoutAttrs["acronym"] = struct{}{}
|
||||
p.setOfElementsAllowedWithoutAttrs["address"] = struct{}{}
|
||||
p.setOfElementsAllowedWithoutAttrs["article"] = struct{}{}
|
||||
p.setOfElementsAllowedWithoutAttrs["aside"] = struct{}{}
|
||||
p.setOfElementsAllowedWithoutAttrs["audio"] = struct{}{}
|
||||
|
@ -451,6 +703,7 @@ func (p *Policy) addDefaultElementsWithoutAttrs() {
|
|||
p.setOfElementsAllowedWithoutAttrs["button"] = struct{}{}
|
||||
p.setOfElementsAllowedWithoutAttrs["canvas"] = struct{}{}
|
||||
p.setOfElementsAllowedWithoutAttrs["caption"] = struct{}{}
|
||||
p.setOfElementsAllowedWithoutAttrs["center"] = struct{}{}
|
||||
p.setOfElementsAllowedWithoutAttrs["cite"] = struct{}{}
|
||||
p.setOfElementsAllowedWithoutAttrs["code"] = struct{}{}
|
||||
p.setOfElementsAllowedWithoutAttrs["col"] = struct{}{}
|
||||
|
@ -484,6 +737,7 @@ func (p *Policy) addDefaultElementsWithoutAttrs() {
|
|||
p.setOfElementsAllowedWithoutAttrs["kbd"] = struct{}{}
|
||||
p.setOfElementsAllowedWithoutAttrs["li"] = struct{}{}
|
||||
p.setOfElementsAllowedWithoutAttrs["mark"] = struct{}{}
|
||||
p.setOfElementsAllowedWithoutAttrs["marquee"] = struct{}{}
|
||||
p.setOfElementsAllowedWithoutAttrs["nav"] = struct{}{}
|
||||
p.setOfElementsAllowedWithoutAttrs["ol"] = struct{}{}
|
||||
p.setOfElementsAllowedWithoutAttrs["optgroup"] = struct{}{}
|
||||
|
@ -496,6 +750,7 @@ func (p *Policy) addDefaultElementsWithoutAttrs() {
|
|||
p.setOfElementsAllowedWithoutAttrs["ruby"] = struct{}{}
|
||||
p.setOfElementsAllowedWithoutAttrs["s"] = struct{}{}
|
||||
p.setOfElementsAllowedWithoutAttrs["samp"] = struct{}{}
|
||||
p.setOfElementsAllowedWithoutAttrs["script"] = struct{}{}
|
||||
p.setOfElementsAllowedWithoutAttrs["section"] = struct{}{}
|
||||
p.setOfElementsAllowedWithoutAttrs["select"] = struct{}{}
|
||||
p.setOfElementsAllowedWithoutAttrs["small"] = struct{}{}
|
||||
|
|
436
vendor/github.com/microcosm-cc/bluemonday/sanitize.go
generated
vendored
436
vendor/github.com/microcosm-cc/bluemonday/sanitize.go
generated
vendored
|
@ -33,9 +33,20 @@ import (
|
|||
"bytes"
|
||||
"io"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
|
||||
cssparser "github.com/chris-ramon/douceur/parser"
|
||||
)
|
||||
|
||||
var (
|
||||
dataAttribute = regexp.MustCompile("^data-.+")
|
||||
dataAttributeXMLPrefix = regexp.MustCompile("^xml.+")
|
||||
dataAttributeInvalidChars = regexp.MustCompile("[A-Z;]+")
|
||||
cssUnicodeChar = regexp.MustCompile(`\\[0-9a-f]{1,6} ?`)
|
||||
)
|
||||
|
||||
// Sanitize takes a string that contains a HTML fragment or document and applies
|
||||
|
@ -75,6 +86,98 @@ func (p *Policy) SanitizeReader(r io.Reader) *bytes.Buffer {
|
|||
return p.sanitize(r)
|
||||
}
|
||||
|
||||
const escapedURLChars = "'<>\"\r"
|
||||
|
||||
func escapeUrlComponent(val string) string {
|
||||
w := bytes.NewBufferString("")
|
||||
i := strings.IndexAny(val, escapedURLChars)
|
||||
for i != -1 {
|
||||
if _, err := w.WriteString(val[:i]); err != nil {
|
||||
return w.String()
|
||||
}
|
||||
var esc string
|
||||
switch val[i] {
|
||||
case '\'':
|
||||
// "'" is shorter than "'" and apos was not in HTML until HTML5.
|
||||
esc = "'"
|
||||
case '<':
|
||||
esc = "<"
|
||||
case '>':
|
||||
esc = ">"
|
||||
case '"':
|
||||
// """ is shorter than """.
|
||||
esc = """
|
||||
case '\r':
|
||||
esc = " "
|
||||
default:
|
||||
panic("unrecognized escape character")
|
||||
}
|
||||
val = val[i+1:]
|
||||
if _, err := w.WriteString(esc); err != nil {
|
||||
return w.String()
|
||||
}
|
||||
i = strings.IndexAny(val, escapedURLChars)
|
||||
}
|
||||
w.WriteString(val)
|
||||
return w.String()
|
||||
}
|
||||
|
||||
func sanitizedUrl(val string) (string, error) {
|
||||
u, err := url.Parse(val)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
// sanitize the url query params
|
||||
sanitizedQueryValues := make(url.Values, 0)
|
||||
queryValues := u.Query()
|
||||
for k, vals := range queryValues {
|
||||
sk := html.EscapeString(k)
|
||||
for _, v := range vals {
|
||||
sv := escapeUrlComponent(v)
|
||||
sanitizedQueryValues.Set(sk, sv)
|
||||
}
|
||||
}
|
||||
u.RawQuery = sanitizedQueryValues.Encode()
|
||||
// u.String() will also sanitize host/scheme/user/pass
|
||||
return u.String(), nil
|
||||
}
|
||||
|
||||
func (p *Policy) writeLinkableBuf(buff *bytes.Buffer, token *html.Token) {
|
||||
// do not escape multiple query parameters
|
||||
tokenBuff := bytes.NewBufferString("")
|
||||
tokenBuff.WriteString("<")
|
||||
tokenBuff.WriteString(token.Data)
|
||||
for _, attr := range token.Attr {
|
||||
tokenBuff.WriteByte(' ')
|
||||
tokenBuff.WriteString(attr.Key)
|
||||
tokenBuff.WriteString(`="`)
|
||||
switch attr.Key {
|
||||
case "href", "src":
|
||||
u, ok := p.validURL(attr.Val)
|
||||
if !ok {
|
||||
tokenBuff.WriteString(html.EscapeString(attr.Val))
|
||||
continue
|
||||
}
|
||||
u, err := sanitizedUrl(u)
|
||||
if err == nil {
|
||||
tokenBuff.WriteString(u)
|
||||
} else {
|
||||
// fallthrough
|
||||
tokenBuff.WriteString(html.EscapeString(attr.Val))
|
||||
}
|
||||
default:
|
||||
// re-apply
|
||||
tokenBuff.WriteString(html.EscapeString(attr.Val))
|
||||
}
|
||||
tokenBuff.WriteByte('"')
|
||||
}
|
||||
if token.Type == html.SelfClosingTagToken {
|
||||
tokenBuff.WriteString("/")
|
||||
}
|
||||
tokenBuff.WriteString(">")
|
||||
buff.WriteString(tokenBuff.String())
|
||||
}
|
||||
|
||||
// Performs the actual sanitization process.
|
||||
func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
|
||||
|
||||
|
@ -112,9 +215,13 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
|
|||
switch token.Type {
|
||||
case html.DoctypeToken:
|
||||
|
||||
if p.allowDocType {
|
||||
buff.WriteString(token.String())
|
||||
}
|
||||
// DocType is not handled as there is no safe parsing mechanism
|
||||
// provided by golang.org/x/net/html for the content, and this can
|
||||
// be misused to insert HTML tags that are not then sanitized
|
||||
//
|
||||
// One might wish to recursively sanitize here using the same policy
|
||||
// but I will need to do some further testing before considering
|
||||
// this.
|
||||
|
||||
case html.CommentToken:
|
||||
|
||||
|
@ -122,20 +229,23 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
|
|||
|
||||
case html.StartTagToken:
|
||||
|
||||
mostRecentlyStartedToken = token.Data
|
||||
mostRecentlyStartedToken = strings.ToLower(token.Data)
|
||||
|
||||
aps, ok := p.elsAndAttrs[token.Data]
|
||||
if !ok {
|
||||
if _, ok := p.setOfElementsToSkipContent[token.Data]; ok {
|
||||
skipElementContent = true
|
||||
skippingElementsCount++
|
||||
aa, matched := p.matchRegex(token.Data)
|
||||
if !matched {
|
||||
if _, ok := p.setOfElementsToSkipContent[token.Data]; ok {
|
||||
skipElementContent = true
|
||||
skippingElementsCount++
|
||||
}
|
||||
if p.addSpaces {
|
||||
buff.WriteString(" ")
|
||||
}
|
||||
break
|
||||
}
|
||||
if p.addSpaces {
|
||||
buff.WriteString(" ")
|
||||
}
|
||||
break
|
||||
aps = aa
|
||||
}
|
||||
|
||||
if len(token.Attr) != 0 {
|
||||
token.Attr = p.sanitizeAttrs(token.Data, token.Attr, aps)
|
||||
}
|
||||
|
@ -152,11 +262,20 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
|
|||
}
|
||||
|
||||
if !skipElementContent {
|
||||
buff.WriteString(token.String())
|
||||
// do not escape multiple query parameters
|
||||
if linkable(token.Data) {
|
||||
p.writeLinkableBuf(&buff, &token)
|
||||
} else {
|
||||
buff.WriteString(token.String())
|
||||
}
|
||||
}
|
||||
|
||||
case html.EndTagToken:
|
||||
|
||||
if mostRecentlyStartedToken == strings.ToLower(token.Data) {
|
||||
mostRecentlyStartedToken = ""
|
||||
}
|
||||
|
||||
if skipClosingTag && closingTagToSkipStack[len(closingTagToSkipStack)-1] == token.Data {
|
||||
closingTagToSkipStack = closingTagToSkipStack[:len(closingTagToSkipStack)-1]
|
||||
if len(closingTagToSkipStack) == 0 {
|
||||
|
@ -167,18 +286,27 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
|
|||
}
|
||||
break
|
||||
}
|
||||
|
||||
if _, ok := p.elsAndAttrs[token.Data]; !ok {
|
||||
if _, ok := p.setOfElementsToSkipContent[token.Data]; ok {
|
||||
match := false
|
||||
for regex := range p.elsMatchingAndAttrs {
|
||||
if regex.MatchString(token.Data) {
|
||||
skipElementContent = false
|
||||
match = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if _, ok := p.setOfElementsToSkipContent[token.Data]; ok && !match {
|
||||
skippingElementsCount--
|
||||
if skippingElementsCount == 0 {
|
||||
skipElementContent = false
|
||||
}
|
||||
}
|
||||
if p.addSpaces {
|
||||
buff.WriteString(" ")
|
||||
if !match {
|
||||
if p.addSpaces {
|
||||
buff.WriteString(" ")
|
||||
}
|
||||
break
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
if !skipElementContent {
|
||||
|
@ -189,10 +317,14 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
|
|||
|
||||
aps, ok := p.elsAndAttrs[token.Data]
|
||||
if !ok {
|
||||
if p.addSpaces {
|
||||
buff.WriteString(" ")
|
||||
aa, matched := p.matchRegex(token.Data)
|
||||
if !matched {
|
||||
if p.addSpaces && !matched {
|
||||
buff.WriteString(" ")
|
||||
}
|
||||
break
|
||||
}
|
||||
break
|
||||
aps = aa
|
||||
}
|
||||
|
||||
if len(token.Attr) != 0 {
|
||||
|
@ -202,19 +334,23 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
|
|||
if len(token.Attr) == 0 && !p.allowNoAttrs(token.Data) {
|
||||
if p.addSpaces {
|
||||
buff.WriteString(" ")
|
||||
break
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
if !skipElementContent {
|
||||
buff.WriteString(token.String())
|
||||
// do not escape multiple query parameters
|
||||
if linkable(token.Data) {
|
||||
p.writeLinkableBuf(&buff, &token)
|
||||
} else {
|
||||
buff.WriteString(token.String())
|
||||
}
|
||||
}
|
||||
|
||||
case html.TextToken:
|
||||
|
||||
if !skipElementContent {
|
||||
switch strings.ToLower(mostRecentlyStartedToken) {
|
||||
case "javascript":
|
||||
switch mostRecentlyStartedToken {
|
||||
case "script":
|
||||
// not encouraged, but if a policy allows JavaScript we
|
||||
// should not HTML escape it as that would break the output
|
||||
buff.WriteString(token.Data)
|
||||
|
@ -248,10 +384,47 @@ func (p *Policy) sanitizeAttrs(
|
|||
return attrs
|
||||
}
|
||||
|
||||
hasStylePolicies := false
|
||||
sps, elementHasStylePolicies := p.elsAndStyles[elementName]
|
||||
if len(p.globalStyles) > 0 || (elementHasStylePolicies && len(sps) > 0) {
|
||||
hasStylePolicies = true
|
||||
}
|
||||
// no specific element policy found, look for a pattern match
|
||||
if !hasStylePolicies{
|
||||
for k, v := range p.elsMatchingAndStyles{
|
||||
if k.MatchString(elementName) {
|
||||
if len(v) > 0{
|
||||
hasStylePolicies = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Builds a new attribute slice based on the whether the attribute has been
|
||||
// whitelisted explicitly or globally.
|
||||
cleanAttrs := []html.Attribute{}
|
||||
for _, htmlAttr := range attrs {
|
||||
if p.allowDataAttributes {
|
||||
// If we see a data attribute, let it through.
|
||||
if isDataAttribute(htmlAttr.Key) {
|
||||
cleanAttrs = append(cleanAttrs, htmlAttr)
|
||||
continue
|
||||
}
|
||||
}
|
||||
// Is this a "style" attribute, and if so, do we need to sanitize it?
|
||||
if htmlAttr.Key == "style" && hasStylePolicies {
|
||||
htmlAttr = p.sanitizeStyles(htmlAttr, elementName)
|
||||
if htmlAttr.Val == "" {
|
||||
// We've sanitized away any and all styles; don't bother to
|
||||
// output the style attribute (even if it's allowed)
|
||||
continue
|
||||
} else {
|
||||
cleanAttrs = append(cleanAttrs, htmlAttr)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Is there an element specific attribute policy that applies?
|
||||
if ap, ok := aps[htmlAttr.Key]; ok {
|
||||
if ap.regexp != nil {
|
||||
|
@ -267,6 +440,7 @@ func (p *Policy) sanitizeAttrs(
|
|||
|
||||
// Is there a global attribute policy that applies?
|
||||
if ap, ok := p.globalAttrs[htmlAttr.Key]; ok {
|
||||
|
||||
if ap.regexp != nil {
|
||||
if ap.regexp.MatchString(htmlAttr.Val) {
|
||||
cleanAttrs = append(cleanAttrs, htmlAttr)
|
||||
|
@ -332,6 +506,8 @@ func (p *Policy) sanitizeAttrs(
|
|||
|
||||
if (p.requireNoFollow ||
|
||||
p.requireNoFollowFullyQualifiedLinks ||
|
||||
p.requireNoReferrer ||
|
||||
p.requireNoReferrerFullyQualifiedLinks ||
|
||||
p.addTargetBlankToFullyQualifiedLinks) &&
|
||||
len(cleanAttrs) > 0 {
|
||||
|
||||
|
@ -359,12 +535,16 @@ func (p *Policy) sanitizeAttrs(
|
|||
if hrefFound {
|
||||
var (
|
||||
noFollowFound bool
|
||||
noReferrerFound bool
|
||||
targetBlankFound bool
|
||||
)
|
||||
|
||||
addNoFollow := (p.requireNoFollow ||
|
||||
externalLink && p.requireNoFollowFullyQualifiedLinks)
|
||||
|
||||
addNoReferrer := (p.requireNoReferrer ||
|
||||
externalLink && p.requireNoReferrerFullyQualifiedLinks)
|
||||
|
||||
addTargetBlank := (externalLink &&
|
||||
p.addTargetBlankToFullyQualifiedLinks)
|
||||
|
||||
|
@ -372,18 +552,18 @@ func (p *Policy) sanitizeAttrs(
|
|||
for _, htmlAttr := range cleanAttrs {
|
||||
|
||||
var appended bool
|
||||
if htmlAttr.Key == "rel" && addNoFollow {
|
||||
if htmlAttr.Key == "rel" && (addNoFollow || addNoReferrer) {
|
||||
|
||||
if strings.Contains(htmlAttr.Val, "nofollow") {
|
||||
noFollowFound = true
|
||||
tmpAttrs = append(tmpAttrs, htmlAttr)
|
||||
appended = true
|
||||
} else {
|
||||
if addNoFollow && !strings.Contains(htmlAttr.Val, "nofollow") {
|
||||
htmlAttr.Val += " nofollow"
|
||||
noFollowFound = true
|
||||
tmpAttrs = append(tmpAttrs, htmlAttr)
|
||||
appended = true
|
||||
}
|
||||
if addNoReferrer && !strings.Contains(htmlAttr.Val, "noreferrer") {
|
||||
htmlAttr.Val += " noreferrer"
|
||||
}
|
||||
noFollowFound = addNoFollow
|
||||
noReferrerFound = addNoReferrer
|
||||
tmpAttrs = append(tmpAttrs, htmlAttr)
|
||||
appended = true
|
||||
}
|
||||
|
||||
if elementName == "a" && htmlAttr.Key == "target" {
|
||||
|
@ -402,14 +582,22 @@ func (p *Policy) sanitizeAttrs(
|
|||
tmpAttrs = append(tmpAttrs, htmlAttr)
|
||||
}
|
||||
}
|
||||
if noFollowFound || targetBlankFound {
|
||||
if noFollowFound || noReferrerFound || targetBlankFound {
|
||||
cleanAttrs = tmpAttrs
|
||||
}
|
||||
|
||||
if addNoFollow && !noFollowFound {
|
||||
if (addNoFollow && !noFollowFound) || (addNoReferrer && !noReferrerFound) {
|
||||
rel := html.Attribute{}
|
||||
rel.Key = "rel"
|
||||
rel.Val = "nofollow"
|
||||
if addNoFollow {
|
||||
rel.Val = "nofollow"
|
||||
}
|
||||
if addNoReferrer {
|
||||
if rel.Val != "" {
|
||||
rel.Val += " "
|
||||
}
|
||||
rel.Val += "noreferrer"
|
||||
}
|
||||
cleanAttrs = append(cleanAttrs, rel)
|
||||
}
|
||||
|
||||
|
@ -479,20 +667,112 @@ func (p *Policy) sanitizeAttrs(
|
|||
return cleanAttrs
|
||||
}
|
||||
|
||||
func (p *Policy) sanitizeStyles(attr html.Attribute, elementName string) html.Attribute {
|
||||
sps := p.elsAndStyles[elementName]
|
||||
if len(sps) == 0{
|
||||
sps = map[string]stylePolicy{}
|
||||
// check for any matching elements, if we don't already have a policy found
|
||||
// if multiple matches are found they will be overwritten, it's best
|
||||
// to not have overlapping matchers
|
||||
for regex, policies :=range p.elsMatchingAndStyles{
|
||||
if regex.MatchString(elementName){
|
||||
for k, v := range policies{
|
||||
sps[k] = v
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Add semi-colon to end to fix parsing issue
|
||||
if len(attr.Val) > 0 && attr.Val[len(attr.Val)-1] != ';' {
|
||||
attr.Val = attr.Val + ";"
|
||||
}
|
||||
decs, err := cssparser.ParseDeclarations(attr.Val)
|
||||
if err != nil {
|
||||
attr.Val = ""
|
||||
return attr
|
||||
}
|
||||
clean := []string{}
|
||||
prefixes := []string{"-webkit-", "-moz-", "-ms-", "-o-", "mso-", "-xv-", "-atsc-", "-wap-", "-khtml-", "prince-", "-ah-", "-hp-", "-ro-", "-rim-", "-tc-"}
|
||||
|
||||
for _, dec := range decs {
|
||||
addedProperty := false
|
||||
tempProperty := strings.ToLower(dec.Property)
|
||||
tempValue := removeUnicode(strings.ToLower(dec.Value))
|
||||
for _, i := range prefixes {
|
||||
tempProperty = strings.TrimPrefix(tempProperty, i)
|
||||
}
|
||||
if sp, ok := sps[tempProperty]; ok {
|
||||
if sp.handler != nil {
|
||||
if sp.handler(tempValue) {
|
||||
clean = append(clean, dec.Property+": "+dec.Value)
|
||||
addedProperty = true
|
||||
}
|
||||
} else if len(sp.enum) > 0 {
|
||||
if stringInSlice(tempValue, sp.enum) {
|
||||
clean = append(clean, dec.Property+": "+dec.Value)
|
||||
addedProperty = true
|
||||
}
|
||||
} else if sp.regexp != nil {
|
||||
if sp.regexp.MatchString(tempValue) {
|
||||
clean = append(clean, dec.Property+": "+dec.Value)
|
||||
addedProperty = true
|
||||
}
|
||||
continue
|
||||
}
|
||||
}
|
||||
if sp, ok := p.globalStyles[tempProperty]; ok && !addedProperty {
|
||||
if sp.handler != nil {
|
||||
if sp.handler(tempValue) {
|
||||
clean = append(clean, dec.Property+": "+dec.Value)
|
||||
}
|
||||
} else if len(sp.enum) > 0 {
|
||||
if stringInSlice(tempValue, sp.enum) {
|
||||
clean = append(clean, dec.Property+": "+dec.Value)
|
||||
}
|
||||
} else if sp.regexp != nil {
|
||||
if sp.regexp.MatchString(tempValue) {
|
||||
clean = append(clean, dec.Property+": "+dec.Value)
|
||||
}
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(clean) > 0 {
|
||||
attr.Val = strings.Join(clean, "; ")
|
||||
} else {
|
||||
attr.Val = ""
|
||||
}
|
||||
return attr
|
||||
}
|
||||
|
||||
func (p *Policy) allowNoAttrs(elementName string) bool {
|
||||
_, ok := p.setOfElementsAllowedWithoutAttrs[elementName]
|
||||
if !ok {
|
||||
for _, r := range p.setOfElementsMatchingAllowedWithoutAttrs {
|
||||
if r.MatchString(elementName) {
|
||||
ok = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return ok
|
||||
}
|
||||
|
||||
func (p *Policy) validURL(rawurl string) (string, bool) {
|
||||
if p.requireParseableURLs {
|
||||
// URLs do not contain whitespace
|
||||
if strings.Contains(rawurl, " ") ||
|
||||
// URLs are valid if when space is trimmed the URL is valid
|
||||
rawurl = strings.TrimSpace(rawurl)
|
||||
|
||||
// URLs cannot contain whitespace, unless it is a data-uri
|
||||
if (strings.Contains(rawurl, " ") ||
|
||||
strings.Contains(rawurl, "\t") ||
|
||||
strings.Contains(rawurl, "\n") {
|
||||
strings.Contains(rawurl, "\n")) &&
|
||||
!strings.HasPrefix(rawurl, `data:`) {
|
||||
return "", false
|
||||
}
|
||||
|
||||
// URLs are valid if they parse
|
||||
u, err := url.Parse(rawurl)
|
||||
if err != nil {
|
||||
return "", false
|
||||
|
@ -533,3 +813,77 @@ func linkable(elementName string) bool {
|
|||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// stringInSlice returns true if needle exists in haystack
|
||||
func stringInSlice(needle string, haystack []string) bool {
|
||||
for _, straw := range haystack {
|
||||
if strings.ToLower(straw) == strings.ToLower(needle) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isDataAttribute(val string) bool {
|
||||
if !dataAttribute.MatchString(val) {
|
||||
return false
|
||||
}
|
||||
rest := strings.Split(val, "data-")
|
||||
if len(rest) == 1 {
|
||||
return false
|
||||
}
|
||||
// data-xml* is invalid.
|
||||
if dataAttributeXMLPrefix.MatchString(rest[1]) {
|
||||
return false
|
||||
}
|
||||
// no uppercase or semi-colons allowed.
|
||||
if dataAttributeInvalidChars.MatchString(rest[1]) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func removeUnicode(value string) string {
|
||||
substitutedValue := value
|
||||
currentLoc := cssUnicodeChar.FindStringIndex(substitutedValue)
|
||||
for currentLoc != nil {
|
||||
|
||||
character := substitutedValue[currentLoc[0]+1 : currentLoc[1]]
|
||||
character = strings.TrimSpace(character)
|
||||
if len(character) < 4 {
|
||||
character = strings.Repeat("0", 4-len(character)) + character
|
||||
} else {
|
||||
for len(character) > 4 {
|
||||
if character[0] != '0' {
|
||||
character = ""
|
||||
break
|
||||
} else {
|
||||
character = character[1:]
|
||||
}
|
||||
}
|
||||
}
|
||||
character = "\\u" + character
|
||||
translatedChar, err := strconv.Unquote(`"` + character + `"`)
|
||||
translatedChar = strings.TrimSpace(translatedChar)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
substitutedValue = substitutedValue[0:currentLoc[0]] + translatedChar + substitutedValue[currentLoc[1]:]
|
||||
currentLoc = cssUnicodeChar.FindStringIndex(substitutedValue)
|
||||
}
|
||||
return substitutedValue
|
||||
}
|
||||
|
||||
func (p *Policy) matchRegex(elementName string ) (map[string]attrPolicy, bool) {
|
||||
aps := make(map[string]attrPolicy, 0)
|
||||
matched := false
|
||||
for regex, attrs := range p.elsMatchingAndAttrs {
|
||||
if regex.MatchString(elementName) {
|
||||
matched = true
|
||||
for k, v := range attrs {
|
||||
aps[k] = v
|
||||
}
|
||||
}
|
||||
}
|
||||
return aps, matched
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue