1
0
Fork 0
forked from forgejo/forgejo

upgrade to most recent bluemonday (#11007)

* upgrade to most recent bluemonday

* make vendor

* update tests for bluemonday

* update tests for bluemonday

* update tests for bluemonday
This commit is contained in:
techknowlogick 2020-04-07 16:08:47 -04:00 committed by GitHub
parent 4c54477bb5
commit d00ebf445b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
50 changed files with 4977 additions and 300 deletions

27
vendor/github.com/gorilla/css/LICENSE generated vendored Normal file
View file

@ -0,0 +1,27 @@
Copyright (c) 2013, Gorilla web toolkit
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
Neither the name of the {organization} nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

33
vendor/github.com/gorilla/css/scanner/doc.go generated vendored Normal file
View file

@ -0,0 +1,33 @@
// Copyright 2012 The Gorilla Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
Package gorilla/css/scanner generates tokens for a CSS3 input.
It follows the CSS3 specification located at:
http://www.w3.org/TR/css3-syntax/
To use it, create a new scanner for a given CSS string and call Next() until
the token returned has type TokenEOF or TokenError:
s := scanner.New(myCSS)
for {
token := s.Next()
if token.Type == scanner.TokenEOF || token.Type == scanner.TokenError {
break
}
// Do something with the token...
}
Following the CSS3 specification, an error can only occur when the scanner
finds an unclosed quote or unclosed comment. In these cases the text becomes
"untokenizable". Everything else is tokenizable and it is up to a parser
to make sense of the token stream (or ignore nonsensical token sequences).
Note: the scanner doesn't perform lexical analysis or, in other words, it
doesn't care about the token context. It is intended to be used by a
lexer or parser.
*/
package scanner

356
vendor/github.com/gorilla/css/scanner/scanner.go generated vendored Normal file
View file

@ -0,0 +1,356 @@
// Copyright 2012 The Gorilla Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package scanner
import (
"fmt"
"regexp"
"strings"
"unicode"
"unicode/utf8"
)
// tokenType identifies the type of lexical tokens.
type tokenType int
// String returns a string representation of the token type.
func (t tokenType) String() string {
return tokenNames[t]
}
// Token represents a token and the corresponding string.
type Token struct {
Type tokenType
Value string
Line int
Column int
}
// String returns a string representation of the token.
func (t *Token) String() string {
if len(t.Value) > 10 {
return fmt.Sprintf("%s (line: %d, column: %d): %.10q...",
t.Type, t.Line, t.Column, t.Value)
}
return fmt.Sprintf("%s (line: %d, column: %d): %q",
t.Type, t.Line, t.Column, t.Value)
}
// All tokens -----------------------------------------------------------------
// The complete list of tokens in CSS3.
const (
// Scanner flags.
TokenError tokenType = iota
TokenEOF
// From now on, only tokens from the CSS specification.
TokenIdent
TokenAtKeyword
TokenString
TokenHash
TokenNumber
TokenPercentage
TokenDimension
TokenURI
TokenUnicodeRange
TokenCDO
TokenCDC
TokenS
TokenComment
TokenFunction
TokenIncludes
TokenDashMatch
TokenPrefixMatch
TokenSuffixMatch
TokenSubstringMatch
TokenChar
TokenBOM
)
// tokenNames maps tokenType's to their names. Used for conversion to string.
var tokenNames = map[tokenType]string{
TokenError: "error",
TokenEOF: "EOF",
TokenIdent: "IDENT",
TokenAtKeyword: "ATKEYWORD",
TokenString: "STRING",
TokenHash: "HASH",
TokenNumber: "NUMBER",
TokenPercentage: "PERCENTAGE",
TokenDimension: "DIMENSION",
TokenURI: "URI",
TokenUnicodeRange: "UNICODE-RANGE",
TokenCDO: "CDO",
TokenCDC: "CDC",
TokenS: "S",
TokenComment: "COMMENT",
TokenFunction: "FUNCTION",
TokenIncludes: "INCLUDES",
TokenDashMatch: "DASHMATCH",
TokenPrefixMatch: "PREFIXMATCH",
TokenSuffixMatch: "SUFFIXMATCH",
TokenSubstringMatch: "SUBSTRINGMATCH",
TokenChar: "CHAR",
TokenBOM: "BOM",
}
// Macros and productions -----------------------------------------------------
// http://www.w3.org/TR/css3-syntax/#tokenization
var macroRegexp = regexp.MustCompile(`\{[a-z]+\}`)
// macros maps macro names to patterns to be expanded.
var macros = map[string]string{
// must be escaped: `\.+*?()|[]{}^$`
"ident": `-?{nmstart}{nmchar}*`,
"name": `{nmchar}+`,
"nmstart": `[a-zA-Z_]|{nonascii}|{escape}`,
"nonascii": "[\u0080-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]",
"unicode": `\\[0-9a-fA-F]{1,6}{wc}?`,
"escape": "{unicode}|\\\\[\u0020-\u007E\u0080-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]",
"nmchar": `[a-zA-Z0-9_-]|{nonascii}|{escape}`,
"num": `[0-9]*\.[0-9]+|[0-9]+`,
"string": `"(?:{stringchar}|')*"|'(?:{stringchar}|")*'`,
"stringchar": `{urlchar}|[ ]|\\{nl}`,
"nl": `[\n\r\f]|\r\n`,
"w": `{wc}*`,
"wc": `[\t\n\f\r ]`,
// urlchar should accept [(ascii characters minus those that need escaping)|{nonascii}|{escape}]
// ASCII characters range = `[\u0020-\u007e]`
// Skip space \u0020 = `[\u0021-\u007e]`
// Skip quotation mark \0022 = `[\u0021\u0023-\u007e]`
// Skip apostrophe \u0027 = `[\u0021\u0023-\u0026\u0028-\u007e]`
// Skip reverse solidus \u005c = `[\u0021\u0023-\u0026\u0028-\u005b\u005d\u007e]`
// Finally, the left square bracket (\u005b) and right (\u005d) needs escaping themselves
"urlchar": "[\u0021\u0023-\u0026\u0028-\\\u005b\\\u005d-\u007E]|{nonascii}|{escape}",
}
// productions maps the list of tokens to patterns to be expanded.
var productions = map[tokenType]string{
// Unused regexps (matched using other methods) are commented out.
TokenIdent: `{ident}`,
TokenAtKeyword: `@{ident}`,
TokenString: `{string}`,
TokenHash: `#{name}`,
TokenNumber: `{num}`,
TokenPercentage: `{num}%`,
TokenDimension: `{num}{ident}`,
TokenURI: `url\({w}(?:{string}|{urlchar}*?){w}\)`,
TokenUnicodeRange: `U\+[0-9A-F\?]{1,6}(?:-[0-9A-F]{1,6})?`,
//TokenCDO: `<!--`,
TokenCDC: `-->`,
TokenS: `{wc}+`,
TokenComment: `/\*[^\*]*[\*]+(?:[^/][^\*]*[\*]+)*/`,
TokenFunction: `{ident}\(`,
//TokenIncludes: `~=`,
//TokenDashMatch: `\|=`,
//TokenPrefixMatch: `\^=`,
//TokenSuffixMatch: `\$=`,
//TokenSubstringMatch: `\*=`,
//TokenChar: `[^"']`,
//TokenBOM: "\uFEFF",
}
// matchers maps the list of tokens to compiled regular expressions.
//
// The map is filled on init() using the macros and productions defined in
// the CSS specification.
var matchers = map[tokenType]*regexp.Regexp{}
// matchOrder is the order to test regexps when first-char shortcuts
// can't be used.
var matchOrder = []tokenType{
TokenURI,
TokenFunction,
TokenUnicodeRange,
TokenIdent,
TokenDimension,
TokenPercentage,
TokenNumber,
TokenCDC,
}
func init() {
// replace macros and compile regexps for productions.
replaceMacro := func(s string) string {
return "(?:" + macros[s[1:len(s)-1]] + ")"
}
for t, s := range productions {
for macroRegexp.MatchString(s) {
s = macroRegexp.ReplaceAllStringFunc(s, replaceMacro)
}
matchers[t] = regexp.MustCompile("^(?:" + s + ")")
}
}
// Scanner --------------------------------------------------------------------
// New returns a new CSS scanner for the given input.
func New(input string) *Scanner {
// Normalize newlines.
input = strings.Replace(input, "\r\n", "\n", -1)
return &Scanner{
input: input,
row: 1,
col: 1,
}
}
// Scanner scans an input and emits tokens following the CSS3 specification.
type Scanner struct {
input string
pos int
row int
col int
err *Token
}
// Next returns the next token from the input.
//
// At the end of the input the token type is TokenEOF.
//
// If the input can't be tokenized the token type is TokenError. This occurs
// in case of unclosed quotation marks or comments.
func (s *Scanner) Next() *Token {
if s.err != nil {
return s.err
}
if s.pos >= len(s.input) {
s.err = &Token{TokenEOF, "", s.row, s.col}
return s.err
}
if s.pos == 0 {
// Test BOM only once, at the beginning of the file.
if strings.HasPrefix(s.input, "\uFEFF") {
return s.emitSimple(TokenBOM, "\uFEFF")
}
}
// There's a lot we can guess based on the first byte so we'll take a
// shortcut before testing multiple regexps.
input := s.input[s.pos:]
switch input[0] {
case '\t', '\n', '\f', '\r', ' ':
// Whitespace.
return s.emitToken(TokenS, matchers[TokenS].FindString(input))
case '.':
// Dot is too common to not have a quick check.
// We'll test if this is a Char; if it is followed by a number it is a
// dimension/percentage/number, and this will be matched later.
if len(input) > 1 && !unicode.IsDigit(rune(input[1])) {
return s.emitSimple(TokenChar, ".")
}
case '#':
// Another common one: Hash or Char.
if match := matchers[TokenHash].FindString(input); match != "" {
return s.emitToken(TokenHash, match)
}
return s.emitSimple(TokenChar, "#")
case '@':
// Another common one: AtKeyword or Char.
if match := matchers[TokenAtKeyword].FindString(input); match != "" {
return s.emitSimple(TokenAtKeyword, match)
}
return s.emitSimple(TokenChar, "@")
case ':', ',', ';', '%', '&', '+', '=', '>', '(', ')', '[', ']', '{', '}':
// More common chars.
return s.emitSimple(TokenChar, string(input[0]))
case '"', '\'':
// String or error.
match := matchers[TokenString].FindString(input)
if match != "" {
return s.emitToken(TokenString, match)
}
s.err = &Token{TokenError, "unclosed quotation mark", s.row, s.col}
return s.err
case '/':
// Comment, error or Char.
if len(input) > 1 && input[1] == '*' {
match := matchers[TokenComment].FindString(input)
if match != "" {
return s.emitToken(TokenComment, match)
} else {
s.err = &Token{TokenError, "unclosed comment", s.row, s.col}
return s.err
}
}
return s.emitSimple(TokenChar, "/")
case '~':
// Includes or Char.
return s.emitPrefixOrChar(TokenIncludes, "~=")
case '|':
// DashMatch or Char.
return s.emitPrefixOrChar(TokenDashMatch, "|=")
case '^':
// PrefixMatch or Char.
return s.emitPrefixOrChar(TokenPrefixMatch, "^=")
case '$':
// SuffixMatch or Char.
return s.emitPrefixOrChar(TokenSuffixMatch, "$=")
case '*':
// SubstringMatch or Char.
return s.emitPrefixOrChar(TokenSubstringMatch, "*=")
case '<':
// CDO or Char.
return s.emitPrefixOrChar(TokenCDO, "<!--")
}
// Test all regexps, in order.
for _, token := range matchOrder {
if match := matchers[token].FindString(input); match != "" {
return s.emitToken(token, match)
}
}
// We already handled unclosed quotation marks and comments,
// so this can only be a Char.
r, width := utf8.DecodeRuneInString(input)
token := &Token{TokenChar, string(r), s.row, s.col}
s.col += width
s.pos += width
return token
}
// updatePosition updates input coordinates based on the consumed text.
func (s *Scanner) updatePosition(text string) {
width := utf8.RuneCountInString(text)
lines := strings.Count(text, "\n")
s.row += lines
if lines == 0 {
s.col += width
} else {
s.col = utf8.RuneCountInString(text[strings.LastIndex(text, "\n"):])
}
s.pos += len(text) // while col is a rune index, pos is a byte index
}
// emitToken returns a Token for the string v and updates the scanner position.
func (s *Scanner) emitToken(t tokenType, v string) *Token {
token := &Token{t, v, s.row, s.col}
s.updatePosition(v)
return token
}
// emitSimple returns a Token for the string v and updates the scanner
// position in a simplified manner.
//
// The string is known to have only ASCII characters and to not have a newline.
func (s *Scanner) emitSimple(t tokenType, v string) *Token {
token := &Token{t, v, s.row, s.col}
s.col += len(v)
s.pos += len(v)
return token
}
// emitPrefixOrChar returns a Token for type t if the current position
// matches the given prefix. Otherwise it returns a Char token using the
// first character from the prefix.
//
// The prefix is known to have only ASCII characters and to not have a newline.
func (s *Scanner) emitPrefixOrChar(t tokenType, prefix string) *Token {
if strings.HasPrefix(s.input[s.pos:], prefix) {
return s.emitSimple(t, prefix)
}
return s.emitSimple(TokenChar, string(prefix[0]))
}