Browse Source

Make parser compatible with Go 1.12

See changes in strings.Map(): https://golang.org/doc/go1.12#strings
Frédéric Guillot 3 months ago
parent
commit
6764a420b0

+ 1 - 1
go.mod

@@ -10,7 +10,7 @@ require (
 	github.com/lib/pq v1.0.0
 	github.com/tdewolff/minify/v2 v2.3.8 // indirect
 	golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9
-	golang.org/x/net v0.0.0-20181207154023-610586996380
+	golang.org/x/net v0.0.0-20190228165749-92fc7df08ae7
 	golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890
 	golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f // indirect
 	golang.org/x/sys v0.0.0-20181208175041-ad97f365e150 // indirect

+ 2 - 0
go.sum

@@ -26,6 +26,8 @@ golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73r
 golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20181207154023-610586996380 h1:zPQexyRtNYBc7bcHmehl1dH6TB3qn8zytv8cBGLDNY0=
 golang.org/x/net v0.0.0-20181207154023-610586996380/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190228165749-92fc7df08ae7 h1:Qe/u+eY379X4He4GBMFZYu3pmh1ML5yT1aL1ndNM1zQ=
+golang.org/x/net v0.0.0-20190228165749-92fc7df08ae7/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890 h1:uESlIz09WIHT2I+pasSXcpLYqYK8wHcdCetU3VuMBJE=
 golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f h1:wMNYb4v58l5UBM7MYRLPG6ZhfOqbKu7X5eyFl8ZhKvA=

+ 0 - 26
reader/parser/parser.go

@@ -8,7 +8,6 @@ import (
 	"strings"
 
 	"miniflux.app/errors"
-	"miniflux.app/logger"
 	"miniflux.app/model"
 	"miniflux.app/reader/atom"
 	"miniflux.app/reader/json"
@@ -18,8 +17,6 @@ import (
 
 // ParseFeed analyzes the input data and returns a normalized feed object.
 func ParseFeed(data string) (*model.Feed, *errors.LocalizedError) {
-	data = stripInvalidXMLCharacters(data)
-
 	switch DetectFeedFormat(data) {
 	case FormatAtom:
 		return atom.Parse(strings.NewReader(data))
@@ -33,26 +30,3 @@ func ParseFeed(data string) (*model.Feed, *errors.LocalizedError) {
 		return nil, errors.NewLocalizedError("Unsupported feed format")
 	}
 }
-
-func stripInvalidXMLCharacters(input string) string {
-	return strings.Map(func(r rune) rune {
-		if isInCharacterRange(r) {
-			return r
-		}
-
-		logger.Debug("Strip invalid XML characters: %U", r)
-		return -1
-	}, input)
-}
-
-// Decide whether the given rune is in the XML Character Range, per
-// the Char production of http://www.xml.com/axml/testaxml.htm,
-// Section 2.2 Characters.
-func isInCharacterRange(r rune) (inrange bool) {
-	return r == 0x09 ||
-		r == 0x0A ||
-		r == 0x0D ||
-		r >= 0x20 && r <= 0xDF77 ||
-		r >= 0xE000 && r <= 0xFFFD ||
-		r >= 0x10000 && r <= 0x10FFFF
-}

+ 4 - 1
reader/parser/parser_test.go

@@ -187,7 +187,10 @@ func TestDifferentEncodingWithResponse(t *testing.T) {
 		}
 
 		r := &client.Response{Body: bytes.NewReader(content), ContentType: tc.contentType}
-		r.EnsureUnicodeBody()
+		if encodingErr := r.EnsureUnicodeBody(); encodingErr != nil {
+			t.Fatalf(`Encoding error for %q: %v`, tc.filename, encodingErr)
+		}
+
 		feed, parseErr := ParseFeed(r.String())
 		if parseErr != nil {
 			t.Fatalf(`Parsing error for %q - %q: %v`, tc.filename, tc.contentType, parseErr)

+ 1 - 1
vendor/golang.org/x/net/html/node.go

@@ -177,7 +177,7 @@ func (s *nodeStack) index(n *Node) int {
 // contains returns whether a is within s.
 func (s *nodeStack) contains(a atom.Atom) bool {
 	for _, n := range *s {
-		if n.DataAtom == a {
+		if n.DataAtom == a && n.Namespace == "" {
 			return true
 		}
 	}

+ 22 - 9
vendor/golang.org/x/net/html/parse.go

@@ -439,9 +439,6 @@ func (p *parser) resetInsertionMode() {
 		case a.Select:
 			if !last {
 				for ancestor, first := n, p.oe[0]; ancestor != first; {
-					if ancestor == first {
-						break
-					}
 					ancestor = p.oe[p.oe.index(ancestor)-1]
 					switch ancestor.DataAtom {
 					case a.Template:
@@ -1719,8 +1716,12 @@ func inSelectIM(p *parser) bool {
 			}
 			p.addElement()
 		case a.Select:
-			p.tok.Type = EndTagToken
-			return false
+			if p.popUntil(selectScope, a.Select) {
+				p.resetInsertionMode()
+			} else {
+				// Ignore the token.
+				return true
+			}
 		case a.Input, a.Keygen, a.Textarea:
 			if p.elementInScope(selectScope, a.Select) {
 				p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
@@ -1750,6 +1751,9 @@ func inSelectIM(p *parser) bool {
 		case a.Select:
 			if p.popUntil(selectScope, a.Select) {
 				p.resetInsertionMode()
+			} else {
+				// Ignore the token.
+				return true
 			}
 		case a.Template:
 			return inHeadIM(p)
@@ -1775,13 +1779,22 @@ func inSelectInTableIM(p *parser) bool {
 	case StartTagToken, EndTagToken:
 		switch p.tok.DataAtom {
 		case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
-			if p.tok.Type == StartTagToken || p.elementInScope(tableScope, p.tok.DataAtom) {
-				p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
-				return false
-			} else {
+			if p.tok.Type == EndTagToken && !p.elementInScope(tableScope, p.tok.DataAtom) {
 				// Ignore the token.
 				return true
 			}
+			// This is like p.popUntil(selectScope, a.Select), but it also
+			// matches <math select>, not just <select>. Matching the MathML
+			// tag is arguably incorrect (conceptually), but it mimics what
+			// Chromium does.
+			for i := len(p.oe) - 1; i >= 0; i-- {
+				if n := p.oe[i]; n.DataAtom == a.Select {
+					p.oe = p.oe[:i]
+					break
+				}
+			}
+			p.resetInsertionMode()
+			return false
 		}
 	}
 	return inSelectIM(p)

+ 1 - 1
vendor/modules.txt

@@ -18,7 +18,7 @@ golang.org/x/crypto/acme/autocert
 golang.org/x/crypto/bcrypt
 golang.org/x/crypto/acme
 golang.org/x/crypto/blowfish
-# golang.org/x/net v0.0.0-20181207154023-610586996380
+# golang.org/x/net v0.0.0-20190228165749-92fc7df08ae7
 golang.org/x/net/html/charset
 golang.org/x/net/html
 golang.org/x/net/html/atom