Godeps: bump github.com/huin/goupnp to c57ae84

release/1.0.1
Felix Lange 10 years ago
parent 663d4e0aff
commit e7c7b54b82
  1. 14
      Godeps/Godeps.json
  2. 3
      Godeps/_workspace/src/github.com/huin/goupnp/goupnp.go
  3. 78
      Godeps/_workspace/src/golang.org/x/net/html/atom/atom.go
  4. 109
      Godeps/_workspace/src/golang.org/x/net/html/atom/atom_test.go
  5. 648
      Godeps/_workspace/src/golang.org/x/net/html/atom/gen.go
  6. 713
      Godeps/_workspace/src/golang.org/x/net/html/atom/table.go
  7. 351
      Godeps/_workspace/src/golang.org/x/net/html/atom/table_test.go
  8. 244
      Godeps/_workspace/src/golang.org/x/net/html/charset/charset.go
  9. 236
      Godeps/_workspace/src/golang.org/x/net/html/charset/charset_test.go
  10. 111
      Godeps/_workspace/src/golang.org/x/net/html/charset/gen.go
  11. 235
      Godeps/_workspace/src/golang.org/x/net/html/charset/table.go
  12. 48
      Godeps/_workspace/src/golang.org/x/net/html/charset/testdata/HTTP-charset.html
  13. 48
      Godeps/_workspace/src/golang.org/x/net/html/charset/testdata/HTTP-vs-UTF-8-BOM.html
  14. 49
      Godeps/_workspace/src/golang.org/x/net/html/charset/testdata/HTTP-vs-meta-charset.html
  15. 49
      Godeps/_workspace/src/golang.org/x/net/html/charset/testdata/HTTP-vs-meta-content.html
  16. 47
      Godeps/_workspace/src/golang.org/x/net/html/charset/testdata/No-encoding-declaration.html
  17. 9
      Godeps/_workspace/src/golang.org/x/net/html/charset/testdata/README
  18. BIN
      Godeps/_workspace/src/golang.org/x/net/html/charset/testdata/UTF-16BE-BOM.html
  19. BIN
      Godeps/_workspace/src/golang.org/x/net/html/charset/testdata/UTF-16LE-BOM.html
  20. 49
      Godeps/_workspace/src/golang.org/x/net/html/charset/testdata/UTF-8-BOM-vs-meta-charset.html
  21. 48
      Godeps/_workspace/src/golang.org/x/net/html/charset/testdata/UTF-8-BOM-vs-meta-content.html
  22. 48
      Godeps/_workspace/src/golang.org/x/net/html/charset/testdata/meta-charset-attribute.html
  23. 48
      Godeps/_workspace/src/golang.org/x/net/html/charset/testdata/meta-content-attribute.html
  24. 102
      Godeps/_workspace/src/golang.org/x/net/html/const.go
  25. 106
      Godeps/_workspace/src/golang.org/x/net/html/doc.go
  26. 156
      Godeps/_workspace/src/golang.org/x/net/html/doctype.go
  27. 2253
      Godeps/_workspace/src/golang.org/x/net/html/entity.go
  28. 29
      Godeps/_workspace/src/golang.org/x/net/html/entity_test.go
  29. 258
      Godeps/_workspace/src/golang.org/x/net/html/escape.go
  30. 97
      Godeps/_workspace/src/golang.org/x/net/html/escape_test.go
  31. 40
      Godeps/_workspace/src/golang.org/x/net/html/example_test.go
  32. 226
      Godeps/_workspace/src/golang.org/x/net/html/foreign.go
  33. 193
      Godeps/_workspace/src/golang.org/x/net/html/node.go
  34. 146
      Godeps/_workspace/src/golang.org/x/net/html/node_test.go
  35. 2094
      Godeps/_workspace/src/golang.org/x/net/html/parse.go
  36. 388
      Godeps/_workspace/src/golang.org/x/net/html/parse_test.go
  37. 271
      Godeps/_workspace/src/golang.org/x/net/html/render.go
  38. 156
      Godeps/_workspace/src/golang.org/x/net/html/render_test.go
  39. 2237
      Godeps/_workspace/src/golang.org/x/net/html/testdata/go1.html
  40. 28
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/README
  41. 194
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/adoption01.dat
  42. 31
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/adoption02.dat
  43. 135
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/comments01.dat
  44. 370
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/doctype01.dat
  45. 603
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/entities01.dat
  46. 249
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/entities02.dat
  47. 246
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/html5test-com.dat
  48. 43
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/inbody01.dat
  49. 40
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/isindex.dat
  50. BIN
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/pending-spec-changes-plain-text-unsafe.dat
  51. 52
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/pending-spec-changes.dat
  52. BIN
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/plain-text-unsafe.dat
  53. 308
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/scriptdata01.dat
  54. 15
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/scripted/adoption01.dat
  55. 28
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/scripted/webkit01.dat
  56. 212
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/tables01.dat
  57. 1952
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/tests1.dat
  58. 799
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/tests10.dat
  59. 482
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/tests11.dat
  60. 62
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/tests12.dat
  61. 74
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/tests14.dat
  62. 208
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/tests15.dat
  63. 2299
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/tests16.dat
  64. 153
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/tests17.dat
  65. 269
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/tests18.dat
  66. 1237
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/tests19.dat
  67. 763
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/tests2.dat
  68. 455
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/tests20.dat
  69. 221
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/tests21.dat
  70. 157
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/tests22.dat
  71. 155
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/tests23.dat
  72. 79
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/tests24.dat
  73. 219
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/tests25.dat
  74. 313
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/tests26.dat
  75. 305
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/tests3.dat
  76. 59
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/tests4.dat
  77. 191
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/tests5.dat
  78. 663
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/tests6.dat
  79. 390
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/tests7.dat
  80. 148
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/tests8.dat
  81. 457
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/tests9.dat
  82. 741
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/tests_innerHTML_1.dat
  83. 261
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/tricky01.dat
  84. 610
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/webkit01.dat
  85. 159
      Godeps/_workspace/src/golang.org/x/net/html/testdata/webkit/webkit02.dat
  86. 1219
      Godeps/_workspace/src/golang.org/x/net/html/token.go
  87. 748
      Godeps/_workspace/src/golang.org/x/net/html/token_test.go
  88. 209
      Godeps/_workspace/src/golang.org/x/text/encoding/charmap/charmap.go
  89. 415
      Godeps/_workspace/src/golang.org/x/text/encoding/charmap/maketables.go
  90. 5116
      Godeps/_workspace/src/golang.org/x/text/encoding/charmap/tables.go
  91. 179
      Godeps/_workspace/src/golang.org/x/text/encoding/encoding.go
  92. 1033
      Godeps/_workspace/src/golang.org/x/text/encoding/encoding_test.go
  93. 40
      Godeps/_workspace/src/golang.org/x/text/encoding/example_test.go
  94. 45
      Godeps/_workspace/src/golang.org/x/text/encoding/htmlindex/htmlindex.go
  95. 26
      Godeps/_workspace/src/golang.org/x/text/encoding/ianaindex/example_test.go
  96. 64
      Godeps/_workspace/src/golang.org/x/text/encoding/ianaindex/ianaindex.go
  97. 137
      Godeps/_workspace/src/golang.org/x/text/encoding/internal/identifier/gen.go
  98. 80
      Godeps/_workspace/src/golang.org/x/text/encoding/internal/identifier/identifier.go
  99. 1621
      Godeps/_workspace/src/golang.org/x/text/encoding/internal/identifier/mib.go
  100. 60
      Godeps/_workspace/src/golang.org/x/text/encoding/internal/internal.go
  101. Some files were not shown because too many files have changed in this diff Show More

14
Godeps/Godeps.json generated vendored

@ -27,7 +27,7 @@
},
{
"ImportPath": "github.com/huin/goupnp",
"Rev": "4191d8a85005844ea202fde52799681971b12dfe"
"Rev": "c57ae84388ab59076fd547f1abeab71c2edb0a21"
},
{
"ImportPath": "github.com/jackpal/go-nat-pmp",
@ -89,6 +89,18 @@
"ImportPath": "golang.org/x/crypto/scrypt",
"Rev": "4ed45ec682102c643324fae5dff8dab085b6c300"
},
{
"ImportPath": "golang.org/x/net/html",
"Rev": "e0403b4e005737430c05a57aac078479844f919c"
},
{
"ImportPath": "golang.org/x/text/encoding",
"Rev": "c93e7c9fff19fb9139b5ab04ce041833add0134e"
},
{
"ImportPath": "golang.org/x/text/transform",
"Rev": "c93e7c9fff19fb9139b5ab04ce041833add0134e"
},
{
"ImportPath": "gopkg.in/check.v1",
"Rev": "64131543e7896d5bcc6bd5a76287eb75ea96c673"

@ -20,6 +20,8 @@ import (
"net/http"
"net/url"
"golang.org/x/net/html/charset"
"github.com/huin/goupnp/httpu"
"github.com/huin/goupnp/ssdp"
)
@ -104,6 +106,7 @@ func requestXml(url string, defaultSpace string, doc interface{}) error {
decoder := xml.NewDecoder(resp.Body)
decoder.DefaultSpace = defaultSpace
decoder.CharsetReader = charset.NewReaderLabel
return decoder.Decode(doc)
}

@ -0,0 +1,78 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package atom provides integer codes (also known as atoms) for a fixed set of
// frequently occurring HTML strings: tag names and attribute keys such as "p"
// and "id".
//
// Sharing an atom's name between all elements with the same tag can result in
// fewer string allocations when tokenizing and parsing HTML. Integer
// comparisons are also generally faster than string comparisons.
//
// The value of an atom's particular code is not guaranteed to stay the same
// between versions of this package. Neither is any ordering guaranteed:
// whether atom.H1 < atom.H2 may also change. The codes are not guaranteed to
// be dense. The only guarantees are that e.g. looking up "div" will yield
// atom.Div, calling atom.Div.String will return "div", and atom.Div != 0.
package atom
// Atom is an integer code for a string. The zero value maps to "".
type Atom uint32
// String returns the atom's name.
func (a Atom) String() string {
start := uint32(a >> 8)
n := uint32(a & 0xff)
if start+n > uint32(len(atomText)) {
return ""
}
return atomText[start : start+n]
}
func (a Atom) string() string {
return atomText[a>>8 : a>>8+a&0xff]
}
// fnv computes the FNV hash with an arbitrary starting value h.
func fnv(h uint32, s []byte) uint32 {
for i := range s {
h ^= uint32(s[i])
h *= 16777619
}
return h
}
func match(s string, t []byte) bool {
for i, c := range t {
if s[i] != c {
return false
}
}
return true
}
// Lookup returns the atom whose name is s. It returns zero if there is no
// such atom. The lookup is case sensitive.
func Lookup(s []byte) Atom {
if len(s) == 0 || len(s) > maxAtomLen {
return 0
}
h := fnv(hash0, s)
if a := table[h&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) {
return a
}
if a := table[(h>>16)&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) {
return a
}
return 0
}
// String returns a string whose contents are equal to s. In that sense, it is
// equivalent to string(s) but may be more efficient.
func String(s []byte) string {
if a := Lookup(s); a != 0 {
return a.String()
}
return string(s)
}

@ -0,0 +1,109 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package atom
import (
"sort"
"testing"
)
func TestKnown(t *testing.T) {
for _, s := range testAtomList {
if atom := Lookup([]byte(s)); atom.String() != s {
t.Errorf("Lookup(%q) = %#x (%q)", s, uint32(atom), atom.String())
}
}
}
func TestHits(t *testing.T) {
for _, a := range table {
if a == 0 {
continue
}
got := Lookup([]byte(a.String()))
if got != a {
t.Errorf("Lookup(%q) = %#x, want %#x", a.String(), uint32(got), uint32(a))
}
}
}
func TestMisses(t *testing.T) {
testCases := []string{
"",
"\x00",
"\xff",
"A",
"DIV",
"Div",
"dIV",
"aa",
"a\x00",
"ab",
"abb",
"abbr0",
"abbr ",
" abbr",
" a",
"acceptcharset",
"acceptCharset",
"accept_charset",
"h0",
"h1h2",
"h7",
"onClick",
"λ",
// The following string has the same hash (0xa1d7fab7) as "onmouseover".
"\x00\x00\x00\x00\x00\x50\x18\xae\x38\xd0\xb7",
}
for _, tc := range testCases {
got := Lookup([]byte(tc))
if got != 0 {
t.Errorf("Lookup(%q): got %d, want 0", tc, got)
}
}
}
func TestForeignObject(t *testing.T) {
const (
afo = Foreignobject
afO = ForeignObject
sfo = "foreignobject"
sfO = "foreignObject"
)
if got := Lookup([]byte(sfo)); got != afo {
t.Errorf("Lookup(%q): got %#v, want %#v", sfo, got, afo)
}
if got := Lookup([]byte(sfO)); got != afO {
t.Errorf("Lookup(%q): got %#v, want %#v", sfO, got, afO)
}
if got := afo.String(); got != sfo {
t.Errorf("Atom(%#v).String(): got %q, want %q", afo, got, sfo)
}
if got := afO.String(); got != sfO {
t.Errorf("Atom(%#v).String(): got %q, want %q", afO, got, sfO)
}
}
func BenchmarkLookup(b *testing.B) {
sortedTable := make([]string, 0, len(table))
for _, a := range table {
if a != 0 {
sortedTable = append(sortedTable, a.String())
}
}
sort.Strings(sortedTable)
x := make([][]byte, 1000)
for i := range x {
x[i] = []byte(sortedTable[i%len(sortedTable)])
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, s := range x {
Lookup(s)
}
}
}

@ -0,0 +1,648 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This program generates table.go and table_test.go.
// Invoke as
//
// go run gen.go |gofmt >table.go
// go run gen.go -test |gofmt >table_test.go
import (
"flag"
"fmt"
"math/rand"
"os"
"sort"
"strings"
)
// identifier converts s to a Go exported identifier.
// It converts "div" to "Div" and "accept-charset" to "AcceptCharset".
func identifier(s string) string {
b := make([]byte, 0, len(s))
cap := true
for _, c := range s {
if c == '-' {
cap = true
continue
}
if cap && 'a' <= c && c <= 'z' {
c -= 'a' - 'A'
}
cap = false
b = append(b, byte(c))
}
return string(b)
}
var test = flag.Bool("test", false, "generate table_test.go")
func main() {
flag.Parse()
var all []string
all = append(all, elements...)
all = append(all, attributes...)
all = append(all, eventHandlers...)
all = append(all, extra...)
sort.Strings(all)
if *test {
fmt.Printf("// generated by go run gen.go -test; DO NOT EDIT\n\n")
fmt.Printf("package atom\n\n")
fmt.Printf("var testAtomList = []string{\n")
for _, s := range all {
fmt.Printf("\t%q,\n", s)
}
fmt.Printf("}\n")
return
}
// uniq - lists have dups
// compute max len too
maxLen := 0
w := 0
for _, s := range all {
if w == 0 || all[w-1] != s {
if maxLen < len(s) {
maxLen = len(s)
}
all[w] = s
w++
}
}
all = all[:w]
// Find hash that minimizes table size.
var best *table
for i := 0; i < 1000000; i++ {
if best != nil && 1<<(best.k-1) < len(all) {
break
}
h := rand.Uint32()
for k := uint(0); k <= 16; k++ {
if best != nil && k >= best.k {
break
}
var t table
if t.init(h, k, all) {
best = &t
break
}
}
}
if best == nil {
fmt.Fprintf(os.Stderr, "failed to construct string table\n")
os.Exit(1)
}
// Lay out strings, using overlaps when possible.
layout := append([]string{}, all...)
// Remove strings that are substrings of other strings
for changed := true; changed; {
changed = false
for i, s := range layout {
if s == "" {
continue
}
for j, t := range layout {
if i != j && t != "" && strings.Contains(s, t) {
changed = true
layout[j] = ""
}
}
}
}
// Join strings where one suffix matches another prefix.
for {
// Find best i, j, k such that layout[i][len-k:] == layout[j][:k],
// maximizing overlap length k.
besti := -1
bestj := -1
bestk := 0
for i, s := range layout {
if s == "" {
continue
}
for j, t := range layout {
if i == j {
continue
}
for k := bestk + 1; k <= len(s) && k <= len(t); k++ {
if s[len(s)-k:] == t[:k] {
besti = i
bestj = j
bestk = k
}
}
}
}
if bestk > 0 {
layout[besti] += layout[bestj][bestk:]
layout[bestj] = ""
continue
}
break
}
text := strings.Join(layout, "")
atom := map[string]uint32{}
for _, s := range all {
off := strings.Index(text, s)
if off < 0 {
panic("lost string " + s)
}
atom[s] = uint32(off<<8 | len(s))
}
// Generate the Go code.
fmt.Printf("// generated by go run gen.go; DO NOT EDIT\n\n")
fmt.Printf("package atom\n\nconst (\n")
for _, s := range all {
fmt.Printf("\t%s Atom = %#x\n", identifier(s), atom[s])
}
fmt.Printf(")\n\n")
fmt.Printf("const hash0 = %#x\n\n", best.h0)
fmt.Printf("const maxAtomLen = %d\n\n", maxLen)
fmt.Printf("var table = [1<<%d]Atom{\n", best.k)
for i, s := range best.tab {
if s == "" {
continue
}
fmt.Printf("\t%#x: %#x, // %s\n", i, atom[s], s)
}
fmt.Printf("}\n")
datasize := (1 << best.k) * 4
fmt.Printf("const atomText =\n")
textsize := len(text)
for len(text) > 60 {
fmt.Printf("\t%q +\n", text[:60])
text = text[60:]
}
fmt.Printf("\t%q\n\n", text)
fmt.Fprintf(os.Stderr, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize)
}
type byLen []string
func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) }
func (x byLen) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
func (x byLen) Len() int { return len(x) }
// fnv computes the FNV hash with an arbitrary starting value h.
func fnv(h uint32, s string) uint32 {
for i := 0; i < len(s); i++ {
h ^= uint32(s[i])
h *= 16777619
}
return h
}
// A table represents an attempt at constructing the lookup table.
// The lookup table uses cuckoo hashing, meaning that each string
// can be found in one of two positions.
type table struct {
h0 uint32
k uint
mask uint32
tab []string
}
// hash returns the two hashes for s.
func (t *table) hash(s string) (h1, h2 uint32) {
h := fnv(t.h0, s)
h1 = h & t.mask
h2 = (h >> 16) & t.mask
return
}
// init initializes the table with the given parameters.
// h0 is the initial hash value,
// k is the number of bits of hash value to use, and
// x is the list of strings to store in the table.
// init returns false if the table cannot be constructed.
func (t *table) init(h0 uint32, k uint, x []string) bool {
t.h0 = h0
t.k = k
t.tab = make([]string, 1<<k)
t.mask = 1<<k - 1
for _, s := range x {
if !t.insert(s) {
return false
}
}
return true
}
// insert inserts s in the table.
func (t *table) insert(s string) bool {
h1, h2 := t.hash(s)
if t.tab[h1] == "" {
t.tab[h1] = s
return true
}
if t.tab[h2] == "" {
t.tab[h2] = s
return true
}
if t.push(h1, 0) {
t.tab[h1] = s
return true
}
if t.push(h2, 0) {
t.tab[h2] = s
return true
}
return false
}
// push attempts to push aside the entry in slot i.
func (t *table) push(i uint32, depth int) bool {
if depth > len(t.tab) {
return false
}
s := t.tab[i]
h1, h2 := t.hash(s)
j := h1 + h2 - i
if t.tab[j] != "" && !t.push(j, depth+1) {
return false
}
t.tab[j] = s
return true
}
// The lists of element names and attribute keys were taken from
// https://html.spec.whatwg.org/multipage/indices.html#index
// as of the "HTML Living Standard - Last Updated 21 February 2015" version.
var elements = []string{
"a",
"abbr",
"address",
"area",
"article",
"aside",
"audio",
"b",
"base",
"bdi",
"bdo",
"blockquote",
"body",
"br",
"button",
"canvas",
"caption",
"cite",
"code",
"col",
"colgroup",
"command",
"data",
"datalist",
"dd",
"del",
"details",
"dfn",
"dialog",
"div",
"dl",
"dt",
"em",
"embed",
"fieldset",
"figcaption",
"figure",
"footer",
"form",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"head",
"header",
"hgroup",
"hr",
"html",
"i",
"iframe",
"img",
"input",
"ins",
"kbd",
"keygen",
"label",
"legend",
"li",
"link",
"map",
"mark",
"menu",
"menuitem",
"meta",
"meter",
"nav",
"noscript",
"object",
"ol",
"optgroup",
"option",
"output",
"p",
"param",
"pre",
"progress",
"q",
"rp",
"rt",
"ruby",
"s",
"samp",
"script",
"section",
"select",
"small",
"source",
"span",
"strong",
"style",
"sub",
"summary",
"sup",
"table",
"tbody",
"td",
"template",
"textarea",
"tfoot",
"th",
"thead",
"time",
"title",
"tr",
"track",
"u",
"ul",
"var",
"video",
"wbr",
}
// https://html.spec.whatwg.org/multipage/indices.html#attributes-3
var attributes = []string{
"abbr",
"accept",
"accept-charset",
"accesskey",
"action",
"alt",
"async",
"autocomplete",
"autofocus",
"autoplay",
"challenge",
"charset",
"checked",
"cite",
"class",
"cols",
"colspan",
"command",
"content",
"contenteditable",
"contextmenu",
"controls",
"coords",
"crossorigin",
"data",
"datetime",
"default",
"defer",
"dir",
"dirname",
"disabled",
"download",
"draggable",
"dropzone",
"enctype",
"for",
"form",
"formaction",
"formenctype",
"formmethod",
"formnovalidate",
"formtarget",
"headers",
"height",
"hidden",
"high",
"href",
"hreflang",
"http-equiv",
"icon",
"id",
"inputmode",
"ismap",
"itemid",
"itemprop",
"itemref",
"itemscope",
"itemtype",
"keytype",
"kind",
"label",
"lang",
"list",
"loop",
"low",
"manifest",
"max",
"maxlength",
"media",
"mediagroup",
"method",
"min",
"minlength",
"multiple",
"muted",
"name",
"novalidate",
"open",
"optimum",
"pattern",
"ping",
"placeholder",
"poster",
"preload",
"radiogroup",
"readonly",
"rel",
"required",
"reversed",
"rows",
"rowspan",
"sandbox",
"spellcheck",
"scope",
"scoped",
"seamless",
"selected",
"shape",
"size",
"sizes",
"sortable",
"sorted",
"span",
"src",
"srcdoc",
"srclang",
"start",
"step",
"style",
"tabindex",
"target",
"title",
"translate",
"type",
"typemustmatch",
"usemap",
"value",
"width",
"wrap",
}
var eventHandlers = []string{
"onabort",
"onautocomplete",
"onautocompleteerror",
"onafterprint",
"onbeforeprint",
"onbeforeunload",
"onblur",
"oncancel",
"oncanplay",
"oncanplaythrough",
"onchange",
"onclick",
"onclose",
"oncontextmenu",
"oncuechange",
"ondblclick",
"ondrag",
"ondragend",
"ondragenter",
"ondragleave",
"ondragover",
"ondragstart",
"ondrop",
"ondurationchange",
"onemptied",
"onended",
"onerror",
"onfocus",
"onhashchange",
"oninput",
"oninvalid",
"onkeydown",
"onkeypress",
"onkeyup",
"onlanguagechange",
"onload",
"onloadeddata",
"onloadedmetadata",
"onloadstart",
"onmessage",
"onmousedown",
"onmousemove",
"onmouseout",
"onmouseover",
"onmouseup",
"onmousewheel",
"onoffline",
"ononline",
"onpagehide",
"onpageshow",
"onpause",
"onplay",
"onplaying",
"onpopstate",
"onprogress",
"onratechange",
"onreset",
"onresize",
"onscroll",
"onseeked",
"onseeking",
"onselect",
"onshow",
"onsort",
"onstalled",
"onstorage",
"onsubmit",
"onsuspend",
"ontimeupdate",
"ontoggle",
"onunload",
"onvolumechange",
"onwaiting",
}
// extra are ad-hoc values not covered by any of the lists above.
var extra = []string{
"align",
"annotation",
"annotation-xml",
"applet",
"basefont",
"bgsound",
"big",
"blink",
"center",
"color",
"desc",
"face",
"font",
"foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive.
"foreignobject",
"frame",
"frameset",
"image",
"isindex",
"listing",
"malignmark",
"marquee",
"math",
"mglyph",
"mi",
"mn",
"mo",
"ms",
"mtext",
"nobr",
"noembed",
"noframes",
"plaintext",
"prompt",
"public",
"spacer",
"strike",
"svg",
"system",
"tt",
"xmp",
}

@ -0,0 +1,713 @@
// generated by go run gen.go; DO NOT EDIT
package atom
const (
A Atom = 0x1
Abbr Atom = 0x4
Accept Atom = 0x2106
AcceptCharset Atom = 0x210e
Accesskey Atom = 0x3309
Action Atom = 0x1f606
Address Atom = 0x4f307
Align Atom = 0x1105
Alt Atom = 0x4503
Annotation Atom = 0x1670a
AnnotationXml Atom = 0x1670e
Applet Atom = 0x2b306
Area Atom = 0x2fa04
Article Atom = 0x38807
Aside Atom = 0x8305
Async Atom = 0x7b05
Audio Atom = 0xa605
Autocomplete Atom = 0x1fc0c
Autofocus Atom = 0xb309
Autoplay Atom = 0xce08
B Atom = 0x101
Base Atom = 0xd604
Basefont Atom = 0xd608
Bdi Atom = 0x1a03
Bdo Atom = 0xe703
Bgsound Atom = 0x11807
Big Atom = 0x12403
Blink Atom = 0x12705
Blockquote Atom = 0x12c0a
Body Atom = 0x2f04
Br Atom = 0x202
Button Atom = 0x13606
Canvas Atom = 0x7f06
Caption Atom = 0x1bb07
Center Atom = 0x5b506
Challenge Atom = 0x21f09
Charset Atom = 0x2807
Checked Atom = 0x32807
Cite Atom = 0x3c804
Class Atom = 0x4de05
Code Atom = 0x14904
Col Atom = 0x15003
Colgroup Atom = 0x15008
Color Atom = 0x15d05
Cols Atom = 0x16204
Colspan Atom = 0x16207
Command Atom = 0x17507
Content Atom = 0x42307
Contenteditable Atom = 0x4230f
Contextmenu Atom = 0x3310b
Controls Atom = 0x18808
Coords Atom = 0x19406
Crossorigin Atom = 0x19f0b
Data Atom = 0x44a04
Datalist Atom = 0x44a08
Datetime Atom = 0x23c08
Dd Atom = 0x26702
Default Atom = 0x8607
Defer Atom = 0x14b05
Del Atom = 0x3ef03
Desc Atom = 0x4db04
Details Atom = 0x4807
Dfn Atom = 0x6103
Dialog Atom = 0x1b06
Dir Atom = 0x6903
Dirname Atom = 0x6907
Disabled Atom = 0x10c08
Div Atom = 0x11303
Dl Atom = 0x11e02
Download Atom = 0x40008
Draggable Atom = 0x17b09
Dropzone Atom = 0x39108
Dt Atom = 0x50902
Em Atom = 0x6502
Embed Atom = 0x6505
Enctype Atom = 0x21107
Face Atom = 0x5b304
Fieldset Atom = 0x1b008
Figcaption Atom = 0x1b80a
Figure Atom = 0x1cc06
Font Atom = 0xda04
Footer Atom = 0x8d06
For Atom = 0x1d803
ForeignObject Atom = 0x1d80d
Foreignobject Atom = 0x1e50d
Form Atom = 0x1f204
Formaction Atom = 0x1f20a
Formenctype Atom = 0x20d0b
Formmethod Atom = 0x2280a
Formnovalidate Atom = 0x2320e
Formtarget Atom = 0x2470a
Frame Atom = 0x9a05
Frameset Atom = 0x9a08
H1 Atom = 0x26e02
H2 Atom = 0x29402
H3 Atom = 0x2a702
H4 Atom = 0x2e902
H5 Atom = 0x2f302
H6 Atom = 0x50b02
Head Atom = 0x2d504
Header Atom = 0x2d506
Headers Atom = 0x2d507
Height Atom = 0x25106
Hgroup Atom = 0x25906
Hidden Atom = 0x26506
High Atom = 0x26b04
Hr Atom = 0x27002
Href Atom = 0x27004
Hreflang Atom = 0x27008
Html Atom = 0x25504
HttpEquiv Atom = 0x2780a
I Atom = 0x601
Icon Atom = 0x42204
Id Atom = 0x8502
Iframe Atom = 0x29606
Image Atom = 0x29c05
Img Atom = 0x2a103
Input Atom = 0x3e805
Inputmode Atom = 0x3e809
Ins Atom = 0x1a803
Isindex Atom = 0x2a907
Ismap Atom = 0x2b005
Itemid Atom = 0x33c06
Itemprop Atom = 0x3c908
Itemref Atom = 0x5ad07
Itemscope Atom = 0x2b909
Itemtype Atom = 0x2c308
Kbd Atom = 0x1903
Keygen Atom = 0x3906
Keytype Atom = 0x53707
Kind Atom = 0x10904
Label Atom = 0xf005
Lang Atom = 0x27404
Legend Atom = 0x18206
Li Atom = 0x1202
Link Atom = 0x12804
List Atom = 0x44e04
Listing Atom = 0x44e07
Loop Atom = 0xf404
Low Atom = 0x11f03
Malignmark Atom = 0x100a
Manifest Atom = 0x5f108
Map Atom = 0x2b203
Mark Atom = 0x1604
Marquee Atom = 0x2cb07
Math Atom = 0x2d204
Max Atom = 0x2e103
Maxlength Atom = 0x2e109
Media Atom = 0x6e05
Mediagroup Atom = 0x6e0a
Menu Atom = 0x33804
Menuitem Atom = 0x33808
Meta Atom = 0x45d04
Meter Atom = 0x24205
Method Atom = 0x22c06
Mglyph Atom = 0x2a206
Mi Atom = 0x2eb02
Min Atom = 0x2eb03
Minlength Atom = 0x2eb09
Mn Atom = 0x23502
Mo Atom = 0x3ed02
Ms Atom = 0x2bc02
Mtext Atom = 0x2f505
Multiple Atom = 0x30308
Muted Atom = 0x30b05
Name Atom = 0x6c04
Nav Atom = 0x3e03
Nobr Atom = 0x5704
Noembed Atom = 0x6307
Noframes Atom = 0x9808
Noscript Atom = 0x3d208
Novalidate Atom = 0x2360a
Object Atom = 0x1ec06
Ol Atom = 0xc902
Onabort Atom = 0x13a07
Onafterprint Atom = 0x1c00c
Onautocomplete Atom = 0x1fa0e
Onautocompleteerror Atom = 0x1fa13
Onbeforeprint Atom = 0x6040d
Onbeforeunload Atom = 0x4e70e
Onblur Atom = 0xaa06
Oncancel Atom = 0xe908
Oncanplay Atom = 0x28509
Oncanplaythrough Atom = 0x28510
Onchange Atom = 0x3a708
Onclick Atom = 0x31007
Onclose Atom = 0x31707
Oncontextmenu Atom = 0x32f0d
Oncuechange Atom = 0x3420b
Ondblclick Atom = 0x34d0a
Ondrag Atom = 0x35706
Ondragend Atom = 0x35709
Ondragenter Atom = 0x3600b
Ondragleave Atom = 0x36b0b
Ondragover Atom = 0x3760a
Ondragstart Atom = 0x3800b
Ondrop Atom = 0x38f06
Ondurationchange Atom = 0x39f10
Onemptied Atom = 0x39609
Onended Atom = 0x3af07
Onerror Atom = 0x3b607
Onfocus Atom = 0x3bd07
Onhashchange Atom = 0x3da0c
Oninput Atom = 0x3e607
Oninvalid Atom = 0x3f209
Onkeydown Atom = 0x3fb09
Onkeypress Atom = 0x4080a
Onkeyup Atom = 0x41807
Onlanguagechange Atom = 0x43210
Onload Atom = 0x44206
Onloadeddata Atom = 0x4420c
Onloadedmetadata Atom = 0x45510
Onloadstart Atom = 0x46b0b
Onmessage Atom = 0x47609
Onmousedown Atom = 0x47f0b
Onmousemove Atom = 0x48a0b
Onmouseout Atom = 0x4950a
Onmouseover Atom = 0x4a20b
Onmouseup Atom = 0x4ad09
Onmousewheel Atom = 0x4b60c
Onoffline Atom = 0x4c209
Ononline Atom = 0x4cb08
Onpagehide Atom = 0x4d30a
Onpageshow Atom = 0x4fe0a
Onpause Atom = 0x50d07
Onplay Atom = 0x51706
Onplaying Atom = 0x51709
Onpopstate Atom = 0x5200a
Onprogress Atom = 0x52a0a
Onratechange Atom = 0x53e0c
Onreset Atom = 0x54a07
Onresize Atom = 0x55108
Onscroll Atom = 0x55f08
Onseeked Atom = 0x56708
Onseeking Atom = 0x56f09
Onselect Atom = 0x57808
Onshow Atom = 0x58206
Onsort Atom = 0x58b06
Onstalled Atom = 0x59509
Onstorage Atom = 0x59e09
Onsubmit Atom = 0x5a708
Onsuspend Atom = 0x5bb09
Ontimeupdate Atom = 0xdb0c
Ontoggle Atom = 0x5c408
Onunload Atom = 0x5cc08
Onvolumechange Atom = 0x5d40e
Onwaiting Atom = 0x5e209
Open Atom = 0x3cf04
Optgroup Atom = 0xf608
Optimum Atom = 0x5eb07
Option Atom = 0x60006
Output Atom = 0x49c06
P Atom = 0xc01
Param Atom = 0xc05
Pattern Atom = 0x5107
Ping Atom = 0x7704
Placeholder Atom = 0xc30b
Plaintext Atom = 0xfd09
Poster Atom = 0x15706
Pre Atom = 0x25e03
Preload Atom = 0x25e07
Progress Atom = 0x52c08
Prompt Atom = 0x5fa06
Public Atom = 0x41e06
Q Atom = 0x13101
Radiogroup Atom = 0x30a
Readonly Atom = 0x2fb08
Rel Atom = 0x25f03
Required Atom = 0x1d008
Reversed Atom = 0x5a08
Rows Atom = 0x9204
Rowspan Atom = 0x9207
Rp Atom = 0x1c602
Rt Atom = 0x13f02
Ruby Atom = 0xaf04
S Atom = 0x2c01
Samp Atom = 0x4e04
Sandbox Atom = 0xbb07
Scope Atom = 0x2bd05
Scoped Atom = 0x2bd06
Script Atom = 0x3d406
Seamless Atom = 0x31c08
Section Atom = 0x4e207
Select Atom = 0x57a06
Selected Atom = 0x57a08
Shape Atom = 0x4f905
Size Atom = 0x55504
Sizes Atom = 0x55505
Small Atom = 0x18f05
Sortable Atom = 0x58d08
Sorted Atom = 0x19906
Source Atom = 0x1aa06
Spacer Atom = 0x2db06
Span Atom = 0x9504
Spellcheck Atom = 0x3230a
Src Atom = 0x3c303
Srcdoc Atom = 0x3c306
Srclang Atom = 0x41107
Start Atom = 0x38605
Step Atom = 0x5f704
Strike Atom = 0x53306
Strong Atom = 0x55906
Style Atom = 0x61105
Sub Atom = 0x5a903
Summary Atom = 0x61607
Sup Atom = 0x61d03
Svg Atom = 0x62003
System Atom = 0x62306
Tabindex Atom = 0x46308
Table Atom = 0x42d05
Target Atom = 0x24b06
Tbody Atom = 0x2e05
Td Atom = 0x4702
Template Atom = 0x62608
Textarea Atom = 0x2f608
Tfoot Atom = 0x8c05
Th Atom = 0x22e02
Thead Atom = 0x2d405
Time Atom = 0xdd04
Title Atom = 0xa105
Tr Atom = 0x10502
Track Atom = 0x10505
Translate Atom = 0x14009
Tt Atom = 0x5302
Type Atom = 0x21404
Typemustmatch Atom = 0x2140d
U Atom = 0xb01
Ul Atom = 0x8a02
Usemap Atom = 0x51106
Value Atom = 0x4005
Var Atom = 0x11503
Video Atom = 0x28105
Wbr Atom = 0x12103
Width Atom = 0x50705
Wrap Atom = 0x58704
Xmp Atom = 0xc103
)
const hash0 = 0xc17da63e
const maxAtomLen = 19
var table = [1 << 9]Atom{
0x1: 0x48a0b, // onmousemove
0x2: 0x5e209, // onwaiting
0x3: 0x1fa13, // onautocompleteerror
0x4: 0x5fa06, // prompt
0x7: 0x5eb07, // optimum
0x8: 0x1604, // mark
0xa: 0x5ad07, // itemref
0xb: 0x4fe0a, // onpageshow
0xc: 0x57a06, // select
0xd: 0x17b09, // draggable
0xe: 0x3e03, // nav
0xf: 0x17507, // command
0x11: 0xb01, // u
0x14: 0x2d507, // headers
0x15: 0x44a08, // datalist
0x17: 0x4e04, // samp
0x1a: 0x3fb09, // onkeydown
0x1b: 0x55f08, // onscroll
0x1c: 0x15003, // col
0x20: 0x3c908, // itemprop
0x21: 0x2780a, // http-equiv
0x22: 0x61d03, // sup
0x24: 0x1d008, // required
0x2b: 0x25e07, // preload
0x2c: 0x6040d, // onbeforeprint
0x2d: 0x3600b, // ondragenter
0x2e: 0x50902, // dt
0x2f: 0x5a708, // onsubmit
0x30: 0x27002, // hr
0x31: 0x32f0d, // oncontextmenu
0x33: 0x29c05, // image
0x34: 0x50d07, // onpause
0x35: 0x25906, // hgroup
0x36: 0x7704, // ping
0x37: 0x57808, // onselect
0x3a: 0x11303, // div
0x3b: 0x1fa0e, // onautocomplete
0x40: 0x2eb02, // mi
0x41: 0x31c08, // seamless
0x42: 0x2807, // charset
0x43: 0x8502, // id
0x44: 0x5200a, // onpopstate
0x45: 0x3ef03, // del
0x46: 0x2cb07, // marquee
0x47: 0x3309, // accesskey
0x49: 0x8d06, // footer
0x4a: 0x44e04, // list
0x4b: 0x2b005, // ismap
0x51: 0x33804, // menu
0x52: 0x2f04, // body
0x55: 0x9a08, // frameset
0x56: 0x54a07, // onreset
0x57: 0x12705, // blink
0x58: 0xa105, // title
0x59: 0x38807, // article
0x5b: 0x22e02, // th
0x5d: 0x13101, // q
0x5e: 0x3cf04, // open
0x5f: 0x2fa04, // area
0x61: 0x44206, // onload
0x62: 0xda04, // font
0x63: 0xd604, // base
0x64: 0x16207, // colspan
0x65: 0x53707, // keytype
0x66: 0x11e02, // dl
0x68: 0x1b008, // fieldset
0x6a: 0x2eb03, // min
0x6b: 0x11503, // var
0x6f: 0x2d506, // header
0x70: 0x13f02, // rt
0x71: 0x15008, // colgroup
0x72: 0x23502, // mn
0x74: 0x13a07, // onabort
0x75: 0x3906, // keygen
0x76: 0x4c209, // onoffline
0x77: 0x21f09, // challenge
0x78: 0x2b203, // map
0x7a: 0x2e902, // h4
0x7b: 0x3b607, // onerror
0x7c: 0x2e109, // maxlength
0x7d: 0x2f505, // mtext
0x7e: 0xbb07, // sandbox
0x7f: 0x58b06, // onsort
0x80: 0x100a, // malignmark
0x81: 0x45d04, // meta
0x82: 0x7b05, // async
0x83: 0x2a702, // h3
0x84: 0x26702, // dd
0x85: 0x27004, // href
0x86: 0x6e0a, // mediagroup
0x87: 0x19406, // coords
0x88: 0x41107, // srclang
0x89: 0x34d0a, // ondblclick
0x8a: 0x4005, // value
0x8c: 0xe908, // oncancel
0x8e: 0x3230a, // spellcheck
0x8f: 0x9a05, // frame
0x91: 0x12403, // big
0x94: 0x1f606, // action
0x95: 0x6903, // dir
0x97: 0x2fb08, // readonly
0x99: 0x42d05, // table
0x9a: 0x61607, // summary
0x9b: 0x12103, // wbr
0x9c: 0x30a, // radiogroup
0x9d: 0x6c04, // name
0x9f: 0x62306, // system
0xa1: 0x15d05, // color
0xa2: 0x7f06, // canvas
0xa3: 0x25504, // html
0xa5: 0x56f09, // onseeking
0xac: 0x4f905, // shape
0xad: 0x25f03, // rel
0xae: 0x28510, // oncanplaythrough
0xaf: 0x3760a, // ondragover
0xb0: 0x62608, // template
0xb1: 0x1d80d, // foreignObject
0xb3: 0x9204, // rows
0xb6: 0x44e07, // listing
0xb7: 0x49c06, // output
0xb9: 0x3310b, // contextmenu
0xbb: 0x11f03, // low
0xbc: 0x1c602, // rp
0xbd: 0x5bb09, // onsuspend
0xbe: 0x13606, // button
0xbf: 0x4db04, // desc
0xc1: 0x4e207, // section
0xc2: 0x52a0a, // onprogress
0xc3: 0x59e09, // onstorage
0xc4: 0x2d204, // math
0xc5: 0x4503, // alt
0xc7: 0x8a02, // ul
0xc8: 0x5107, // pattern
0xc9: 0x4b60c, // onmousewheel
0xca: 0x35709, // ondragend
0xcb: 0xaf04, // ruby
0xcc: 0xc01, // p
0xcd: 0x31707, // onclose
0xce: 0x24205, // meter
0xcf: 0x11807, // bgsound
0xd2: 0x25106, // height
0xd4: 0x101, // b
0xd5: 0x2c308, // itemtype
0xd8: 0x1bb07, // caption
0xd9: 0x10c08, // disabled
0xdb: 0x33808, // menuitem
0xdc: 0x62003, // svg
0xdd: 0x18f05, // small
0xde: 0x44a04, // data
0xe0: 0x4cb08, // ononline
0xe1: 0x2a206, // mglyph
0xe3: 0x6505, // embed
0xe4: 0x10502, // tr
0xe5: 0x46b0b, // onloadstart
0xe7: 0x3c306, // srcdoc
0xeb: 0x5c408, // ontoggle
0xed: 0xe703, // bdo
0xee: 0x4702, // td
0xef: 0x8305, // aside
0xf0: 0x29402, // h2
0xf1: 0x52c08, // progress
0xf2: 0x12c0a, // blockquote
0xf4: 0xf005, // label
0xf5: 0x601, // i
0xf7: 0x9207, // rowspan
0xfb: 0x51709, // onplaying
0xfd: 0x2a103, // img
0xfe: 0xf608, // optgroup
0xff: 0x42307, // content
0x101: 0x53e0c, // onratechange
0x103: 0x3da0c, // onhashchange
0x104: 0x4807, // details
0x106: 0x40008, // download
0x109: 0x14009, // translate
0x10b: 0x4230f, // contenteditable
0x10d: 0x36b0b, // ondragleave
0x10e: 0x2106, // accept
0x10f: 0x57a08, // selected
0x112: 0x1f20a, // formaction
0x113: 0x5b506, // center
0x115: 0x45510, // onloadedmetadata
0x116: 0x12804, // link
0x117: 0xdd04, // time
0x118: 0x19f0b, // crossorigin
0x119: 0x3bd07, // onfocus
0x11a: 0x58704, // wrap
0x11b: 0x42204, // icon
0x11d: 0x28105, // video
0x11e: 0x4de05, // class
0x121: 0x5d40e, // onvolumechange
0x122: 0xaa06, // onblur
0x123: 0x2b909, // itemscope
0x124: 0x61105, // style
0x127: 0x41e06, // public
0x129: 0x2320e, // formnovalidate
0x12a: 0x58206, // onshow
0x12c: 0x51706, // onplay
0x12d: 0x3c804, // cite
0x12e: 0x2bc02, // ms
0x12f: 0xdb0c, // ontimeupdate
0x130: 0x10904, // kind
0x131: 0x2470a, // formtarget
0x135: 0x3af07, // onended
0x136: 0x26506, // hidden
0x137: 0x2c01, // s
0x139: 0x2280a, // formmethod
0x13a: 0x3e805, // input
0x13c: 0x50b02, // h6
0x13d: 0xc902, // ol
0x13e: 0x3420b, // oncuechange
0x13f: 0x1e50d, // foreignobject
0x143: 0x4e70e, // onbeforeunload
0x144: 0x2bd05, // scope
0x145: 0x39609, // onemptied
0x146: 0x14b05, // defer
0x147: 0xc103, // xmp
0x148: 0x39f10, // ondurationchange
0x149: 0x1903, // kbd
0x14c: 0x47609, // onmessage
0x14d: 0x60006, // option
0x14e: 0x2eb09, // minlength
0x14f: 0x32807, // checked
0x150: 0xce08, // autoplay
0x152: 0x202, // br
0x153: 0x2360a, // novalidate
0x156: 0x6307, // noembed
0x159: 0x31007, // onclick
0x15a: 0x47f0b, // onmousedown
0x15b: 0x3a708, // onchange
0x15e: 0x3f209, // oninvalid
0x15f: 0x2bd06, // scoped
0x160: 0x18808, // controls
0x161: 0x30b05, // muted
0x162: 0x58d08, // sortable
0x163: 0x51106, // usemap
0x164: 0x1b80a, // figcaption
0x165: 0x35706, // ondrag
0x166: 0x26b04, // high
0x168: 0x3c303, // src
0x169: 0x15706, // poster
0x16b: 0x1670e, // annotation-xml
0x16c: 0x5f704, // step
0x16d: 0x4, // abbr
0x16e: 0x1b06, // dialog
0x170: 0x1202, // li
0x172: 0x3ed02, // mo
0x175: 0x1d803, // for
0x176: 0x1a803, // ins
0x178: 0x55504, // size
0x179: 0x43210, // onlanguagechange
0x17a: 0x8607, // default
0x17b: 0x1a03, // bdi
0x17c: 0x4d30a, // onpagehide
0x17d: 0x6907, // dirname
0x17e: 0x21404, // type
0x17f: 0x1f204, // form
0x181: 0x28509, // oncanplay
0x182: 0x6103, // dfn
0x183: 0x46308, // tabindex
0x186: 0x6502, // em
0x187: 0x27404, // lang
0x189: 0x39108, // dropzone
0x18a: 0x4080a, // onkeypress
0x18b: 0x23c08, // datetime
0x18c: 0x16204, // cols
0x18d: 0x1, // a
0x18e: 0x4420c, // onloadeddata
0x190: 0xa605, // audio
0x192: 0x2e05, // tbody
0x193: 0x22c06, // method
0x195: 0xf404, // loop
0x196: 0x29606, // iframe
0x198: 0x2d504, // head
0x19e: 0x5f108, // manifest
0x19f: 0xb309, // autofocus
0x1a0: 0x14904, // code
0x1a1: 0x55906, // strong
0x1a2: 0x30308, // multiple
0x1a3: 0xc05, // param
0x1a6: 0x21107, // enctype
0x1a7: 0x5b304, // face
0x1a8: 0xfd09, // plaintext
0x1a9: 0x26e02, // h1
0x1aa: 0x59509, // onstalled
0x1ad: 0x3d406, // script
0x1ae: 0x2db06, // spacer
0x1af: 0x55108, // onresize
0x1b0: 0x4a20b, // onmouseover
0x1b1: 0x5cc08, // onunload
0x1b2: 0x56708, // onseeked
0x1b4: 0x2140d, // typemustmatch
0x1b5: 0x1cc06, // figure
0x1b6: 0x4950a, // onmouseout
0x1b7: 0x25e03, // pre
0x1b8: 0x50705, // width
0x1b9: 0x19906, // sorted
0x1bb: 0x5704, // nobr
0x1be: 0x5302, // tt
0x1bf: 0x1105, // align
0x1c0: 0x3e607, // oninput
0x1c3: 0x41807, // onkeyup
0x1c6: 0x1c00c, // onafterprint
0x1c7: 0x210e, // accept-charset
0x1c8: 0x33c06, // itemid
0x1c9: 0x3e809, // inputmode
0x1cb: 0x53306, // strike
0x1cc: 0x5a903, // sub
0x1cd: 0x10505, // track
0x1ce: 0x38605, // start
0x1d0: 0xd608, // basefont
0x1d6: 0x1aa06, // source
0x1d7: 0x18206, // legend
0x1d8: 0x2d405, // thead
0x1da: 0x8c05, // tfoot
0x1dd: 0x1ec06, // object
0x1de: 0x6e05, // media
0x1df: 0x1670a, // annotation
0x1e0: 0x20d0b, // formenctype
0x1e2: 0x3d208, // noscript
0x1e4: 0x55505, // sizes
0x1e5: 0x1fc0c, // autocomplete
0x1e6: 0x9504, // span
0x1e7: 0x9808, // noframes
0x1e8: 0x24b06, // target
0x1e9: 0x38f06, // ondrop
0x1ea: 0x2b306, // applet
0x1ec: 0x5a08, // reversed
0x1f0: 0x2a907, // isindex
0x1f3: 0x27008, // hreflang
0x1f5: 0x2f302, // h5
0x1f6: 0x4f307, // address
0x1fa: 0x2e103, // max
0x1fb: 0xc30b, // placeholder
0x1fc: 0x2f608, // textarea
0x1fe: 0x4ad09, // onmouseup
0x1ff: 0x3800b, // ondragstart
}
const atomText = "abbradiogrouparamalignmarkbdialogaccept-charsetbodyaccesskey" +
"genavaluealtdetailsampatternobreversedfnoembedirnamediagroup" +
"ingasyncanvasidefaultfooterowspanoframesetitleaudionblurubya" +
"utofocusandboxmplaceholderautoplaybasefontimeupdatebdoncance" +
"labelooptgrouplaintextrackindisabledivarbgsoundlowbrbigblink" +
"blockquotebuttonabortranslatecodefercolgroupostercolorcolspa" +
"nnotation-xmlcommandraggablegendcontrolsmallcoordsortedcross" +
"originsourcefieldsetfigcaptionafterprintfigurequiredforeignO" +
"bjectforeignobjectformactionautocompleteerrorformenctypemust" +
"matchallengeformmethodformnovalidatetimeterformtargetheightm" +
"lhgroupreloadhiddenhigh1hreflanghttp-equivideoncanplaythroug" +
"h2iframeimageimglyph3isindexismappletitemscopeditemtypemarqu" +
"eematheaderspacermaxlength4minlength5mtextareadonlymultiplem" +
"utedonclickoncloseamlesspellcheckedoncontextmenuitemidoncuec" +
"hangeondblclickondragendondragenterondragleaveondragoverondr" +
"agstarticleondropzonemptiedondurationchangeonendedonerroronf" +
"ocusrcdocitempropenoscriptonhashchangeoninputmodeloninvalido" +
"nkeydownloadonkeypressrclangonkeyupublicontenteditableonlang" +
"uagechangeonloadeddatalistingonloadedmetadatabindexonloadsta" +
"rtonmessageonmousedownonmousemoveonmouseoutputonmouseoveronm" +
"ouseuponmousewheelonofflineononlineonpagehidesclassectionbef" +
"oreunloaddresshapeonpageshowidth6onpausemaponplayingonpopsta" +
"teonprogresstrikeytypeonratechangeonresetonresizestrongonscr" +
"ollonseekedonseekingonselectedonshowraponsortableonstalledon" +
"storageonsubmitemrefacenteronsuspendontoggleonunloadonvolume" +
"changeonwaitingoptimumanifestepromptoptionbeforeprintstylesu" +
"mmarysupsvgsystemplate"

@ -0,0 +1,351 @@
// generated by go run gen.go -test; DO NOT EDIT
package atom
var testAtomList = []string{
"a",
"abbr",
"abbr",
"accept",
"accept-charset",
"accesskey",
"action",
"address",
"align",
"alt",
"annotation",
"annotation-xml",
"applet",
"area",
"article",
"aside",
"async",
"audio",
"autocomplete",
"autofocus",
"autoplay",
"b",
"base",
"basefont",
"bdi",
"bdo",
"bgsound",
"big",
"blink",
"blockquote",
"body",
"br",
"button",
"canvas",
"caption",
"center",
"challenge",
"charset",
"checked",
"cite",
"cite",
"class",
"code",
"col",
"colgroup",
"color",
"cols",
"colspan",
"command",
"command",
"content",
"contenteditable",
"contextmenu",
"controls",
"coords",
"crossorigin",
"data",
"data",
"datalist",
"datetime",
"dd",
"default",
"defer",
"del",
"desc",
"details",
"dfn",
"dialog",
"dir",
"dirname",
"disabled",
"div",
"dl",
"download",
"draggable",
"dropzone",
"dt",
"em",
"embed",
"enctype",
"face",
"fieldset",
"figcaption",
"figure",
"font",
"footer",
"for",
"foreignObject",
"foreignobject",
"form",
"form",
"formaction",
"formenctype",
"formmethod",
"formnovalidate",
"formtarget",
"frame",
"frameset",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"head",
"header",
"headers",
"height",
"hgroup",
"hidden",
"high",
"hr",
"href",
"hreflang",
"html",
"http-equiv",
"i",
"icon",
"id",
"iframe",
"image",
"img",
"input",
"inputmode",
"ins",
"isindex",
"ismap",
"itemid",
"itemprop",
"itemref",
"itemscope",
"itemtype",
"kbd",
"keygen",
"keytype",
"kind",
"label",
"label",
"lang",
"legend",
"li",
"link",
"list",
"listing",
"loop",
"low",
"malignmark",
"manifest",
"map",
"mark",
"marquee",
"math",
"max",
"maxlength",
"media",
"mediagroup",
"menu",
"menuitem",
"meta",
"meter",
"method",
"mglyph",
"mi",
"min",
"minlength",
"mn",
"mo",
"ms",
"mtext",
"multiple",
"muted",
"name",
"nav",
"nobr",
"noembed",
"noframes",
"noscript",
"novalidate",
"object",
"ol",
"onabort",
"onafterprint",
"onautocomplete",
"onautocompleteerror",
"onbeforeprint",
"onbeforeunload",
"onblur",
"oncancel",
"oncanplay",
"oncanplaythrough",
"onchange",
"onclick",
"onclose",
"oncontextmenu",
"oncuechange",
"ondblclick",
"ondrag",
"ondragend",
"ondragenter",
"ondragleave",
"ondragover",
"ondragstart",
"ondrop",
"ondurationchange",
"onemptied",
"onended",
"onerror",
"onfocus",
"onhashchange",
"oninput",
"oninvalid",
"onkeydown",
"onkeypress",
"onkeyup",
"onlanguagechange",
"onload",
"onloadeddata",
"onloadedmetadata",
"onloadstart",
"onmessage",
"onmousedown",
"onmousemove",
"onmouseout",
"onmouseover",
"onmouseup",
"onmousewheel",
"onoffline",
"ononline",
"onpagehide",
"onpageshow",
"onpause",
"onplay",
"onplaying",
"onpopstate",
"onprogress",
"onratechange",
"onreset",
"onresize",
"onscroll",
"onseeked",
"onseeking",
"onselect",
"onshow",
"onsort",
"onstalled",
"onstorage",
"onsubmit",
"onsuspend",
"ontimeupdate",
"ontoggle",
"onunload",
"onvolumechange",
"onwaiting",
"open",
"optgroup",
"optimum",
"option",
"output",
"p",
"param",
"pattern",
"ping",
"placeholder",
"plaintext",
"poster",
"pre",
"preload",
"progress",
"prompt",
"public",
"q",
"radiogroup",
"readonly",
"rel",
"required",
"reversed",
"rows",
"rowspan",
"rp",
"rt",
"ruby",
"s",
"samp",
"sandbox",
"scope",
"scoped",
"script",
"seamless",
"section",
"select",
"selected",
"shape",
"size",
"sizes",
"small",
"sortable",
"sorted",
"source",
"spacer",
"span",
"span",
"spellcheck",
"src",
"srcdoc",
"srclang",
"start",
"step",
"strike",
"strong",
"style",
"style",
"sub",
"summary",
"sup",
"svg",
"system",
"tabindex",
"table",
"target",
"tbody",
"td",
"template",
"textarea",
"tfoot",
"th",
"thead",
"time",
"title",
"title",
"tr",
"track",
"translate",
"tt",
"type",
"typemustmatch",
"u",
"ul",
"usemap",
"value",
"var",
"video",
"wbr",
"width",
"wrap",
"xmp",
}

@ -0,0 +1,244 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package charset provides common text encodings for HTML documents.
//
// The mapping from encoding labels to encodings is defined at
// https://encoding.spec.whatwg.org/.
package charset
import (
"bytes"
"fmt"
"io"
"mime"
"strings"
"unicode/utf8"
"golang.org/x/net/html"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/transform"
)
// Lookup returns the encoding with the specified label, and its canonical
// name. It returns nil and the empty string if label is not one of the
// standard encodings for HTML. Matching is case-insensitive and ignores
// leading and trailing whitespace.
func Lookup(label string) (e encoding.Encoding, name string) {
label = strings.ToLower(strings.Trim(label, "\t\n\r\f "))
enc := encodings[label]
return enc.e, enc.name
}
// DetermineEncoding determines the encoding of an HTML document by examining
// up to the first 1024 bytes of content and the declared Content-Type.
//
// See http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#determining-the-character-encoding
func DetermineEncoding(content []byte, contentType string) (e encoding.Encoding, name string, certain bool) {
if len(content) > 1024 {
content = content[:1024]
}
for _, b := range boms {
if bytes.HasPrefix(content, b.bom) {
e, name = Lookup(b.enc)
return e, name, true
}
}
if _, params, err := mime.ParseMediaType(contentType); err == nil {
if cs, ok := params["charset"]; ok {
if e, name = Lookup(cs); e != nil {
return e, name, true
}
}
}
if len(content) > 0 {
e, name = prescan(content)
if e != nil {
return e, name, false
}
}
// Try to detect UTF-8.
// First eliminate any partial rune at the end.
for i := len(content) - 1; i >= 0 && i > len(content)-4; i-- {
b := content[i]
if b < 0x80 {
break
}
if utf8.RuneStart(b) {
content = content[:i]
break
}
}
hasHighBit := false
for _, c := range content {
if c >= 0x80 {
hasHighBit = true
break
}
}
if hasHighBit && utf8.Valid(content) {
return encoding.Nop, "utf-8", false
}
// TODO: change default depending on user's locale?
return charmap.Windows1252, "windows-1252", false
}
// NewReader returns an io.Reader that converts the content of r to UTF-8.
// It calls DetermineEncoding to find out what r's encoding is.
func NewReader(r io.Reader, contentType string) (io.Reader, error) {
preview := make([]byte, 1024)
n, err := io.ReadFull(r, preview)
switch {
case err == io.ErrUnexpectedEOF:
preview = preview[:n]
r = bytes.NewReader(preview)
case err != nil:
return nil, err
default:
r = io.MultiReader(bytes.NewReader(preview), r)
}
if e, _, _ := DetermineEncoding(preview, contentType); e != encoding.Nop {
r = transform.NewReader(r, e.NewDecoder())
}
return r, nil
}
// NewReaderLabel returns a reader that converts from the specified charset to
// UTF-8. It uses Lookup to find the encoding that corresponds to label, and
// returns an error if Lookup returns nil. It is suitable for use as
// encoding/xml.Decoder's CharsetReader function.
func NewReaderLabel(label string, input io.Reader) (io.Reader, error) {
e, _ := Lookup(label)
if e == nil {
return nil, fmt.Errorf("unsupported charset: %q", label)
}
return transform.NewReader(input, e.NewDecoder()), nil
}
func prescan(content []byte) (e encoding.Encoding, name string) {
z := html.NewTokenizer(bytes.NewReader(content))
for {
switch z.Next() {
case html.ErrorToken:
return nil, ""
case html.StartTagToken, html.SelfClosingTagToken:
tagName, hasAttr := z.TagName()
if !bytes.Equal(tagName, []byte("meta")) {
continue
}
attrList := make(map[string]bool)
gotPragma := false
const (
dontKnow = iota
doNeedPragma
doNotNeedPragma
)
needPragma := dontKnow
name = ""
e = nil
for hasAttr {
var key, val []byte
key, val, hasAttr = z.TagAttr()
ks := string(key)
if attrList[ks] {
continue
}
attrList[ks] = true
for i, c := range val {
if 'A' <= c && c <= 'Z' {
val[i] = c + 0x20
}
}
switch ks {
case "http-equiv":
if bytes.Equal(val, []byte("content-type")) {
gotPragma = true
}
case "content":
if e == nil {
name = fromMetaElement(string(val))
if name != "" {
e, name = Lookup(name)
if e != nil {
needPragma = doNeedPragma
}
}
}
case "charset":
e, name = Lookup(string(val))
needPragma = doNotNeedPragma
}
}
if needPragma == dontKnow || needPragma == doNeedPragma && !gotPragma {
continue
}
if strings.HasPrefix(name, "utf-16") {
name = "utf-8"
e = encoding.Nop
}
if e != nil {
return e, name
}
}
}
}
func fromMetaElement(s string) string {
for s != "" {
csLoc := strings.Index(s, "charset")
if csLoc == -1 {
return ""
}
s = s[csLoc+len("charset"):]
s = strings.TrimLeft(s, " \t\n\f\r")
if !strings.HasPrefix(s, "=") {
continue
}
s = s[1:]
s = strings.TrimLeft(s, " \t\n\f\r")
if s == "" {
return ""
}
if q := s[0]; q == '"' || q == '\'' {
s = s[1:]
closeQuote := strings.IndexRune(s, rune(q))
if closeQuote == -1 {
return ""
}
return s[:closeQuote]
}
end := strings.IndexAny(s, "; \t\n\f\r")
if end == -1 {
end = len(s)
}
return s[:end]
}
return ""
}
var boms = []struct {
bom []byte
enc string
}{
{[]byte{0xfe, 0xff}, "utf-16be"},
{[]byte{0xff, 0xfe}, "utf-16le"},
{[]byte{0xef, 0xbb, 0xbf}, "utf-8"},
}

@ -0,0 +1,236 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package charset
import (
"bytes"
"encoding/xml"
"io/ioutil"
"runtime"
"strings"
"testing"
"golang.org/x/text/transform"
)
func transformString(t transform.Transformer, s string) (string, error) {
r := transform.NewReader(strings.NewReader(s), t)
b, err := ioutil.ReadAll(r)
return string(b), err
}
var testCases = []struct {
utf8, other, otherEncoding string
}{
{"Résumé", "Résumé", "utf8"},
{"Résumé", "R\xe9sum\xe9", "latin1"},
{"これは漢字です。", "S0\x8c0o0\"oW[g0Y0\x020", "UTF-16LE"},
{"これは漢字です。", "0S0\x8c0oo\"[W0g0Y0\x02", "UTF-16BE"},
{"Hello, world", "Hello, world", "ASCII"},
{"Gdańsk", "Gda\xf1sk", "ISO-8859-2"},
{"Ââ Čč Đđ Ŋŋ Õõ Šš Žž Åå Ää", "\xc2\xe2 \xc8\xe8 \xa9\xb9 \xaf\xbf \xd5\xf5 \xaa\xba \xac\xbc \xc5\xe5 \xc4\xe4", "ISO-8859-10"},
{"สำหรบ", "\xca\xd3\xcb\xc3\u047a", "ISO-8859-11"},
{"latviešu", "latvie\xf0u", "ISO-8859-13"},
{"Seònaid", "Se\xf2naid", "ISO-8859-14"},
{"€1 is cheap", "\xa41 is cheap", "ISO-8859-15"},
{"românește", "rom\xe2ne\xbate", "ISO-8859-16"},
{"nutraĵo", "nutra\xbco", "ISO-8859-3"},
{"Kalâdlit", "Kal\xe2dlit", "ISO-8859-4"},
{"русский", "\xe0\xe3\xe1\xe1\xda\xd8\xd9", "ISO-8859-5"},
{"ελληνικά", "\xe5\xeb\xeb\xe7\xed\xe9\xea\xdc", "ISO-8859-7"},
{"Kağan", "Ka\xf0an", "ISO-8859-9"},
{"Résumé", "R\x8esum\x8e", "macintosh"},
{"Gdańsk", "Gda\xf1sk", "windows-1250"},
{"русский", "\xf0\xf3\xf1\xf1\xea\xe8\xe9", "windows-1251"},
{"Résumé", "R\xe9sum\xe9", "windows-1252"},
{"ελληνικά", "\xe5\xeb\xeb\xe7\xed\xe9\xea\xdc", "windows-1253"},
{"Kağan", "Ka\xf0an", "windows-1254"},
{ִבְרִית", "\xf2\xc4\xe1\xc0\xf8\xc4\xe9\xfa", "windows-1255"},
{"العربية", "\xc7\xe1\xda\xd1\xc8\xed\xc9", "windows-1256"},
{"latviešu", "latvie\xf0u", "windows-1257"},
{"Việt", "Vi\xea\xf2t", "windows-1258"},
{"สำหรบ", "\xca\xd3\xcb\xc3\u047a", "windows-874"},
{"русский", "\xd2\xd5\xd3\xd3\xcb\xc9\xca", "KOI8-R"},
{"українська", "\xd5\xcb\xd2\xc1\xa7\xce\xd3\xd8\xcb\xc1", "KOI8-U"},
{"Hello 常用國字標準字體表", "Hello \xb1`\xa5\u03b0\xea\xa6r\xbc\u0437\u01e6r\xc5\xe9\xaa\xed", "big5"},
{"Hello 常用國字標準字體表", "Hello \xb3\xa3\xd3\xc3\x87\xf8\xd7\xd6\x98\xcb\x9c\xca\xd7\xd6\xf3\x77\xb1\xed", "gbk"},
{"Hello 常用國字標準字體表", "Hello \xb3\xa3\xd3\xc3\x87\xf8\xd7\xd6\x98\xcb\x9c\xca\xd7\xd6\xf3\x77\xb1\xed", "gb18030"},
{ִבְרִית", "\x81\x30\xfb\x30\x81\x30\xf6\x34\x81\x30\xf9\x33\x81\x30\xf6\x30\x81\x30\xfb\x36\x81\x30\xf6\x34\x81\x30\xfa\x31\x81\x30\xfb\x38", "gb18030"},
{"㧯", "\x82\x31\x89\x38", "gb18030"},
{"これは漢字です。", "\x82\xb1\x82\xea\x82\xcd\x8a\xbf\x8e\x9a\x82\xc5\x82\xb7\x81B", "SJIS"},
{"Hello, 世界!", "Hello, \x90\xa2\x8aE!", "SJIS"},
{"イウエオカ", "\xb2\xb3\xb4\xb5\xb6", "SJIS"},
{"これは漢字です。", "\xa4\xb3\xa4\xec\xa4\u03f4\xc1\xbb\xfa\xa4\u01e4\xb9\xa1\xa3", "EUC-JP"},
{"Hello, 世界!", "Hello, \x1b$B@$3&\x1b(B!", "ISO-2022-JP"},
{"네이트 | 즐거움의 시작, 슈파스(Spaβ) NATE", "\xb3\xd7\xc0\xcc\xc6\xae | \xc1\xf1\xb0\xc5\xbf\xf2\xc0\xc7 \xbd\xc3\xc0\xdb, \xbd\xb4\xc6\xc4\xbd\xba(Spa\xa5\xe2) NATE", "EUC-KR"},
}
func TestDecode(t *testing.T) {
for _, tc := range testCases {
e, _ := Lookup(tc.otherEncoding)
if e == nil {
t.Errorf("%s: not found", tc.otherEncoding)
continue
}
s, err := transformString(e.NewDecoder(), tc.other)
if err != nil {
t.Errorf("%s: decode %q: %v", tc.otherEncoding, tc.other, err)
continue
}
if s != tc.utf8 {
t.Errorf("%s: got %q, want %q", tc.otherEncoding, s, tc.utf8)
}
}
}
func TestEncode(t *testing.T) {
for _, tc := range testCases {
e, _ := Lookup(tc.otherEncoding)
if e == nil {
t.Errorf("%s: not found", tc.otherEncoding)
continue
}
s, err := transformString(e.NewEncoder(), tc.utf8)
if err != nil {
t.Errorf("%s: encode %q: %s", tc.otherEncoding, tc.utf8, err)
continue
}
if s != tc.other {
t.Errorf("%s: got %q, want %q", tc.otherEncoding, s, tc.other)
}
}
}
// TestNames verifies that you can pass an encoding's name to Lookup and get
// the same encoding back (except for "replacement").
func TestNames(t *testing.T) {
for _, e := range encodings {
if e.name == "replacement" {
continue
}
_, got := Lookup(e.name)
if got != e.name {
t.Errorf("got %q, want %q", got, e.name)
continue
}
}
}
var sniffTestCases = []struct {
filename, declared, want string
}{
{"HTTP-charset.html", "text/html; charset=iso-8859-15", "iso-8859-15"},
{"UTF-16LE-BOM.html", "", "utf-16le"},
{"UTF-16BE-BOM.html", "", "utf-16be"},
{"meta-content-attribute.html", "text/html", "iso-8859-15"},
{"meta-charset-attribute.html", "text/html", "iso-8859-15"},
{"No-encoding-declaration.html", "text/html", "utf-8"},
{"HTTP-vs-UTF-8-BOM.html", "text/html; charset=iso-8859-15", "utf-8"},
{"HTTP-vs-meta-content.html", "text/html; charset=iso-8859-15", "iso-8859-15"},
{"HTTP-vs-meta-charset.html", "text/html; charset=iso-8859-15", "iso-8859-15"},
{"UTF-8-BOM-vs-meta-content.html", "text/html", "utf-8"},
{"UTF-8-BOM-vs-meta-charset.html", "text/html", "utf-8"},
}
func TestSniff(t *testing.T) {
switch runtime.GOOS {
case "nacl": // platforms that don't permit direct file system access
t.Skipf("not supported on %q", runtime.GOOS)
}
for _, tc := range sniffTestCases {
content, err := ioutil.ReadFile("testdata/" + tc.filename)
if err != nil {
t.Errorf("%s: error reading file: %v", tc.filename, err)
continue
}
_, name, _ := DetermineEncoding(content, tc.declared)
if name != tc.want {
t.Errorf("%s: got %q, want %q", tc.filename, name, tc.want)
continue
}
}
}
func TestReader(t *testing.T) {
switch runtime.GOOS {
case "nacl": // platforms that don't permit direct file system access
t.Skipf("not supported on %q", runtime.GOOS)
}
for _, tc := range sniffTestCases {
content, err := ioutil.ReadFile("testdata/" + tc.filename)
if err != nil {
t.Errorf("%s: error reading file: %v", tc.filename, err)
continue
}
r, err := NewReader(bytes.NewReader(content), tc.declared)
if err != nil {
t.Errorf("%s: error creating reader: %v", tc.filename, err)
continue
}
got, err := ioutil.ReadAll(r)
if err != nil {
t.Errorf("%s: error reading from charset.NewReader: %v", tc.filename, err)
continue
}
e, _ := Lookup(tc.want)
want, err := ioutil.ReadAll(transform.NewReader(bytes.NewReader(content), e.NewDecoder()))
if err != nil {
t.Errorf("%s: error decoding with hard-coded charset name: %v", tc.filename, err)
continue
}
if !bytes.Equal(got, want) {
t.Errorf("%s: got %q, want %q", tc.filename, got, want)
continue
}
}
}
var metaTestCases = []struct {
meta, want string
}{
{"", ""},
{"text/html", ""},
{"text/html; charset utf-8", ""},
{"text/html; charset=latin-2", "latin-2"},
{"text/html; charset; charset = utf-8", "utf-8"},
{`charset="big5"`, "big5"},
{"charset='shift_jis'", "shift_jis"},
}
func TestFromMeta(t *testing.T) {
for _, tc := range metaTestCases {
got := fromMetaElement(tc.meta)
if got != tc.want {
t.Errorf("%q: got %q, want %q", tc.meta, got, tc.want)
}
}
}
func TestXML(t *testing.T) {
const s = "<?xml version=\"1.0\" encoding=\"windows-1252\"?><a><Word>r\xe9sum\xe9</Word></a>"
d := xml.NewDecoder(strings.NewReader(s))
d.CharsetReader = NewReaderLabel
var a struct {
Word string
}
err := d.Decode(&a)
if err != nil {
t.Fatalf("Decode: %v", err)
}
want := "résumé"
if a.Word != want {
t.Errorf("got %q, want %q", a.Word, want)
}
}

@ -0,0 +1,111 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// Download https://encoding.spec.whatwg.org/encodings.json and use it to
// generate table.go.
import (
"encoding/json"
"fmt"
"log"
"net/http"
"strings"
)
type enc struct {
Name string
Labels []string
}
type group struct {
Encodings []enc
Heading string
}
const specURL = "https://encoding.spec.whatwg.org/encodings.json"
func main() {
resp, err := http.Get(specURL)
if err != nil {
log.Fatalf("error fetching %s: %s", specURL, err)
}
if resp.StatusCode != 200 {
log.Fatalf("error fetching %s: HTTP status %s", specURL, resp.Status)
}
defer resp.Body.Close()
var groups []group
d := json.NewDecoder(resp.Body)
err = d.Decode(&groups)
if err != nil {
log.Fatalf("error reading encodings.json: %s", err)
}
fmt.Println("// generated by go run gen.go; DO NOT EDIT")
fmt.Println()
fmt.Println("package charset")
fmt.Println()
fmt.Println("import (")
fmt.Println(`"golang.org/x/text/encoding"`)
for _, pkg := range []string{"charmap", "japanese", "korean", "simplifiedchinese", "traditionalchinese", "unicode"} {
fmt.Printf("\"golang.org/x/text/encoding/%s\"\n", pkg)
}
fmt.Println(")")
fmt.Println()
fmt.Println("var encodings = map[string]struct{e encoding.Encoding; name string} {")
for _, g := range groups {
for _, e := range g.Encodings {
goName, ok := miscNames[e.Name]
if !ok {
for k, v := range prefixes {
if strings.HasPrefix(e.Name, k) {
goName = v + e.Name[len(k):]
break
}
}
if goName == "" {
log.Fatalf("unrecognized encoding name: %s", e.Name)
}
}
for _, label := range e.Labels {
fmt.Printf("%q: {%s, %q},\n", label, goName, e.Name)
}
}
}
fmt.Println("}")
}
var prefixes = map[string]string{
"iso-8859-": "charmap.ISO8859_",
"windows-": "charmap.Windows",
}
var miscNames = map[string]string{
"utf-8": "encoding.Nop",
"ibm866": "charmap.CodePage866",
"iso-8859-8-i": "charmap.ISO8859_8",
"koi8-r": "charmap.KOI8R",
"koi8-u": "charmap.KOI8U",
"macintosh": "charmap.Macintosh",
"x-mac-cyrillic": "charmap.MacintoshCyrillic",
"gbk": "simplifiedchinese.GBK",
"gb18030": "simplifiedchinese.GB18030",
"hz-gb-2312": "simplifiedchinese.HZGB2312",
"big5": "traditionalchinese.Big5",
"euc-jp": "japanese.EUCJP",
"iso-2022-jp": "japanese.ISO2022JP",
"shift_jis": "japanese.ShiftJIS",
"euc-kr": "korean.EUCKR",
"replacement": "encoding.Replacement",
"utf-16be": "unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)",
"utf-16le": "unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM)",
"x-user-defined": "charmap.XUserDefined",
}

@ -0,0 +1,235 @@
// generated by go run gen.go; DO NOT EDIT
package charset
import (
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/encoding/japanese"
"golang.org/x/text/encoding/korean"
"golang.org/x/text/encoding/simplifiedchinese"
"golang.org/x/text/encoding/traditionalchinese"
"golang.org/x/text/encoding/unicode"
)
var encodings = map[string]struct {
e encoding.Encoding
name string
}{
"unicode-1-1-utf-8": {encoding.Nop, "utf-8"},
"utf-8": {encoding.Nop, "utf-8"},
"utf8": {encoding.Nop, "utf-8"},
"866": {charmap.CodePage866, "ibm866"},
"cp866": {charmap.CodePage866, "ibm866"},
"csibm866": {charmap.CodePage866, "ibm866"},
"ibm866": {charmap.CodePage866, "ibm866"},
"csisolatin2": {charmap.ISO8859_2, "iso-8859-2"},
"iso-8859-2": {charmap.ISO8859_2, "iso-8859-2"},
"iso-ir-101": {charmap.ISO8859_2, "iso-8859-2"},
"iso8859-2": {charmap.ISO8859_2, "iso-8859-2"},
"iso88592": {charmap.ISO8859_2, "iso-8859-2"},
"iso_8859-2": {charmap.ISO8859_2, "iso-8859-2"},
"iso_8859-2:1987": {charmap.ISO8859_2, "iso-8859-2"},
"l2": {charmap.ISO8859_2, "iso-8859-2"},
"latin2": {charmap.ISO8859_2, "iso-8859-2"},
"csisolatin3": {charmap.ISO8859_3, "iso-8859-3"},
"iso-8859-3": {charmap.ISO8859_3, "iso-8859-3"},
"iso-ir-109": {charmap.ISO8859_3, "iso-8859-3"},
"iso8859-3": {charmap.ISO8859_3, "iso-8859-3"},
"iso88593": {charmap.ISO8859_3, "iso-8859-3"},
"iso_8859-3": {charmap.ISO8859_3, "iso-8859-3"},
"iso_8859-3:1988": {charmap.ISO8859_3, "iso-8859-3"},
"l3": {charmap.ISO8859_3, "iso-8859-3"},
"latin3": {charmap.ISO8859_3, "iso-8859-3"},
"csisolatin4": {charmap.ISO8859_4, "iso-8859-4"},
"iso-8859-4": {charmap.ISO8859_4, "iso-8859-4"},
"iso-ir-110": {charmap.ISO8859_4, "iso-8859-4"},
"iso8859-4": {charmap.ISO8859_4, "iso-8859-4"},
"iso88594": {charmap.ISO8859_4, "iso-8859-4"},
"iso_8859-4": {charmap.ISO8859_4, "iso-8859-4"},
"iso_8859-4:1988": {charmap.ISO8859_4, "iso-8859-4"},
"l4": {charmap.ISO8859_4, "iso-8859-4"},
"latin4": {charmap.ISO8859_4, "iso-8859-4"},
"csisolatincyrillic": {charmap.ISO8859_5, "iso-8859-5"},
"cyrillic": {charmap.ISO8859_5, "iso-8859-5"},
"iso-8859-5": {charmap.ISO8859_5, "iso-8859-5"},
"iso-ir-144": {charmap.ISO8859_5, "iso-8859-5"},
"iso8859-5": {charmap.ISO8859_5, "iso-8859-5"},
"iso88595": {charmap.ISO8859_5, "iso-8859-5"},
"iso_8859-5": {charmap.ISO8859_5, "iso-8859-5"},
"iso_8859-5:1988": {charmap.ISO8859_5, "iso-8859-5"},
"arabic": {charmap.ISO8859_6, "iso-8859-6"},
"asmo-708": {charmap.ISO8859_6, "iso-8859-6"},
"csiso88596e": {charmap.ISO8859_6, "iso-8859-6"},
"csiso88596i": {charmap.ISO8859_6, "iso-8859-6"},
"csisolatinarabic": {charmap.ISO8859_6, "iso-8859-6"},
"ecma-114": {charmap.ISO8859_6, "iso-8859-6"},
"iso-8859-6": {charmap.ISO8859_6, "iso-8859-6"},
"iso-8859-6-e": {charmap.ISO8859_6, "iso-8859-6"},
"iso-8859-6-i": {charmap.ISO8859_6, "iso-8859-6"},
"iso-ir-127": {charmap.ISO8859_6, "iso-8859-6"},
"iso8859-6": {charmap.ISO8859_6, "iso-8859-6"},
"iso88596": {charmap.ISO8859_6, "iso-8859-6"},
"iso_8859-6": {charmap.ISO8859_6, "iso-8859-6"},
"iso_8859-6:1987": {charmap.ISO8859_6, "iso-8859-6"},
"csisolatingreek": {charmap.ISO8859_7, "iso-8859-7"},
"ecma-118": {charmap.ISO8859_7, "iso-8859-7"},
"elot_928": {charmap.ISO8859_7, "iso-8859-7"},
"greek": {charmap.ISO8859_7, "iso-8859-7"},
"greek8": {charmap.ISO8859_7, "iso-8859-7"},
"iso-8859-7": {charmap.ISO8859_7, "iso-8859-7"},
"iso-ir-126": {charmap.ISO8859_7, "iso-8859-7"},
"iso8859-7": {charmap.ISO8859_7, "iso-8859-7"},
"iso88597": {charmap.ISO8859_7, "iso-8859-7"},
"iso_8859-7": {charmap.ISO8859_7, "iso-8859-7"},
"iso_8859-7:1987": {charmap.ISO8859_7, "iso-8859-7"},
"sun_eu_greek": {charmap.ISO8859_7, "iso-8859-7"},
"csiso88598e": {charmap.ISO8859_8, "iso-8859-8"},
"csisolatinhebrew": {charmap.ISO8859_8, "iso-8859-8"},
"hebrew": {charmap.ISO8859_8, "iso-8859-8"},
"iso-8859-8": {charmap.ISO8859_8, "iso-8859-8"},
"iso-8859-8-e": {charmap.ISO8859_8, "iso-8859-8"},
"iso-ir-138": {charmap.ISO8859_8, "iso-8859-8"},
"iso8859-8": {charmap.ISO8859_8, "iso-8859-8"},
"iso88598": {charmap.ISO8859_8, "iso-8859-8"},
"iso_8859-8": {charmap.ISO8859_8, "iso-8859-8"},
"iso_8859-8:1988": {charmap.ISO8859_8, "iso-8859-8"},
"visual": {charmap.ISO8859_8, "iso-8859-8"},
"csiso88598i": {charmap.ISO8859_8, "iso-8859-8-i"},
"iso-8859-8-i": {charmap.ISO8859_8, "iso-8859-8-i"},
"logical": {charmap.ISO8859_8, "iso-8859-8-i"},
"csisolatin6": {charmap.ISO8859_10, "iso-8859-10"},
"iso-8859-10": {charmap.ISO8859_10, "iso-8859-10"},
"iso-ir-157": {charmap.ISO8859_10, "iso-8859-10"},
"iso8859-10": {charmap.ISO8859_10, "iso-8859-10"},
"iso885910": {charmap.ISO8859_10, "iso-8859-10"},
"l6": {charmap.ISO8859_10, "iso-8859-10"},
"latin6": {charmap.ISO8859_10, "iso-8859-10"},
"iso-8859-13": {charmap.ISO8859_13, "iso-8859-13"},
"iso8859-13": {charmap.ISO8859_13, "iso-8859-13"},
"iso885913": {charmap.ISO8859_13, "iso-8859-13"},
"iso-8859-14": {charmap.ISO8859_14, "iso-8859-14"},
"iso8859-14": {charmap.ISO8859_14, "iso-8859-14"},
"iso885914": {charmap.ISO8859_14, "iso-8859-14"},
"csisolatin9": {charmap.ISO8859_15, "iso-8859-15"},
"iso-8859-15": {charmap.ISO8859_15, "iso-8859-15"},
"iso8859-15": {charmap.ISO8859_15, "iso-8859-15"},
"iso885915": {charmap.ISO8859_15, "iso-8859-15"},
"iso_8859-15": {charmap.ISO8859_15, "iso-8859-15"},
"l9": {charmap.ISO8859_15, "iso-8859-15"},
"iso-8859-16": {charmap.ISO8859_16, "iso-8859-16"},
"cskoi8r": {charmap.KOI8R, "koi8-r"},
"koi": {charmap.KOI8R, "koi8-r"},
"koi8": {charmap.KOI8R, "koi8-r"},
"koi8-r": {charmap.KOI8R, "koi8-r"},
"koi8_r": {charmap.KOI8R, "koi8-r"},
"koi8-u": {charmap.KOI8U, "koi8-u"},
"csmacintosh": {charmap.Macintosh, "macintosh"},
"mac": {charmap.Macintosh, "macintosh"},
"macintosh": {charmap.Macintosh, "macintosh"},
"x-mac-roman": {charmap.Macintosh, "macintosh"},
"dos-874": {charmap.Windows874, "windows-874"},
"iso-8859-11": {charmap.Windows874, "windows-874"},
"iso8859-11": {charmap.Windows874, "windows-874"},
"iso885911": {charmap.Windows874, "windows-874"},
"tis-620": {charmap.Windows874, "windows-874"},
"windows-874": {charmap.Windows874, "windows-874"},
"cp1250": {charmap.Windows1250, "windows-1250"},
"windows-1250": {charmap.Windows1250, "windows-1250"},
"x-cp1250": {charmap.Windows1250, "windows-1250"},
"cp1251": {charmap.Windows1251, "windows-1251"},
"windows-1251": {charmap.Windows1251, "windows-1251"},
"x-cp1251": {charmap.Windows1251, "windows-1251"},
"ansi_x3.4-1968": {charmap.Windows1252, "windows-1252"},
"ascii": {charmap.Windows1252, "windows-1252"},
"cp1252": {charmap.Windows1252, "windows-1252"},
"cp819": {charmap.Windows1252, "windows-1252"},
"csisolatin1": {charmap.Windows1252, "windows-1252"},
"ibm819": {charmap.Windows1252, "windows-1252"},
"iso-8859-1": {charmap.Windows1252, "windows-1252"},
"iso-ir-100": {charmap.Windows1252, "windows-1252"},
"iso8859-1": {charmap.Windows1252, "windows-1252"},
"iso88591": {charmap.Windows1252, "windows-1252"},
"iso_8859-1": {charmap.Windows1252, "windows-1252"},
"iso_8859-1:1987": {charmap.Windows1252, "windows-1252"},
"l1": {charmap.Windows1252, "windows-1252"},
"latin1": {charmap.Windows1252, "windows-1252"},
"us-ascii": {charmap.Windows1252, "windows-1252"},
"windows-1252": {charmap.Windows1252, "windows-1252"},
"x-cp1252": {charmap.Windows1252, "windows-1252"},
"cp1253": {charmap.Windows1253, "windows-1253"},
"windows-1253": {charmap.Windows1253, "windows-1253"},
"x-cp1253": {charmap.Windows1253, "windows-1253"},
"cp1254": {charmap.Windows1254, "windows-1254"},
"csisolatin5": {charmap.Windows1254, "windows-1254"},
"iso-8859-9": {charmap.Windows1254, "windows-1254"},
"iso-ir-148": {charmap.Windows1254, "windows-1254"},
"iso8859-9": {charmap.Windows1254, "windows-1254"},
"iso88599": {charmap.Windows1254, "windows-1254"},
"iso_8859-9": {charmap.Windows1254, "windows-1254"},
"iso_8859-9:1989": {charmap.Windows1254, "windows-1254"},
"l5": {charmap.Windows1254, "windows-1254"},
"latin5": {charmap.Windows1254, "windows-1254"},
"windows-1254": {charmap.Windows1254, "windows-1254"},
"x-cp1254": {charmap.Windows1254, "windows-1254"},
"cp1255": {charmap.Windows1255, "windows-1255"},
"windows-1255": {charmap.Windows1255, "windows-1255"},
"x-cp1255": {charmap.Windows1255, "windows-1255"},
"cp1256": {charmap.Windows1256, "windows-1256"},
"windows-1256": {charmap.Windows1256, "windows-1256"},
"x-cp1256": {charmap.Windows1256, "windows-1256"},
"cp1257": {charmap.Windows1257, "windows-1257"},
"windows-1257": {charmap.Windows1257, "windows-1257"},
"x-cp1257": {charmap.Windows1257, "windows-1257"},
"cp1258": {charmap.Windows1258, "windows-1258"},
"windows-1258": {charmap.Windows1258, "windows-1258"},
"x-cp1258": {charmap.Windows1258, "windows-1258"},
"x-mac-cyrillic": {charmap.MacintoshCyrillic, "x-mac-cyrillic"},
"x-mac-ukrainian": {charmap.MacintoshCyrillic, "x-mac-cyrillic"},
"chinese": {simplifiedchinese.GBK, "gbk"},
"csgb2312": {simplifiedchinese.GBK, "gbk"},
"csiso58gb231280": {simplifiedchinese.GBK, "gbk"},
"gb2312": {simplifiedchinese.GBK, "gbk"},
"gb_2312": {simplifiedchinese.GBK, "gbk"},
"gb_2312-80": {simplifiedchinese.GBK, "gbk"},
"gbk": {simplifiedchinese.GBK, "gbk"},
"iso-ir-58": {simplifiedchinese.GBK, "gbk"},
"x-gbk": {simplifiedchinese.GBK, "gbk"},
"gb18030": {simplifiedchinese.GB18030, "gb18030"},
"hz-gb-2312": {simplifiedchinese.HZGB2312, "hz-gb-2312"},
"big5": {traditionalchinese.Big5, "big5"},
"big5-hkscs": {traditionalchinese.Big5, "big5"},
"cn-big5": {traditionalchinese.Big5, "big5"},
"csbig5": {traditionalchinese.Big5, "big5"},
"x-x-big5": {traditionalchinese.Big5, "big5"},
"cseucpkdfmtjapanese": {japanese.EUCJP, "euc-jp"},
"euc-jp": {japanese.EUCJP, "euc-jp"},
"x-euc-jp": {japanese.EUCJP, "euc-jp"},
"csiso2022jp": {japanese.ISO2022JP, "iso-2022-jp"},
"iso-2022-jp": {japanese.ISO2022JP, "iso-2022-jp"},
"csshiftjis": {japanese.ShiftJIS, "shift_jis"},
"ms_kanji": {japanese.ShiftJIS, "shift_jis"},
"shift-jis": {japanese.ShiftJIS, "shift_jis"},
"shift_jis": {japanese.ShiftJIS, "shift_jis"},
"sjis": {japanese.ShiftJIS, "shift_jis"},
"windows-31j": {japanese.ShiftJIS, "shift_jis"},
"x-sjis": {japanese.ShiftJIS, "shift_jis"},
"cseuckr": {korean.EUCKR, "euc-kr"},
"csksc56011987": {korean.EUCKR, "euc-kr"},
"euc-kr": {korean.EUCKR, "euc-kr"},
"iso-ir-149": {korean.EUCKR, "euc-kr"},
"korean": {korean.EUCKR, "euc-kr"},
"ks_c_5601-1987": {korean.EUCKR, "euc-kr"},
"ks_c_5601-1989": {korean.EUCKR, "euc-kr"},
"ksc5601": {korean.EUCKR, "euc-kr"},
"ksc_5601": {korean.EUCKR, "euc-kr"},
"windows-949": {korean.EUCKR, "euc-kr"},
"csiso2022kr": {encoding.Replacement, "replacement"},
"iso-2022-kr": {encoding.Replacement, "replacement"},
"iso-2022-cn": {encoding.Replacement, "replacement"},
"iso-2022-cn-ext": {encoding.Replacement, "replacement"},
"utf-16be": {unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM), "utf-16be"},
"utf-16": {unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM), "utf-16le"},
"utf-16le": {unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM), "utf-16le"},
"x-user-defined": {charmap.XUserDefined, "x-user-defined"},
}

@ -0,0 +1,48 @@
<!DOCTYPE html>
<html lang="en" >
<head>
<title>HTTP charset</title>
<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
<link rel="stylesheet" type="text/css" href="./generatedtests.css">
<script src="http://w3c-test.org/resources/testharness.js"></script>
<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
<meta name='flags' content='http'>
<meta name="assert" content="The character encoding of a page can be set using the HTTP header charset declaration.">
<style type='text/css'>
.test div { width: 50px; }</style>
<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-15.css">
</head>
<body>
<p class='title'>HTTP charset</p>
<div id='log'></div>
<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
<div class='description'>
<p class="assertion" title="Assertion">The character encoding of a page can be set using the HTTP header charset declaration.</p>
<div class="notes"><p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00C3;&#x0153;&#x00C3;&#x20AC;&#x00C3;&#x0161;</code>. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.</p><p>The only character encoding declaration for this HTML file is in the HTTP header, which sets the encoding to ISO 8859-15.</p></p>
</div>
</div>
<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-003">Next test</a></div><div class="doctype">HTML5</div>
<p class="jump">the-input-byte-stream-001<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#basics" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-001" target="_blank">Detailed results for this test</a><br/> <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
<li>The test is read from a server that supports HTTP.</li></ul></div>
</div>
<script>
test(function() {
assert_equals(document.getElementById('box').offsetWidth, 100);
}, " ");
</script>
</body>
</html>

@ -0,0 +1,48 @@
<!DOCTYPE html>
<html lang="en" >
<head>
<title>HTTP vs UTF-8 BOM</title>
<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
<link rel="stylesheet" type="text/css" href="./generatedtests.css">
<script src="http://w3c-test.org/resources/testharness.js"></script>
<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
<meta name='flags' content='http'>
<meta name="assert" content="A character encoding set in the HTTP header has lower precedence than the UTF-8 signature.">
<style type='text/css'>
.test div { width: 50px; }</style>
<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-utf8.css">
</head>
<body>
<p class='title'>HTTP vs UTF-8 BOM</p>
<div id='log'></div>
<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
<div class='description'>
<p class="assertion" title="Assertion">A character encoding set in the HTTP header has lower precedence than the UTF-8 signature.</p>
<div class="notes"><p><p>The HTTP header attempts to set the character encoding to ISO 8859-15. The page starts with a UTF-8 signature.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00FD;&#x00E4;&#x00E8;</code>. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.</p><p>If the test is unsuccessful, the characters &#x00EF;&#x00BB;&#x00BF; should appear at the top of the page. These represent the bytes that make up the UTF-8 signature when encountered in the ISO 8859-15 encoding.</p></p>
</div>
</div>
<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-022">Next test</a></div><div class="doctype">HTML5</div>
<p class="jump">the-input-byte-stream-034<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#precedence" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-034" target="_blank">Detailed results for this test</a><br/> <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
<li>The test is read from a server that supports HTTP.</li></ul></div>
</div>
<script>
test(function() {
assert_equals(document.getElementById('box').offsetWidth, 100);
}, " ");
</script>
</body>
</html>

@ -0,0 +1,49 @@
<!DOCTYPE html>
<html lang="en" >
<head>
<meta charset="iso-8859-1" > <title>HTTP vs meta charset</title>
<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
<link rel="stylesheet" type="text/css" href="./generatedtests.css">
<script src="http://w3c-test.org/resources/testharness.js"></script>
<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
<meta name='flags' content='http'>
<meta name="assert" content="The HTTP header has a higher precedence than an encoding declaration in a meta charset attribute.">
<style type='text/css'>
.test div { width: 50px; }.test div { width: 90px; }
</style>
<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-15.css">
</head>
<body>
<p class='title'>HTTP vs meta charset</p>
<div id='log'></div>
<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
<div class='description'>
<p class="assertion" title="Assertion">The HTTP header has a higher precedence than an encoding declaration in a meta charset attribute.</p>
<div class="notes"><p><p>The HTTP header attempts to set the character encoding to ISO 8859-15. The page contains an encoding declaration in a meta charset attribute that attempts to set the character encoding to ISO 8859-1.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00C3;&#x0153;&#x00C3;&#x20AC;&#x00C3;&#x0161;</code>. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.</p></p>
</div>
</div>
<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-037">Next test</a></div><div class="doctype">HTML5</div>
<p class="jump">the-input-byte-stream-018<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#precedence" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-018" target="_blank">Detailed results for this test</a><br/> <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
<li>The test is read from a server that supports HTTP.</li></ul></div>
</div>
<script>
test(function() {
assert_equals(document.getElementById('box').offsetWidth, 100);
}, " ");
</script>
</body>
</html>

@ -0,0 +1,49 @@
<!DOCTYPE html>
<html lang="en" >
<head>
<meta http-equiv="content-type" content="text/html;charset=iso-8859-1" > <title>HTTP vs meta content</title>
<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
<link rel="stylesheet" type="text/css" href="./generatedtests.css">
<script src="http://w3c-test.org/resources/testharness.js"></script>
<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
<meta name='flags' content='http'>
<meta name="assert" content="The HTTP header has a higher precedence than an encoding declaration in a meta content attribute.">
<style type='text/css'>
.test div { width: 50px; }.test div { width: 90px; }
</style>
<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-15.css">
</head>
<body>
<p class='title'>HTTP vs meta content</p>
<div id='log'></div>
<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
<div class='description'>
<p class="assertion" title="Assertion">The HTTP header has a higher precedence than an encoding declaration in a meta content attribute.</p>
<div class="notes"><p><p>The HTTP header attempts to set the character encoding to ISO 8859-15. The page contains an encoding declaration in a meta content attribute that attempts to set the character encoding to ISO 8859-1.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00C3;&#x0153;&#x00C3;&#x20AC;&#x00C3;&#x0161;</code>. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.</p></p>
</div>
</div>
<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-018">Next test</a></div><div class="doctype">HTML5</div>
<p class="jump">the-input-byte-stream-016<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#precedence" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-016" target="_blank">Detailed results for this test</a><br/> <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
<li>The test is read from a server that supports HTTP.</li></ul></div>
</div>
<script>
test(function() {
assert_equals(document.getElementById('box').offsetWidth, 100);
}, " ");
</script>
</body>
</html>

@ -0,0 +1,47 @@
<!DOCTYPE html>
<html lang="en" >
<head>
<title>No encoding declaration</title>
<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
<link rel="stylesheet" type="text/css" href="./generatedtests.css">
<script src="http://w3c-test.org/resources/testharness.js"></script>
<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
<meta name='flags' content='http'>
<meta name="assert" content="A page with no encoding information in HTTP, BOM, XML declaration or meta element will be treated as UTF-8.">
<style type='text/css'>
.test div { width: 50px; }</style>
<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-utf8.css">
</head>
<body>
<p class='title'>No encoding declaration</p>
<div id='log'></div>
<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
<div class='description'>
<p class="assertion" title="Assertion">A page with no encoding information in HTTP, BOM, XML declaration or meta element will be treated as UTF-8.</p>
<div class="notes"><p><p>The test on this page contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00FD;&#x00E4;&#x00E8;</code>. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.</p></p>
</div>
</div>
<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-034">Next test</a></div><div class="doctype">HTML5</div>
<p class="jump">the-input-byte-stream-015<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#basics" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-015" target="_blank">Detailed results for this test</a><br/> <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
<div class='prereq'>Assumptions: <ul><li>The test is read from a server that supports HTTP.</li></ul></div>
</div>
<script>
test(function() {
assert_equals(document.getElementById('box').offsetWidth, 100);
}, " ");
</script>
</body>
</html>

@ -0,0 +1,9 @@
These test cases come from
http://www.w3.org/International/tests/repository/html5/the-input-byte-stream/results-basics
Distributed under both the W3C Test Suite License
(http://www.w3.org/Consortium/Legal/2008/04-testsuite-license)
and the W3C 3-clause BSD License
(http://www.w3.org/Consortium/Legal/2008/03-bsd-license).
To contribute to a W3C Test Suite, see the policies and contribution
forms (http://www.w3.org/2004/10/27-testcases).

@ -0,0 +1,49 @@
<!DOCTYPE html>
<html lang="en" >
<head>
<meta charset="iso-8859-15"> <title>UTF-8 BOM vs meta charset</title>
<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
<link rel="stylesheet" type="text/css" href="./generatedtests.css">
<script src="http://w3c-test.org/resources/testharness.js"></script>
<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
<meta name='flags' content='http'>
<meta name="assert" content="A page with a UTF-8 BOM will be recognized as UTF-8 even if the meta charset attribute declares a different encoding.">
<style type='text/css'>
.test div { width: 50px; }.test div { width: 90px; }
</style>
<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-utf8.css">
</head>
<body>
<p class='title'>UTF-8 BOM vs meta charset</p>
<div id='log'></div>
<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
<div class='description'>
<p class="assertion" title="Assertion">A page with a UTF-8 BOM will be recognized as UTF-8 even if the meta charset attribute declares a different encoding.</p>
<div class="notes"><p><p>The page contains an encoding declaration in a meta charset attribute that attempts to set the character encoding to ISO 8859-15, but the file starts with a UTF-8 signature.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00FD;&#x00E4;&#x00E8;</code>. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.</p></p>
</div>
</div>
<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-024">Next test</a></div><div class="doctype">HTML5</div>
<p class="jump">the-input-byte-stream-038<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#precedence" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-038" target="_blank">Detailed results for this test</a><br/> <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
<li>The test is read from a server that supports HTTP.</li></ul></div>
</div>
<script>
test(function() {
assert_equals(document.getElementById('box').offsetWidth, 100);
}, " ");
</script>
</body>
</html>

@ -0,0 +1,48 @@
<!DOCTYPE html>
<html lang="en" >
<head>
<meta http-equiv="content-type" content="text/html; charset=iso-8859-15"> <title>UTF-8 BOM vs meta content</title>
<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
<link rel="stylesheet" type="text/css" href="./generatedtests.css">
<script src="http://w3c-test.org/resources/testharness.js"></script>
<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
<meta name='flags' content='http'>
<meta name="assert" content="A page with a UTF-8 BOM will be recognized as UTF-8 even if the meta content attribute declares a different encoding.">
<style type='text/css'>
.test div { width: 50px; }</style>
<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-utf8.css">
</head>
<body>
<p class='title'>UTF-8 BOM vs meta content</p>
<div id='log'></div>
<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
<div class='description'>
<p class="assertion" title="Assertion">A page with a UTF-8 BOM will be recognized as UTF-8 even if the meta content attribute declares a different encoding.</p>
<div class="notes"><p><p>The page contains an encoding declaration in a meta content attribute that attempts to set the character encoding to ISO 8859-15, but the file starts with a UTF-8 signature.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00FD;&#x00E4;&#x00E8;</code>. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.</p></p>
</div>
</div>
<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-038">Next test</a></div><div class="doctype">HTML5</div>
<p class="jump">the-input-byte-stream-037<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#precedence" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-037" target="_blank">Detailed results for this test</a><br/> <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
<li>The test is read from a server that supports HTTP.</li></ul></div>
</div>
<script>
test(function() {
assert_equals(document.getElementById('box').offsetWidth, 100);
}, " ");
</script>
</body>
</html>

@ -0,0 +1,48 @@
<!DOCTYPE html>
<html lang="en" >
<head>
<meta charset="iso-8859-15"> <title>meta charset attribute</title>
<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
<link rel="stylesheet" type="text/css" href="./generatedtests.css">
<script src="http://w3c-test.org/resources/testharness.js"></script>
<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
<meta name='flags' content='http'>
<meta name="assert" content="The character encoding of the page can be set by a meta element with charset attribute.">
<style type='text/css'>
.test div { width: 50px; }</style>
<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-15.css">
</head>
<body>
<p class='title'>meta charset attribute</p>
<div id='log'></div>
<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
<div class='description'>
<p class="assertion" title="Assertion">The character encoding of the page can be set by a meta element with charset attribute.</p>
<div class="notes"><p><p>The only character encoding declaration for this HTML file is in the charset attribute of the meta element, which declares the encoding to be ISO 8859-15.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00C3;&#x0153;&#x00C3;&#x20AC;&#x00C3;&#x0161;</code>. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.</p></p>
</div>
</div>
<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-015">Next test</a></div><div class="doctype">HTML5</div>
<p class="jump">the-input-byte-stream-009<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#basics" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-009" target="_blank">Detailed results for this test</a><br/> <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
<li>The test is read from a server that supports HTTP.</li></ul></div>
</div>
<script>
test(function() {
assert_equals(document.getElementById('box').offsetWidth, 100);
}, " ");
</script>
</body>
</html>

@ -0,0 +1,48 @@
<!DOCTYPE html>
<html lang="en" >
<head>
<meta http-equiv="content-type" content="text/html; charset=iso-8859-15"> <title>meta content attribute</title>
<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
<link rel="stylesheet" type="text/css" href="./generatedtests.css">
<script src="http://w3c-test.org/resources/testharness.js"></script>
<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
<meta name='flags' content='http'>
<meta name="assert" content="The character encoding of the page can be set by a meta element with http-equiv and content attributes.">
<style type='text/css'>
.test div { width: 50px; }</style>
<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-15.css">
</head>
<body>
<p class='title'>meta content attribute</p>
<div id='log'></div>
<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
<div class='description'>
<p class="assertion" title="Assertion">The character encoding of the page can be set by a meta element with http-equiv and content attributes.</p>
<div class="notes"><p><p>The only character encoding declaration for this HTML file is in the content attribute of the meta element, which declares the encoding to be ISO 8859-15.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00C3;&#x0153;&#x00C3;&#x20AC;&#x00C3;&#x0161;</code>. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.</p></p>
</div>
</div>
<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-009">Next test</a></div><div class="doctype">HTML5</div>
<p class="jump">the-input-byte-stream-007<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#basics" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-007" target="_blank">Detailed results for this test</a><br/> <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
<li>The test is read from a server that supports HTTP.</li></ul></div>
</div>
<script>
test(function() {
assert_equals(document.getElementById('box').offsetWidth, 100);
}, " ");
</script>
</body>
</html>

@ -0,0 +1,102 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
// Section 12.2.3.2 of the HTML5 specification says "The following elements
// have varying levels of special parsing rules".
// https://html.spec.whatwg.org/multipage/syntax.html#the-stack-of-open-elements
var isSpecialElementMap = map[string]bool{
"address": true,
"applet": true,
"area": true,
"article": true,
"aside": true,
"base": true,
"basefont": true,
"bgsound": true,
"blockquote": true,
"body": true,
"br": true,
"button": true,
"caption": true,
"center": true,
"col": true,
"colgroup": true,
"dd": true,
"details": true,
"dir": true,
"div": true,
"dl": true,
"dt": true,
"embed": true,
"fieldset": true,
"figcaption": true,
"figure": true,
"footer": true,
"form": true,
"frame": true,
"frameset": true,
"h1": true,
"h2": true,
"h3": true,
"h4": true,
"h5": true,
"h6": true,
"head": true,
"header": true,
"hgroup": true,
"hr": true,
"html": true,
"iframe": true,
"img": true,
"input": true,
"isindex": true,
"li": true,
"link": true,
"listing": true,
"marquee": true,
"menu": true,
"meta": true,
"nav": true,
"noembed": true,
"noframes": true,
"noscript": true,
"object": true,
"ol": true,
"p": true,
"param": true,
"plaintext": true,
"pre": true,
"script": true,
"section": true,
"select": true,
"source": true,
"style": true,
"summary": true,
"table": true,
"tbody": true,
"td": true,
"template": true,
"textarea": true,
"tfoot": true,
"th": true,
"thead": true,
"title": true,
"tr": true,
"track": true,
"ul": true,
"wbr": true,
"xmp": true,
}
func isSpecialElement(element *Node) bool {
switch element.Namespace {
case "", "html":
return isSpecialElementMap[element.Data]
case "svg":
return element.Data == "foreignObject"
}
return false
}

@ -0,0 +1,106 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
Package html implements an HTML5-compliant tokenizer and parser.
Tokenization is done by creating a Tokenizer for an io.Reader r. It is the
caller's responsibility to ensure that r provides UTF-8 encoded HTML.
z := html.NewTokenizer(r)
Given a Tokenizer z, the HTML is tokenized by repeatedly calling z.Next(),
which parses the next token and returns its type, or an error:
for {
tt := z.Next()
if tt == html.ErrorToken {
// ...
return ...
}
// Process the current token.
}
There are two APIs for retrieving the current token. The high-level API is to
call Token; the low-level API is to call Text or TagName / TagAttr. Both APIs
allow optionally calling Raw after Next but before Token, Text, TagName, or
TagAttr. In EBNF notation, the valid call sequence per token is:
Next {Raw} [ Token | Text | TagName {TagAttr} ]
Token returns an independent data structure that completely describes a token.
Entities (such as "&lt;") are unescaped, tag names and attribute keys are
lower-cased, and attributes are collected into a []Attribute. For example:
for {
if z.Next() == html.ErrorToken {
// Returning io.EOF indicates success.
return z.Err()
}
emitToken(z.Token())
}
The low-level API performs fewer allocations and copies, but the contents of
the []byte values returned by Text, TagName and TagAttr may change on the next
call to Next. For example, to extract an HTML page's anchor text:
depth := 0
for {
tt := z.Next()
switch tt {
case ErrorToken:
return z.Err()
case TextToken:
if depth > 0 {
// emitBytes should copy the []byte it receives,
// if it doesn't process it immediately.
emitBytes(z.Text())
}
case StartTagToken, EndTagToken:
tn, _ := z.TagName()
if len(tn) == 1 && tn[0] == 'a' {
if tt == StartTagToken {
depth++
} else {
depth--
}
}
}
}
Parsing is done by calling Parse with an io.Reader, which returns the root of
the parse tree (the document element) as a *Node. It is the caller's
responsibility to ensure that the Reader provides UTF-8 encoded HTML. For
example, to process each anchor node in depth-first order:
doc, err := html.Parse(r)
if err != nil {
// ...
}
var f func(*html.Node)
f = func(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "a" {
// Do something with n...
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
f(c)
}
}
f(doc)
The relevant specifications include:
https://html.spec.whatwg.org/multipage/syntax.html and
https://html.spec.whatwg.org/multipage/syntax.html#tokenization
*/
package html
// The tokenization algorithm implemented by this package is not a line-by-line
// transliteration of the relatively verbose state-machine in the WHATWG
// specification. A more direct approach is used instead, where the program
// counter implies the state, such as whether it is tokenizing a tag or a text
// node. Specification compliance is verified by checking expected and actual
// outputs over a test suite rather than aiming for algorithmic fidelity.
// TODO(nigeltao): Does a DOM API belong in this package or a separate one?
// TODO(nigeltao): How does parsing interact with a JavaScript engine?

@ -0,0 +1,156 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"strings"
)
// parseDoctype parses the data from a DoctypeToken into a name,
// public identifier, and system identifier. It returns a Node whose Type
// is DoctypeNode, whose Data is the name, and which has attributes
// named "system" and "public" for the two identifiers if they were present.
// quirks is whether the document should be parsed in "quirks mode".
func parseDoctype(s string) (n *Node, quirks bool) {
n = &Node{Type: DoctypeNode}
// Find the name.
space := strings.IndexAny(s, whitespace)
if space == -1 {
space = len(s)
}
n.Data = s[:space]
// The comparison to "html" is case-sensitive.
if n.Data != "html" {
quirks = true
}
n.Data = strings.ToLower(n.Data)
s = strings.TrimLeft(s[space:], whitespace)
if len(s) < 6 {
// It can't start with "PUBLIC" or "SYSTEM".
// Ignore the rest of the string.
return n, quirks || s != ""
}
key := strings.ToLower(s[:6])
s = s[6:]
for key == "public" || key == "system" {
s = strings.TrimLeft(s, whitespace)
if s == "" {
break
}
quote := s[0]
if quote != '"' && quote != '\'' {
break
}
s = s[1:]
q := strings.IndexRune(s, rune(quote))
var id string
if q == -1 {
id = s
s = ""
} else {
id = s[:q]
s = s[q+1:]
}
n.Attr = append(n.Attr, Attribute{Key: key, Val: id})
if key == "public" {
key = "system"
} else {
key = ""
}
}
if key != "" || s != "" {
quirks = true
} else if len(n.Attr) > 0 {
if n.Attr[0].Key == "public" {
public := strings.ToLower(n.Attr[0].Val)
switch public {
case "-//w3o//dtd w3 html strict 3.0//en//", "-/w3d/dtd html 4.0 transitional/en", "html":
quirks = true
default:
for _, q := range quirkyIDs {
if strings.HasPrefix(public, q) {
quirks = true
break
}
}
}
// The following two public IDs only cause quirks mode if there is no system ID.
if len(n.Attr) == 1 && (strings.HasPrefix(public, "-//w3c//dtd html 4.01 frameset//") ||
strings.HasPrefix(public, "-//w3c//dtd html 4.01 transitional//")) {
quirks = true
}
}
if lastAttr := n.Attr[len(n.Attr)-1]; lastAttr.Key == "system" &&
strings.ToLower(lastAttr.Val) == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" {
quirks = true
}
}
return n, quirks
}
// quirkyIDs is a list of public doctype identifiers that cause a document
// to be interpreted in quirks mode. The identifiers should be in lower case.
var quirkyIDs = []string{
"+//silmaril//dtd html pro v0r11 19970101//",
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
"-//as//dtd html 3.0 aswedit + extensions//",
"-//ietf//dtd html 2.0 level 1//",
"-//ietf//dtd html 2.0 level 2//",
"-//ietf//dtd html 2.0 strict level 1//",
"-//ietf//dtd html 2.0 strict level 2//",
"-//ietf//dtd html 2.0 strict//",
"-//ietf//dtd html 2.0//",
"-//ietf//dtd html 2.1e//",
"-//ietf//dtd html 3.0//",
"-//ietf//dtd html 3.2 final//",
"-//ietf//dtd html 3.2//",
"-//ietf//dtd html 3//",
"-//ietf//dtd html level 0//",
"-//ietf//dtd html level 1//",
"-//ietf//dtd html level 2//",
"-//ietf//dtd html level 3//",
"-//ietf//dtd html strict level 0//",
"-//ietf//dtd html strict level 1//",
"-//ietf//dtd html strict level 2//",
"-//ietf//dtd html strict level 3//",
"-//ietf//dtd html strict//",
"-//ietf//dtd html//",
"-//metrius//dtd metrius presentational//",
"-//microsoft//dtd internet explorer 2.0 html strict//",
"-//microsoft//dtd internet explorer 2.0 html//",
"-//microsoft//dtd internet explorer 2.0 tables//",
"-//microsoft//dtd internet explorer 3.0 html strict//",
"-//microsoft//dtd internet explorer 3.0 html//",
"-//microsoft//dtd internet explorer 3.0 tables//",
"-//netscape comm. corp.//dtd html//",
"-//netscape comm. corp.//dtd strict html//",
"-//o'reilly and associates//dtd html 2.0//",
"-//o'reilly and associates//dtd html extended 1.0//",
"-//o'reilly and associates//dtd html extended relaxed 1.0//",
"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
"-//spyglass//dtd html 2.0 extended//",
"-//sq//dtd html 2.0 hotmetal + extensions//",
"-//sun microsystems corp.//dtd hotjava html//",
"-//sun microsystems corp.//dtd hotjava strict html//",
"-//w3c//dtd html 3 1995-03-24//",
"-//w3c//dtd html 3.2 draft//",
"-//w3c//dtd html 3.2 final//",
"-//w3c//dtd html 3.2//",
"-//w3c//dtd html 3.2s draft//",
"-//w3c//dtd html 4.0 frameset//",
"-//w3c//dtd html 4.0 transitional//",
"-//w3c//dtd html experimental 19960712//",
"-//w3c//dtd html experimental 970421//",
"-//w3c//dtd w3 html//",
"-//w3o//dtd w3 html 3.0//",
"-//webtechs//dtd mozilla html 2.0//",
"-//webtechs//dtd mozilla html//",
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,29 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"testing"
"unicode/utf8"
)
func TestEntityLength(t *testing.T) {
// We verify that the length of UTF-8 encoding of each value is <= 1 + len(key).
// The +1 comes from the leading "&". This property implies that the length of
// unescaped text is <= the length of escaped text.
for k, v := range entity {
if 1+len(k) < utf8.RuneLen(v) {
t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v))
}
if len(k) > longestEntityWithoutSemicolon && k[len(k)-1] != ';' {
t.Errorf("entity name %s is %d characters, but longestEntityWithoutSemicolon=%d", k, len(k), longestEntityWithoutSemicolon)
}
}
for k, v := range entity2 {
if 1+len(k) < utf8.RuneLen(v[0])+utf8.RuneLen(v[1]) {
t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v[0]) + string(v[1]))
}
}
}

@ -0,0 +1,258 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"bytes"
"strings"
"unicode/utf8"
)
// These replacements permit compatibility with old numeric entities that
// assumed Windows-1252 encoding.
// https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference
var replacementTable = [...]rune{
'\u20AC', // First entry is what 0x80 should be replaced with.
'\u0081',
'\u201A',
'\u0192',
'\u201E',
'\u2026',
'\u2020',
'\u2021',
'\u02C6',
'\u2030',
'\u0160',
'\u2039',
'\u0152',
'\u008D',
'\u017D',
'\u008F',
'\u0090',
'\u2018',
'\u2019',
'\u201C',
'\u201D',
'\u2022',
'\u2013',
'\u2014',
'\u02DC',
'\u2122',
'\u0161',
'\u203A',
'\u0153',
'\u009D',
'\u017E',
'\u0178', // Last entry is 0x9F.
// 0x00->'\uFFFD' is handled programmatically.
// 0x0D->'\u000D' is a no-op.
}
// unescapeEntity reads an entity like "&lt;" from b[src:] and writes the
// corresponding "<" to b[dst:], returning the incremented dst and src cursors.
// Precondition: b[src] == '&' && dst <= src.
// attribute should be true if parsing an attribute value.
func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) {
// https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference
// i starts at 1 because we already know that s[0] == '&'.
i, s := 1, b[src:]
if len(s) <= 1 {
b[dst] = b[src]
return dst + 1, src + 1
}
if s[i] == '#' {
if len(s) <= 3 { // We need to have at least "&#.".
b[dst] = b[src]
return dst + 1, src + 1
}
i++
c := s[i]
hex := false
if c == 'x' || c == 'X' {
hex = true
i++
}
x := '\x00'
for i < len(s) {
c = s[i]
i++
if hex {
if '0' <= c && c <= '9' {
x = 16*x + rune(c) - '0'
continue
} else if 'a' <= c && c <= 'f' {
x = 16*x + rune(c) - 'a' + 10
continue
} else if 'A' <= c && c <= 'F' {
x = 16*x + rune(c) - 'A' + 10
continue
}
} else if '0' <= c && c <= '9' {
x = 10*x + rune(c) - '0'
continue
}
if c != ';' {
i--
}
break
}
if i <= 3 { // No characters matched.
b[dst] = b[src]
return dst + 1, src + 1
}
if 0x80 <= x && x <= 0x9F {
// Replace characters from Windows-1252 with UTF-8 equivalents.
x = replacementTable[x-0x80]
} else if x == 0 || (0xD800 <= x && x <= 0xDFFF) || x > 0x10FFFF {
// Replace invalid characters with the replacement character.
x = '\uFFFD'
}
return dst + utf8.EncodeRune(b[dst:], x), src + i
}
// Consume the maximum number of characters possible, with the
// consumed characters matching one of the named references.
for i < len(s) {
c := s[i]
i++
// Lower-cased characters are more common in entities, so we check for them first.
if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
continue
}
if c != ';' {
i--
}
break
}
entityName := string(s[1:i])
if entityName == "" {
// No-op.
} else if attribute && entityName[len(entityName)-1] != ';' && len(s) > i && s[i] == '=' {
// No-op.
} else if x := entity[entityName]; x != 0 {
return dst + utf8.EncodeRune(b[dst:], x), src + i
} else if x := entity2[entityName]; x[0] != 0 {
dst1 := dst + utf8.EncodeRune(b[dst:], x[0])
return dst1 + utf8.EncodeRune(b[dst1:], x[1]), src + i
} else if !attribute {
maxLen := len(entityName) - 1
if maxLen > longestEntityWithoutSemicolon {
maxLen = longestEntityWithoutSemicolon
}
for j := maxLen; j > 1; j-- {
if x := entity[entityName[:j]]; x != 0 {
return dst + utf8.EncodeRune(b[dst:], x), src + j + 1
}
}
}
dst1, src1 = dst+i, src+i
copy(b[dst:dst1], b[src:src1])
return dst1, src1
}
// unescape unescapes b's entities in-place, so that "a&lt;b" becomes "a<b".
// attribute should be true if parsing an attribute value.
func unescape(b []byte, attribute bool) []byte {
for i, c := range b {
if c == '&' {
dst, src := unescapeEntity(b, i, i, attribute)
for src < len(b) {
c := b[src]
if c == '&' {
dst, src = unescapeEntity(b, dst, src, attribute)
} else {
b[dst] = c
dst, src = dst+1, src+1
}
}
return b[0:dst]
}
}
return b
}
// lower lower-cases the A-Z bytes in b in-place, so that "aBc" becomes "abc".
func lower(b []byte) []byte {
for i, c := range b {
if 'A' <= c && c <= 'Z' {
b[i] = c + 'a' - 'A'
}
}
return b
}
const escapedChars = "&'<>\"\r"
func escape(w writer, s string) error {
i := strings.IndexAny(s, escapedChars)
for i != -1 {
if _, err := w.WriteString(s[:i]); err != nil {
return err
}
var esc string
switch s[i] {
case '&':
esc = "&amp;"
case '\'':
// "&#39;" is shorter than "&apos;" and apos was not in HTML until HTML5.
esc = "&#39;"
case '<':
esc = "&lt;"
case '>':
esc = "&gt;"
case '"':
// "&#34;" is shorter than "&quot;".
esc = "&#34;"
case '\r':
esc = "&#13;"
default:
panic("unrecognized escape character")
}
s = s[i+1:]
if _, err := w.WriteString(esc); err != nil {
return err
}
i = strings.IndexAny(s, escapedChars)
}
_, err := w.WriteString(s)
return err
}
// EscapeString escapes special characters like "<" to become "&lt;". It
// escapes only five such characters: <, >, &, ' and ".
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
// always true.
func EscapeString(s string) string {
if strings.IndexAny(s, escapedChars) == -1 {
return s
}
var buf bytes.Buffer
escape(&buf, s)
return buf.String()
}
// UnescapeString unescapes entities like "&lt;" to become "<". It unescapes a
// larger range of entities than EscapeString escapes. For example, "&aacute;"
// unescapes to "á", as does "&#225;" and "&xE1;".
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
// always true.
func UnescapeString(s string) string {
for _, c := range s {
if c == '&' {
return string(unescape([]byte(s), false))
}
}
return s
}

@ -0,0 +1,97 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import "testing"
type unescapeTest struct {
// A short description of the test case.
desc string
// The HTML text.
html string
// The unescaped text.
unescaped string
}
var unescapeTests = []unescapeTest{
// Handle no entities.
{
"copy",
"A\ttext\nstring",
"A\ttext\nstring",
},
// Handle simple named entities.
{
"simple",
"&amp; &gt; &lt;",
"& > <",
},
// Handle hitting the end of the string.
{
"stringEnd",
"&amp &amp",
"& &",
},
// Handle entities with two codepoints.
{
"multiCodepoint",
"text &gesl; blah",
"text \u22db\ufe00 blah",
},
// Handle decimal numeric entities.
{
"decimalEntity",
"Delta = &#916; ",
"Delta = Δ ",
},
// Handle hexadecimal numeric entities.
{
"hexadecimalEntity",
"Lambda = &#x3bb; = &#X3Bb ",
"Lambda = λ = λ ",
},
// Handle numeric early termination.
{
"numericEnds",
"&# &#x &#128;43 &copy = &#169f = &#xa9",
"&# &#x €43 © = ©f = ©",
},
// Handle numeric ISO-8859-1 entity replacements.
{
"numericReplacements",
"Footnote&#x87;",
"Footnote‡",
},
}
func TestUnescape(t *testing.T) {
for _, tt := range unescapeTests {
unescaped := UnescapeString(tt.html)
if unescaped != tt.unescaped {
t.Errorf("TestUnescape %s: want %q, got %q", tt.desc, tt.unescaped, unescaped)
}
}
}
func TestUnescapeEscape(t *testing.T) {
ss := []string{
``,
`abc def`,
`a & b`,
`a&amp;b`,
`a &amp b`,
`&quot;`,
`"`,
`"<&>"`,
`&quot;&lt;&amp;&gt;&quot;`,
`3&5==1 && 0<1, "0&lt;1", a+acute=&aacute;`,
`The special characters are: <, >, &, ' and "`,
}
for _, s := range ss {
if got := UnescapeString(EscapeString(s)); got != s {
t.Errorf("got %q want %q", got, s)
}
}
}

@ -0,0 +1,40 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This example demonstrates parsing HTML data and walking the resulting tree.
package html_test
import (
"fmt"
"log"
"strings"
"golang.org/x/net/html"
)
func ExampleParse() {
s := `<p>Links:</p><ul><li><a href="foo">Foo</a><li><a href="/bar/baz">BarBaz</a></ul>`
doc, err := html.Parse(strings.NewReader(s))
if err != nil {
log.Fatal(err)
}
var f func(*html.Node)
f = func(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "a" {
for _, a := range n.Attr {
if a.Key == "href" {
fmt.Println(a.Val)
break
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
f(c)
}
}
f(doc)
// Output:
// foo
// /bar/baz
}

@ -0,0 +1,226 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"strings"
)
func adjustAttributeNames(aa []Attribute, nameMap map[string]string) {
for i := range aa {
if newName, ok := nameMap[aa[i].Key]; ok {
aa[i].Key = newName
}
}
}
func adjustForeignAttributes(aa []Attribute) {
for i, a := range aa {
if a.Key == "" || a.Key[0] != 'x' {
continue
}
switch a.Key {
case "xlink:actuate", "xlink:arcrole", "xlink:href", "xlink:role", "xlink:show",
"xlink:title", "xlink:type", "xml:base", "xml:lang", "xml:space", "xmlns:xlink":
j := strings.Index(a.Key, ":")
aa[i].Namespace = a.Key[:j]
aa[i].Key = a.Key[j+1:]
}
}
}
func htmlIntegrationPoint(n *Node) bool {
if n.Type != ElementNode {
return false
}
switch n.Namespace {
case "math":
if n.Data == "annotation-xml" {
for _, a := range n.Attr {
if a.Key == "encoding" {
val := strings.ToLower(a.Val)
if val == "text/html" || val == "application/xhtml+xml" {
return true
}
}
}
}
case "svg":
switch n.Data {
case "desc", "foreignObject", "title":
return true
}
}
return false
}
func mathMLTextIntegrationPoint(n *Node) bool {
if n.Namespace != "math" {
return false
}
switch n.Data {
case "mi", "mo", "mn", "ms", "mtext":
return true
}
return false
}
// Section 12.2.5.5.
var breakout = map[string]bool{
"b": true,
"big": true,
"blockquote": true,
"body": true,
"br": true,
"center": true,
"code": true,
"dd": true,
"div": true,
"dl": true,
"dt": true,
"em": true,
"embed": true,
"h1": true,
"h2": true,
"h3": true,
"h4": true,
"h5": true,
"h6": true,
"head": true,
"hr": true,
"i": true,
"img": true,
"li": true,
"listing": true,
"menu": true,
"meta": true,
"nobr": true,
"ol": true,
"p": true,
"pre": true,
"ruby": true,
"s": true,
"small": true,
"span": true,
"strong": true,
"strike": true,
"sub": true,
"sup": true,
"table": true,
"tt": true,
"u": true,
"ul": true,
"var": true,
}
// Section 12.2.5.5.
var svgTagNameAdjustments = map[string]string{
"altglyph": "altGlyph",
"altglyphdef": "altGlyphDef",
"altglyphitem": "altGlyphItem",
"animatecolor": "animateColor",
"animatemotion": "animateMotion",
"animatetransform": "animateTransform",
"clippath": "clipPath",
"feblend": "feBlend",
"fecolormatrix": "feColorMatrix",
"fecomponenttransfer": "feComponentTransfer",
"fecomposite": "feComposite",
"feconvolvematrix": "feConvolveMatrix",
"fediffuselighting": "feDiffuseLighting",
"fedisplacementmap": "feDisplacementMap",
"fedistantlight": "feDistantLight",
"feflood": "feFlood",
"fefunca": "feFuncA",
"fefuncb": "feFuncB",
"fefuncg": "feFuncG",
"fefuncr": "feFuncR",
"fegaussianblur": "feGaussianBlur",
"feimage": "feImage",
"femerge": "feMerge",
"femergenode": "feMergeNode",
"femorphology": "feMorphology",
"feoffset": "feOffset",
"fepointlight": "fePointLight",
"fespecularlighting": "feSpecularLighting",
"fespotlight": "feSpotLight",
"fetile": "feTile",
"feturbulence": "feTurbulence",
"foreignobject": "foreignObject",
"glyphref": "glyphRef",
"lineargradient": "linearGradient",
"radialgradient": "radialGradient",
"textpath": "textPath",
}
// Section 12.2.5.1
var mathMLAttributeAdjustments = map[string]string{
"definitionurl": "definitionURL",
}
var svgAttributeAdjustments = map[string]string{
"attributename": "attributeName",
"attributetype": "attributeType",
"basefrequency": "baseFrequency",
"baseprofile": "baseProfile",
"calcmode": "calcMode",
"clippathunits": "clipPathUnits",
"contentscripttype": "contentScriptType",
"contentstyletype": "contentStyleType",
"diffuseconstant": "diffuseConstant",
"edgemode": "edgeMode",
"externalresourcesrequired": "externalResourcesRequired",
"filterres": "filterRes",
"filterunits": "filterUnits",
"glyphref": "glyphRef",
"gradienttransform": "gradientTransform",
"gradientunits": "gradientUnits",
"kernelmatrix": "kernelMatrix",
"kernelunitlength": "kernelUnitLength",
"keypoints": "keyPoints",
"keysplines": "keySplines",
"keytimes": "keyTimes",
"lengthadjust": "lengthAdjust",
"limitingconeangle": "limitingConeAngle",
"markerheight": "markerHeight",
"markerunits": "markerUnits",
"markerwidth": "markerWidth",
"maskcontentunits": "maskContentUnits",
"maskunits": "maskUnits",
"numoctaves": "numOctaves",
"pathlength": "pathLength",
"patterncontentunits": "patternContentUnits",
"patterntransform": "patternTransform",
"patternunits": "patternUnits",
"pointsatx": "pointsAtX",
"pointsaty": "pointsAtY",
"pointsatz": "pointsAtZ",
"preservealpha": "preserveAlpha",
"preserveaspectratio": "preserveAspectRatio",
"primitiveunits": "primitiveUnits",
"refx": "refX",
"refy": "refY",
"repeatcount": "repeatCount",
"repeatdur": "repeatDur",
"requiredextensions": "requiredExtensions",
"requiredfeatures": "requiredFeatures",
"specularconstant": "specularConstant",
"specularexponent": "specularExponent",
"spreadmethod": "spreadMethod",
"startoffset": "startOffset",
"stddeviation": "stdDeviation",
"stitchtiles": "stitchTiles",
"surfacescale": "surfaceScale",
"systemlanguage": "systemLanguage",
"tablevalues": "tableValues",
"targetx": "targetX",
"targety": "targetY",
"textlength": "textLength",
"viewbox": "viewBox",
"viewtarget": "viewTarget",
"xchannelselector": "xChannelSelector",
"ychannelselector": "yChannelSelector",
"zoomandpan": "zoomAndPan",
}

@ -0,0 +1,193 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"golang.org/x/net/html/atom"
)
// A NodeType is the type of a Node.
type NodeType uint32
const (
ErrorNode NodeType = iota
TextNode
DocumentNode
ElementNode
CommentNode
DoctypeNode
scopeMarkerNode
)
// Section 12.2.3.3 says "scope markers are inserted when entering applet
// elements, buttons, object elements, marquees, table cells, and table
// captions, and are used to prevent formatting from 'leaking'".
var scopeMarker = Node{Type: scopeMarkerNode}
// A Node consists of a NodeType and some Data (tag name for element nodes,
// content for text) and are part of a tree of Nodes. Element nodes may also
// have a Namespace and contain a slice of Attributes. Data is unescaped, so
// that it looks like "a<b" rather than "a&lt;b". For element nodes, DataAtom
// is the atom for Data, or zero if Data is not a known tag name.
//
// An empty Namespace implies a "http://www.w3.org/1999/xhtml" namespace.
// Similarly, "math" is short for "http://www.w3.org/1998/Math/MathML", and
// "svg" is short for "http://www.w3.org/2000/svg".
type Node struct {
Parent, FirstChild, LastChild, PrevSibling, NextSibling *Node
Type NodeType
DataAtom atom.Atom
Data string
Namespace string
Attr []Attribute
}
// InsertBefore inserts newChild as a child of n, immediately before oldChild
// in the sequence of n's children. oldChild may be nil, in which case newChild
// is appended to the end of n's children.
//
// It will panic if newChild already has a parent or siblings.
func (n *Node) InsertBefore(newChild, oldChild *Node) {
if newChild.Parent != nil || newChild.PrevSibling != nil || newChild.NextSibling != nil {
panic("html: InsertBefore called for an attached child Node")
}
var prev, next *Node
if oldChild != nil {
prev, next = oldChild.PrevSibling, oldChild
} else {
prev = n.LastChild
}
if prev != nil {
prev.NextSibling = newChild
} else {
n.FirstChild = newChild
}
if next != nil {
next.PrevSibling = newChild
} else {
n.LastChild = newChild
}
newChild.Parent = n
newChild.PrevSibling = prev
newChild.NextSibling = next
}
// AppendChild adds a node c as a child of n.
//
// It will panic if c already has a parent or siblings.
func (n *Node) AppendChild(c *Node) {
if c.Parent != nil || c.PrevSibling != nil || c.NextSibling != nil {
panic("html: AppendChild called for an attached child Node")
}
last := n.LastChild
if last != nil {
last.NextSibling = c
} else {
n.FirstChild = c
}
n.LastChild = c
c.Parent = n
c.PrevSibling = last
}
// RemoveChild removes a node c that is a child of n. Afterwards, c will have
// no parent and no siblings.
//
// It will panic if c's parent is not n.
func (n *Node) RemoveChild(c *Node) {
if c.Parent != n {
panic("html: RemoveChild called for a non-child Node")
}
if n.FirstChild == c {
n.FirstChild = c.NextSibling
}
if c.NextSibling != nil {
c.NextSibling.PrevSibling = c.PrevSibling
}
if n.LastChild == c {
n.LastChild = c.PrevSibling
}
if c.PrevSibling != nil {
c.PrevSibling.NextSibling = c.NextSibling
}
c.Parent = nil
c.PrevSibling = nil
c.NextSibling = nil
}
// reparentChildren reparents all of src's child nodes to dst.
func reparentChildren(dst, src *Node) {
for {
child := src.FirstChild
if child == nil {
break
}
src.RemoveChild(child)
dst.AppendChild(child)
}
}
// clone returns a new node with the same type, data and attributes.
// The clone has no parent, no siblings and no children.
func (n *Node) clone() *Node {
m := &Node{
Type: n.Type,
DataAtom: n.DataAtom,
Data: n.Data,
Attr: make([]Attribute, len(n.Attr)),
}
copy(m.Attr, n.Attr)
return m
}
// nodeStack is a stack of nodes.
type nodeStack []*Node
// pop pops the stack. It will panic if s is empty.
func (s *nodeStack) pop() *Node {
i := len(*s)
n := (*s)[i-1]
*s = (*s)[:i-1]
return n
}
// top returns the most recently pushed node, or nil if s is empty.
func (s *nodeStack) top() *Node {
if i := len(*s); i > 0 {
return (*s)[i-1]
}
return nil
}
// index returns the index of the top-most occurrence of n in the stack, or -1
// if n is not present.
func (s *nodeStack) index(n *Node) int {
for i := len(*s) - 1; i >= 0; i-- {
if (*s)[i] == n {
return i
}
}
return -1
}
// insert inserts a node at the given index.
func (s *nodeStack) insert(i int, n *Node) {
(*s) = append(*s, nil)
copy((*s)[i+1:], (*s)[i:])
(*s)[i] = n
}
// remove removes a node from the stack. It is a no-op if n is not present.
func (s *nodeStack) remove(n *Node) {
i := s.index(n)
if i == -1 {
return
}
copy((*s)[i:], (*s)[i+1:])
j := len(*s) - 1
(*s)[j] = nil
*s = (*s)[:j]
}

@ -0,0 +1,146 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"fmt"
)
// checkTreeConsistency checks that a node and its descendants are all
// consistent in their parent/child/sibling relationships.
func checkTreeConsistency(n *Node) error {
return checkTreeConsistency1(n, 0)
}
func checkTreeConsistency1(n *Node, depth int) error {
if depth == 1e4 {
return fmt.Errorf("html: tree looks like it contains a cycle")
}
if err := checkNodeConsistency(n); err != nil {
return err
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
if err := checkTreeConsistency1(c, depth+1); err != nil {
return err
}
}
return nil
}
// checkNodeConsistency checks that a node's parent/child/sibling relationships
// are consistent.
func checkNodeConsistency(n *Node) error {
if n == nil {
return nil
}
nParent := 0
for p := n.Parent; p != nil; p = p.Parent {
nParent++
if nParent == 1e4 {
return fmt.Errorf("html: parent list looks like an infinite loop")
}
}
nForward := 0
for c := n.FirstChild; c != nil; c = c.NextSibling {
nForward++
if nForward == 1e6 {
return fmt.Errorf("html: forward list of children looks like an infinite loop")
}
if c.Parent != n {
return fmt.Errorf("html: inconsistent child/parent relationship")
}
}
nBackward := 0
for c := n.LastChild; c != nil; c = c.PrevSibling {
nBackward++
if nBackward == 1e6 {
return fmt.Errorf("html: backward list of children looks like an infinite loop")
}
if c.Parent != n {
return fmt.Errorf("html: inconsistent child/parent relationship")
}
}
if n.Parent != nil {
if n.Parent == n {
return fmt.Errorf("html: inconsistent parent relationship")
}
if n.Parent == n.FirstChild {
return fmt.Errorf("html: inconsistent parent/first relationship")
}
if n.Parent == n.LastChild {
return fmt.Errorf("html: inconsistent parent/last relationship")
}
if n.Parent == n.PrevSibling {
return fmt.Errorf("html: inconsistent parent/prev relationship")
}
if n.Parent == n.NextSibling {
return fmt.Errorf("html: inconsistent parent/next relationship")
}
parentHasNAsAChild := false
for c := n.Parent.FirstChild; c != nil; c = c.NextSibling {
if c == n {
parentHasNAsAChild = true
break
}
}
if !parentHasNAsAChild {
return fmt.Errorf("html: inconsistent parent/child relationship")
}
}
if n.PrevSibling != nil && n.PrevSibling.NextSibling != n {
return fmt.Errorf("html: inconsistent prev/next relationship")
}
if n.NextSibling != nil && n.NextSibling.PrevSibling != n {
return fmt.Errorf("html: inconsistent next/prev relationship")
}
if (n.FirstChild == nil) != (n.LastChild == nil) {
return fmt.Errorf("html: inconsistent first/last relationship")
}
if n.FirstChild != nil && n.FirstChild == n.LastChild {
// We have a sole child.
if n.FirstChild.PrevSibling != nil || n.FirstChild.NextSibling != nil {
return fmt.Errorf("html: inconsistent sole child's sibling relationship")
}
}
seen := map[*Node]bool{}
var last *Node
for c := n.FirstChild; c != nil; c = c.NextSibling {
if seen[c] {
return fmt.Errorf("html: inconsistent repeated child")
}
seen[c] = true
last = c
}
if last != n.LastChild {
return fmt.Errorf("html: inconsistent last relationship")
}
var first *Node
for c := n.LastChild; c != nil; c = c.PrevSibling {
if !seen[c] {
return fmt.Errorf("html: inconsistent missing child")
}
delete(seen, c)
first = c
}
if first != n.FirstChild {
return fmt.Errorf("html: inconsistent first relationship")
}
if len(seen) != 0 {
return fmt.Errorf("html: inconsistent forwards/backwards child list")
}
return nil
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,388 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"runtime"
"sort"
"strings"
"testing"
"golang.org/x/net/html/atom"
)
// readParseTest reads a single test case from r.
func readParseTest(r *bufio.Reader) (text, want, context string, err error) {
line, err := r.ReadSlice('\n')
if err != nil {
return "", "", "", err
}
var b []byte
// Read the HTML.
if string(line) != "#data\n" {
return "", "", "", fmt.Errorf(`got %q want "#data\n"`, line)
}
for {
line, err = r.ReadSlice('\n')
if err != nil {
return "", "", "", err
}
if line[0] == '#' {
break
}
b = append(b, line...)
}
text = strings.TrimSuffix(string(b), "\n")
b = b[:0]
// Skip the error list.
if string(line) != "#errors\n" {
return "", "", "", fmt.Errorf(`got %q want "#errors\n"`, line)
}
for {
line, err = r.ReadSlice('\n')
if err != nil {
return "", "", "", err
}
if line[0] == '#' {
break
}
}
if string(line) == "#document-fragment\n" {
line, err = r.ReadSlice('\n')
if err != nil {
return "", "", "", err
}
context = strings.TrimSpace(string(line))
line, err = r.ReadSlice('\n')
if err != nil {
return "", "", "", err
}
}
// Read the dump of what the parse tree should be.
if string(line) != "#document\n" {
return "", "", "", fmt.Errorf(`got %q want "#document\n"`, line)
}
inQuote := false
for {
line, err = r.ReadSlice('\n')
if err != nil && err != io.EOF {
return "", "", "", err
}
trimmed := bytes.Trim(line, "| \n")
if len(trimmed) > 0 {
if line[0] == '|' && trimmed[0] == '"' {
inQuote = true
}
if trimmed[len(trimmed)-1] == '"' && !(line[0] == '|' && len(trimmed) == 1) {
inQuote = false
}
}
if len(line) == 0 || len(line) == 1 && line[0] == '\n' && !inQuote {
break
}
b = append(b, line...)
}
return text, string(b), context, nil
}
func dumpIndent(w io.Writer, level int) {
io.WriteString(w, "| ")
for i := 0; i < level; i++ {
io.WriteString(w, " ")
}
}
type sortedAttributes []Attribute
func (a sortedAttributes) Len() int {
return len(a)
}
func (a sortedAttributes) Less(i, j int) bool {
if a[i].Namespace != a[j].Namespace {
return a[i].Namespace < a[j].Namespace
}
return a[i].Key < a[j].Key
}
func (a sortedAttributes) Swap(i, j int) {
a[i], a[j] = a[j], a[i]
}
func dumpLevel(w io.Writer, n *Node, level int) error {
dumpIndent(w, level)
switch n.Type {
case ErrorNode:
return errors.New("unexpected ErrorNode")
case DocumentNode:
return errors.New("unexpected DocumentNode")
case ElementNode:
if n.Namespace != "" {
fmt.Fprintf(w, "<%s %s>", n.Namespace, n.Data)
} else {
fmt.Fprintf(w, "<%s>", n.Data)
}
attr := sortedAttributes(n.Attr)
sort.Sort(attr)
for _, a := range attr {
io.WriteString(w, "\n")
dumpIndent(w, level+1)
if a.Namespace != "" {
fmt.Fprintf(w, `%s %s="%s"`, a.Namespace, a.Key, a.Val)
} else {
fmt.Fprintf(w, `%s="%s"`, a.Key, a.Val)
}
}
case TextNode:
fmt.Fprintf(w, `"%s"`, n.Data)
case CommentNode:
fmt.Fprintf(w, "<!-- %s -->", n.Data)
case DoctypeNode:
fmt.Fprintf(w, "<!DOCTYPE %s", n.Data)
if n.Attr != nil {
var p, s string
for _, a := range n.Attr {
switch a.Key {
case "public":
p = a.Val
case "system":
s = a.Val
}
}
if p != "" || s != "" {
fmt.Fprintf(w, ` "%s"`, p)
fmt.Fprintf(w, ` "%s"`, s)
}
}
io.WriteString(w, ">")
case scopeMarkerNode:
return errors.New("unexpected scopeMarkerNode")
default:
return errors.New("unknown node type")
}
io.WriteString(w, "\n")
for c := n.FirstChild; c != nil; c = c.NextSibling {
if err := dumpLevel(w, c, level+1); err != nil {
return err
}
}
return nil
}
func dump(n *Node) (string, error) {
if n == nil || n.FirstChild == nil {
return "", nil
}
var b bytes.Buffer
for c := n.FirstChild; c != nil; c = c.NextSibling {
if err := dumpLevel(&b, c, 0); err != nil {
return "", err
}
}
return b.String(), nil
}
const testDataDir = "testdata/webkit/"
func TestParser(t *testing.T) {
testFiles, err := filepath.Glob(testDataDir + "*.dat")
if err != nil {
t.Fatal(err)
}
for _, tf := range testFiles {
f, err := os.Open(tf)
if err != nil {
t.Fatal(err)
}
defer f.Close()
r := bufio.NewReader(f)
for i := 0; ; i++ {
text, want, context, err := readParseTest(r)
if err == io.EOF {
break
}
if err != nil {
t.Fatal(err)
}
err = testParseCase(text, want, context)
if err != nil {
t.Errorf("%s test #%d %q, %s", tf, i, text, err)
}
}
}
}
// testParseCase tests one test case from the test files. If the test does not
// pass, it returns an error that explains the failure.
// text is the HTML to be parsed, want is a dump of the correct parse tree,
// and context is the name of the context node, if any.
func testParseCase(text, want, context string) (err error) {
defer func() {
if x := recover(); x != nil {
switch e := x.(type) {
case error:
err = e
default:
err = fmt.Errorf("%v", e)
}
}
}()
var doc *Node
if context == "" {
doc, err = Parse(strings.NewReader(text))
if err != nil {
return err
}
} else {
contextNode := &Node{
Type: ElementNode,
DataAtom: atom.Lookup([]byte(context)),
Data: context,
}
nodes, err := ParseFragment(strings.NewReader(text), contextNode)
if err != nil {
return err
}
doc = &Node{
Type: DocumentNode,
}
for _, n := range nodes {
doc.AppendChild(n)
}
}
if err := checkTreeConsistency(doc); err != nil {
return err
}
got, err := dump(doc)
if err != nil {
return err
}
// Compare the parsed tree to the #document section.
if got != want {
return fmt.Errorf("got vs want:\n----\n%s----\n%s----", got, want)
}
if renderTestBlacklist[text] || context != "" {
return nil
}
// Check that rendering and re-parsing results in an identical tree.
pr, pw := io.Pipe()
go func() {
pw.CloseWithError(Render(pw, doc))
}()
doc1, err := Parse(pr)
if err != nil {
return err
}
got1, err := dump(doc1)
if err != nil {
return err
}
if got != got1 {
return fmt.Errorf("got vs got1:\n----\n%s----\n%s----", got, got1)
}
return nil
}
// Some test input result in parse trees are not 'well-formed' despite
// following the HTML5 recovery algorithms. Rendering and re-parsing such a
// tree will not result in an exact clone of that tree. We blacklist such
// inputs from the render test.
var renderTestBlacklist = map[string]bool{
// The second <a> will be reparented to the first <table>'s parent. This
// results in an <a> whose parent is an <a>, which is not 'well-formed'.
`<a><table><td><a><table></table><a></tr><a></table><b>X</b>C<a>Y`: true,
// The same thing with a <p>:
`<p><table></p>`: true,
// More cases of <a> being reparented:
`<a href="blah">aba<table><a href="foo">br<tr><td></td></tr>x</table>aoe`: true,
`<a><table><a></table><p><a><div><a>`: true,
`<a><table><td><a><table></table><a></tr><a></table><a>`: true,
// A similar reparenting situation involving <nobr>:
`<!DOCTYPE html><body><b><nobr>1<table><nobr></b><i><nobr>2<nobr></i>3`: true,
// A <plaintext> element is reparented, putting it before a table.
// A <plaintext> element can't have anything after it in HTML.
`<table><plaintext><td>`: true,
`<!doctype html><table><plaintext></plaintext>`: true,
`<!doctype html><table><tbody><plaintext></plaintext>`: true,
`<!doctype html><table><tbody><tr><plaintext></plaintext>`: true,
// A form inside a table inside a form doesn't work either.
`<!doctype html><form><table></form><form></table></form>`: true,
// A script that ends at EOF may escape its own closing tag when rendered.
`<!doctype html><script><!--<script `: true,
`<!doctype html><script><!--<script <`: true,
`<!doctype html><script><!--<script <a`: true,
`<!doctype html><script><!--<script </`: true,
`<!doctype html><script><!--<script </s`: true,
`<!doctype html><script><!--<script </script`: true,
`<!doctype html><script><!--<script </scripta`: true,
`<!doctype html><script><!--<script -`: true,
`<!doctype html><script><!--<script -a`: true,
`<!doctype html><script><!--<script -<`: true,
`<!doctype html><script><!--<script --`: true,
`<!doctype html><script><!--<script --a`: true,
`<!doctype html><script><!--<script --<`: true,
`<script><!--<script `: true,
`<script><!--<script <a`: true,
`<script><!--<script </script`: true,
`<script><!--<script </scripta`: true,
`<script><!--<script -`: true,
`<script><!--<script -a`: true,
`<script><!--<script --`: true,
`<script><!--<script --a`: true,
`<script><!--<script <`: true,
`<script><!--<script </`: true,
`<script><!--<script </s`: true,
// Reconstructing the active formatting elements results in a <plaintext>
// element that contains an <a> element.
`<!doctype html><p><a><plaintext>b`: true,
}
func TestNodeConsistency(t *testing.T) {
// inconsistentNode is a Node whose DataAtom and Data do not agree.
inconsistentNode := &Node{
Type: ElementNode,
DataAtom: atom.Frameset,
Data: "table",
}
_, err := ParseFragment(strings.NewReader("<p>hello</p>"), inconsistentNode)
if err == nil {
t.Errorf("got nil error, want non-nil")
}
}
func BenchmarkParser(b *testing.B) {
buf, err := ioutil.ReadFile("testdata/go1.html")
if err != nil {
b.Fatalf("could not read testdata/go1.html: %v", err)
}
b.SetBytes(int64(len(buf)))
runtime.GC()
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
Parse(bytes.NewBuffer(buf))
}
}

@ -0,0 +1,271 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"bufio"
"errors"
"fmt"
"io"
"strings"
)
type writer interface {
io.Writer
WriteByte(c byte) error // in Go 1.1, use io.ByteWriter
WriteString(string) (int, error)
}
// Render renders the parse tree n to the given writer.
//
// Rendering is done on a 'best effort' basis: calling Parse on the output of
// Render will always result in something similar to the original tree, but it
// is not necessarily an exact clone unless the original tree was 'well-formed'.
// 'Well-formed' is not easily specified; the HTML5 specification is
// complicated.
//
// Calling Parse on arbitrary input typically results in a 'well-formed' parse
// tree. However, it is possible for Parse to yield a 'badly-formed' parse tree.
// For example, in a 'well-formed' parse tree, no <a> element is a child of
// another <a> element: parsing "<a><a>" results in two sibling elements.
// Similarly, in a 'well-formed' parse tree, no <a> element is a child of a
// <table> element: parsing "<p><table><a>" results in a <p> with two sibling
// children; the <a> is reparented to the <table>'s parent. However, calling
// Parse on "<a><table><a>" does not return an error, but the result has an <a>
// element with an <a> child, and is therefore not 'well-formed'.
//
// Programmatically constructed trees are typically also 'well-formed', but it
// is possible to construct a tree that looks innocuous but, when rendered and
// re-parsed, results in a different tree. A simple example is that a solitary
// text node would become a tree containing <html>, <head> and <body> elements.
// Another example is that the programmatic equivalent of "a<head>b</head>c"
// becomes "<html><head><head/><body>abc</body></html>".
func Render(w io.Writer, n *Node) error {
if x, ok := w.(writer); ok {
return render(x, n)
}
buf := bufio.NewWriter(w)
if err := render(buf, n); err != nil {
return err
}
return buf.Flush()
}
// plaintextAbort is returned from render1 when a <plaintext> element
// has been rendered. No more end tags should be rendered after that.
var plaintextAbort = errors.New("html: internal error (plaintext abort)")
func render(w writer, n *Node) error {
err := render1(w, n)
if err == plaintextAbort {
err = nil
}
return err
}
func render1(w writer, n *Node) error {
// Render non-element nodes; these are the easy cases.
switch n.Type {
case ErrorNode:
return errors.New("html: cannot render an ErrorNode node")
case TextNode:
return escape(w, n.Data)
case DocumentNode:
for c := n.FirstChild; c != nil; c = c.NextSibling {
if err := render1(w, c); err != nil {
return err
}
}
return nil
case ElementNode:
// No-op.
case CommentNode:
if _, err := w.WriteString("<!--"); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
return err
}
if _, err := w.WriteString("-->"); err != nil {
return err
}
return nil
case DoctypeNode:
if _, err := w.WriteString("<!DOCTYPE "); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
return err
}
if n.Attr != nil {
var p, s string
for _, a := range n.Attr {
switch a.Key {
case "public":
p = a.Val
case "system":
s = a.Val
}
}
if p != "" {
if _, err := w.WriteString(" PUBLIC "); err != nil {
return err
}
if err := writeQuoted(w, p); err != nil {
return err
}
if s != "" {
if err := w.WriteByte(' '); err != nil {
return err
}
if err := writeQuoted(w, s); err != nil {
return err
}
}
} else if s != "" {
if _, err := w.WriteString(" SYSTEM "); err != nil {
return err
}
if err := writeQuoted(w, s); err != nil {
return err
}
}
}
return w.WriteByte('>')
default:
return errors.New("html: unknown node type")
}
// Render the <xxx> opening tag.
if err := w.WriteByte('<'); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
return err
}
for _, a := range n.Attr {
if err := w.WriteByte(' '); err != nil {
return err
}
if a.Namespace != "" {
if _, err := w.WriteString(a.Namespace); err != nil {
return err
}
if err := w.WriteByte(':'); err != nil {
return err
}
}
if _, err := w.WriteString(a.Key); err != nil {
return err
}
if _, err := w.WriteString(`="`); err != nil {
return err
}
if err := escape(w, a.Val); err != nil {
return err
}
if err := w.WriteByte('"'); err != nil {
return err
}
}
if voidElements[n.Data] {
if n.FirstChild != nil {
return fmt.Errorf("html: void element <%s> has child nodes", n.Data)
}
_, err := w.WriteString("/>")
return err
}
if err := w.WriteByte('>'); err != nil {
return err
}
// Add initial newline where there is danger of a newline beging ignored.
if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") {
switch n.Data {
case "pre", "listing", "textarea":
if err := w.WriteByte('\n'); err != nil {
return err
}
}
}
// Render any child nodes.
switch n.Data {
case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
for c := n.FirstChild; c != nil; c = c.NextSibling {
if c.Type == TextNode {
if _, err := w.WriteString(c.Data); err != nil {
return err
}
} else {
if err := render1(w, c); err != nil {
return err
}
}
}
if n.Data == "plaintext" {
// Don't render anything else. <plaintext> must be the
// last element in the file, with no closing tag.
return plaintextAbort
}
default:
for c := n.FirstChild; c != nil; c = c.NextSibling {
if err := render1(w, c); err != nil {
return err
}
}
}
// Render the </xxx> closing tag.
if _, err := w.WriteString("</"); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
return err
}
return w.WriteByte('>')
}
// writeQuoted writes s to w surrounded by quotes. Normally it will use double
// quotes, but if s contains a double quote, it will use single quotes.
// It is used for writing the identifiers in a doctype declaration.
// In valid HTML, they can't contain both types of quotes.
func writeQuoted(w writer, s string) error {
var q byte = '"'
if strings.Contains(s, `"`) {
q = '\''
}
if err := w.WriteByte(q); err != nil {
return err
}
if _, err := w.WriteString(s); err != nil {
return err
}
if err := w.WriteByte(q); err != nil {
return err
}
return nil
}
// Section 12.1.2, "Elements", gives this list of void elements. Void elements
// are those that can't have any contents.
var voidElements = map[string]bool{
"area": true,
"base": true,
"br": true,
"col": true,
"command": true,
"embed": true,
"hr": true,
"img": true,
"input": true,
"keygen": true,
"link": true,
"meta": true,
"param": true,
"source": true,
"track": true,
"wbr": true,
}

@ -0,0 +1,156 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"bytes"
"testing"
)
func TestRenderer(t *testing.T) {
nodes := [...]*Node{
0: {
Type: ElementNode,
Data: "html",
},
1: {
Type: ElementNode,
Data: "head",
},
2: {
Type: ElementNode,
Data: "body",
},
3: {
Type: TextNode,
Data: "0<1",
},
4: {
Type: ElementNode,
Data: "p",
Attr: []Attribute{
{
Key: "id",
Val: "A",
},
{
Key: "foo",
Val: `abc"def`,
},
},
},
5: {
Type: TextNode,
Data: "2",
},
6: {
Type: ElementNode,
Data: "b",
Attr: []Attribute{
{
Key: "empty",
Val: "",
},
},
},
7: {
Type: TextNode,
Data: "3",
},
8: {
Type: ElementNode,
Data: "i",
Attr: []Attribute{
{
Key: "backslash",
Val: `\`,
},
},
},
9: {
Type: TextNode,
Data: "&4",
},
10: {
Type: TextNode,
Data: "5",
},
11: {
Type: ElementNode,
Data: "blockquote",
},
12: {
Type: ElementNode,
Data: "br",
},
13: {
Type: TextNode,
Data: "6",
},
}
// Build a tree out of those nodes, based on a textual representation.
// Only the ".\t"s are significant. The trailing HTML-like text is
// just commentary. The "0:" prefixes are for easy cross-reference with
// the nodes array.
treeAsText := [...]string{
0: `<html>`,
1: `. <head>`,
2: `. <body>`,
3: `. . "0&lt;1"`,
4: `. . <p id="A" foo="abc&#34;def">`,
5: `. . . "2"`,
6: `. . . <b empty="">`,
7: `. . . . "3"`,
8: `. . . <i backslash="\">`,
9: `. . . . "&amp;4"`,
10: `. . "5"`,
11: `. . <blockquote>`,
12: `. . <br>`,
13: `. . "6"`,
}
if len(nodes) != len(treeAsText) {
t.Fatal("len(nodes) != len(treeAsText)")
}
var stack [8]*Node
for i, line := range treeAsText {
level := 0
for line[0] == '.' {
// Strip a leading ".\t".
line = line[2:]
level++
}
n := nodes[i]
if level == 0 {
if stack[0] != nil {
t.Fatal("multiple root nodes")
}
stack[0] = n
} else {
stack[level-1].AppendChild(n)
stack[level] = n
for i := level + 1; i < len(stack); i++ {
stack[i] = nil
}
}
// At each stage of tree construction, we check all nodes for consistency.
for j, m := range nodes {
if err := checkNodeConsistency(m); err != nil {
t.Fatalf("i=%d, j=%d: %v", i, j, err)
}
}
}
want := `<html><head></head><body>0&lt;1<p id="A" foo="abc&#34;def">` +
`2<b empty="">3</b><i backslash="\">&amp;4</i></p>` +
`5<blockquote></blockquote><br/>6</body></html>`
b := new(bytes.Buffer)
if err := Render(b, nodes[0]); err != nil {
t.Fatal(err)
}
if got := b.String(); got != want {
t.Errorf("got vs want:\n%s\n%s\n", got, want)
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,28 @@
The *.dat files in this directory are copied from The WebKit Open Source
Project, specifically $WEBKITROOT/LayoutTests/html5lib/resources.
WebKit is licensed under a BSD style license.
http://webkit.org/coding/bsd-license.html says:
Copyright (C) 2009 Apple Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS "AS IS" AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

@ -0,0 +1,194 @@
#data
<a><p></a></p>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <p>
| <a>
#data
<a>1<p>2</a>3</p>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <p>
| <a>
| "2"
| "3"
#data
<a>1<button>2</a>3</button>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <button>
| <a>
| "2"
| "3"
#data
<a>1<b>2</a>3</b>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <b>
| "2"
| <b>
| "3"
#data
<a>1<div>2<div>3</a>4</div>5</div>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <div>
| <a>
| "2"
| <div>
| <a>
| "3"
| "4"
| "5"
#data
<table><a>1<p>2</a>3</p>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <p>
| <a>
| "2"
| "3"
| <table>
#data
<b><b><a><p></a>
#errors
#document
| <html>
| <head>
| <body>
| <b>
| <b>
| <a>
| <p>
| <a>
#data
<b><a><b><p></a>
#errors
#document
| <html>
| <head>
| <body>
| <b>
| <a>
| <b>
| <b>
| <p>
| <a>
#data
<a><b><b><p></a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <b>
| <b>
| <b>
| <b>
| <p>
| <a>
#data
<p>1<s id="A">2<b id="B">3</p>4</s>5</b>
#errors
#document
| <html>
| <head>
| <body>
| <p>
| "1"
| <s>
| id="A"
| "2"
| <b>
| id="B"
| "3"
| <s>
| id="A"
| <b>
| id="B"
| "4"
| <b>
| id="B"
| "5"
#data
<table><a>1<td>2</td>3</table>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <a>
| "3"
| <table>
| <tbody>
| <tr>
| <td>
| "2"
#data
<table>A<td>B</td>C</table>
#errors
#document
| <html>
| <head>
| <body>
| "AC"
| <table>
| <tbody>
| <tr>
| <td>
| "B"
#data
<a><svg><tr><input></a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <svg svg>
| <svg tr>
| <svg input>

@ -0,0 +1,31 @@
#data
<b>1<i>2<p>3</b>4
#errors
#document
| <html>
| <head>
| <body>
| <b>
| "1"
| <i>
| "2"
| <i>
| <p>
| <b>
| "3"
| "4"
#data
<a><div><style></style><address><a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <div>
| <a>
| <style>
| <address>
| <a>
| <a>

@ -0,0 +1,135 @@
#data
FOO<!-- BAR -->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -->
| "BAZ"
#data
FOO<!-- BAR --!>BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -->
| "BAZ"
#data
FOO<!-- BAR -- >BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -- >BAZ -->
#data
FOO<!-- BAR -- <QUX> -- MUX -->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -- <QUX> -- MUX -->
| "BAZ"
#data
FOO<!-- BAR -- <QUX> -- MUX --!>BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -- <QUX> -- MUX -->
| "BAZ"
#data
FOO<!-- BAR -- <QUX> -- MUX -- >BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -- <QUX> -- MUX -- >BAZ -->
#data
FOO<!---->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- -->
| "BAZ"
#data
FOO<!--->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- -->
| "BAZ"
#data
FOO<!-->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- -->
| "BAZ"
#data
<?xml version="1.0">Hi
#errors
#document
| <!-- ?xml version="1.0" -->
| <html>
| <head>
| <body>
| "Hi"
#data
<?xml version="1.0">
#errors
#document
| <!-- ?xml version="1.0" -->
| <html>
| <head>
| <body>
#data
<?xml version
#errors
#document
| <!-- ?xml version -->
| <html>
| <head>
| <body>
#data
FOO<!----->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- - -->
| "BAZ"

@ -0,0 +1,370 @@
#data
<!DOCTYPE html>Hello
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "Hello"
#data
<!dOctYpE HtMl>Hello
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPEhtml>Hello
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE>Hello
#errors
#document
| <!DOCTYPE >
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE >Hello
#errors
#document
| <!DOCTYPE >
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato >Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato taco>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato taco "ddd>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato sYstEM>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato sYstEM >Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato sYstEM ggg>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEM taco >Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEM 'taco"'>Hello
#errors
#document
| <!DOCTYPE potato "" "taco"">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEM "taco">Hello
#errors
#document
| <!DOCTYPE potato "" "taco">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEM "tai'co">Hello
#errors
#document
| <!DOCTYPE potato "" "tai'co">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEMtaco "ddd">Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato grass SYSTEM taco>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato pUbLIc>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato pUbLIc >Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato pUbLIcgoof>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC goof>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC "go'of">Hello
#errors
#document
| <!DOCTYPE potato "go'of" "">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC 'go'of'>Hello
#errors
#document
| <!DOCTYPE potato "go" "">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC 'go:hh of' >Hello
#errors
#document
| <!DOCTYPE potato "go:hh of" "">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC "W3C-//dfdf" SYSTEM ggg>Hello
#errors
#document
| <!DOCTYPE potato "W3C-//dfdf" "">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">Hello
#errors
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE ...>Hello
#errors
#document
| <!DOCTYPE ...>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
#errors
#document
| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
#errors
#document
| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE root-element [SYSTEM OR PUBLIC FPI] "uri" [
<!-- internal declarations -->
]>
#errors
#document
| <!DOCTYPE root-element>
| <html>
| <head>
| <body>
| "]>"
#data
<!DOCTYPE html PUBLIC
"-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
"http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
#errors
#document
| <!DOCTYPE html "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" "http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML SYSTEM "http://www.w3.org/DTD/HTML4-strict.dtd"><body><b>Mine!</b></body>
#errors
#document
| <!DOCTYPE html "" "http://www.w3.org/DTD/HTML4-strict.dtd">
| <html>
| <head>
| <body>
| <b>
| "Mine!"
#data
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN""http://www.w3.org/TR/html4/strict.dtd">
#errors
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
#errors
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML PUBLIC"-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
#errors
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML PUBLIC'-//W3C//DTD HTML 4.01//EN''http://www.w3.org/TR/html4/strict.dtd'>
#errors
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>

@ -0,0 +1,603 @@
#data
FOO&gt;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO>BAR"
#data
FOO&gtBAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO>BAR"
#data
FOO&gt BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO> BAR"
#data
FOO&gt;;;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO>;;BAR"
#data
I'm &notit; I tell you
#errors
#document
| <html>
| <head>
| <body>
| "I'm ¬it; I tell you"
#data
I'm &notin; I tell you
#errors
#document
| <html>
| <head>
| <body>
| "I'm ∉ I tell you"
#data
FOO& BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO& BAR"
#data
FOO&<BAR>
#errors
#document
| <html>
| <head>
| <body>
| "FOO&"
| <bar>
#data
FOO&&&&gt;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO&&&>BAR"
#data
FOO&#41;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO)BAR"
#data
FOO&#x41;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOOABAR"
#data
FOO&#X41;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOOABAR"
#data
FOO&#BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO&#BAR"
#data
FOO&#ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO&#ZOO"
#data
FOO&#xBAR
#errors
#document
| <html>
| <head>
| <body>
| "FOOºR"
#data
FOO&#xZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO&#xZOO"
#data
FOO&#XZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO&#XZOO"
#data
FOO&#41BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO)BAR"
#data
FOO&#x41BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO䆺R"
#data
FOO&#x41ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOAZOO"
#data
FOO&#x0000;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<EFBFBD>ZOO"
#data
FOO&#x0078;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOxZOO"
#data
FOO&#x0079;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOyZOO"
#data
FOO&#x0080;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO€ZOO"
#data
FOO&#x0081;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0082;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO‚ZOO"
#data
FOO&#x0083;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOƒZOO"
#data
FOO&#x0084;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO„ZOO"
#data
FOO&#x0085;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO…ZOO"
#data
FOO&#x0086;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO†ZOO"
#data
FOO&#x0087;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO‡ZOO"
#data
FOO&#x0088;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOˆZOO"
#data
FOO&#x0089;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO‰ZOO"
#data
FOO&#x008A;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOŠZOO"
#data
FOO&#x008B;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO‹ZOO"
#data
FOO&#x008C;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOŒZOO"
#data
FOO&#x008D;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x008E;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOŽZOO"
#data
FOO&#x008F;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0090;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0091;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO‘ZOO"
#data
FOO&#x0092;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO’ZOO"
#data
FOO&#x0093;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO“ZOO"
#data
FOO&#x0094;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO”ZOO"
#data
FOO&#x0095;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO•ZOO"
#data
FOO&#x0096;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO–ZOO"
#data
FOO&#x0097;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO—ZOO"
#data
FOO&#x0098;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO˜ZOO"
#data
FOO&#x0099;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO™ZOO"
#data
FOO&#x009A;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOšZOO"
#data
FOO&#x009B;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO›ZOO"
#data
FOO&#x009C;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOœZOO"
#data
FOO&#x009D;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x009E;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOžZOO"
#data
FOO&#x009F;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOŸZOO"
#data
FOO&#x00A0;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO ZOO"
#data
FOO&#xD7FF;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO퟿ZOO"
#data
FOO&#xD800;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<EFBFBD>ZOO"
#data
FOO&#xD801;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<EFBFBD>ZOO"
#data
FOO&#xDFFE;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<EFBFBD>ZOO"
#data
FOO&#xDFFF;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<EFBFBD>ZOO"
#data
FOO&#xE000;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x10FFFE;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO􏿾ZOO"
#data
FOO&#x1087D4;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO􈟔ZOO"
#data
FOO&#x10FFFF;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO􏿿ZOO"
#data
FOO&#x110000;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<EFBFBD>ZOO"
#data
FOO&#xFFFFFF;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<EFBFBD>ZOO"

@ -0,0 +1,249 @@
#data
<div bar="ZZ&gt;YY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ>YY"
#data
<div bar="ZZ&"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&"
#data
<div bar='ZZ&'></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&"
#data
<div bar=ZZ&></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&"
#data
<div bar="ZZ&gt=YY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gt=YY"
#data
<div bar="ZZ&gt0YY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gt0YY"
#data
<div bar="ZZ&gt9YY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gt9YY"
#data
<div bar="ZZ&gtaYY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gtaYY"
#data
<div bar="ZZ&gtZYY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gtZYY"
#data
<div bar="ZZ&gt YY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ> YY"
#data
<div bar="ZZ&gt"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ>"
#data
<div bar='ZZ&gt'></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ>"
#data
<div bar=ZZ&gt></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ>"
#data
<div bar="ZZ&pound_id=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ£_id=23"
#data
<div bar="ZZ&prod_id=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&prod_id=23"
#data
<div bar="ZZ&pound;_id=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ£_id=23"
#data
<div bar="ZZ&prod;_id=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ∏_id=23"
#data
<div bar="ZZ&pound=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&pound=23"
#data
<div bar="ZZ&prod=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&prod=23"
#data
<div>ZZ&pound_id=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ£_id=23"
#data
<div>ZZ&prod_id=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ&prod_id=23"
#data
<div>ZZ&pound;_id=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ£_id=23"
#data
<div>ZZ&prod;_id=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ∏_id=23"
#data
<div>ZZ&pound=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ£=23"
#data
<div>ZZ&prod=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ&prod=23"

@ -0,0 +1,246 @@
#data
<div<div>
#errors
#document
| <html>
| <head>
| <body>
| <div<div>
#data
<div foo<bar=''>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| foo<bar=""
#data
<div foo=`bar`>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| foo="`bar`"
#data
<div \"foo=''>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| \"foo=""
#data
<a href='\nbar'></a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| href="\nbar"
#data
<!DOCTYPE html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
#data
&lang;&rang;
#errors
#document
| <html>
| <head>
| <body>
| "⟨⟩"
#data
&apos;
#errors
#document
| <html>
| <head>
| <body>
| "'"
#data
&ImaginaryI;
#errors
#document
| <html>
| <head>
| <body>
| "ⅈ"
#data
&Kopf;
#errors
#document
| <html>
| <head>
| <body>
| "𝕂"
#data
&notinva;
#errors
#document
| <html>
| <head>
| <body>
| "∉"
#data
<?import namespace="foo" implementation="#bar">
#errors
#document
| <!-- ?import namespace="foo" implementation="#bar" -->
| <html>
| <head>
| <body>
#data
<!--foo--bar-->
#errors
#document
| <!-- foo--bar -->
| <html>
| <head>
| <body>
#data
<![CDATA[x]]>
#errors
#document
| <!-- [CDATA[x]] -->
| <html>
| <head>
| <body>
#data
<textarea><!--</textarea>--></textarea>
#errors
#document
| <html>
| <head>
| <body>
| <textarea>
| "<!--"
| "-->"
#data
<textarea><!--</textarea>-->
#errors
#document
| <html>
| <head>
| <body>
| <textarea>
| "<!--"
| "-->"
#data
<style><!--</style>--></style>
#errors
#document
| <html>
| <head>
| <style>
| "<!--"
| <body>
| "-->"
#data
<style><!--</style>-->
#errors
#document
| <html>
| <head>
| <style>
| "<!--"
| <body>
| "-->"
#data
<ul><li>A </li> <li>B</li></ul>
#errors
#document
| <html>
| <head>
| <body>
| <ul>
| <li>
| "A "
| " "
| <li>
| "B"
#data
<table><form><input type=hidden><input></form><div></div></table>
#errors
#document
| <html>
| <head>
| <body>
| <input>
| <div>
| <table>
| <form>
| <input>
| type="hidden"
#data
<i>A<b>B<p></i>C</b>D
#errors
#document
| <html>
| <head>
| <body>
| <i>
| "A"
| <b>
| "B"
| <b>
| <p>
| <b>
| <i>
| "C"
| "D"
#data
<div></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
#data
<svg></svg>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
#data
<math></math>
#errors
#document
| <html>
| <head>
| <body>
| <math math>

@ -0,0 +1,43 @@
#data
<button>1</foo>
#errors
#document
| <html>
| <head>
| <body>
| <button>
| "1"
#data
<foo>1<p>2</foo>
#errors
#document
| <html>
| <head>
| <body>
| <foo>
| "1"
| <p>
| "2"
#data
<dd>1</foo>
#errors
#document
| <html>
| <head>
| <body>
| <dd>
| "1"
#data
<foo>1<dd>2</foo>
#errors
#document
| <html>
| <head>
| <body>
| <foo>
| "1"
| <dd>
| "2"

@ -0,0 +1,40 @@
#data
<isindex>
#errors
#document
| <html>
| <head>
| <body>
| <form>
| <hr>
| <label>
| "This is a searchable index. Enter search keywords: "
| <input>
| name="isindex"
| <hr>
#data
<isindex name="A" action="B" prompt="C" foo="D">
#errors
#document
| <html>
| <head>
| <body>
| <form>
| action="B"
| <hr>
| <label>
| "C"
| <input>
| foo="D"
| name="isindex"
| <hr>
#data
<form><isindex>
#errors
#document
| <html>
| <head>
| <body>
| <form>

@ -0,0 +1,52 @@
#data
<input type="hidden"><frameset>
#errors
21: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
31: “frameset” start tag seen.
31: End of file seen and there were open elements.
#document
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><table><caption><svg>foo</table>bar
#errors
47: End tag “table” did not match the name of the current open element (“svg”).
47: “table” closed but “caption” was still open.
47: End tag “table” seen, but there were open elements.
36: Unclosed element “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <svg svg>
| "foo"
| "bar"
#data
<table><tr><td><svg><desc><td></desc><circle>
#errors
7: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
30: A table cell was implicitly closed, but there were open elements.
26: Unclosed element “desc”.
20: Unclosed element “svg”.
37: Stray end tag “desc”.
45: End of file seen and there were open elements.
45: Unclosed element “circle”.
7: Unclosed element “table”.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| <svg desc>
| <td>
| <circle>

@ -0,0 +1,308 @@
#data
FOO<script>'Hello'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'Hello'"
| "BAR"
#data
FOO<script></script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script></script >BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script></script/>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script></script/ >BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script type="text/plain"></scriptx>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "</scriptx>BAR"
#data
FOO<script></script foo=">" dd>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script>'<'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<'"
| "BAR"
#data
FOO<script>'<!'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!'"
| "BAR"
#data
FOO<script>'<!-'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-'"
| "BAR"
#data
FOO<script>'<!--'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!--'"
| "BAR"
#data
FOO<script>'<!---'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!---'"
| "BAR"
#data
FOO<script>'<!-->'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-->'"
| "BAR"
#data
FOO<script>'<!-->'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-->'"
| "BAR"
#data
FOO<script>'<!-- potato'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-- potato'"
| "BAR"
#data
FOO<script>'<!-- <sCrIpt'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-- <sCrIpt'"
| "BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt>'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt>'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt> -'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt> -'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt> --'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt> --'</script>BAR"
#data
FOO<script>'<!-- <sCrIpt> -->'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-- <sCrIpt> -->'"
| "BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt> --!>'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt> --!>'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt> -- >'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt> -- >'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt '</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt '</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt/'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt\'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt\'"
| "BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR</script>QUX
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt/'</script>BAR"
| "QUX"

@ -0,0 +1,15 @@
#data
<p><b id="A"><script>document.getElementById("A").id = "B"</script></p>TEXT</b>
#errors
#document
| <html>
| <head>
| <body>
| <p>
| <b>
| id="B"
| <script>
| "document.getElementById("A").id = "B""
| <b>
| id="A"
| "TEXT"

@ -0,0 +1,28 @@
#data
1<script>document.write("2")</script>3
#errors
#document
| <html>
| <head>
| <body>
| "1"
| <script>
| "document.write("2")"
| "23"
#data
1<script>document.write("<script>document.write('2')</scr"+ "ipt><script>document.write('3')</scr" + "ipt>")</script>4
#errors
#document
| <html>
| <head>
| <body>
| "1"
| <script>
| "document.write("<script>document.write('2')</scr"+ "ipt><script>document.write('3')</scr" + "ipt>")"
| <script>
| "document.write('2')"
| "2"
| <script>
| "document.write('3')"
| "34"

@ -0,0 +1,212 @@
#data
<table><th>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <th>
#data
<table><td>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
#data
<table><col foo='bar'>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <colgroup>
| <col>
| foo="bar"
#data
<table><colgroup></html>foo
#errors
#document
| <html>
| <head>
| <body>
| "foo"
| <table>
| <colgroup>
#data
<table></table><p>foo
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <p>
| "foo"
#data
<table></body></caption></col></colgroup></html></tbody></td></tfoot></th></thead></tr><td>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
#data
<table><select><option>3</select></table>
#errors
#document
| <html>
| <head>
| <body>
| <select>
| <option>
| "3"
| <table>
#data
<table><select><table></table></select></table>
#errors
#document
| <html>
| <head>
| <body>
| <select>
| <table>
| <table>
#data
<table><select></table>
#errors
#document
| <html>
| <head>
| <body>
| <select>
| <table>
#data
<table><select><option>A<tr><td>B</td></tr></table>
#errors
#document
| <html>
| <head>
| <body>
| <select>
| <option>
| "A"
| <table>
| <tbody>
| <tr>
| <td>
| "B"
#data
<table><td></body></caption></col></colgroup></html>foo
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "foo"
#data
<table><td>A</table>B
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "A"
| "B"
#data
<table><tr><caption>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <caption>
#data
<table><tr></body></caption></col></colgroup></html></td></th><td>foo
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "foo"
#data
<table><td><tr>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <tr>
#data
<table><td><button><td>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <button>
| <td>
#data
<table><tr><td><svg><desc><td>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| <svg desc>
| <td>

File diff suppressed because it is too large Load Diff

@ -0,0 +1,799 @@
#data
<!DOCTYPE html><svg></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
#data
<!DOCTYPE html><svg></svg><![CDATA[a]]>
#errors
29: Bogus comment
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <!-- [CDATA[a]] -->
#data
<!DOCTYPE html><body><svg></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
#data
<!DOCTYPE html><body><select><svg></svg></select>
#errors
35: Stray “svg” start tag.
42: Stray end tag “svg”
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!DOCTYPE html><body><select><option><svg></svg></option></select>
#errors
43: Stray “svg” start tag.
50: Stray end tag “svg”
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <option>
#data
<!DOCTYPE html><body><table><svg></svg></table>
#errors
34: Start tag “svg” seen in “table”.
41: Stray end tag “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <table>
#data
<!DOCTYPE html><body><table><svg><g>foo</g></svg></table>
#errors
34: Start tag “svg” seen in “table”.
46: Stray end tag “g”.
53: Stray end tag “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <table>
#data
<!DOCTYPE html><body><table><svg><g>foo</g><g>bar</g></svg></table>
#errors
34: Start tag “svg” seen in “table”.
46: Stray end tag “g”.
58: Stray end tag “g”.
65: Stray end tag “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <table>
#data
<!DOCTYPE html><body><table><tbody><svg><g>foo</g><g>bar</g></svg></tbody></table>
#errors
41: Start tag “svg” seen in “table”.
53: Stray end tag “g”.
65: Stray end tag “g”.
72: Stray end tag “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <table>
| <tbody>
#data
<!DOCTYPE html><body><table><tbody><tr><svg><g>foo</g><g>bar</g></svg></tr></tbody></table>
#errors
45: Start tag “svg” seen in “table”.
57: Stray end tag “g”.
69: Stray end tag “g”.
76: Stray end tag “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <table>
| <tbody>
| <tr>
#data
<!DOCTYPE html><body><table><tbody><tr><td><svg><g>foo</g><g>bar</g></svg></td></tr></tbody></table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
#data
<!DOCTYPE html><body><table><tbody><tr><td><svg><g>foo</g><g>bar</g></svg><p>baz</td></tr></tbody></table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g></svg><p>baz</caption></table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
#errors
70: HTML start tag “p” in a foreign namespace context.
81: “table” closed but “caption” was still open.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g>baz</table><p>quux
#errors
78: “table” closed but “caption” was still open.
78: Unclosed elements on stack.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| "baz"
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><colgroup><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
#errors
44: Start tag “svg” seen in “table”.
56: Stray end tag “g”.
68: Stray end tag “g”.
71: HTML start tag “p” in a foreign namespace context.
71: Start tag “p” seen in “table”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
| <table>
| <colgroup>
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><tr><td><select><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
#errors
50: Stray “svg” start tag.
54: Stray “g” start tag.
62: Stray end tag “g”
66: Stray “g” start tag.
74: Stray end tag “g”
77: Stray “p” start tag.
88: “table” end tag with “select” open.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <select>
| "foobarbaz"
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><select><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
#errors
36: Start tag “select” seen in “table”.
42: Stray “svg” start tag.
46: Stray “g” start tag.
54: Stray end tag “g”
58: Stray “g” start tag.
66: Stray end tag “g”
69: Stray “p” start tag.
80: “table” end tag with “select” open.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| "foobarbaz"
| <table>
| <p>
| "quux"
#data
<!DOCTYPE html><body></body></html><svg><g>foo</g><g>bar</g><p>baz
#errors
41: Stray “svg” start tag.
68: HTML start tag “p” in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><body></body><svg><g>foo</g><g>bar</g><p>baz
#errors
34: Stray “svg” start tag.
61: HTML start tag “p” in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><frameset><svg><g></g><g></g><p><span>
#errors
31: Stray “svg” start tag.
35: Stray “g” start tag.
40: Stray end tag “g”
44: Stray “g” start tag.
49: Stray end tag “g”
52: Stray “p” start tag.
58: Stray “span” start tag.
58: End of file seen and there were open elements.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><frameset></frameset><svg><g></g><g></g><p><span>
#errors
42: Stray “svg” start tag.
46: Stray “g” start tag.
51: Stray end tag “g”
55: Stray “g” start tag.
60: Stray end tag “g”
63: Stray “p” start tag.
69: Stray “span” start tag.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><body xlink:href=foo><svg xlink:href=foo></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| <svg svg>
| xlink href="foo"
#data
<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo></g></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| xml:lang="en"
| <svg svg>
| <svg g>
| xlink href="foo"
| xml lang="en"
#data
<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo /></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| xml:lang="en"
| <svg svg>
| <svg g>
| xlink href="foo"
| xml lang="en"
#data
<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo />bar</svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| xml:lang="en"
| <svg svg>
| <svg g>
| xlink href="foo"
| xml lang="en"
| "bar"
#data
<svg></path>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
#data
<div><svg></div>a
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| "a"
#data
<div><svg><path></div>a
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| "a"
#data
<div><svg><path></svg><path>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| <path>
#data
<div><svg><path><foreignObject><math></div>a
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| <svg foreignObject>
| <math math>
| "a"
#data
<div><svg><path><foreignObject><p></div>a
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| <svg foreignObject>
| <p>
| "a"
#data
<!DOCTYPE html><svg><desc><div><svg><ul>a
#errors
40: HTML start tag “ul” in a foreign namespace context.
41: End of file in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg desc>
| <div>
| <svg svg>
| <ul>
| "a"
#data
<!DOCTYPE html><svg><desc><svg><ul>a
#errors
35: HTML start tag “ul” in a foreign namespace context.
36: End of file in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg desc>
| <svg svg>
| <ul>
| "a"
#data
<!DOCTYPE html><p><svg><desc><p>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <svg svg>
| <svg desc>
| <p>
#data
<!DOCTYPE html><p><svg><title><p>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <svg svg>
| <svg title>
| <p>
#data
<div><svg><path><foreignObject><p></foreignObject><p>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| <svg foreignObject>
| <p>
| <p>
#data
<math><mi><div><object><div><span></span></div></object></div></mi><mi>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| <div>
| <object>
| <div>
| <span>
| <math mi>
#data
<math><mi><svg><foreignObject><div><div></div></div></foreignObject></svg></mi><mi>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| <svg svg>
| <svg foreignObject>
| <div>
| <div>
| <math mi>
#data
<svg><script></script><path>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| <svg script>
| <svg path>
#data
<table><svg></svg><tr>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| <table>
| <tbody>
| <tr>
#data
<math><mi><mglyph>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| <math mglyph>
#data
<math><mi><malignmark>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| <math malignmark>
#data
<math><mo><mglyph>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mo>
| <math mglyph>
#data
<math><mo><malignmark>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mo>
| <math malignmark>
#data
<math><mn><mglyph>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mn>
| <math mglyph>
#data
<math><mn><malignmark>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mn>
| <math malignmark>
#data
<math><ms><mglyph>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math ms>
| <math mglyph>
#data
<math><ms><malignmark>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math ms>
| <math malignmark>
#data
<math><mtext><mglyph>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mtext>
| <math mglyph>
#data
<math><mtext><malignmark>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mtext>
| <math malignmark>
#data
<math><annotation-xml><svg></svg></annotation-xml><mi>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| <svg svg>
| <math mi>
#data
<math><annotation-xml><svg><foreignObject><div><math><mi></mi></math><span></span></div></foreignObject><path></path></svg></annotation-xml><mi>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| <svg svg>
| <svg foreignObject>
| <div>
| <math math>
| <math mi>
| <span>
| <svg path>
| <math mi>
#data
<math><annotation-xml><svg><foreignObject><math><mi><svg></svg></mi><mo></mo></math><span></span></foreignObject><path></path></svg></annotation-xml><mi>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| <svg svg>
| <svg foreignObject>
| <math math>
| <math mi>
| <svg svg>
| <math mo>
| <span>
| <svg path>
| <math mi>

@ -0,0 +1,482 @@
#data
<!DOCTYPE html><body><svg attributeName='' attributeType='' baseFrequency='' baseProfile='' calcMode='' clipPathUnits='' contentScriptType='' contentStyleType='' diffuseConstant='' edgeMode='' externalResourcesRequired='' filterRes='' filterUnits='' glyphRef='' gradientTransform='' gradientUnits='' kernelMatrix='' kernelUnitLength='' keyPoints='' keySplines='' keyTimes='' lengthAdjust='' limitingConeAngle='' markerHeight='' markerUnits='' markerWidth='' maskContentUnits='' maskUnits='' numOctaves='' pathLength='' patternContentUnits='' patternTransform='' patternUnits='' pointsAtX='' pointsAtY='' pointsAtZ='' preserveAlpha='' preserveAspectRatio='' primitiveUnits='' refX='' refY='' repeatCount='' repeatDur='' requiredExtensions='' requiredFeatures='' specularConstant='' specularExponent='' spreadMethod='' startOffset='' stdDeviation='' stitchTiles='' surfaceScale='' systemLanguage='' tableValues='' targetX='' targetY='' textLength='' viewBox='' viewTarget='' xChannelSelector='' yChannelSelector='' zoomAndPan=''></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| attributeName=""
| attributeType=""
| baseFrequency=""
| baseProfile=""
| calcMode=""
| clipPathUnits=""
| contentScriptType=""
| contentStyleType=""
| diffuseConstant=""
| edgeMode=""
| externalResourcesRequired=""
| filterRes=""
| filterUnits=""
| glyphRef=""
| gradientTransform=""
| gradientUnits=""
| kernelMatrix=""
| kernelUnitLength=""
| keyPoints=""
| keySplines=""
| keyTimes=""
| lengthAdjust=""
| limitingConeAngle=""
| markerHeight=""
| markerUnits=""
| markerWidth=""
| maskContentUnits=""
| maskUnits=""
| numOctaves=""
| pathLength=""
| patternContentUnits=""
| patternTransform=""
| patternUnits=""
| pointsAtX=""
| pointsAtY=""
| pointsAtZ=""
| preserveAlpha=""
| preserveAspectRatio=""
| primitiveUnits=""
| refX=""
| refY=""
| repeatCount=""
| repeatDur=""
| requiredExtensions=""
| requiredFeatures=""
| specularConstant=""
| specularExponent=""
| spreadMethod=""
| startOffset=""
| stdDeviation=""
| stitchTiles=""
| surfaceScale=""
| systemLanguage=""
| tableValues=""
| targetX=""
| targetY=""
| textLength=""
| viewBox=""
| viewTarget=""
| xChannelSelector=""
| yChannelSelector=""
| zoomAndPan=""
#data
<!DOCTYPE html><BODY><SVG ATTRIBUTENAME='' ATTRIBUTETYPE='' BASEFREQUENCY='' BASEPROFILE='' CALCMODE='' CLIPPATHUNITS='' CONTENTSCRIPTTYPE='' CONTENTSTYLETYPE='' DIFFUSECONSTANT='' EDGEMODE='' EXTERNALRESOURCESREQUIRED='' FILTERRES='' FILTERUNITS='' GLYPHREF='' GRADIENTTRANSFORM='' GRADIENTUNITS='' KERNELMATRIX='' KERNELUNITLENGTH='' KEYPOINTS='' KEYSPLINES='' KEYTIMES='' LENGTHADJUST='' LIMITINGCONEANGLE='' MARKERHEIGHT='' MARKERUNITS='' MARKERWIDTH='' MASKCONTENTUNITS='' MASKUNITS='' NUMOCTAVES='' PATHLENGTH='' PATTERNCONTENTUNITS='' PATTERNTRANSFORM='' PATTERNUNITS='' POINTSATX='' POINTSATY='' POINTSATZ='' PRESERVEALPHA='' PRESERVEASPECTRATIO='' PRIMITIVEUNITS='' REFX='' REFY='' REPEATCOUNT='' REPEATDUR='' REQUIREDEXTENSIONS='' REQUIREDFEATURES='' SPECULARCONSTANT='' SPECULAREXPONENT='' SPREADMETHOD='' STARTOFFSET='' STDDEVIATION='' STITCHTILES='' SURFACESCALE='' SYSTEMLANGUAGE='' TABLEVALUES='' TARGETX='' TARGETY='' TEXTLENGTH='' VIEWBOX='' VIEWTARGET='' XCHANNELSELECTOR='' YCHANNELSELECTOR='' ZOOMANDPAN=''></SVG>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| attributeName=""
| attributeType=""
| baseFrequency=""
| baseProfile=""
| calcMode=""
| clipPathUnits=""
| contentScriptType=""
| contentStyleType=""
| diffuseConstant=""
| edgeMode=""
| externalResourcesRequired=""
| filterRes=""
| filterUnits=""
| glyphRef=""
| gradientTransform=""
| gradientUnits=""
| kernelMatrix=""
| kernelUnitLength=""
| keyPoints=""
| keySplines=""
| keyTimes=""
| lengthAdjust=""
| limitingConeAngle=""
| markerHeight=""
| markerUnits=""
| markerWidth=""
| maskContentUnits=""
| maskUnits=""
| numOctaves=""
| pathLength=""
| patternContentUnits=""
| patternTransform=""
| patternUnits=""
| pointsAtX=""
| pointsAtY=""
| pointsAtZ=""
| preserveAlpha=""
| preserveAspectRatio=""
| primitiveUnits=""
| refX=""
| refY=""
| repeatCount=""
| repeatDur=""
| requiredExtensions=""
| requiredFeatures=""
| specularConstant=""
| specularExponent=""
| spreadMethod=""
| startOffset=""
| stdDeviation=""
| stitchTiles=""
| surfaceScale=""
| systemLanguage=""
| tableValues=""
| targetX=""
| targetY=""
| textLength=""
| viewBox=""
| viewTarget=""
| xChannelSelector=""
| yChannelSelector=""
| zoomAndPan=""
#data
<!DOCTYPE html><body><svg attributename='' attributetype='' basefrequency='' baseprofile='' calcmode='' clippathunits='' contentscripttype='' contentstyletype='' diffuseconstant='' edgemode='' externalresourcesrequired='' filterres='' filterunits='' glyphref='' gradienttransform='' gradientunits='' kernelmatrix='' kernelunitlength='' keypoints='' keysplines='' keytimes='' lengthadjust='' limitingconeangle='' markerheight='' markerunits='' markerwidth='' maskcontentunits='' maskunits='' numoctaves='' pathlength='' patterncontentunits='' patterntransform='' patternunits='' pointsatx='' pointsaty='' pointsatz='' preservealpha='' preserveaspectratio='' primitiveunits='' refx='' refy='' repeatcount='' repeatdur='' requiredextensions='' requiredfeatures='' specularconstant='' specularexponent='' spreadmethod='' startoffset='' stddeviation='' stitchtiles='' surfacescale='' systemlanguage='' tablevalues='' targetx='' targety='' textlength='' viewbox='' viewtarget='' xchannelselector='' ychannelselector='' zoomandpan=''></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| attributeName=""
| attributeType=""
| baseFrequency=""
| baseProfile=""
| calcMode=""
| clipPathUnits=""
| contentScriptType=""
| contentStyleType=""
| diffuseConstant=""
| edgeMode=""
| externalResourcesRequired=""
| filterRes=""
| filterUnits=""
| glyphRef=""
| gradientTransform=""
| gradientUnits=""
| kernelMatrix=""
| kernelUnitLength=""
| keyPoints=""
| keySplines=""
| keyTimes=""
| lengthAdjust=""
| limitingConeAngle=""
| markerHeight=""
| markerUnits=""
| markerWidth=""
| maskContentUnits=""
| maskUnits=""
| numOctaves=""
| pathLength=""
| patternContentUnits=""
| patternTransform=""
| patternUnits=""
| pointsAtX=""
| pointsAtY=""
| pointsAtZ=""
| preserveAlpha=""
| preserveAspectRatio=""
| primitiveUnits=""
| refX=""
| refY=""
| repeatCount=""
| repeatDur=""
| requiredExtensions=""
| requiredFeatures=""
| specularConstant=""
| specularExponent=""
| spreadMethod=""
| startOffset=""
| stdDeviation=""
| stitchTiles=""
| surfaceScale=""
| systemLanguage=""
| tableValues=""
| targetX=""
| targetY=""
| textLength=""
| viewBox=""
| viewTarget=""
| xChannelSelector=""
| yChannelSelector=""
| zoomAndPan=""
#data
<!DOCTYPE html><body><math attributeName='' attributeType='' baseFrequency='' baseProfile='' calcMode='' clipPathUnits='' contentScriptType='' contentStyleType='' diffuseConstant='' edgeMode='' externalResourcesRequired='' filterRes='' filterUnits='' glyphRef='' gradientTransform='' gradientUnits='' kernelMatrix='' kernelUnitLength='' keyPoints='' keySplines='' keyTimes='' lengthAdjust='' limitingConeAngle='' markerHeight='' markerUnits='' markerWidth='' maskContentUnits='' maskUnits='' numOctaves='' pathLength='' patternContentUnits='' patternTransform='' patternUnits='' pointsAtX='' pointsAtY='' pointsAtZ='' preserveAlpha='' preserveAspectRatio='' primitiveUnits='' refX='' refY='' repeatCount='' repeatDur='' requiredExtensions='' requiredFeatures='' specularConstant='' specularExponent='' spreadMethod='' startOffset='' stdDeviation='' stitchTiles='' surfaceScale='' systemLanguage='' tableValues='' targetX='' targetY='' textLength='' viewBox='' viewTarget='' xChannelSelector='' yChannelSelector='' zoomAndPan=''></math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| attributename=""
| attributetype=""
| basefrequency=""
| baseprofile=""
| calcmode=""
| clippathunits=""
| contentscripttype=""
| contentstyletype=""
| diffuseconstant=""
| edgemode=""
| externalresourcesrequired=""
| filterres=""
| filterunits=""
| glyphref=""
| gradienttransform=""
| gradientunits=""
| kernelmatrix=""
| kernelunitlength=""
| keypoints=""
| keysplines=""
| keytimes=""
| lengthadjust=""
| limitingconeangle=""
| markerheight=""
| markerunits=""
| markerwidth=""
| maskcontentunits=""
| maskunits=""
| numoctaves=""
| pathlength=""
| patterncontentunits=""
| patterntransform=""
| patternunits=""
| pointsatx=""
| pointsaty=""
| pointsatz=""
| preservealpha=""
| preserveaspectratio=""
| primitiveunits=""
| refx=""
| refy=""
| repeatcount=""
| repeatdur=""
| requiredextensions=""
| requiredfeatures=""
| specularconstant=""
| specularexponent=""
| spreadmethod=""
| startoffset=""
| stddeviation=""
| stitchtiles=""
| surfacescale=""
| systemlanguage=""
| tablevalues=""
| targetx=""
| targety=""
| textlength=""
| viewbox=""
| viewtarget=""
| xchannelselector=""
| ychannelselector=""
| zoomandpan=""
#data
<!DOCTYPE html><body><svg><altGlyph /><altGlyphDef /><altGlyphItem /><animateColor /><animateMotion /><animateTransform /><clipPath /><feBlend /><feColorMatrix /><feComponentTransfer /><feComposite /><feConvolveMatrix /><feDiffuseLighting /><feDisplacementMap /><feDistantLight /><feFlood /><feFuncA /><feFuncB /><feFuncG /><feFuncR /><feGaussianBlur /><feImage /><feMerge /><feMergeNode /><feMorphology /><feOffset /><fePointLight /><feSpecularLighting /><feSpotLight /><feTile /><feTurbulence /><foreignObject /><glyphRef /><linearGradient /><radialGradient /><textPath /></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg altGlyph>
| <svg altGlyphDef>
| <svg altGlyphItem>
| <svg animateColor>
| <svg animateMotion>
| <svg animateTransform>
| <svg clipPath>
| <svg feBlend>
| <svg feColorMatrix>
| <svg feComponentTransfer>
| <svg feComposite>
| <svg feConvolveMatrix>
| <svg feDiffuseLighting>
| <svg feDisplacementMap>
| <svg feDistantLight>
| <svg feFlood>
| <svg feFuncA>
| <svg feFuncB>
| <svg feFuncG>
| <svg feFuncR>
| <svg feGaussianBlur>
| <svg feImage>
| <svg feMerge>
| <svg feMergeNode>
| <svg feMorphology>
| <svg feOffset>
| <svg fePointLight>
| <svg feSpecularLighting>
| <svg feSpotLight>
| <svg feTile>
| <svg feTurbulence>
| <svg foreignObject>
| <svg glyphRef>
| <svg linearGradient>
| <svg radialGradient>
| <svg textPath>
#data
<!DOCTYPE html><body><svg><altglyph /><altglyphdef /><altglyphitem /><animatecolor /><animatemotion /><animatetransform /><clippath /><feblend /><fecolormatrix /><fecomponenttransfer /><fecomposite /><feconvolvematrix /><fediffuselighting /><fedisplacementmap /><fedistantlight /><feflood /><fefunca /><fefuncb /><fefuncg /><fefuncr /><fegaussianblur /><feimage /><femerge /><femergenode /><femorphology /><feoffset /><fepointlight /><fespecularlighting /><fespotlight /><fetile /><feturbulence /><foreignobject /><glyphref /><lineargradient /><radialgradient /><textpath /></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg altGlyph>
| <svg altGlyphDef>
| <svg altGlyphItem>
| <svg animateColor>
| <svg animateMotion>
| <svg animateTransform>
| <svg clipPath>
| <svg feBlend>
| <svg feColorMatrix>
| <svg feComponentTransfer>
| <svg feComposite>
| <svg feConvolveMatrix>
| <svg feDiffuseLighting>
| <svg feDisplacementMap>
| <svg feDistantLight>
| <svg feFlood>
| <svg feFuncA>
| <svg feFuncB>
| <svg feFuncG>
| <svg feFuncR>
| <svg feGaussianBlur>
| <svg feImage>
| <svg feMerge>
| <svg feMergeNode>
| <svg feMorphology>
| <svg feOffset>
| <svg fePointLight>
| <svg feSpecularLighting>
| <svg feSpotLight>
| <svg feTile>
| <svg feTurbulence>
| <svg foreignObject>
| <svg glyphRef>
| <svg linearGradient>
| <svg radialGradient>
| <svg textPath>
#data
<!DOCTYPE html><BODY><SVG><ALTGLYPH /><ALTGLYPHDEF /><ALTGLYPHITEM /><ANIMATECOLOR /><ANIMATEMOTION /><ANIMATETRANSFORM /><CLIPPATH /><FEBLEND /><FECOLORMATRIX /><FECOMPONENTTRANSFER /><FECOMPOSITE /><FECONVOLVEMATRIX /><FEDIFFUSELIGHTING /><FEDISPLACEMENTMAP /><FEDISTANTLIGHT /><FEFLOOD /><FEFUNCA /><FEFUNCB /><FEFUNCG /><FEFUNCR /><FEGAUSSIANBLUR /><FEIMAGE /><FEMERGE /><FEMERGENODE /><FEMORPHOLOGY /><FEOFFSET /><FEPOINTLIGHT /><FESPECULARLIGHTING /><FESPOTLIGHT /><FETILE /><FETURBULENCE /><FOREIGNOBJECT /><GLYPHREF /><LINEARGRADIENT /><RADIALGRADIENT /><TEXTPATH /></SVG>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg altGlyph>
| <svg altGlyphDef>
| <svg altGlyphItem>
| <svg animateColor>
| <svg animateMotion>
| <svg animateTransform>
| <svg clipPath>
| <svg feBlend>
| <svg feColorMatrix>
| <svg feComponentTransfer>
| <svg feComposite>
| <svg feConvolveMatrix>
| <svg feDiffuseLighting>
| <svg feDisplacementMap>
| <svg feDistantLight>
| <svg feFlood>
| <svg feFuncA>
| <svg feFuncB>
| <svg feFuncG>
| <svg feFuncR>
| <svg feGaussianBlur>
| <svg feImage>
| <svg feMerge>
| <svg feMergeNode>
| <svg feMorphology>
| <svg feOffset>
| <svg fePointLight>
| <svg feSpecularLighting>
| <svg feSpotLight>
| <svg feTile>
| <svg feTurbulence>
| <svg foreignObject>
| <svg glyphRef>
| <svg linearGradient>
| <svg radialGradient>
| <svg textPath>
#data
<!DOCTYPE html><body><math><altGlyph /><altGlyphDef /><altGlyphItem /><animateColor /><animateMotion /><animateTransform /><clipPath /><feBlend /><feColorMatrix /><feComponentTransfer /><feComposite /><feConvolveMatrix /><feDiffuseLighting /><feDisplacementMap /><feDistantLight /><feFlood /><feFuncA /><feFuncB /><feFuncG /><feFuncR /><feGaussianBlur /><feImage /><feMerge /><feMergeNode /><feMorphology /><feOffset /><fePointLight /><feSpecularLighting /><feSpotLight /><feTile /><feTurbulence /><foreignObject /><glyphRef /><linearGradient /><radialGradient /><textPath /></math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math altglyph>
| <math altglyphdef>
| <math altglyphitem>
| <math animatecolor>
| <math animatemotion>
| <math animatetransform>
| <math clippath>
| <math feblend>
| <math fecolormatrix>
| <math fecomponenttransfer>
| <math fecomposite>
| <math feconvolvematrix>
| <math fediffuselighting>
| <math fedisplacementmap>
| <math fedistantlight>
| <math feflood>
| <math fefunca>
| <math fefuncb>
| <math fefuncg>
| <math fefuncr>
| <math fegaussianblur>
| <math feimage>
| <math femerge>
| <math femergenode>
| <math femorphology>
| <math feoffset>
| <math fepointlight>
| <math fespecularlighting>
| <math fespotlight>
| <math fetile>
| <math feturbulence>
| <math foreignobject>
| <math glyphref>
| <math lineargradient>
| <math radialgradient>
| <math textpath>
#data
<!DOCTYPE html><body><svg><solidColor /></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg solidcolor>

@ -0,0 +1,62 @@
#data
<!DOCTYPE html><body><p>foo<math><mtext><i>baz</i></mtext><annotation-xml><svg><desc><b>eggs</b></desc><g><foreignObject><P>spam<TABLE><tr><td><img></td></table></foreignObject></g><g>quux</g></svg></annotation-xml></math>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <math math>
| <math mtext>
| <i>
| "baz"
| <math annotation-xml>
| <svg svg>
| <svg desc>
| <b>
| "eggs"
| <svg g>
| <svg foreignObject>
| <p>
| "spam"
| <table>
| <tbody>
| <tr>
| <td>
| <img>
| <svg g>
| "quux"
| "bar"
#data
<!DOCTYPE html><body>foo<math><mtext><i>baz</i></mtext><annotation-xml><svg><desc><b>eggs</b></desc><g><foreignObject><P>spam<TABLE><tr><td><img></td></table></foreignObject></g><g>quux</g></svg></annotation-xml></math>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "foo"
| <math math>
| <math mtext>
| <i>
| "baz"
| <math annotation-xml>
| <svg svg>
| <svg desc>
| <b>
| "eggs"
| <svg g>
| <svg foreignObject>
| <p>
| "spam"
| <table>
| <tbody>
| <tr>
| <td>
| <img>
| <svg g>
| "quux"
| "bar"

@ -0,0 +1,74 @@
#data
<!DOCTYPE html><html><body><xyz:abc></xyz:abc>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <xyz:abc>
#data
<!DOCTYPE html><html><body><xyz:abc></xyz:abc><span></span>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <xyz:abc>
| <span>
#data
<!DOCTYPE html><html><html abc:def=gh><xyz:abc></xyz:abc>
#errors
15: Unexpected start tag html
#document
| <!DOCTYPE html>
| <html>
| abc:def="gh"
| <head>
| <body>
| <xyz:abc>
#data
<!DOCTYPE html><html xml:lang=bar><html xml:lang=foo>
#errors
15: Unexpected start tag html
#document
| <!DOCTYPE html>
| <html>
| xml:lang="bar"
| <head>
| <body>
#data
<!DOCTYPE html><html 123=456>
#errors
#document
| <!DOCTYPE html>
| <html>
| 123="456"
| <head>
| <body>
#data
<!DOCTYPE html><html 123=456><html 789=012>
#errors
#document
| <!DOCTYPE html>
| <html>
| 123="456"
| 789="012"
| <head>
| <body>
#data
<!DOCTYPE html><html><body 789=012>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| 789="012"

@ -0,0 +1,208 @@
#data
<!DOCTYPE html><p><b><i><u></p> <p>X
#errors
Line: 1 Col: 31 Unexpected end tag (p). Ignored.
Line: 1 Col: 36 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <b>
| <i>
| <u>
| <b>
| <i>
| <u>
| " "
| <p>
| "X"
#data
<p><b><i><u></p>
<p>X
#errors
Line: 1 Col: 3 Unexpected start tag (p). Expected DOCTYPE.
Line: 1 Col: 16 Unexpected end tag (p). Ignored.
Line: 2 Col: 4 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <p>
| <b>
| <i>
| <u>
| <b>
| <i>
| <u>
| "
"
| <p>
| "X"
#data
<!doctype html></html> <head>
#errors
Line: 1 Col: 22 Unexpected end tag (html) after the (implied) root element.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| " "
#data
<!doctype html></body><meta>
#errors
Line: 1 Col: 22 Unexpected end tag (body) after the (implied) root element.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <meta>
#data
<html></html><!-- foo -->
#errors
Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
Line: 1 Col: 13 Unexpected end tag (html) after the (implied) root element.
#document
| <html>
| <head>
| <body>
| <!-- foo -->
#data
<!doctype html></body><title>X</title>
#errors
Line: 1 Col: 22 Unexpected end tag (body) after the (implied) root element.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <title>
| "X"
#data
<!doctype html><table> X<meta></table>
#errors
Line: 1 Col: 24 Unexpected non-space characters in table context caused voodoo mode.
Line: 1 Col: 30 Unexpected start tag (meta) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| " X"
| <meta>
| <table>
#data
<!doctype html><table> x</table>
#errors
Line: 1 Col: 24 Unexpected non-space characters in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| " x"
| <table>
#data
<!doctype html><table> x </table>
#errors
Line: 1 Col: 25 Unexpected non-space characters in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| " x "
| <table>
#data
<!doctype html><table><tr> x</table>
#errors
Line: 1 Col: 28 Unexpected non-space characters in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| " x"
| <table>
| <tbody>
| <tr>
#data
<!doctype html><table>X<style> <tr>x </style> </table>
#errors
Line: 1 Col: 23 Unexpected non-space characters in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "X"
| <table>
| <style>
| " <tr>x "
| " "
#data
<!doctype html><div><table><a>foo</a> <tr><td>bar</td> </tr></table></div>
#errors
Line: 1 Col: 30 Unexpected start tag (a) in table context caused voodoo mode.
Line: 1 Col: 37 Unexpected end tag (a) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <div>
| <a>
| "foo"
| <table>
| " "
| <tbody>
| <tr>
| <td>
| "bar"
| " "
#data
<frame></frame></frame><frameset><frame><frameset><frame></frameset><noframes></frameset><noframes>
#errors
6: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
13: Stray start tag “frame”.
21: Stray end tag “frame”.
29: Stray end tag “frame”.
39: “frameset” start tag after “body” already open.
105: End of file seen inside an [R]CDATA element.
105: End of file seen and there were open elements.
XXX: These errors are wrong, please fix me!
#document
| <html>
| <head>
| <frameset>
| <frame>
| <frameset>
| <frame>
| <noframes>
| "</frameset><noframes>"
#data
<!DOCTYPE html><object></html>
#errors
1: Expected closing tag. Unexpected end of file
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <object>

File diff suppressed because it is too large Load Diff

@ -0,0 +1,153 @@
#data
<!doctype html><table><tbody><select><tr>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <table>
| <tbody>
| <tr>
#data
<!doctype html><table><tr><select><td>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <table>
| <tbody>
| <tr>
| <td>
#data
<!doctype html><table><tr><td><select><td>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <select>
| <td>
#data
<!doctype html><table><tr><th><select><td>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <th>
| <select>
| <td>
#data
<!doctype html><table><caption><select><tr>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <select>
| <tbody>
| <tr>
#data
<!doctype html><select><tr>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!doctype html><select><td>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!doctype html><select><th>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!doctype html><select><tbody>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!doctype html><select><thead>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!doctype html><select><tfoot>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!doctype html><select><caption>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!doctype html><table><tr></table>a
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| "a"

@ -0,0 +1,269 @@
#data
<!doctype html><plaintext></plaintext>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <plaintext>
| "</plaintext>"
#data
<!doctype html><table><plaintext></plaintext>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <plaintext>
| "</plaintext>"
| <table>
#data
<!doctype html><table><tbody><plaintext></plaintext>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <plaintext>
| "</plaintext>"
| <table>
| <tbody>
#data
<!doctype html><table><tbody><tr><plaintext></plaintext>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <plaintext>
| "</plaintext>"
| <table>
| <tbody>
| <tr>
#data
<!doctype html><table><tbody><tr><plaintext></plaintext>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <plaintext>
| "</plaintext>"
| <table>
| <tbody>
| <tr>
#data
<!doctype html><table><td><plaintext></plaintext>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <plaintext>
| "</plaintext>"
#data
<!doctype html><table><caption><plaintext></plaintext>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <plaintext>
| "</plaintext>"
#data
<!doctype html><table><tr><style></script></style>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "abc"
| <table>
| <tbody>
| <tr>
| <style>
| "</script>"
#data
<!doctype html><table><tr><script></style></script>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "abc"
| <table>
| <tbody>
| <tr>
| <script>
| "</style>"
#data
<!doctype html><table><caption><style></script></style>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <style>
| "</script>"
| "abc"
#data
<!doctype html><table><td><style></script></style>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <style>
| "</script>"
| "abc"
#data
<!doctype html><select><script></style></script>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <script>
| "</style>"
| "abc"
#data
<!doctype html><table><select><script></style></script>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <script>
| "</style>"
| "abc"
| <table>
#data
<!doctype html><table><tr><select><script></style></script>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <script>
| "</style>"
| "abc"
| <table>
| <tbody>
| <tr>
#data
<!doctype html><frameset></frameset><noframes>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
| <noframes>
| "abc"
#data
<!doctype html><frameset></frameset><noframes>abc</noframes><!--abc-->
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
| <noframes>
| "abc"
| <!-- abc -->
#data
<!doctype html><frameset></frameset></html><noframes>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
| <noframes>
| "abc"
#data
<!doctype html><frameset></frameset></html><noframes>abc</noframes><!--abc-->
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
| <noframes>
| "abc"
| <!-- abc -->
#data
<!doctype html><table><tr></tbody><tfoot>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <tfoot>
#data
<!doctype html><table><td><svg></svg>abc<td>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| "abc"
| <td>

File diff suppressed because it is too large Load Diff

@ -0,0 +1,763 @@
#data
<!DOCTYPE html>Test
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "Test"
#data
<textarea>test</div>test
#errors
Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
Line: 1 Col: 24 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <textarea>
| "test</div>test"
#data
<table><td>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 11 Unexpected table cell start tag (td) in the table body phase.
Line: 1 Col: 11 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
#data
<table><td>test</tbody></table>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 11 Unexpected table cell start tag (td) in the table body phase.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "test"
#data
<frame>test
#errors
Line: 1 Col: 7 Unexpected start tag (frame). Expected DOCTYPE.
Line: 1 Col: 7 Unexpected start tag frame. Ignored.
#document
| <html>
| <head>
| <body>
| "test"
#data
<!DOCTYPE html><frameset>test
#errors
Line: 1 Col: 29 Unepxected characters in the frameset phase. Characters ignored.
Line: 1 Col: 29 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><frameset><!DOCTYPE html>
#errors
Line: 1 Col: 40 Unexpected DOCTYPE. Ignored.
Line: 1 Col: 40 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><font><p><b>test</font>
#errors
Line: 1 Col: 38 End tag (font) violates step 1, paragraph 3 of the adoption agency algorithm.
Line: 1 Col: 38 End tag (font) violates step 1, paragraph 3 of the adoption agency algorithm.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <font>
| <p>
| <font>
| <b>
| "test"
#data
<!DOCTYPE html><dt><div><dd>
#errors
Line: 1 Col: 28 Missing end tag (div, dt).
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <dt>
| <div>
| <dd>
#data
<script></x
#errors
Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
Line: 1 Col: 11 Unexpected end of file. Expected end tag (script).
#document
| <html>
| <head>
| <script>
| "</x"
| <body>
#data
<table><plaintext><td>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 18 Unexpected start tag (plaintext) in table context caused voodoo mode.
Line: 1 Col: 22 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| <plaintext>
| "<td>"
| <table>
#data
<plaintext></plaintext>
#errors
Line: 1 Col: 11 Unexpected start tag (plaintext). Expected DOCTYPE.
Line: 1 Col: 23 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <plaintext>
| "</plaintext>"
#data
<!DOCTYPE html><table><tr>TEST
#errors
Line: 1 Col: 30 Unexpected non-space characters in table context caused voodoo mode.
Line: 1 Col: 30 Unexpected end of file. Expected table content.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "TEST"
| <table>
| <tbody>
| <tr>
#data
<!DOCTYPE html><body t1=1><body t2=2><body t3=3 t4=4>
#errors
Line: 1 Col: 37 Unexpected start tag (body).
Line: 1 Col: 53 Unexpected start tag (body).
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| t1="1"
| t2="2"
| t3="3"
| t4="4"
#data
</b test
#errors
Line: 1 Col: 8 Unexpected end of file in attribute name.
Line: 1 Col: 8 End tag contains unexpected attributes.
Line: 1 Col: 8 Unexpected end tag (b). Expected DOCTYPE.
Line: 1 Col: 8 Unexpected end tag (b) after the (implied) root element.
#document
| <html>
| <head>
| <body>
#data
<!DOCTYPE html></b test<b &=&amp>X
#errors
Line: 1 Col: 32 Named entity didn't end with ';'.
Line: 1 Col: 33 End tag contains unexpected attributes.
Line: 1 Col: 33 Unexpected end tag (b) after the (implied) root element.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "X"
#data
<!doctypehtml><scrIPt type=text/x-foobar;baz>X</SCRipt
#errors
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
Line: 1 Col: 54 Unexpected end of file in the tag name.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <script>
| type="text/x-foobar;baz"
| "X</SCRipt"
| <body>
#data
&
#errors
Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "&"
#data
&#
#errors
Line: 1 Col: 1 Numeric entity expected. Got end of file instead.
Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "&#"
#data
&#X
#errors
Line: 1 Col: 3 Numeric entity expected but none found.
Line: 1 Col: 3 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "&#X"
#data
&#x
#errors
Line: 1 Col: 3 Numeric entity expected but none found.
Line: 1 Col: 3 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "&#x"
#data
&#45
#errors
Line: 1 Col: 4 Numeric entity didn't end with ';'.
Line: 1 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "-"
#data
&x-test
#errors
Line: 1 Col: 1 Named entity expected. Got none.
Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "&x-test"
#data
<!doctypehtml><p><li>
#errors
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <li>
#data
<!doctypehtml><p><dt>
#errors
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <dt>
#data
<!doctypehtml><p><dd>
#errors
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <dd>
#data
<!doctypehtml><p><form>
#errors
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
Line: 1 Col: 23 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <form>
#data
<!DOCTYPE html><p></P>X
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "X"
#data
&AMP
#errors
Line: 1 Col: 4 Named entity didn't end with ';'.
Line: 1 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "&"
#data
&AMp;
#errors
Line: 1 Col: 1 Named entity expected. Got none.
Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "&AMp;"
#data
<!DOCTYPE html><html><head></head><body><thisISasillyTESTelementNameToMakeSureCrazyTagNamesArePARSEDcorrectLY>
#errors
Line: 1 Col: 110 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <thisisasillytestelementnametomakesurecrazytagnamesareparsedcorrectly>
#data
<!DOCTYPE html>X</body>X
#errors
Line: 1 Col: 24 Unexpected non-space characters in the after body phase.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "XX"
#data
<!DOCTYPE html><!-- X
#errors
Line: 1 Col: 21 Unexpected end of file in comment.
#document
| <!DOCTYPE html>
| <!-- X -->
| <html>
| <head>
| <body>
#data
<!DOCTYPE html><table><caption>test TEST</caption><td>test
#errors
Line: 1 Col: 54 Unexpected table cell start tag (td) in the table body phase.
Line: 1 Col: 58 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| "test TEST"
| <tbody>
| <tr>
| <td>
| "test"
#data
<!DOCTYPE html><select><option><optgroup>
#errors
Line: 1 Col: 41 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <option>
| <optgroup>
#data
<!DOCTYPE html><select><optgroup><option></optgroup><option><select><option>
#errors
Line: 1 Col: 68 Unexpected select start tag in the select phase treated as select end tag.
Line: 1 Col: 76 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <optgroup>
| <option>
| <option>
| <option>
#data
<!DOCTYPE html><select><optgroup><option><optgroup>
#errors
Line: 1 Col: 51 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <optgroup>
| <option>
| <optgroup>
#data
<!DOCTYPE html><datalist><option>foo</datalist>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <datalist>
| <option>
| "foo"
| "bar"
#data
<!DOCTYPE html><font><input><input></font>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <font>
| <input>
| <input>
#data
<!DOCTYPE html><!-- XXX - XXX -->
#errors
#document
| <!DOCTYPE html>
| <!-- XXX - XXX -->
| <html>
| <head>
| <body>
#data
<!DOCTYPE html><!-- XXX - XXX
#errors
Line: 1 Col: 29 Unexpected end of file in comment (-)
#document
| <!DOCTYPE html>
| <!-- XXX - XXX -->
| <html>
| <head>
| <body>
#data
<!DOCTYPE html><!-- XXX - XXX - XXX -->
#errors
#document
| <!DOCTYPE html>
| <!-- XXX - XXX - XXX -->
| <html>
| <head>
| <body>
#data
<isindex test=x name=x>
#errors
Line: 1 Col: 23 Unexpected start tag (isindex). Expected DOCTYPE.
Line: 1 Col: 23 Unexpected start tag isindex. Don't use it!
#document
| <html>
| <head>
| <body>
| <form>
| <hr>
| <label>
| "This is a searchable index. Enter search keywords: "
| <input>
| name="isindex"
| test="x"
| <hr>
#data
test
test
#errors
Line: 2 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "test
test"
#data
<!DOCTYPE html><body><title>test</body></title>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <title>
| "test</body>"
#data
<!DOCTYPE html><body><title>X</title><meta name=z><link rel=foo><style>
x { content:"</style" } </style>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <title>
| "X"
| <meta>
| name="z"
| <link>
| rel="foo"
| <style>
| "
x { content:"</style" } "
#data
<!DOCTYPE html><select><optgroup></optgroup></select>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <optgroup>
#data
#errors
Line: 2 Col: 1 Unexpected End of file. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
#data
<!DOCTYPE html> <html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
#data
<!DOCTYPE html><script>
</script> <title>x</title> </head>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <script>
| "
"
| " "
| <title>
| "x"
| " "
| <body>
#data
<!DOCTYPE html><html><body><html id=x>
#errors
Line: 1 Col: 38 html needs to be the first start tag.
#document
| <!DOCTYPE html>
| <html>
| id="x"
| <head>
| <body>
#data
<!DOCTYPE html>X</body><html id="x">
#errors
Line: 1 Col: 36 Unexpected start tag token (html) in the after body phase.
Line: 1 Col: 36 html needs to be the first start tag.
#document
| <!DOCTYPE html>
| <html>
| id="x"
| <head>
| <body>
| "X"
#data
<!DOCTYPE html><head><html id=x>
#errors
Line: 1 Col: 32 html needs to be the first start tag.
#document
| <!DOCTYPE html>
| <html>
| id="x"
| <head>
| <body>
#data
<!DOCTYPE html>X</html>X
#errors
Line: 1 Col: 24 Unexpected non-space characters in the after body phase.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "XX"
#data
<!DOCTYPE html>X</html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "X "
#data
<!DOCTYPE html>X</html><p>X
#errors
Line: 1 Col: 26 Unexpected start tag (p).
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "X"
| <p>
| "X"
#data
<!DOCTYPE html>X<p/x/y/z>
#errors
Line: 1 Col: 19 Expected a > after the /.
Line: 1 Col: 21 Solidus (/) incorrectly placed in tag.
Line: 1 Col: 23 Solidus (/) incorrectly placed in tag.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "X"
| <p>
| x=""
| y=""
| z=""
#data
<!DOCTYPE html><!--x--
#errors
Line: 1 Col: 22 Unexpected end of file in comment (--).
#document
| <!DOCTYPE html>
| <!-- x -->
| <html>
| <head>
| <body>
#data
<!DOCTYPE html><table><tr><td></p></table>
#errors
Line: 1 Col: 34 Unexpected end tag (p). Ignored.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <p>
#data
<!DOCTYPE <!DOCTYPE HTML>><!--<!--x-->-->
#errors
Line: 1 Col: 20 Expected space or '>'. Got ''
Line: 1 Col: 25 Erroneous DOCTYPE.
Line: 1 Col: 35 Unexpected character in comment found.
#document
| <!DOCTYPE <!doctype>
| <html>
| <head>
| <body>
| ">"
| <!-- <!--x -->
| "-->"
#data
<!doctype html><div><form></form><div></div></div>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <div>
| <form>
| <div>

@ -0,0 +1,455 @@
#data
<!doctype html><p><button><button>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <button>
#data
<!doctype html><p><button><address>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <address>
#data
<!doctype html><p><button><blockquote>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <blockquote>
#data
<!doctype html><p><button><menu>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <menu>
#data
<!doctype html><p><button><p>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <p>
#data
<!doctype html><p><button><ul>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <ul>
#data
<!doctype html><p><button><h1>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <h1>
#data
<!doctype html><p><button><h6>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <h6>
#data
<!doctype html><p><button><listing>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <listing>
#data
<!doctype html><p><button><pre>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <pre>
#data
<!doctype html><p><button><form>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <form>
#data
<!doctype html><p><button><li>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <li>
#data
<!doctype html><p><button><dd>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <dd>
#data
<!doctype html><p><button><dt>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <dt>
#data
<!doctype html><p><button><plaintext>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <plaintext>
#data
<!doctype html><p><button><table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <table>
#data
<!doctype html><p><button><hr>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <hr>
#data
<!doctype html><p><button><xmp>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <xmp>
#data
<!doctype html><p><button></p>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <p>
#data
<!doctype html><address><button></address>a
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <address>
| <button>
| "a"
#data
<!doctype html><address><button></address>a
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <address>
| <button>
| "a"
#data
<p><table></p>
#errors
#document
| <html>
| <head>
| <body>
| <p>
| <p>
| <table>
#data
<!doctype html><svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
#data
<!doctype html><p><figcaption>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <figcaption>
#data
<!doctype html><p><summary>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <summary>
#data
<!doctype html><form><table><form>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <form>
| <table>
#data
<!doctype html><table><form><form>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <form>
#data
<!doctype html><table><form></table><form>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <form>
#data
<!doctype html><svg><foreignObject><p>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg foreignObject>
| <p>
#data
<!doctype html><svg><title>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg title>
| "abc"
#data
<option><span><option>
#errors
#document
| <html>
| <head>
| <body>
| <option>
| <span>
| <option>
#data
<option><option>
#errors
#document
| <html>
| <head>
| <body>
| <option>
| <option>
#data
<math><annotation-xml><div>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| <div>
#data
<math><annotation-xml encoding="application/svg+xml"><div>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| encoding="application/svg+xml"
| <div>
#data
<math><annotation-xml encoding="application/xhtml+xml"><div>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| encoding="application/xhtml+xml"
| <div>
#data
<math><annotation-xml encoding="aPPlication/xhtmL+xMl"><div>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| encoding="aPPlication/xhtmL+xMl"
| <div>
#data
<math><annotation-xml encoding="text/html"><div>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| encoding="text/html"
| <div>
#data
<math><annotation-xml encoding="Text/htmL"><div>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| encoding="Text/htmL"
| <div>
#data
<math><annotation-xml encoding=" text/html "><div>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| encoding=" text/html "
| <div>

@ -0,0 +1,221 @@
#data
<svg><![CDATA[foo]]>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "foo"
#data
<math><![CDATA[foo]]>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| "foo"
#data
<div><![CDATA[foo]]>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <!-- [CDATA[foo]] -->
#data
<svg><![CDATA[foo
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "foo"
#data
<svg><![CDATA[foo
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "foo"
#data
<svg><![CDATA[
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
#data
<svg><![CDATA[]]>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
#data
<svg><![CDATA[]] >]]>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "]] >"
#data
<svg><![CDATA[]] >]]>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "]] >"
#data
<svg><![CDATA[]]
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "]]"
#data
<svg><![CDATA[]
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "]"
#data
<svg><![CDATA[]>a
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "]>a"
#data
<svg><foreignObject><div><![CDATA[foo]]>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| <svg foreignObject>
| <div>
| <!-- [CDATA[foo]] -->
#data
<svg><![CDATA[<svg>]]>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "<svg>"
#data
<svg><![CDATA[</svg>a]]>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "</svg>a"
#data
<svg><![CDATA[<svg>a
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "<svg>a"
#data
<svg><![CDATA[</svg>a
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "</svg>a"
#data
<svg><![CDATA[<svg>]]><path>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "<svg>"
| <svg path>
#data
<svg><![CDATA[<svg>]]></path>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "<svg>"
#data
<svg><![CDATA[<svg>]]><!--path-->
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "<svg>"
| <!-- path -->
#data
<svg><![CDATA[<svg>]]>path
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "<svg>path"
#data
<svg><![CDATA[<!--svg-->]]>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "<!--svg-->"

@ -0,0 +1,157 @@
#data
<a><b><big><em><strong><div>X</a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <b>
| <big>
| <em>
| <strong>
| <big>
| <em>
| <strong>
| <div>
| <a>
| "X"
#data
<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8>A</a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <b>
| <b>
| <div>
| id="1"
| <a>
| <div>
| id="2"
| <a>
| <div>
| id="3"
| <a>
| <div>
| id="4"
| <a>
| <div>
| id="5"
| <a>
| <div>
| id="6"
| <a>
| <div>
| id="7"
| <a>
| <div>
| id="8"
| <a>
| "A"
#data
<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8><div id=9>A</a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <b>
| <b>
| <div>
| id="1"
| <a>
| <div>
| id="2"
| <a>
| <div>
| id="3"
| <a>
| <div>
| id="4"
| <a>
| <div>
| id="5"
| <a>
| <div>
| id="6"
| <a>
| <div>
| id="7"
| <a>
| <div>
| id="8"
| <a>
| <div>
| id="9"
| "A"
#data
<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8><div id=9><div id=10>A</a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <b>
| <b>
| <div>
| id="1"
| <a>
| <div>
| id="2"
| <a>
| <div>
| id="3"
| <a>
| <div>
| id="4"
| <a>
| <div>
| id="5"
| <a>
| <div>
| id="6"
| <a>
| <div>
| id="7"
| <a>
| <div>
| id="8"
| <a>
| <div>
| id="9"
| <div>
| id="10"
| "A"
#data
<cite><b><cite><i><cite><i><cite><i><div>X</b>TEST
#errors
Line: 1 Col: 6 Unexpected start tag (cite). Expected DOCTYPE.
Line: 1 Col: 46 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
Line: 1 Col: 50 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <cite>
| <b>
| <cite>
| <i>
| <cite>
| <i>
| <cite>
| <i>
| <i>
| <i>
| <div>
| <b>
| "X"
| "TEST"

@ -0,0 +1,155 @@
#data
<p><font size=4><font color=red><font size=4><font size=4><font size=4><font size=4><font size=4><font color=red><p>X
#errors
3: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
116: Unclosed elements.
117: End of file seen and there were open elements.
#document
| <html>
| <head>
| <body>
| <p>
| <font>
| size="4"
| <font>
| color="red"
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| <font>
| color="red"
| <p>
| <font>
| color="red"
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| <font>
| color="red"
| "X"
#data
<p><font size=4><font size=4><font size=4><font size=4><p>X
#errors
#document
| <html>
| <head>
| <body>
| <p>
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| <p>
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| "X"
#data
<p><font size=4><font size=4><font size=4><font size="5"><font size=4><p>X
#errors
#document
| <html>
| <head>
| <body>
| <p>
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="5"
| <font>
| size="4"
| <p>
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="5"
| <font>
| size="4"
| "X"
#data
<p><font size=4 id=a><font size=4 id=b><font size=4><font size=4><p>X
#errors
#document
| <html>
| <head>
| <body>
| <p>
| <font>
| id="a"
| size="4"
| <font>
| id="b"
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| <p>
| <font>
| id="a"
| size="4"
| <font>
| id="b"
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| "X"
#data
<p><b id=a><b id=a><b id=a><b><object><b id=a><b id=a>X</object><p>Y
#errors
#document
| <html>
| <head>
| <body>
| <p>
| <b>
| id="a"
| <b>
| id="a"
| <b>
| id="a"
| <b>
| <object>
| <b>
| id="a"
| <b>
| id="a"
| "X"
| <p>
| <b>
| id="a"
| <b>
| id="a"
| <b>
| id="a"
| <b>
| "Y"

@ -0,0 +1,79 @@
#data
<!DOCTYPE html>&NotEqualTilde;
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "≂̸"
#data
<!DOCTYPE html>&NotEqualTilde;A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "≂̸A"
#data
<!DOCTYPE html>&ThickSpace;
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| ""
#data
<!DOCTYPE html>&ThickSpace;A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "A"
#data
<!DOCTYPE html>&NotSubset;
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "⊂"
#data
<!DOCTYPE html>&NotSubset;A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "⊂A"
#data
<!DOCTYPE html>&Gopf;
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "𝔾"
#data
<!DOCTYPE html>&Gopf;A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "𝔾A"

@ -0,0 +1,219 @@
#data
<!DOCTYPE html><body><foo>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <foo>
| "A"
#data
<!DOCTYPE html><body><area>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <area>
| "A"
#data
<!DOCTYPE html><body><base>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <base>
| "A"
#data
<!DOCTYPE html><body><basefont>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <basefont>
| "A"
#data
<!DOCTYPE html><body><bgsound>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <bgsound>
| "A"
#data
<!DOCTYPE html><body><br>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <br>
| "A"
#data
<!DOCTYPE html><body><col>A
#errors
26: Stray start tag “col”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "A"
#data
<!DOCTYPE html><body><command>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <command>
| "A"
#data
<!DOCTYPE html><body><embed>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <embed>
| "A"
#data
<!DOCTYPE html><body><frame>A
#errors
26: Stray start tag “frame”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "A"
#data
<!DOCTYPE html><body><hr>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <hr>
| "A"
#data
<!DOCTYPE html><body><img>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <img>
| "A"
#data
<!DOCTYPE html><body><input>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <input>
| "A"
#data
<!DOCTYPE html><body><keygen>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <keygen>
| "A"
#data
<!DOCTYPE html><body><link>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <link>
| "A"
#data
<!DOCTYPE html><body><meta>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <meta>
| "A"
#data
<!DOCTYPE html><body><param>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <param>
| "A"
#data
<!DOCTYPE html><body><source>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <source>
| "A"
#data
<!DOCTYPE html><body><track>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <track>
| "A"
#data
<!DOCTYPE html><body><wbr>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <wbr>
| "A"

@ -0,0 +1,313 @@
#data
<!DOCTYPE html><body><a href='#1'><nobr>1<nobr></a><br><a href='#2'><nobr>2<nobr></a><br><a href='#3'><nobr>3<nobr></a>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <a>
| href="#1"
| <nobr>
| "1"
| <nobr>
| <nobr>
| <br>
| <a>
| href="#2"
| <a>
| href="#2"
| <nobr>
| "2"
| <nobr>
| <nobr>
| <br>
| <a>
| href="#3"
| <a>
| href="#3"
| <nobr>
| "3"
| <nobr>
#data
<!DOCTYPE html><body><b><nobr>1<nobr></b><i><nobr>2<nobr></i>3
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <b>
| <nobr>
| "1"
| <nobr>
| <nobr>
| <i>
| <i>
| <nobr>
| "2"
| <nobr>
| <nobr>
| "3"
#data
<!DOCTYPE html><body><b><nobr>1<table><nobr></b><i><nobr>2<nobr></i>3
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <b>
| <nobr>
| "1"
| <nobr>
| <i>
| <i>
| <nobr>
| "2"
| <nobr>
| <nobr>
| "3"
| <table>
#data
<!DOCTYPE html><body><b><nobr>1<table><tr><td><nobr></b><i><nobr>2<nobr></i>3
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <b>
| <nobr>
| "1"
| <table>
| <tbody>
| <tr>
| <td>
| <nobr>
| <i>
| <i>
| <nobr>
| "2"
| <nobr>
| <nobr>
| "3"
#data
<!DOCTYPE html><body><b><nobr>1<div><nobr></b><i><nobr>2<nobr></i>3
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <b>
| <nobr>
| "1"
| <div>
| <b>
| <nobr>
| <nobr>
| <nobr>
| <i>
| <i>
| <nobr>
| "2"
| <nobr>
| <nobr>
| "3"
#data
<!DOCTYPE html><body><b><nobr>1<nobr></b><div><i><nobr>2<nobr></i>3
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <b>
| <nobr>
| "1"
| <nobr>
| <div>
| <nobr>
| <i>
| <i>
| <nobr>
| "2"
| <nobr>
| <nobr>
| "3"
#data
<!DOCTYPE html><body><b><nobr>1<nobr><ins></b><i><nobr>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <b>
| <nobr>
| "1"
| <nobr>
| <ins>
| <nobr>
| <i>
| <i>
| <nobr>
#data
<!DOCTYPE html><body><b><nobr>1<ins><nobr></b><i>2
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <b>
| <nobr>
| "1"
| <ins>
| <nobr>
| <nobr>
| <i>
| "2"
#data
<!DOCTYPE html><body><b>1<nobr></b><i><nobr>2</i>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <b>
| "1"
| <nobr>
| <nobr>
| <i>
| <i>
| <nobr>
| "2"
#data
<p><code x</code></p>
#errors
#document
| <html>
| <head>
| <body>
| <p>
| <code>
| code=""
| x<=""
| <code>
| code=""
| x<=""
| "
"
#data
<!DOCTYPE html><svg><foreignObject><p><i></p>a
#errors
45: End tag “p” seen, but there were open elements.
41: Unclosed element “i”.
46: End of file seen and there were open elements.
35: Unclosed element “foreignObject”.
20: Unclosed element “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg foreignObject>
| <p>
| <i>
| <i>
| "a"
#data
<!DOCTYPE html><table><tr><td><svg><foreignObject><p><i></p>a
#errors
56: End tag “p” seen, but there were open elements.
52: Unclosed element “i”.
57: End of file seen and there were open elements.
46: Unclosed element “foreignObject”.
31: Unclosed element “svg”.
22: Unclosed element “table”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| <svg foreignObject>
| <p>
| <i>
| <i>
| "a"
#data
<!DOCTYPE html><math><mtext><p><i></p>a
#errors
38: End tag “p” seen, but there were open elements.
34: Unclosed element “i”.
39: End of file in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math mtext>
| <p>
| <i>
| <i>
| "a"
#data
<!DOCTYPE html><table><tr><td><math><mtext><p><i></p>a
#errors
53: End tag “p” seen, but there were open elements.
49: Unclosed element “i”.
54: End of file in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <math math>
| <math mtext>
| <p>
| <i>
| <i>
| "a"
#data
<!DOCTYPE html><body><div><!/div>a
#errors
29: Bogus comment.
34: End of file seen and there were open elements.
26: Unclosed element “div”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <div>
| <!-- /div -->
| "a"

@ -0,0 +1,305 @@
#data
<head></head><style></style>
#errors
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
Line: 1 Col: 20 Unexpected start tag (style) that can be in head. Moved.
#document
| <html>
| <head>
| <style>
| <body>
#data
<head></head><script></script>
#errors
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
Line: 1 Col: 21 Unexpected start tag (script) that can be in head. Moved.
#document
| <html>
| <head>
| <script>
| <body>
#data
<head></head><!-- --><style></style><!-- --><script></script>
#errors
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
Line: 1 Col: 28 Unexpected start tag (style) that can be in head. Moved.
#document
| <html>
| <head>
| <style>
| <script>
| <!-- -->
| <!-- -->
| <body>
#data
<head></head><!-- -->x<style></style><!-- --><script></script>
#errors
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
#document
| <html>
| <head>
| <!-- -->
| <body>
| "x"
| <style>
| <!-- -->
| <script>
#data
<!DOCTYPE html><html><head></head><body><pre>
</pre></body></html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <pre>
#data
<!DOCTYPE html><html><head></head><body><pre>
foo</pre></body></html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <pre>
| "foo"
#data
<!DOCTYPE html><html><head></head><body><pre>
foo</pre></body></html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <pre>
| "
foo"
#data
<!DOCTYPE html><html><head></head><body><pre>
foo
</pre></body></html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <pre>
| "foo
"
#data
<!DOCTYPE html><html><head></head><body><pre>x</pre><span>
</span></body></html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <pre>
| "x"
| <span>
| "
"
#data
<!DOCTYPE html><html><head></head><body><pre>x
y</pre></body></html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <pre>
| "x
y"
#data
<!DOCTYPE html><html><head></head><body><pre>x<div>
y</pre></body></html>
#errors
Line: 2 Col: 7 End tag (pre) seen too early. Expected other end tag.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <pre>
| "x"
| <div>
| "
y"
#data
<!DOCTYPE html><pre>&#x0a;&#x0a;A</pre>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <pre>
| "
A"
#data
<!DOCTYPE html><HTML><META><HEAD></HEAD></HTML>
#errors
Line: 1 Col: 33 Unexpected start tag head in existing head. Ignored.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <meta>
| <body>
#data
<!DOCTYPE html><HTML><HEAD><head></HEAD></HTML>
#errors
Line: 1 Col: 33 Unexpected start tag head in existing head. Ignored.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
#data
<textarea>foo<span>bar</span><i>baz
#errors
Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
Line: 1 Col: 35 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <textarea>
| "foo<span>bar</span><i>baz"
#data
<title>foo<span>bar</em><i>baz
#errors
Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
Line: 1 Col: 30 Unexpected end of file. Expected end tag (title).
#document
| <html>
| <head>
| <title>
| "foo<span>bar</em><i>baz"
| <body>
#data
<!DOCTYPE html><textarea>
</textarea>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <textarea>
#data
<!DOCTYPE html><textarea>
foo</textarea>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <textarea>
| "foo"
#data
<!DOCTYPE html><textarea>
foo</textarea>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <textarea>
| "
foo"
#data
<!DOCTYPE html><html><head></head><body><ul><li><div><p><li></ul></body></html>
#errors
Line: 1 Col: 60 Missing end tag (div, li).
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <ul>
| <li>
| <div>
| <p>
| <li>
#data
<!doctype html><nobr><nobr><nobr>
#errors
Line: 1 Col: 27 Unexpected start tag (nobr) implies end tag (nobr).
Line: 1 Col: 33 Unexpected start tag (nobr) implies end tag (nobr).
Line: 1 Col: 33 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <nobr>
| <nobr>
| <nobr>
#data
<!doctype html><nobr><nobr></nobr><nobr>
#errors
Line: 1 Col: 27 Unexpected start tag (nobr) implies end tag (nobr).
Line: 1 Col: 40 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <nobr>
| <nobr>
| <nobr>
#data
<!doctype html><html><body><p><table></table></body></html>
#errors
Not known
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <table>
#data
<p><table></table>
#errors
Not known
#document
| <html>
| <head>
| <body>
| <p>
| <table>

@ -0,0 +1,59 @@
#data
direct div content
#errors
#document-fragment
div
#document
| "direct div content"
#data
direct textarea content
#errors
#document-fragment
textarea
#document
| "direct textarea content"
#data
textarea content with <em>pseudo</em> <foo>markup
#errors
#document-fragment
textarea
#document
| "textarea content with <em>pseudo</em> <foo>markup"
#data
this is &#x0043;DATA inside a <style> element
#errors
#document-fragment
style
#document
| "this is &#x0043;DATA inside a <style> element"
#data
</plaintext>
#errors
#document-fragment
plaintext
#document
| "</plaintext>"
#data
setting html's innerHTML
#errors
Line: 1 Col: 24 Unexpected EOF in inner html mode.
#document-fragment
html
#document
| <head>
| <body>
| "setting html's innerHTML"
#data
<title>setting head's innerHTML</title>
#errors
#document-fragment
head
#document
| <title>
| "setting head's innerHTML"

@ -0,0 +1,191 @@
#data
<style> <!-- </style>x
#errors
Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
Line: 1 Col: 22 Unexpected end of file. Expected end tag (style).
#document
| <html>
| <head>
| <style>
| " <!-- "
| <body>
| "x"
#data
<style> <!-- </style> --> </style>x
#errors
Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
#document
| <html>
| <head>
| <style>
| " <!-- "
| " "
| <body>
| "--> x"
#data
<style> <!--> </style>x
#errors
Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
#document
| <html>
| <head>
| <style>
| " <!--> "
| <body>
| "x"
#data
<style> <!---> </style>x
#errors
Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
#document
| <html>
| <head>
| <style>
| " <!---> "
| <body>
| "x"
#data
<iframe> <!---> </iframe>x
#errors
Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| <iframe>
| " <!---> "
| "x"
#data
<iframe> <!--- </iframe>->x</iframe> --> </iframe>x
#errors
Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| <iframe>
| " <!--- "
| "->x --> x"
#data
<script> <!-- </script> --> </script>x
#errors
Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
#document
| <html>
| <head>
| <script>
| " <!-- "
| " "
| <body>
| "--> x"
#data
<title> <!-- </title> --> </title>x
#errors
Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
#document
| <html>
| <head>
| <title>
| " <!-- "
| " "
| <body>
| "--> x"
#data
<textarea> <!--- </textarea>->x</textarea> --> </textarea>x
#errors
Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| <textarea>
| " <!--- "
| "->x --> x"
#data
<style> <!</-- </style>x
#errors
Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
#document
| <html>
| <head>
| <style>
| " <!</-- "
| <body>
| "x"
#data
<p><xmp></xmp>
#errors
XXX: Unknown
#document
| <html>
| <head>
| <body>
| <p>
| <xmp>
#data
<xmp> <!-- > --> </xmp>
#errors
Line: 1 Col: 5 Unexpected start tag (xmp). Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| <xmp>
| " <!-- > --> "
#data
<title>&amp;</title>
#errors
Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
#document
| <html>
| <head>
| <title>
| "&"
| <body>
#data
<title><!--&amp;--></title>
#errors
Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
#document
| <html>
| <head>
| <title>
| "<!--&-->"
| <body>
#data
<title><!--</title>
#errors
Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
Line: 1 Col: 19 Unexpected end of file. Expected end tag (title).
#document
| <html>
| <head>
| <title>
| "<!--"
| <body>
#data
<noscript><!--</noscript>--></noscript>
#errors
Line: 1 Col: 10 Unexpected start tag (noscript). Expected DOCTYPE.
#document
| <html>
| <head>
| <noscript>
| "<!--"
| <body>
| "-->"

@ -0,0 +1,663 @@
#data
<!doctype html></head> <head>
#errors
Line: 1 Col: 29 Unexpected start tag head. Ignored.
#document
| <!DOCTYPE html>
| <html>
| <head>
| " "
| <body>
#data
<!doctype html><form><div></form><div>
#errors
33: End tag "form" seen but there were unclosed elements.
38: End of file seen and there were open elements.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <form>
| <div>
| <div>
#data
<!doctype html><title>&amp;</title>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <title>
| "&"
| <body>
#data
<!doctype html><title><!--&amp;--></title>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <title>
| "<!--&-->"
| <body>
#data
<!doctype>
#errors
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
Line: 1 Col: 10 Unexpected > character. Expected DOCTYPE name.
Line: 1 Col: 10 Erroneous DOCTYPE.
#document
| <!DOCTYPE >
| <html>
| <head>
| <body>
#data
<!---x
#errors
Line: 1 Col: 6 Unexpected end of file in comment.
Line: 1 Col: 6 Unexpected End of file. Expected DOCTYPE.
#document
| <!-- -x -->
| <html>
| <head>
| <body>
#data
<body>
<div>
#errors
Line: 1 Col: 6 Unexpected start tag (body).
Line: 2 Col: 5 Expected closing tag. Unexpected end of file.
#document-fragment
div
#document
| "
"
| <div>
#data
<frameset></frameset>
foo
#errors
Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
Line: 2 Col: 3 Unexpected non-space characters in the after frameset phase. Ignored.
#document
| <html>
| <head>
| <frameset>
| "
"
#data
<frameset></frameset>
<noframes>
#errors
Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
Line: 2 Col: 10 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <frameset>
| "
"
| <noframes>
#data
<frameset></frameset>
<div>
#errors
Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
Line: 2 Col: 5 Unexpected start tag (div) in the after frameset phase. Ignored.
#document
| <html>
| <head>
| <frameset>
| "
"
#data
<frameset></frameset>
</html>
#errors
Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
#document
| <html>
| <head>
| <frameset>
| "
"
#data
<frameset></frameset>
</div>
#errors
Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
Line: 2 Col: 6 Unexpected end tag (div) in the after frameset phase. Ignored.
#document
| <html>
| <head>
| <frameset>
| "
"
#data
<form><form>
#errors
Line: 1 Col: 6 Unexpected start tag (form). Expected DOCTYPE.
Line: 1 Col: 12 Unexpected start tag (form).
Line: 1 Col: 12 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <form>
#data
<button><button>
#errors
Line: 1 Col: 8 Unexpected start tag (button). Expected DOCTYPE.
Line: 1 Col: 16 Unexpected start tag (button) implies end tag (button).
Line: 1 Col: 16 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <button>
| <button>
#data
<table><tr><td></th>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 20 Unexpected end tag (th). Ignored.
Line: 1 Col: 20 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
#data
<table><caption><td>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 20 Unexpected end tag (td). Ignored.
Line: 1 Col: 20 Unexpected table cell start tag (td) in the table body phase.
Line: 1 Col: 20 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <table>
| <caption>
| <tbody>
| <tr>
| <td>
#data
<table><caption><div>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 21 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <table>
| <caption>
| <div>
#data
</caption><div>
#errors
Line: 1 Col: 10 Unexpected end tag (caption). Ignored.
Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
#document-fragment
caption
#document
| <div>
#data
<table><caption><div></caption>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 31 Unexpected end tag (caption). Missing end tag (div).
Line: 1 Col: 31 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| <table>
| <caption>
| <div>
#data
<table><caption></table>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 24 Unexpected end table tag in caption. Generates implied end caption.
#document
| <html>
| <head>
| <body>
| <table>
| <caption>
#data
</table><div>
#errors
Line: 1 Col: 8 Unexpected end table tag in caption. Generates implied end caption.
Line: 1 Col: 8 Unexpected end tag (caption). Ignored.
Line: 1 Col: 13 Expected closing tag. Unexpected end of file.
#document-fragment
caption
#document
| <div>
#data
<table><caption></body></col></colgroup></html></tbody></td></tfoot></th></thead></tr>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 23 Unexpected end tag (body). Ignored.
Line: 1 Col: 29 Unexpected end tag (col). Ignored.
Line: 1 Col: 40 Unexpected end tag (colgroup). Ignored.
Line: 1 Col: 47 Unexpected end tag (html). Ignored.
Line: 1 Col: 55 Unexpected end tag (tbody). Ignored.
Line: 1 Col: 60 Unexpected end tag (td). Ignored.
Line: 1 Col: 68 Unexpected end tag (tfoot). Ignored.
Line: 1 Col: 73 Unexpected end tag (th). Ignored.
Line: 1 Col: 81 Unexpected end tag (thead). Ignored.
Line: 1 Col: 86 Unexpected end tag (tr). Ignored.
Line: 1 Col: 86 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <table>
| <caption>
#data
<table><caption><div></div>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 27 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <table>
| <caption>
| <div>
#data
<table><tr><td></body></caption></col></colgroup></html>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 22 Unexpected end tag (body). Ignored.
Line: 1 Col: 32 Unexpected end tag (caption). Ignored.
Line: 1 Col: 38 Unexpected end tag (col). Ignored.
Line: 1 Col: 49 Unexpected end tag (colgroup). Ignored.
Line: 1 Col: 56 Unexpected end tag (html). Ignored.
Line: 1 Col: 56 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
#data
</table></tbody></tfoot></thead></tr><div>
#errors
Line: 1 Col: 8 Unexpected end tag (table). Ignored.
Line: 1 Col: 16 Unexpected end tag (tbody). Ignored.
Line: 1 Col: 24 Unexpected end tag (tfoot). Ignored.
Line: 1 Col: 32 Unexpected end tag (thead). Ignored.
Line: 1 Col: 37 Unexpected end tag (tr). Ignored.
Line: 1 Col: 42 Expected closing tag. Unexpected end of file.
#document-fragment
td
#document
| <div>
#data
<table><colgroup>foo
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 20 Unexpected non-space characters in table context caused voodoo mode.
Line: 1 Col: 20 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| "foo"
| <table>
| <colgroup>
#data
foo<col>
#errors
Line: 1 Col: 3 Unexpected end tag (colgroup). Ignored.
#document-fragment
colgroup
#document
| <col>
#data
<table><colgroup></col>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 23 This element (col) has no end tag.
Line: 1 Col: 23 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <table>
| <colgroup>
#data
<frameset><div>
#errors
Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
Line: 1 Col: 15 Unexpected start tag token (div) in the frameset phase. Ignored.
Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <frameset>
#data
</frameset><frame>
#errors
Line: 1 Col: 11 Unexpected end tag token (frameset) in the frameset phase (innerHTML).
#document-fragment
frameset
#document
| <frame>
#data
<frameset></div>
#errors
Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
Line: 1 Col: 16 Unexpected end tag token (div) in the frameset phase. Ignored.
Line: 1 Col: 16 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <frameset>
#data
</body><div>
#errors
Line: 1 Col: 7 Unexpected end tag (body). Ignored.
Line: 1 Col: 12 Expected closing tag. Unexpected end of file.
#document-fragment
body
#document
| <div>
#data
<table><tr><div>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 16 Unexpected start tag (div) in table context caused voodoo mode.
Line: 1 Col: 16 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| <div>
| <table>
| <tbody>
| <tr>
#data
</tr><td>
#errors
Line: 1 Col: 5 Unexpected end tag (tr). Ignored.
#document-fragment
tr
#document
| <td>
#data
</tbody></tfoot></thead><td>
#errors
Line: 1 Col: 8 Unexpected end tag (tbody). Ignored.
Line: 1 Col: 16 Unexpected end tag (tfoot). Ignored.
Line: 1 Col: 24 Unexpected end tag (thead). Ignored.
#document-fragment
tr
#document
| <td>
#data
<table><tr><div><td>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 16 Unexpected start tag (div) in table context caused voodoo mode.
Line: 1 Col: 20 Unexpected implied end tag (div) in the table row phase.
Line: 1 Col: 20 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <div>
| <table>
| <tbody>
| <tr>
| <td>
#data
<caption><col><colgroup><tbody><tfoot><thead><tr>
#errors
Line: 1 Col: 9 Unexpected start tag (caption).
Line: 1 Col: 14 Unexpected start tag (col).
Line: 1 Col: 24 Unexpected start tag (colgroup).
Line: 1 Col: 31 Unexpected start tag (tbody).
Line: 1 Col: 38 Unexpected start tag (tfoot).
Line: 1 Col: 45 Unexpected start tag (thead).
Line: 1 Col: 49 Unexpected end of file. Expected table content.
#document-fragment
tbody
#document
| <tr>
#data
<table><tbody></thead>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 22 Unexpected end tag (thead) in the table body phase. Ignored.
Line: 1 Col: 22 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
#data
</table><tr>
#errors
Line: 1 Col: 8 Unexpected end tag (table). Ignored.
Line: 1 Col: 12 Unexpected end of file. Expected table content.
#document-fragment
tbody
#document
| <tr>
#data
<table><tbody></body></caption></col></colgroup></html></td></th></tr>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 21 Unexpected end tag (body) in the table body phase. Ignored.
Line: 1 Col: 31 Unexpected end tag (caption) in the table body phase. Ignored.
Line: 1 Col: 37 Unexpected end tag (col) in the table body phase. Ignored.
Line: 1 Col: 48 Unexpected end tag (colgroup) in the table body phase. Ignored.
Line: 1 Col: 55 Unexpected end tag (html) in the table body phase. Ignored.
Line: 1 Col: 60 Unexpected end tag (td) in the table body phase. Ignored.
Line: 1 Col: 65 Unexpected end tag (th) in the table body phase. Ignored.
Line: 1 Col: 70 Unexpected end tag (tr) in the table body phase. Ignored.
Line: 1 Col: 70 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
#data
<table><tbody></div>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 20 Unexpected end tag (div) in table context caused voodoo mode.
Line: 1 Col: 20 End tag (div) seen too early. Expected other end tag.
Line: 1 Col: 20 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
#data
<table><table>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 14 Unexpected start tag (table) implies end tag (table).
Line: 1 Col: 14 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| <table>
| <table>
#data
<table></body></caption></col></colgroup></html></tbody></td></tfoot></th></thead></tr>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 14 Unexpected end tag (body). Ignored.
Line: 1 Col: 24 Unexpected end tag (caption). Ignored.
Line: 1 Col: 30 Unexpected end tag (col). Ignored.
Line: 1 Col: 41 Unexpected end tag (colgroup). Ignored.
Line: 1 Col: 48 Unexpected end tag (html). Ignored.
Line: 1 Col: 56 Unexpected end tag (tbody). Ignored.
Line: 1 Col: 61 Unexpected end tag (td). Ignored.
Line: 1 Col: 69 Unexpected end tag (tfoot). Ignored.
Line: 1 Col: 74 Unexpected end tag (th). Ignored.
Line: 1 Col: 82 Unexpected end tag (thead). Ignored.
Line: 1 Col: 87 Unexpected end tag (tr). Ignored.
Line: 1 Col: 87 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| <table>
#data
</table><tr>
#errors
Line: 1 Col: 8 Unexpected end tag (table). Ignored.
Line: 1 Col: 12 Unexpected end of file. Expected table content.
#document-fragment
table
#document
| <tbody>
| <tr>
#data
<body></body></html>
#errors
Line: 1 Col: 20 Unexpected html end tag in inner html mode.
Line: 1 Col: 20 Unexpected EOF in inner html mode.
#document-fragment
html
#document
| <head>
| <body>
#data
<html><frameset></frameset></html>
#errors
Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
#document
| <html>
| <head>
| <frameset>
| " "
#data
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html></html>
#errors
Line: 1 Col: 50 Erroneous DOCTYPE.
Line: 1 Col: 63 Unexpected end tag (html) after the (implied) root element.
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "">
| <html>
| <head>
| <body>
#data
<param><frameset></frameset>
#errors
Line: 1 Col: 7 Unexpected start tag (param). Expected DOCTYPE.
Line: 1 Col: 17 Unexpected start tag (frameset).
#document
| <html>
| <head>
| <frameset>
#data
<source><frameset></frameset>
#errors
Line: 1 Col: 7 Unexpected start tag (source). Expected DOCTYPE.
Line: 1 Col: 17 Unexpected start tag (frameset).
#document
| <html>
| <head>
| <frameset>
#data
<track><frameset></frameset>
#errors
Line: 1 Col: 7 Unexpected start tag (track). Expected DOCTYPE.
Line: 1 Col: 17 Unexpected start tag (frameset).
#document
| <html>
| <head>
| <frameset>
#data
</html><frameset></frameset>
#errors
7: End tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
17: Stray “frameset” start tag.
17: “frameset” start tag seen.
#document
| <html>
| <head>
| <frameset>
#data
</body><frameset></frameset>
#errors
7: End tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
17: Stray “frameset” start tag.
17: “frameset” start tag seen.
#document
| <html>
| <head>
| <frameset>

@ -0,0 +1,390 @@
#data
<!doctype html><body><title>X</title>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <title>
| "X"
#data
<!doctype html><table><title>X</title></table>
#errors
Line: 1 Col: 29 Unexpected start tag (title) in table context caused voodoo mode.
Line: 1 Col: 38 Unexpected end tag (title) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <title>
| "X"
| <table>
#data
<!doctype html><head></head><title>X</title>
#errors
Line: 1 Col: 35 Unexpected start tag (title) that can be in head. Moved.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <title>
| "X"
| <body>
#data
<!doctype html></head><title>X</title>
#errors
Line: 1 Col: 29 Unexpected start tag (title) that can be in head. Moved.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <title>
| "X"
| <body>
#data
<!doctype html><table><meta></table>
#errors
Line: 1 Col: 28 Unexpected start tag (meta) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <meta>
| <table>
#data
<!doctype html><table>X<tr><td><table> <meta></table></table>
#errors
Line: 1 Col: 23 Unexpected non-space characters in table context caused voodoo mode.
Line: 1 Col: 45 Unexpected start tag (meta) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "X"
| <table>
| <tbody>
| <tr>
| <td>
| <meta>
| <table>
| " "
#data
<!doctype html><html> <head>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
#data
<!doctype html> <head>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
#data
<!doctype html><table><style> <tr>x </style> </table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <style>
| " <tr>x "
| " "
#data
<!doctype html><table><TBODY><script> <tr>x </script> </table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <script>
| " <tr>x "
| " "
#data
<!doctype html><p><applet><p>X</p></applet>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <applet>
| <p>
| "X"
#data
<!doctype html><listing>
X</listing>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <listing>
| "X"
#data
<!doctype html><select><input>X
#errors
Line: 1 Col: 30 Unexpected input start tag in the select phase.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <input>
| "X"
#data
<!doctype html><select><select>X
#errors
Line: 1 Col: 31 Unexpected select start tag in the select phase treated as select end tag.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| "X"
#data
<!doctype html><table><input type=hidDEN></table>
#errors
Line: 1 Col: 41 Unexpected input with type hidden in table context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <input>
| type="hidDEN"
#data
<!doctype html><table>X<input type=hidDEN></table>
#errors
Line: 1 Col: 23 Unexpected non-space characters in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "X"
| <table>
| <input>
| type="hidDEN"
#data
<!doctype html><table> <input type=hidDEN></table>
#errors
Line: 1 Col: 43 Unexpected input with type hidden in table context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| " "
| <input>
| type="hidDEN"
#data
<!doctype html><table> <input type='hidDEN'></table>
#errors
Line: 1 Col: 45 Unexpected input with type hidden in table context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| " "
| <input>
| type="hidDEN"
#data
<!doctype html><table><input type=" hidden"><input type=hidDEN></table>
#errors
Line: 1 Col: 44 Unexpected start tag (input) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <input>
| type=" hidden"
| <table>
| <input>
| type="hidDEN"
#data
<!doctype html><table><select>X<tr>
#errors
Line: 1 Col: 30 Unexpected start tag (select) in table context caused voodoo mode.
Line: 1 Col: 35 Unexpected table element start tag (trs) in the select in table phase.
Line: 1 Col: 35 Unexpected end of file. Expected table content.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| "X"
| <table>
| <tbody>
| <tr>
#data
<!doctype html><select>X</select>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| "X"
#data
<!DOCTYPE hTmL><html></html>
#errors
Line: 1 Col: 28 Unexpected end tag (html) after the (implied) root element.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML><html></html>
#errors
Line: 1 Col: 28 Unexpected end tag (html) after the (implied) root element.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
#data
<body>X</body></body>
#errors
Line: 1 Col: 21 Unexpected end tag token (body) in the after body phase.
Line: 1 Col: 21 Unexpected EOF in inner html mode.
#document-fragment
html
#document
| <head>
| <body>
| "X"
#data
<div><p>a</x> b
#errors
Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
Line: 1 Col: 13 Unexpected end tag (x). Ignored.
Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <div>
| <p>
| "a b"
#data
<table><tr><td><code></code> </table>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <code>
| " "
#data
<table><b><tr><td>aaa</td></tr>bbb</table>ccc
#errors
XXX: Fix me
#document
| <html>
| <head>
| <body>
| <b>
| <b>
| "bbb"
| <table>
| <tbody>
| <tr>
| <td>
| "aaa"
| <b>
| "ccc"
#data
A<table><tr> B</tr> B</table>
#errors
XXX: Fix me
#document
| <html>
| <head>
| <body>
| "A B B"
| <table>
| <tbody>
| <tr>
#data
A<table><tr> B</tr> </em>C</table>
#errors
XXX: Fix me
#document
| <html>
| <head>
| <body>
| "A BC"
| <table>
| <tbody>
| <tr>
| " "
#data
<select><keygen>
#errors
Not known
#document
| <html>
| <head>
| <body>
| <select>
| <keygen>

@ -0,0 +1,148 @@
#data
<div>
<div></div>
</span>x
#errors
Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
Line: 3 Col: 7 Unexpected end tag (span). Ignored.
Line: 3 Col: 8 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <div>
| "
"
| <div>
| "
x"
#data
<div>x<div></div>
</span>x
#errors
Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
Line: 2 Col: 7 Unexpected end tag (span). Ignored.
Line: 2 Col: 8 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <div>
| "x"
| <div>
| "
x"
#data
<div>x<div></div>x</span>x
#errors
Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
Line: 1 Col: 25 Unexpected end tag (span). Ignored.
Line: 1 Col: 26 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <div>
| "x"
| <div>
| "xx"
#data
<div>x<div></div>y</span>z
#errors
Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
Line: 1 Col: 25 Unexpected end tag (span). Ignored.
Line: 1 Col: 26 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <div>
| "x"
| <div>
| "yz"
#data
<table><div>x<div></div>x</span>x
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 12 Unexpected start tag (div) in table context caused voodoo mode.
Line: 1 Col: 18 Unexpected start tag (div) in table context caused voodoo mode.
Line: 1 Col: 24 Unexpected end tag (div) in table context caused voodoo mode.
Line: 1 Col: 32 Unexpected end tag (span) in table context caused voodoo mode.
Line: 1 Col: 32 Unexpected end tag (span). Ignored.
Line: 1 Col: 33 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| <div>
| "x"
| <div>
| "xx"
| <table>
#data
x<table>x
#errors
Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
Line: 1 Col: 9 Unexpected non-space characters in table context caused voodoo mode.
Line: 1 Col: 9 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| "xx"
| <table>
#data
x<table><table>x
#errors
Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
Line: 1 Col: 15 Unexpected start tag (table) implies end tag (table).
Line: 1 Col: 16 Unexpected non-space characters in table context caused voodoo mode.
Line: 1 Col: 16 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| "x"
| <table>
| "x"
| <table>
#data
<b>a<div></div><div></b>y
#errors
Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
Line: 1 Col: 24 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
Line: 1 Col: 25 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <b>
| "a"
| <div>
| <div>
| <b>
| "y"
#data
<a><div><p></a>
#errors
Line: 1 Col: 3 Unexpected start tag (a). Expected DOCTYPE.
Line: 1 Col: 15 End tag (a) violates step 1, paragraph 3 of the adoption agency algorithm.
Line: 1 Col: 15 End tag (a) violates step 1, paragraph 3 of the adoption agency algorithm.
Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <a>
| <div>
| <a>
| <p>
| <a>

@ -0,0 +1,457 @@
#data
<!DOCTYPE html><math></math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
#data
<!DOCTYPE html><body><math></math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
#data
<!DOCTYPE html><math><mi>
#errors
25: End of file in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math mi>
#data
<!DOCTYPE html><math><annotation-xml><svg><u>
#errors
45: HTML start tag “u” in a foreign namespace context.
45: End of file seen and there were open elements.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| <svg svg>
| <u>
#data
<!DOCTYPE html><body><select><math></math></select>
#errors
Line: 1 Col: 35 Unexpected start tag token (math) in the select phase. Ignored.
Line: 1 Col: 42 Unexpected end tag (math) in the select phase. Ignored.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!DOCTYPE html><body><select><option><math></math></option></select>
#errors
Line: 1 Col: 43 Unexpected start tag token (math) in the select phase. Ignored.
Line: 1 Col: 50 Unexpected end tag (math) in the select phase. Ignored.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <option>
#data
<!DOCTYPE html><body><table><math></math></table>
#errors
Line: 1 Col: 34 Unexpected start tag (math) in table context caused voodoo mode.
Line: 1 Col: 41 Unexpected end tag (math) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <table>
#data
<!DOCTYPE html><body><table><math><mi>foo</mi></math></table>
#errors
Line: 1 Col: 34 Unexpected start tag (math) in table context caused voodoo mode.
Line: 1 Col: 46 Unexpected end tag (mi) in table context caused voodoo mode.
Line: 1 Col: 53 Unexpected end tag (math) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| "foo"
| <table>
#data
<!DOCTYPE html><body><table><math><mi>foo</mi><mi>bar</mi></math></table>
#errors
Line: 1 Col: 34 Unexpected start tag (math) in table context caused voodoo mode.
Line: 1 Col: 46 Unexpected end tag (mi) in table context caused voodoo mode.
Line: 1 Col: 58 Unexpected end tag (mi) in table context caused voodoo mode.
Line: 1 Col: 65 Unexpected end tag (math) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| <table>
#data
<!DOCTYPE html><body><table><tbody><math><mi>foo</mi><mi>bar</mi></math></tbody></table>
#errors
Line: 1 Col: 41 Unexpected start tag (math) in table context caused voodoo mode.
Line: 1 Col: 53 Unexpected end tag (mi) in table context caused voodoo mode.
Line: 1 Col: 65 Unexpected end tag (mi) in table context caused voodoo mode.
Line: 1 Col: 72 Unexpected end tag (math) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| <table>
| <tbody>
#data
<!DOCTYPE html><body><table><tbody><tr><math><mi>foo</mi><mi>bar</mi></math></tr></tbody></table>
#errors
Line: 1 Col: 45 Unexpected start tag (math) in table context caused voodoo mode.
Line: 1 Col: 57 Unexpected end tag (mi) in table context caused voodoo mode.
Line: 1 Col: 69 Unexpected end tag (mi) in table context caused voodoo mode.
Line: 1 Col: 76 Unexpected end tag (math) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| <table>
| <tbody>
| <tr>
#data
<!DOCTYPE html><body><table><tbody><tr><td><math><mi>foo</mi><mi>bar</mi></math></td></tr></tbody></table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
#data
<!DOCTYPE html><body><table><tbody><tr><td><math><mi>foo</mi><mi>bar</mi></math><p>baz</td></tr></tbody></table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi></math><p>baz</caption></table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
#errors
Line: 1 Col: 70 HTML start tag "p" in a foreign namespace context.
Line: 1 Col: 81 Unexpected end table tag in caption. Generates implied end caption.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| <p>
| "baz"
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi>baz</table><p>quux
#errors
Line: 1 Col: 78 Unexpected end table tag in caption. Generates implied end caption.
Line: 1 Col: 78 Unexpected end tag (caption). Missing end tag (math).
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| "baz"
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><colgroup><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
#errors
Line: 1 Col: 44 Unexpected start tag (math) in table context caused voodoo mode.
Line: 1 Col: 56 Unexpected end tag (mi) in table context caused voodoo mode.
Line: 1 Col: 68 Unexpected end tag (mi) in table context caused voodoo mode.
Line: 1 Col: 71 HTML start tag "p" in a foreign namespace context.
Line: 1 Col: 71 Unexpected start tag (p) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| <p>
| "baz"
| <table>
| <colgroup>
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><tr><td><select><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
#errors
Line: 1 Col: 50 Unexpected start tag token (math) in the select phase. Ignored.
Line: 1 Col: 54 Unexpected start tag token (mi) in the select phase. Ignored.
Line: 1 Col: 62 Unexpected end tag (mi) in the select phase. Ignored.
Line: 1 Col: 66 Unexpected start tag token (mi) in the select phase. Ignored.
Line: 1 Col: 74 Unexpected end tag (mi) in the select phase. Ignored.
Line: 1 Col: 77 Unexpected start tag token (p) in the select phase. Ignored.
Line: 1 Col: 88 Unexpected table element end tag (tables) in the select in table phase.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <select>
| "foobarbaz"
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><select><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
#errors
Line: 1 Col: 36 Unexpected start tag (select) in table context caused voodoo mode.
Line: 1 Col: 42 Unexpected start tag token (math) in the select phase. Ignored.
Line: 1 Col: 46 Unexpected start tag token (mi) in the select phase. Ignored.
Line: 1 Col: 54 Unexpected end tag (mi) in the select phase. Ignored.
Line: 1 Col: 58 Unexpected start tag token (mi) in the select phase. Ignored.
Line: 1 Col: 66 Unexpected end tag (mi) in the select phase. Ignored.
Line: 1 Col: 69 Unexpected start tag token (p) in the select phase. Ignored.
Line: 1 Col: 80 Unexpected table element end tag (tables) in the select in table phase.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| "foobarbaz"
| <table>
| <p>
| "quux"
#data
<!DOCTYPE html><body></body></html><math><mi>foo</mi><mi>bar</mi><p>baz
#errors
Line: 1 Col: 41 Unexpected start tag (math).
Line: 1 Col: 68 HTML start tag "p" in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><body></body><math><mi>foo</mi><mi>bar</mi><p>baz
#errors
Line: 1 Col: 34 Unexpected start tag token (math) in the after body phase.
Line: 1 Col: 61 HTML start tag "p" in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><frameset><math><mi></mi><mi></mi><p><span>
#errors
Line: 1 Col: 31 Unexpected start tag token (math) in the frameset phase. Ignored.
Line: 1 Col: 35 Unexpected start tag token (mi) in the frameset phase. Ignored.
Line: 1 Col: 40 Unexpected end tag token (mi) in the frameset phase. Ignored.
Line: 1 Col: 44 Unexpected start tag token (mi) in the frameset phase. Ignored.
Line: 1 Col: 49 Unexpected end tag token (mi) in the frameset phase. Ignored.
Line: 1 Col: 52 Unexpected start tag token (p) in the frameset phase. Ignored.
Line: 1 Col: 58 Unexpected start tag token (span) in the frameset phase. Ignored.
Line: 1 Col: 58 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><frameset></frameset><math><mi></mi><mi></mi><p><span>
#errors
Line: 1 Col: 42 Unexpected start tag (math) in the after frameset phase. Ignored.
Line: 1 Col: 46 Unexpected start tag (mi) in the after frameset phase. Ignored.
Line: 1 Col: 51 Unexpected end tag (mi) in the after frameset phase. Ignored.
Line: 1 Col: 55 Unexpected start tag (mi) in the after frameset phase. Ignored.
Line: 1 Col: 60 Unexpected end tag (mi) in the after frameset phase. Ignored.
Line: 1 Col: 63 Unexpected start tag (p) in the after frameset phase. Ignored.
Line: 1 Col: 69 Unexpected start tag (span) in the after frameset phase. Ignored.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><body xlink:href=foo><math xlink:href=foo></math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| <math math>
| xlink href="foo"
#data
<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo></mi></math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| xml:lang="en"
| <math math>
| <math mi>
| xlink href="foo"
| xml lang="en"
#data
<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo /></math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| xml:lang="en"
| <math math>
| <math mi>
| xlink href="foo"
| xml lang="en"
#data
<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo />bar</math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| xml:lang="en"
| <math math>
| <math mi>
| xlink href="foo"
| xml lang="en"
| "bar"

@ -0,0 +1,741 @@
#data
<body><span>
#errors
#document-fragment
body
#document
| <span>
#data
<span><body>
#errors
#document-fragment
body
#document
| <span>
#data
<span><body>
#errors
#document-fragment
div
#document
| <span>
#data
<body><span>
#errors
#document-fragment
html
#document
| <head>
| <body>
| <span>
#data
<frameset><span>
#errors
#document-fragment
body
#document
| <span>
#data
<span><frameset>
#errors
#document-fragment
body
#document
| <span>
#data
<span><frameset>
#errors
#document-fragment
div
#document
| <span>
#data
<frameset><span>
#errors
#document-fragment
html
#document
| <head>
| <frameset>
#data
<table><tr>
#errors
#document-fragment
table
#document
| <tbody>
| <tr>
#data
</table><tr>
#errors
#document-fragment
table
#document
| <tbody>
| <tr>
#data
<a>
#errors
#document-fragment
table
#document
| <a>
#data
<a>
#errors
#document-fragment
table
#document
| <a>
#data
<a><caption>a
#errors
#document-fragment
table
#document
| <a>
| <caption>
| "a"
#data
<a><colgroup><col>
#errors
#document-fragment
table
#document
| <a>
| <colgroup>
| <col>
#data
<a><tbody><tr>
#errors
#document-fragment
table
#document
| <a>
| <tbody>
| <tr>
#data
<a><tfoot><tr>
#errors
#document-fragment
table
#document
| <a>
| <tfoot>
| <tr>
#data
<a><thead><tr>
#errors
#document-fragment
table
#document
| <a>
| <thead>
| <tr>
#data
<a><tr>
#errors
#document-fragment
table
#document
| <a>
| <tbody>
| <tr>
#data
<a><th>
#errors
#document-fragment
table
#document
| <a>
| <tbody>
| <tr>
| <th>
#data
<a><td>
#errors
#document-fragment
table
#document
| <a>
| <tbody>
| <tr>
| <td>
#data
<table></table><tbody>
#errors
#document-fragment
caption
#document
| <table>
#data
</table><span>
#errors
#document-fragment
caption
#document
| <span>
#data
<span></table>
#errors
#document-fragment
caption
#document
| <span>
#data
</caption><span>
#errors
#document-fragment
caption
#document
| <span>
#data
<span></caption><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><caption><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><col><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><colgroup><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><html><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><tbody><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><td><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><tfoot><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><thead><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><th><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><tr><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span></table><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
</colgroup><col>
#errors
#document-fragment
colgroup
#document
| <col>
#data
<a><col>
#errors
#document-fragment
colgroup
#document
| <col>
#data
<caption><a>
#errors
#document-fragment
tbody
#document
| <a>
#data
<col><a>
#errors
#document-fragment
tbody
#document
| <a>
#data
<colgroup><a>
#errors
#document-fragment
tbody
#document
| <a>
#data
<tbody><a>
#errors
#document-fragment
tbody
#document
| <a>
#data
<tfoot><a>
#errors
#document-fragment
tbody
#document
| <a>
#data
<thead><a>
#errors
#document-fragment
tbody
#document
| <a>
#data
</table><a>
#errors
#document-fragment
tbody
#document
| <a>
#data
<a><tr>
#errors
#document-fragment
tbody
#document
| <a>
| <tr>
#data
<a><td>
#errors
#document-fragment
tbody
#document
| <a>
| <tr>
| <td>
#data
<a><td>
#errors
#document-fragment
tbody
#document
| <a>
| <tr>
| <td>
#data
<a><td>
#errors
#document-fragment
tbody
#document
| <a>
| <tr>
| <td>
#data
<td><table><tbody><a><tr>
#errors
#document-fragment
tbody
#document
| <tr>
| <td>
| <a>
| <table>
| <tbody>
| <tr>
#data
</tr><td>
#errors
#document-fragment
tr
#document
| <td>
#data
<td><table><a><tr></tr><tr>
#errors
#document-fragment
tr
#document
| <td>
| <a>
| <table>
| <tbody>
| <tr>
| <tr>
#data
<caption><td>
#errors
#document-fragment
tr
#document
| <td>
#data
<col><td>
#errors
#document-fragment
tr
#document
| <td>
#data
<colgroup><td>
#errors
#document-fragment
tr
#document
| <td>
#data
<tbody><td>
#errors
#document-fragment
tr
#document
| <td>
#data
<tfoot><td>
#errors
#document-fragment
tr
#document
| <td>
#data
<thead><td>
#errors
#document-fragment
tr
#document
| <td>
#data
<tr><td>
#errors
#document-fragment
tr
#document
| <td>
#data
</table><td>
#errors
#document-fragment
tr
#document
| <td>
#data
<td><table></table><td>
#errors
#document-fragment
tr
#document
| <td>
| <table>
| <td>
#data
<td><table></table><td>
#errors
#document-fragment
tr
#document
| <td>
| <table>
| <td>
#data
<caption><a>
#errors
#document-fragment
td
#document
| <a>
#data
<col><a>
#errors
#document-fragment
td
#document
| <a>
#data
<colgroup><a>
#errors
#document-fragment
td
#document
| <a>
#data
<tbody><a>
#errors
#document-fragment
td
#document
| <a>
#data
<tfoot><a>
#errors
#document-fragment
td
#document
| <a>
#data
<th><a>
#errors
#document-fragment
td
#document
| <a>
#data
<thead><a>
#errors
#document-fragment
td
#document
| <a>
#data
<tr><a>
#errors
#document-fragment
td
#document
| <a>
#data
</table><a>
#errors
#document-fragment
td
#document
| <a>
#data
</tbody><a>
#errors
#document-fragment
td
#document
| <a>
#data
</td><a>
#errors
#document-fragment
td
#document
| <a>
#data
</tfoot><a>
#errors
#document-fragment
td
#document
| <a>
#data
</thead><a>
#errors
#document-fragment
td
#document
| <a>
#data
</th><a>
#errors
#document-fragment
td
#document
| <a>
#data
</tr><a>
#errors
#document-fragment
td
#document
| <a>
#data
<table><td><td>
#errors
#document-fragment
td
#document
| <table>
| <tbody>
| <tr>
| <td>
| <td>
#data
</select><option>
#errors
#document-fragment
select
#document
| <option>
#data
<input><option>
#errors
#document-fragment
select
#document
| <option>
#data
<keygen><option>
#errors
#document-fragment
select
#document
| <option>
#data
<textarea><option>
#errors
#document-fragment
select
#document
| <option>
#data
</html><!--abc-->
#errors
#document-fragment
html
#document
| <head>
| <body>
| <!-- abc -->
#data
</frameset><frame>
#errors
#document-fragment
frameset
#document
| <frame>
#data
#errors
#document-fragment
html
#document
| <head>
| <body>

@ -0,0 +1,261 @@
#data
<b><p>Bold </b> Not bold</p>
Also not bold.
#errors
#document
| <html>
| <head>
| <body>
| <b>
| <p>
| <b>
| "Bold "
| " Not bold"
| "
Also not bold."
#data
<html>
<font color=red><i>Italic and Red<p>Italic and Red </font> Just italic.</p> Italic only.</i> Plain
<p>I should not be red. <font color=red>Red. <i>Italic and red.</p>
<p>Italic and red. </i> Red.</font> I should not be red.</p>
<b>Bold <i>Bold and italic</b> Only Italic </i> Plain
#errors
#document
| <html>
| <head>
| <body>
| <font>
| color="red"
| <i>
| "Italic and Red"
| <i>
| <p>
| <font>
| color="red"
| "Italic and Red "
| " Just italic."
| " Italic only."
| " Plain
"
| <p>
| "I should not be red. "
| <font>
| color="red"
| "Red. "
| <i>
| "Italic and red."
| <font>
| color="red"
| <i>
| "
"
| <p>
| <font>
| color="red"
| <i>
| "Italic and red. "
| " Red."
| " I should not be red."
| "
"
| <b>
| "Bold "
| <i>
| "Bold and italic"
| <i>
| " Only Italic "
| " Plain"
#data
<html><body>
<p><font size="7">First paragraph.</p>
<p>Second paragraph.</p></font>
<b><p><i>Bold and Italic</b> Italic</p>
#errors
#document
| <html>
| <head>
| <body>
| "
"
| <p>
| <font>
| size="7"
| "First paragraph."
| <font>
| size="7"
| "
"
| <p>
| "Second paragraph."
| "
"
| <b>
| <p>
| <b>
| <i>
| "Bold and Italic"
| <i>
| " Italic"
#data
<html>
<dl>
<dt><b>Boo
<dd>Goo?
</dl>
</html>
#errors
#document
| <html>
| <head>
| <body>
| <dl>
| "
"
| <dt>
| <b>
| "Boo
"
| <dd>
| <b>
| "Goo?
"
| <b>
| "
"
#data
<html><body>
<label><a><div>Hello<div>World</div></a></label>
</body></html>
#errors
#document
| <html>
| <head>
| <body>
| "
"
| <label>
| <a>
| <div>
| <a>
| "Hello"
| <div>
| "World"
| "
"
#data
<table><center> <font>a</center> <img> <tr><td> </td> </tr> </table>
#errors
#document
| <html>
| <head>
| <body>
| <center>
| " "
| <font>
| "a"
| <font>
| <img>
| " "
| <table>
| " "
| <tbody>
| <tr>
| <td>
| " "
| " "
| " "
#data
<table><tr><p><a><p>You should see this text.
#errors
#document
| <html>
| <head>
| <body>
| <p>
| <a>
| <p>
| <a>
| "You should see this text."
| <table>
| <tbody>
| <tr>
#data
<TABLE>
<TR>
<CENTER><CENTER><TD></TD></TR><TR>
<FONT>
<TABLE><tr></tr></TABLE>
</P>
<a></font><font></a>
This page contains an insanely badly-nested tag sequence.
#errors
#document
| <html>
| <head>
| <body>
| <center>
| <center>
| <font>
| "
"
| <table>
| "
"
| <tbody>
| <tr>
| "
"
| <td>
| <tr>
| "
"
| <table>
| <tbody>
| <tr>
| <font>
| "
"
| <p>
| "
"
| <a>
| <a>
| <font>
| <font>
| "
This page contains an insanely badly-nested tag sequence."
#data
<html>
<body>
<b><nobr><div>This text is in a div inside a nobr</nobr>More text that should not be in the nobr, i.e., the
nobr should have closed the div inside it implicitly. </b><pre>A pre tag outside everything else.</pre>
</body>
</html>
#errors
#document
| <html>
| <head>
| <body>
| "
"
| <b>
| <nobr>
| <div>
| <b>
| <nobr>
| "This text is in a div inside a nobr"
| "More text that should not be in the nobr, i.e., the
nobr should have closed the div inside it implicitly. "
| <pre>
| "A pre tag outside everything else."
| "
"

@ -0,0 +1,610 @@
#data
Test
#errors
Line: 1 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "Test"
#data
<div></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
#data
<div>Test</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "Test"
#data
<di
#errors
#document
| <html>
| <head>
| <body>
#data
<div>Hello</div>
<script>
console.log("PASS");
</script>
<div>Bye</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "Hello"
| "
"
| <script>
| "
console.log("PASS");
"
| "
"
| <div>
| "Bye"
#data
<div foo="bar">Hello</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| foo="bar"
| "Hello"
#data
<div>Hello</div>
<script>
console.log("FOO<span>BAR</span>BAZ");
</script>
<div>Bye</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "Hello"
| "
"
| <script>
| "
console.log("FOO<span>BAR</span>BAZ");
"
| "
"
| <div>
| "Bye"
#data
<foo bar="baz"></foo><potato quack="duck"></potato>
#errors
#document
| <html>
| <head>
| <body>
| <foo>
| bar="baz"
| <potato>
| quack="duck"
#data
<foo bar="baz"><potato quack="duck"></potato></foo>
#errors
#document
| <html>
| <head>
| <body>
| <foo>
| bar="baz"
| <potato>
| quack="duck"
#data
<foo></foo bar="baz"><potato></potato quack="duck">
#errors
#document
| <html>
| <head>
| <body>
| <foo>
| <potato>
#data
</ tttt>
#errors
#document
| <!-- tttt -->
| <html>
| <head>
| <body>
#data
<div FOO ><img><img></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| foo=""
| <img>
| <img>
#data
<p>Test</p<p>Test2</p>
#errors
#document
| <html>
| <head>
| <body>
| <p>
| "TestTest2"
#data
<rdar://problem/6869687>
#errors
#document
| <html>
| <head>
| <body>
| <rdar:>
| 6869687=""
| problem=""
#data
<A>test< /A>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "test< /A>"
#data
&lt;
#errors
#document
| <html>
| <head>
| <body>
| "<"
#data
<body foo='bar'><body foo='baz' yo='mama'>
#errors
#document
| <html>
| <head>
| <body>
| foo="bar"
| yo="mama"
#data
<body></br foo="bar"></body>
#errors
#document
| <html>
| <head>
| <body>
| <br>
#data
<bdy><br foo="bar"></body>
#errors
#document
| <html>
| <head>
| <body>
| <bdy>
| <br>
| foo="bar"
#data
<body></body></br foo="bar">
#errors
#document
| <html>
| <head>
| <body>
| <br>
#data
<bdy></body><br foo="bar">
#errors
#document
| <html>
| <head>
| <body>
| <bdy>
| <br>
| foo="bar"
#data
<html><body></body></html><!-- Hi there -->
#errors
#document
| <html>
| <head>
| <body>
| <!-- Hi there -->
#data
<html><body></body></html>x<!-- Hi there -->
#errors
#document
| <html>
| <head>
| <body>
| "x"
| <!-- Hi there -->
#data
<html><body></body></html>x<!-- Hi there --></html><!-- Again -->
#errors
#document
| <html>
| <head>
| <body>
| "x"
| <!-- Hi there -->
| <!-- Again -->
#data
<html><body></body></html>x<!-- Hi there --></body></html><!-- Again -->
#errors
#document
| <html>
| <head>
| <body>
| "x"
| <!-- Hi there -->
| <!-- Again -->
#data
<html><body><ruby><div><rp>xx</rp></div></ruby></body></html>
#errors
#document
| <html>
| <head>
| <body>
| <ruby>
| <div>
| <rp>
| "xx"
#data
<html><body><ruby><div><rt>xx</rt></div></ruby></body></html>
#errors
#document
| <html>
| <head>
| <body>
| <ruby>
| <div>
| <rt>
| "xx"
#data
<html><frameset><!--1--><noframes>A</noframes><!--2--></frameset><!--3--><noframes>B</noframes><!--4--></html><!--5--><noframes>C</noframes><!--6-->
#errors
#document
| <html>
| <head>
| <frameset>
| <!-- 1 -->
| <noframes>
| "A"
| <!-- 2 -->
| <!-- 3 -->
| <noframes>
| "B"
| <!-- 4 -->
| <noframes>
| "C"
| <!-- 5 -->
| <!-- 6 -->
#data
<select><option>A<select><option>B<select><option>C<select><option>D<select><option>E<select><option>F<select><option>G<select>
#errors
#document
| <html>
| <head>
| <body>
| <select>
| <option>
| "A"
| <option>
| "B"
| <select>
| <option>
| "C"
| <option>
| "D"
| <select>
| <option>
| "E"
| <option>
| "F"
| <select>
| <option>
| "G"
#data
<dd><dd><dt><dt><dd><li><li>
#errors
#document
| <html>
| <head>
| <body>
| <dd>
| <dd>
| <dt>
| <dt>
| <dd>
| <li>
| <li>
#data
<div><b></div><div><nobr>a<nobr>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <b>
| <div>
| <b>
| <nobr>
| "a"
| <nobr>
#data
<head></head>
<body></body>
#errors
#document
| <html>
| <head>
| "
"
| <body>
#data
<head></head> <style></style>ddd
#errors
#document
| <html>
| <head>
| <style>
| " "
| <body>
| "ddd"
#data
<kbd><table></kbd><col><select><tr>
#errors
#document
| <html>
| <head>
| <body>
| <kbd>
| <select>
| <table>
| <colgroup>
| <col>
| <tbody>
| <tr>
#data
<kbd><table></kbd><col><select><tr></table><div>
#errors
#document
| <html>
| <head>
| <body>
| <kbd>
| <select>
| <table>
| <colgroup>
| <col>
| <tbody>
| <tr>
| <div>
#data
<a><li><style></style><title></title></a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <li>
| <a>
| <style>
| <title>
#data
<font></p><p><meta><title></title></font>
#errors
#document
| <html>
| <head>
| <body>
| <font>
| <p>
| <p>
| <font>
| <meta>
| <title>
#data
<a><center><title></title><a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <center>
| <a>
| <title>
| <a>
#data
<svg><title><div>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| <svg title>
| <div>
#data
<svg><title><rect><div>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| <svg title>
| <rect>
| <div>
#data
<svg><title><svg><div>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| <svg title>
| <svg svg>
| <div>
#data
<img <="" FAIL>
#errors
#document
| <html>
| <head>
| <body>
| <img>
| <=""
| fail=""
#data
<ul><li><div id='foo'/>A</li><li>B<div>C</div></li></ul>
#errors
#document
| <html>
| <head>
| <body>
| <ul>
| <li>
| <div>
| id="foo"
| "A"
| <li>
| "B"
| <div>
| "C"
#data
<svg><em><desc></em>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| <em>
| <desc>
#data
<table><tr><td><svg><desc><td></desc><circle>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| <svg desc>
| <td>
| <circle>
#data
<svg><tfoot></mi><td>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| <svg tfoot>
| <svg td>
#data
<math><mrow><mrow><mn>1</mn></mrow><mi>a</mi></mrow></math>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mrow>
| <math mrow>
| <math mn>
| "1"
| <math mi>
| "a"
#data
<!doctype html><input type="hidden"><frameset>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
#data
<!doctype html><input type="button"><frameset>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <input>
| type="button"

@ -0,0 +1,159 @@
#data
<foo bar=qux/>
#errors
#document
| <html>
| <head>
| <body>
| <foo>
| bar="qux/"
#data
<p id="status"><noscript><strong>A</strong></noscript><span>B</span></p>
#errors
#document
| <html>
| <head>
| <body>
| <p>
| id="status"
| <noscript>
| "<strong>A</strong>"
| <span>
| "B"
#data
<div><sarcasm><div></div></sarcasm></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <sarcasm>
| <div>
#data
<html><body><img src="" border="0" alt="><div>A</div></body></html>
#errors
#document
| <html>
| <head>
| <body>
#data
<table><td></tbody>A
#errors
#document
| <html>
| <head>
| <body>
| "A"
| <table>
| <tbody>
| <tr>
| <td>
#data
<table><td></thead>A
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "A"
#data
<table><td></tfoot>A
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "A"
#data
<table><thead><td></tbody>A
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <thead>
| <tr>
| <td>
| "A"
#data
<legend>test</legend>
#errors
#document
| <html>
| <head>
| <body>
| <legend>
| "test"
#data
<table><input>
#errors
#document
| <html>
| <head>
| <body>
| <input>
| <table>
#data
<b><em><dcell><postfield><postfield><postfield><postfield><missing_glyph><missing_glyph><missing_glyph><missing_glyph><hkern><aside></b></em>
#errors
#document-fragment
div
#document
| <b>
| <em>
| <dcell>
| <postfield>
| <postfield>
| <postfield>
| <postfield>
| <missing_glyph>
| <missing_glyph>
| <missing_glyph>
| <missing_glyph>
| <hkern>
| <aside>
| <em>
| <b>
#data
<isindex action="x">
#errors
#document-fragment
table
#document
| <form>
| action="x"
| <hr>
| <label>
| "This is a searchable index. Enter search keywords: "
| <input>
| name="isindex"
| <hr>
#data
<option><XH<optgroup></optgroup>
#errors
#document-fragment
select
#document
| <option>

File diff suppressed because it is too large Load Diff

@ -0,0 +1,748 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"bytes"
"io"
"io/ioutil"
"reflect"
"runtime"
"strings"
"testing"
)
type tokenTest struct {
// A short description of the test case.
desc string
// The HTML to parse.
html string
// The string representations of the expected tokens, joined by '$'.
golden string
}
var tokenTests = []tokenTest{
{
"empty",
"",
"",
},
// A single text node. The tokenizer should not break text nodes on whitespace,
// nor should it normalize whitespace within a text node.
{
"text",
"foo bar",
"foo bar",
},
// An entity.
{
"entity",
"one &lt; two",
"one &lt; two",
},
// A start, self-closing and end tag. The tokenizer does not care if the start
// and end tokens don't match; that is the job of the parser.
{
"tags",
"<a>b<c/>d</e>",
"<a>$b$<c/>$d$</e>",
},
// Angle brackets that aren't a tag.
{
"not a tag #0",
"<",
"&lt;",
},
{
"not a tag #1",
"</",
"&lt;/",
},
{
"not a tag #2",
"</>",
"<!---->",
},
{
"not a tag #3",
"a</>b",
"a$<!---->$b",
},
{
"not a tag #4",
"</ >",
"<!-- -->",
},
{
"not a tag #5",
"</.",
"<!--.-->",
},
{
"not a tag #6",
"</.>",
"<!--.-->",
},
{
"not a tag #7",
"a < b",
"a &lt; b",
},
{
"not a tag #8",
"<.>",
"&lt;.&gt;",
},
{
"not a tag #9",
"a<<<b>>>c",
"a&lt;&lt;$<b>$&gt;&gt;c",
},
{
"not a tag #10",
"if x<0 and y < 0 then x*y>0",
"if x&lt;0 and y &lt; 0 then x*y&gt;0",
},
{
"not a tag #11",
"<<p>",
"&lt;$<p>",
},
// EOF in a tag name.
{
"tag name eof #0",
"<a",
"",
},
{
"tag name eof #1",
"<a ",
"",
},
{
"tag name eof #2",
"a<b",
"a",
},
{
"tag name eof #3",
"<a><b",
"<a>",
},
{
"tag name eof #4",
`<a x`,
``,
},
// Some malformed tags that are missing a '>'.
{
"malformed tag #0",
`<p</p>`,
`<p< p="">`,
},
{
"malformed tag #1",
`<p </p>`,
`<p <="" p="">`,
},
{
"malformed tag #2",
`<p id`,
``,
},
{
"malformed tag #3",
`<p id=`,
``,
},
{
"malformed tag #4",
`<p id=>`,
`<p id="">`,
},
{
"malformed tag #5",
`<p id=0`,
``,
},
{
"malformed tag #6",
`<p id=0</p>`,
`<p id="0&lt;/p">`,
},
{
"malformed tag #7",
`<p id="0</p>`,
``,
},
{
"malformed tag #8",
`<p id="0"</p>`,
`<p id="0" <="" p="">`,
},
{
"malformed tag #9",
`<p></p id`,
`<p>`,
},
// Raw text and RCDATA.
{
"basic raw text",
"<script><a></b></script>",
"<script>$&lt;a&gt;&lt;/b&gt;$</script>",
},
{
"unfinished script end tag",
"<SCRIPT>a</SCR",
"<script>$a&lt;/SCR",
},
{
"broken script end tag",
"<SCRIPT>a</SCR ipt>",
"<script>$a&lt;/SCR ipt&gt;",
},
{
"EOF in script end tag",
"<SCRIPT>a</SCRipt",
"<script>$a&lt;/SCRipt",
},
{
"scriptx end tag",
"<SCRIPT>a</SCRiptx",
"<script>$a&lt;/SCRiptx",
},
{
"' ' completes script end tag",
"<SCRIPT>a</SCRipt ",
"<script>$a",
},
{
"'>' completes script end tag",
"<SCRIPT>a</SCRipt>",
"<script>$a$</script>",
},
{
"self-closing script end tag",
"<SCRIPT>a</SCRipt/>",
"<script>$a$</script>",
},
{
"nested script tag",
"<SCRIPT>a</SCRipt<script>",
"<script>$a&lt;/SCRipt&lt;script&gt;",
},
{
"script end tag after unfinished",
"<SCRIPT>a</SCRipt</script>",
"<script>$a&lt;/SCRipt$</script>",
},
{
"script/style mismatched tags",
"<script>a</style>",
"<script>$a&lt;/style&gt;",
},
{
"style element with entity",
"<style>&apos;",
"<style>$&amp;apos;",
},
{
"textarea with tag",
"<textarea><div></textarea>",
"<textarea>$&lt;div&gt;$</textarea>",
},
{
"title with tag and entity",
"<title><b>K&amp;R C</b></title>",
"<title>$&lt;b&gt;K&amp;R C&lt;/b&gt;$</title>",
},
// DOCTYPE tests.
{
"Proper DOCTYPE",
"<!DOCTYPE html>",
"<!DOCTYPE html>",
},
{
"DOCTYPE with no space",
"<!doctypehtml>",
"<!DOCTYPE html>",
},
{
"DOCTYPE with two spaces",
"<!doctype html>",
"<!DOCTYPE html>",
},
{
"looks like DOCTYPE but isn't",
"<!DOCUMENT html>",
"<!--DOCUMENT html-->",
},
{
"DOCTYPE at EOF",
"<!DOCtype",
"<!DOCTYPE >",
},
// XML processing instructions.
{
"XML processing instruction",
"<?xml?>",
"<!--?xml?-->",
},
// Comments.
{
"comment0",
"abc<b><!-- skipme --></b>def",
"abc$<b>$<!-- skipme -->$</b>$def",
},
{
"comment1",
"a<!-->z",
"a$<!---->$z",
},
{
"comment2",
"a<!--->z",
"a$<!---->$z",
},
{
"comment3",
"a<!--x>-->z",
"a$<!--x>-->$z",
},
{
"comment4",
"a<!--x->-->z",
"a$<!--x->-->$z",
},
{
"comment5",
"a<!>z",
"a$<!---->$z",
},
{
"comment6",
"a<!->z",
"a$<!----->$z",
},
{
"comment7",
"a<!---<>z",
"a$<!---<>z-->",
},
{
"comment8",
"a<!--z",
"a$<!--z-->",
},
{
"comment9",
"a<!--z-",
"a$<!--z-->",
},
{
"comment10",
"a<!--z--",
"a$<!--z-->",
},
{
"comment11",
"a<!--z---",
"a$<!--z--->",
},
{
"comment12",
"a<!--z----",
"a$<!--z---->",
},
{
"comment13",
"a<!--x--!>z",
"a$<!--x-->$z",
},
// An attribute with a backslash.
{
"backslash",
`<p id="a\"b">`,
`<p id="a\" b"="">`,
},
// Entities, tag name and attribute key lower-casing, and whitespace
// normalization within a tag.
{
"tricky",
"<p \t\n iD=\"a&quot;B\" foo=\"bar\"><EM>te&lt;&amp;;xt</em></p>",
`<p id="a&#34;B" foo="bar">$<em>$te&lt;&amp;;xt$</em>$</p>`,
},
// A nonexistent entity. Tokenizing and converting back to a string should
// escape the "&" to become "&amp;".
{
"noSuchEntity",
`<a b="c&noSuchEntity;d">&lt;&alsoDoesntExist;&`,
`<a b="c&amp;noSuchEntity;d">$&lt;&amp;alsoDoesntExist;&amp;`,
},
{
"entity without semicolon",
`&notit;&notin;<a b="q=z&amp=5&notice=hello&not;=world">`,
`¬it;∉$<a b="q=z&amp;amp=5&amp;notice=hello¬=world">`,
},
{
"entity with digits",
"&frac12;",
"½",
},
// Attribute tests:
// http://dev.w3.org/html5/pf-summary/Overview.html#attributes
{
"Empty attribute",
`<input disabled FOO>`,
`<input disabled="" foo="">`,
},
{
"Empty attribute, whitespace",
`<input disabled FOO >`,
`<input disabled="" foo="">`,
},
{
"Unquoted attribute value",
`<input value=yes FOO=BAR>`,
`<input value="yes" foo="BAR">`,
},
{
"Unquoted attribute value, spaces",
`<input value = yes FOO = BAR>`,
`<input value="yes" foo="BAR">`,
},
{
"Unquoted attribute value, trailing space",
`<input value=yes FOO=BAR >`,
`<input value="yes" foo="BAR">`,
},
{
"Single-quoted attribute value",
`<input value='yes' FOO='BAR'>`,
`<input value="yes" foo="BAR">`,
},
{
"Single-quoted attribute value, trailing space",
`<input value='yes' FOO='BAR' >`,
`<input value="yes" foo="BAR">`,
},
{
"Double-quoted attribute value",
`<input value="I'm an attribute" FOO="BAR">`,
`<input value="I&#39;m an attribute" foo="BAR">`,
},
{
"Attribute name characters",
`<meta http-equiv="content-type">`,
`<meta http-equiv="content-type">`,
},
{
"Mixed attributes",
`a<P V="0 1" w='2' X=3 y>z`,
`a$<p v="0 1" w="2" x="3" y="">$z`,
},
{
"Attributes with a solitary single quote",
`<p id=can't><p id=won't>`,
`<p id="can&#39;t">$<p id="won&#39;t">`,
},
}
func TestTokenizer(t *testing.T) {
loop:
for _, tt := range tokenTests {
z := NewTokenizer(strings.NewReader(tt.html))
if tt.golden != "" {
for i, s := range strings.Split(tt.golden, "$") {
if z.Next() == ErrorToken {
t.Errorf("%s token %d: want %q got error %v", tt.desc, i, s, z.Err())
continue loop
}
actual := z.Token().String()
if s != actual {
t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual)
continue loop
}
}
}
z.Next()
if z.Err() != io.EOF {
t.Errorf("%s: want EOF got %q", tt.desc, z.Err())
}
}
}
func TestMaxBuffer(t *testing.T) {
// Exceeding the maximum buffer size generates ErrBufferExceeded.
z := NewTokenizer(strings.NewReader("<" + strings.Repeat("t", 10)))
z.SetMaxBuf(5)
tt := z.Next()
if got, want := tt, ErrorToken; got != want {
t.Fatalf("token type: got: %v want: %v", got, want)
}
if got, want := z.Err(), ErrBufferExceeded; got != want {
t.Errorf("error type: got: %v want: %v", got, want)
}
if got, want := string(z.Raw()), "<tttt"; got != want {
t.Fatalf("buffered before overflow: got: %q want: %q", got, want)
}
}
func TestMaxBufferReconstruction(t *testing.T) {
// Exceeding the maximum buffer size at any point while tokenizing permits
// reconstructing the original input.
tests:
for _, test := range tokenTests {
for maxBuf := 1; ; maxBuf++ {
r := strings.NewReader(test.html)
z := NewTokenizer(r)
z.SetMaxBuf(maxBuf)
var tokenized bytes.Buffer
for {
tt := z.Next()
tokenized.Write(z.Raw())
if tt == ErrorToken {
if err := z.Err(); err != io.EOF && err != ErrBufferExceeded {
t.Errorf("%s: unexpected error: %v", test.desc, err)
}
break
}
}
// Anything tokenized along with untokenized input or data left in the reader.
assembled, err := ioutil.ReadAll(io.MultiReader(&tokenized, bytes.NewReader(z.Buffered()), r))
if err != nil {
t.Errorf("%s: ReadAll: %v", test.desc, err)
continue tests
}
if got, want := string(assembled), test.html; got != want {
t.Errorf("%s: reassembled html:\n got: %q\nwant: %q", test.desc, got, want)
continue tests
}
// EOF indicates that we completed tokenization and hence found the max
// maxBuf that generates ErrBufferExceeded, so continue to the next test.
if z.Err() == io.EOF {
break
}
} // buffer sizes
} // tests
}
func TestPassthrough(t *testing.T) {
// Accumulating the raw output for each parse event should reconstruct the
// original input.
for _, test := range tokenTests {
z := NewTokenizer(strings.NewReader(test.html))
var parsed bytes.Buffer
for {
tt := z.Next()
parsed.Write(z.Raw())
if tt == ErrorToken {
break
}
}
if got, want := parsed.String(), test.html; got != want {
t.Errorf("%s: parsed output:\n got: %q\nwant: %q", test.desc, got, want)
}
}
}
func TestBufAPI(t *testing.T) {
s := "0<a>1</a>2<b>3<a>4<a>5</a>6</b>7</a>8<a/>9"
z := NewTokenizer(bytes.NewBufferString(s))
var result bytes.Buffer
depth := 0
loop:
for {
tt := z.Next()
switch tt {
case ErrorToken:
if z.Err() != io.EOF {
t.Error(z.Err())
}
break loop
case TextToken:
if depth > 0 {
result.Write(z.Text())
}
case StartTagToken, EndTagToken:
tn, _ := z.TagName()
if len(tn) == 1 && tn[0] == 'a' {
if tt == StartTagToken {
depth++
} else {
depth--
}
}
}
}
u := "14567"
v := string(result.Bytes())
if u != v {
t.Errorf("TestBufAPI: want %q got %q", u, v)
}
}
func TestConvertNewlines(t *testing.T) {
testCases := map[string]string{
"Mac\rDOS\r\nUnix\n": "Mac\nDOS\nUnix\n",
"Unix\nMac\rDOS\r\n": "Unix\nMac\nDOS\n",
"DOS\r\nDOS\r\nDOS\r\n": "DOS\nDOS\nDOS\n",
"": "",
"\n": "\n",
"\n\r": "\n\n",
"\r": "\n",
"\r\n": "\n",
"\r\n\n": "\n\n",
"\r\n\r": "\n\n",
"\r\n\r\n": "\n\n",
"\r\r": "\n\n",
"\r\r\n": "\n\n",
"\r\r\n\n": "\n\n\n",
"\r\r\r\n": "\n\n\n",
"\r \n": "\n \n",
"xyz": "xyz",
}
for in, want := range testCases {
if got := string(convertNewlines([]byte(in))); got != want {
t.Errorf("input %q: got %q, want %q", in, got, want)
}
}
}
func TestReaderEdgeCases(t *testing.T) {
const s = "<p>An io.Reader can return (0, nil) or (n, io.EOF).</p>"
testCases := []io.Reader{
&zeroOneByteReader{s: s},
&eofStringsReader{s: s},
&stuckReader{},
}
for i, tc := range testCases {
got := []TokenType{}
z := NewTokenizer(tc)
for {
tt := z.Next()
if tt == ErrorToken {
break
}
got = append(got, tt)
}
if err := z.Err(); err != nil && err != io.EOF {
if err != io.ErrNoProgress {
t.Errorf("i=%d: %v", i, err)
}
continue
}
want := []TokenType{
StartTagToken,
TextToken,
EndTagToken,
}
if !reflect.DeepEqual(got, want) {
t.Errorf("i=%d: got %v, want %v", i, got, want)
continue
}
}
}
// zeroOneByteReader is like a strings.Reader that alternates between
// returning 0 bytes and 1 byte at a time.
type zeroOneByteReader struct {
s string
n int
}
func (r *zeroOneByteReader) Read(p []byte) (int, error) {
if len(p) == 0 {
return 0, nil
}
if len(r.s) == 0 {
return 0, io.EOF
}
r.n++
if r.n%2 != 0 {
return 0, nil
}
p[0], r.s = r.s[0], r.s[1:]
return 1, nil
}
// eofStringsReader is like a strings.Reader but can return an (n, err) where
// n > 0 && err != nil.
type eofStringsReader struct {
s string
}
func (r *eofStringsReader) Read(p []byte) (int, error) {
n := copy(p, r.s)
r.s = r.s[n:]
if r.s != "" {
return n, nil
}
return n, io.EOF
}
// stuckReader is an io.Reader that always returns no data and no error.
type stuckReader struct{}
func (*stuckReader) Read(p []byte) (int, error) {
return 0, nil
}
const (
rawLevel = iota
lowLevel
highLevel
)
func benchmarkTokenizer(b *testing.B, level int) {
buf, err := ioutil.ReadFile("testdata/go1.html")
if err != nil {
b.Fatalf("could not read testdata/go1.html: %v", err)
}
b.SetBytes(int64(len(buf)))
runtime.GC()
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
z := NewTokenizer(bytes.NewBuffer(buf))
for {
tt := z.Next()
if tt == ErrorToken {
if err := z.Err(); err != nil && err != io.EOF {
b.Fatalf("tokenizer error: %v", err)
}
break
}
switch level {
case rawLevel:
// Calling z.Raw just returns the raw bytes of the token. It does
// not unescape &lt; to <, or lower-case tag names and attribute keys.
z.Raw()
case lowLevel:
// Caling z.Text, z.TagName and z.TagAttr returns []byte values
// whose contents may change on the next call to z.Next.
switch tt {
case TextToken, CommentToken, DoctypeToken:
z.Text()
case StartTagToken, SelfClosingTagToken:
_, more := z.TagName()
for more {
_, _, more = z.TagAttr()
}
case EndTagToken:
z.TagName()
}
case highLevel:
// Calling z.Token converts []byte values to strings whose validity
// extend beyond the next call to z.Next.
z.Token()
}
}
}
}
func BenchmarkRawLevelTokenizer(b *testing.B) { benchmarkTokenizer(b, rawLevel) }
func BenchmarkLowLevelTokenizer(b *testing.B) { benchmarkTokenizer(b, lowLevel) }
func BenchmarkHighLevelTokenizer(b *testing.B) { benchmarkTokenizer(b, highLevel) }

@ -0,0 +1,209 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package charmap provides simple character encodings such as IBM Code Page 437
// and Windows 1252.
package charmap
import (
"unicode/utf8"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/internal"
"golang.org/x/text/encoding/internal/identifier"
"golang.org/x/text/transform"
)
// These encodings vary only in the way clients should interpret them. Their
// coded character set is identical and a single implementation can be shared.
var (
// ISO8859_6E is the ISO 8859-6E encoding.
ISO8859_6E encoding.Encoding = &iso8859_6E
// ISO8859_6I is the ISO 8859-6I encoding.
ISO8859_6I encoding.Encoding = &iso8859_6I
// ISO8859_8E is the ISO 8859-8E encoding.
ISO8859_8E encoding.Encoding = &iso8859_8E
// ISO8859_8I is the ISO 8859-8I encoding.
ISO8859_8I encoding.Encoding = &iso8859_8I
iso8859_6E = internal.Encoding{
ISO8859_6,
"ISO-8859-6E",
identifier.ISO88596E,
}
iso8859_6I = internal.Encoding{
ISO8859_6,
"ISO-8859-6I",
identifier.ISO88596I,
}
iso8859_8E = internal.Encoding{
ISO8859_8,
"ISO-8859-8E",
identifier.ISO88598E,
}
iso8859_8I = internal.Encoding{
ISO8859_8,
"ISO-8859-8I",
identifier.ISO88598I,
}
)
// All is a list of all defined encodings in this package.
var All = listAll
// TODO: implement these encodings, in order of importance.
// ASCII, ISO8859_1: Rather common. Close to Windows 1252.
// ISO8859_9: Close to Windows 1254.
// utf8Enc holds a rune's UTF-8 encoding in data[:len].
type utf8Enc struct {
len uint8
data [3]byte
}
// charmap describes an 8-bit character set encoding.
type charmap struct {
// name is the encoding's name.
name string
// mib is the encoding type of this encoder.
mib identifier.MIB
// asciiSuperset states whether the encoding is a superset of ASCII.
asciiSuperset bool
// low is the lower bound of the encoded byte for a non-ASCII rune. If
// charmap.asciiSuperset is true then this will be 0x80, otherwise 0x00.
low uint8
// replacement is the encoded replacement character.
replacement byte
// decode is the map from encoded byte to UTF-8.
decode [256]utf8Enc
// encoding is the map from runes to encoded bytes. Each entry is a
// uint32: the high 8 bits are the encoded byte and the low 24 bits are
// the rune. The table entries are sorted by ascending rune.
encode [256]uint32
}
func (m *charmap) NewDecoder() transform.Transformer {
return charmapDecoder{charmap: m}
}
func (m *charmap) NewEncoder() transform.Transformer {
return charmapEncoder{charmap: m}
}
func (m *charmap) String() string {
return m.name
}
func (m *charmap) ID() (mib identifier.MIB, other string) {
return m.mib, ""
}
// charmapDecoder implements transform.Transformer by decoding to UTF-8.
type charmapDecoder struct {
transform.NopResetter
charmap *charmap
}
func (m charmapDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
for i, c := range src {
if m.charmap.asciiSuperset && c < utf8.RuneSelf {
if nDst >= len(dst) {
err = transform.ErrShortDst
break
}
dst[nDst] = c
nDst++
nSrc = i + 1
continue
}
decode := &m.charmap.decode[c]
n := int(decode.len)
if nDst+n > len(dst) {
err = transform.ErrShortDst
break
}
// It's 15% faster to avoid calling copy for these tiny slices.
for j := 0; j < n; j++ {
dst[nDst] = decode.data[j]
nDst++
}
nSrc = i + 1
}
return nDst, nSrc, err
}
// charmapEncoder implements transform.Transformer by encoding from UTF-8.
type charmapEncoder struct {
transform.NopResetter
charmap *charmap
}
func (m charmapEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
r, size := rune(0), 0
for nSrc < len(src) {
if nDst >= len(dst) {
err = transform.ErrShortDst
break
}
r = rune(src[nSrc])
// Decode a 1-byte rune.
if r < utf8.RuneSelf {
nSrc++
if m.charmap.asciiSuperset {
dst[nDst] = uint8(r)
nDst++
continue
}
} else {
// Decode a multi-byte rune.
r, size = utf8.DecodeRune(src[nSrc:])
if size == 1 {
// All valid runes of size 1 (those below utf8.RuneSelf) were
// handled above. We have invalid UTF-8 or we haven't seen the
// full character yet.
if !atEOF && !utf8.FullRune(src[nSrc:]) {
err = transform.ErrShortSrc
break
}
}
nSrc += size
if r == utf8.RuneError {
dst[nDst] = m.charmap.replacement
nDst++
continue
}
}
// Binary search in [low, high) for that rune in the m.charmap.encode table.
for low, high := int(m.charmap.low), 0x100; ; {
if low >= high {
dst[nDst] = m.charmap.replacement
nDst++
break
}
mid := (low + high) / 2
got := m.charmap.encode[mid]
gotRune := rune(got & (1<<24 - 1))
if gotRune < r {
low = mid + 1
} else if gotRune > r {
high = mid
} else {
dst[nDst] = byte(got >> 24)
nDst++
break
}
}
}
return nDst, nSrc, err
}

@ -0,0 +1,415 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This program generates tables.go:
// go run maketables.go | gofmt > tables.go
import (
"bufio"
"fmt"
"log"
"net/http"
"sort"
"strings"
"unicode/utf8"
"golang.org/x/text/encoding"
)
const ascii = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" +
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
` !"#$%&'()*+,-./0123456789:;<=>?` +
`@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` +
"`abcdefghijklmnopqrstuvwxyz{|}~\u007f"
var encodings = []struct {
name string
mib string
comment string
varName string
replacement byte
mapping string
}{
{
"IBM Code Page 437",
"PC8CodePage437",
"",
"CodePage437",
encoding.ASCIISub,
ascii +
"ÇüéâäàåçêëèïîìÄÅÉæÆôöòûùÿÖÜ¢£¥₧ƒ" +
"áíóúñѪº¿⌐¬½¼¡«»░▒▓│┤╡╢╖╕╣║╗╝╜╛┐" +
"└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀" +
"αßΓπΣσµτΦΘΩδ∞∅∈∩≡±≥≤⌠⌡÷≈°•·√ⁿ²∎\u00a0",
},
{
"IBM Code Page 866",
"IBM866",
"",
"CodePage866",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-ibm866.txt",
},
{
"ISO 8859-2",
"ISOLatin2",
"",
"ISO8859_2",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-2.txt",
},
{
"ISO 8859-3",
"ISOLatin3",
"",
"ISO8859_3",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-3.txt",
},
{
"ISO 8859-4",
"ISOLatin4",
"",
"ISO8859_4",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-4.txt",
},
{
"ISO 8859-5",
"ISOLatinCyrillic",
"",
"ISO8859_5",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-5.txt",
},
{
"ISO 8859-6",
"ISOLatinArabic",
"",
"ISO8859_6,ISO8859_6E,ISO8859_6I",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-6.txt",
},
{
"ISO 8859-7",
"ISOLatinGreek",
"",
"ISO8859_7",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-7.txt",
},
{
"ISO 8859-8",
"ISOLatinHebrew",
"",
"ISO8859_8,ISO8859_8E,ISO8859_8I",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-8.txt",
},
{
"ISO 8859-10",
"ISOLatin6",
"",
"ISO8859_10",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-10.txt",
},
{
"ISO 8859-13",
"ISO885913",
"",
"ISO8859_13",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-13.txt",
},
{
"ISO 8859-14",
"ISO885914",
"",
"ISO8859_14",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-14.txt",
},
{
"ISO 8859-15",
"ISO885915",
"",
"ISO8859_15",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-15.txt",
},
{
"ISO 8859-16",
"ISO885916",
"",
"ISO8859_16",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-16.txt",
},
{
"KOI8-R",
"KOI8R",
"",
"KOI8R",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-koi8-r.txt",
},
{
"KOI8-U",
"KOI8U",
"",
"KOI8U",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-koi8-u.txt",
},
{
"Macintosh",
"Macintosh",
"",
"Macintosh",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-macintosh.txt",
},
{
"Macintosh Cyrillic",
"MacintoshCyrillic",
"",
"MacintoshCyrillic",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-x-mac-cyrillic.txt",
},
{
"Windows 874",
"Windows874",
"",
"Windows874",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-874.txt",
},
{
"Windows 1250",
"Windows1250",
"",
"Windows1250",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1250.txt",
},
{
"Windows 1251",
"Windows1251",
"",
"Windows1251",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1251.txt",
},
{
"Windows 1252",
"Windows1252",
"",
"Windows1252",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1252.txt",
},
{
"Windows 1253",
"Windows1253",
"",
"Windows1253",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1253.txt",
},
{
"Windows 1254",
"Windows1254",
"",
"Windows1254",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1254.txt",
},
{
"Windows 1255",
"Windows1255",
"",
"Windows1255",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1255.txt",
},
{
"Windows 1256",
"Windows1256",
"",
"Windows1256",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1256.txt",
},
{
"Windows 1257",
"Windows1257",
"",
"Windows1257",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1257.txt",
},
{
"Windows 1258",
"Windows1258",
"",
"Windows1258",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1258.txt",
},
{
"X-User-Defined",
"XUserDefined",
"It is defined at http://encoding.spec.whatwg.org/#x-user-defined",
"XUserDefined",
encoding.ASCIISub,
ascii +
"\uf780\uf781\uf782\uf783\uf784\uf785\uf786\uf787" +
"\uf788\uf789\uf78a\uf78b\uf78c\uf78d\uf78e\uf78f" +
"\uf790\uf791\uf792\uf793\uf794\uf795\uf796\uf797" +
"\uf798\uf799\uf79a\uf79b\uf79c\uf79d\uf79e\uf79f" +
"\uf7a0\uf7a1\uf7a2\uf7a3\uf7a4\uf7a5\uf7a6\uf7a7" +
"\uf7a8\uf7a9\uf7aa\uf7ab\uf7ac\uf7ad\uf7ae\uf7af" +
"\uf7b0\uf7b1\uf7b2\uf7b3\uf7b4\uf7b5\uf7b6\uf7b7" +
"\uf7b8\uf7b9\uf7ba\uf7bb\uf7bc\uf7bd\uf7be\uf7bf" +
"\uf7c0\uf7c1\uf7c2\uf7c3\uf7c4\uf7c5\uf7c6\uf7c7" +
"\uf7c8\uf7c9\uf7ca\uf7cb\uf7cc\uf7cd\uf7ce\uf7cf" +
"\uf7d0\uf7d1\uf7d2\uf7d3\uf7d4\uf7d5\uf7d6\uf7d7" +
"\uf7d8\uf7d9\uf7da\uf7db\uf7dc\uf7dd\uf7de\uf7df" +
"\uf7e0\uf7e1\uf7e2\uf7e3\uf7e4\uf7e5\uf7e6\uf7e7" +
"\uf7e8\uf7e9\uf7ea\uf7eb\uf7ec\uf7ed\uf7ee\uf7ef" +
"\uf7f0\uf7f1\uf7f2\uf7f3\uf7f4\uf7f5\uf7f6\uf7f7" +
"\uf7f8\uf7f9\uf7fa\uf7fb\uf7fc\uf7fd\uf7fe\uf7ff",
},
}
func getWHATWG(url string) string {
res, err := http.Get(url)
if err != nil {
log.Fatalf("%q: Get: %v", url, err)
}
defer res.Body.Close()
mapping := make([]rune, 128)
for i := range mapping {
mapping[i] = '\ufffd'
}
scanner := bufio.NewScanner(res.Body)
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if s == "" || s[0] == '#' {
continue
}
x, y := 0, 0
if _, err := fmt.Sscanf(s, "%d\t0x%x", &x, &y); err != nil {
log.Fatalf("could not parse %q", s)
}
if x < 0 || 128 <= x {
log.Fatalf("code %d is out of range", x)
}
if 0x80 <= y && y < 0xa0 {
// We diverge from the WHATWG spec by mapping control characters
// in the range [0x80, 0xa0) to U+FFFD.
continue
}
mapping[x] = rune(y)
}
return ascii + string(mapping)
}
func main() {
mibs := map[string]bool{}
all := []string{}
buf := make([]byte, 8)
fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
fmt.Printf("package charmap\n\n")
fmt.Printf("import (\n")
fmt.Printf("\t\"golang.org/x/text/encoding\"\n")
fmt.Printf("\t\"golang.org/x/text/encoding/internal/identifier\"\n")
fmt.Printf(")\n\n")
for _, e := range encodings {
varNames := strings.Split(e.varName, ",")
all = append(all, varNames...)
varName := varNames[0]
if strings.HasPrefix(e.mapping, "http://encoding.spec.whatwg.org/") {
e.mapping = getWHATWG(e.mapping)
}
asciiSuperset, low := strings.HasPrefix(e.mapping, ascii), 0x00
if asciiSuperset {
low = 0x80
}
lvn := 1
if strings.HasPrefix(varName, "ISO") || strings.HasPrefix(varName, "KOI") {
lvn = 3
}
lowerVarName := strings.ToLower(varName[:lvn]) + varName[lvn:]
fmt.Printf("// %s is the %s encoding.\n", varName, e.name)
if e.comment != "" {
fmt.Printf("//\n// %s\n", e.comment)
}
fmt.Printf("var %s encoding.Encoding = &%s\n\nvar %s = charmap{\nname: %q,\n",
varName, lowerVarName, lowerVarName, e.name)
if mibs[e.mib] {
log.Fatalf("MIB type %q declared multiple times.", e.mib)
}
fmt.Printf("mib: identifier.%s,\n", e.mib)
fmt.Printf("asciiSuperset: %t,\n", asciiSuperset)
fmt.Printf("low: 0x%02x,\n", low)
fmt.Printf("replacement: 0x%02x,\n", e.replacement)
fmt.Printf("decode: [256]utf8Enc{\n")
i, backMapping := 0, map[rune]byte{}
for _, c := range e.mapping {
if _, ok := backMapping[c]; !ok {
backMapping[c] = byte(i)
}
for j := range buf {
buf[j] = 0
}
n := utf8.EncodeRune(buf, c)
if n > 3 {
panic(fmt.Sprintf("rune %q (%U) is too long", c, c))
}
fmt.Printf("{%d,[3]byte{0x%02x,0x%02x,0x%02x}},", n, buf[0], buf[1], buf[2])
if i%2 == 1 {
fmt.Printf("\n")
}
i++
}
fmt.Printf("},\n")
fmt.Printf("encode: [256]uint32{\n")
encode := make([]uint32, 0, 256)
for c, i := range backMapping {
encode = append(encode, uint32(i)<<24|uint32(c))
}
sort.Sort(byRune(encode))
for len(encode) < cap(encode) {
encode = append(encode, encode[len(encode)-1])
}
for i, enc := range encode {
fmt.Printf("0x%08x,", enc)
if i%8 == 7 {
fmt.Printf("\n")
}
}
fmt.Printf("},\n}\n")
}
// TODO: add proper line breaking.
fmt.Printf("var listAll = []encoding.Encoding{\n%s,\n}\n\n", strings.Join(all, ",\n"))
}
type byRune []uint32
func (b byRune) Len() int { return len(b) }
func (b byRune) Less(i, j int) bool { return b[i]&0xffffff < b[j]&0xffffff }
func (b byRune) Swap(i, j int) { b[i], b[j] = b[j], b[i] }

File diff suppressed because it is too large Load Diff

@ -0,0 +1,179 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package encoding defines an interface for character encodings, such as Shift
// JIS and Windows 1252, that can convert to and from UTF-8.
//
// To convert the bytes of an io.Reader r from the encoding e to UTF-8:
// rInUTF8 := transform.NewReader(r, e.NewDecoder())
// and to convert from UTF-8 to the encoding e:
// wInUTF8 := transform.NewWriter(w, e.NewEncoder())
// In both cases, import "golang.org/x/text/transform".
//
// Encoding implementations are provided in other packages, such as
// golang.org/x/text/encoding/charmap and
// golang.org/x/text/encoding/japanese.
package encoding
import (
"errors"
"unicode/utf8"
"golang.org/x/text/transform"
)
// Encoding is a character set encoding that can be transformed to and from
// UTF-8.
type Encoding interface {
// NewDecoder returns a transformer that converts to UTF-8.
//
// Transforming source bytes that are not of that encoding will not
// result in an error per se. Each byte that cannot be transcoded will
// be represented in the output by the UTF-8 encoding of '\uFFFD', the
// replacement rune.
NewDecoder() transform.Transformer
// NewEncoder returns a transformer that converts from UTF-8.
//
// Transforming source bytes that are not valid UTF-8 will not result in
// an error per se. Each rune that cannot be transcoded will be
// represented in the output by an encoding-specific replacement such as
// "\x1a" (the ASCII substitute character) or "\xff\xfd". To return
// early with error instead, use transform.Chain to preprocess the data
// with a UTF8Validator.
NewEncoder() transform.Transformer
}
// ASCIISub is the ASCII substitute character, as recommended by
// http://unicode.org/reports/tr36/#Text_Comparison
const ASCIISub = '\x1a'
// Nop is the nop encoding. Its transformed bytes are the same as the source
// bytes; it does not replace invalid UTF-8 sequences.
var Nop Encoding = nop{}
type nop struct{}
func (nop) NewDecoder() transform.Transformer {
return transform.Nop
}
func (nop) NewEncoder() transform.Transformer {
return transform.Nop
}
// Replacement is the replacement encoding. Decoding from the replacement
// encoding yields a single '\uFFFD' replacement rune. Encoding from UTF-8 to
// the replacement encoding yields the same as the source bytes except that
// invalid UTF-8 is converted to '\uFFFD'.
//
// It is defined at http://encoding.spec.whatwg.org/#replacement
var Replacement Encoding = replacement{}
type replacement struct{}
func (replacement) NewDecoder() transform.Transformer {
return replacementDecoder{}
}
func (replacement) NewEncoder() transform.Transformer {
return replacementEncoder{}
}
type replacementDecoder struct{ transform.NopResetter }
func (replacementDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
if len(dst) < 3 {
return 0, 0, transform.ErrShortDst
}
if atEOF {
const fffd = "\ufffd"
dst[0] = fffd[0]
dst[1] = fffd[1]
dst[2] = fffd[2]
nDst = 3
}
return nDst, len(src), nil
}
type replacementEncoder struct{ transform.NopResetter }
func (replacementEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
r, size := rune(0), 0
for ; nSrc < len(src); nSrc += size {
r = rune(src[nSrc])
// Decode a 1-byte rune.
if r < utf8.RuneSelf {
size = 1
} else {
// Decode a multi-byte rune.
r, size = utf8.DecodeRune(src[nSrc:])
if size == 1 {
// All valid runes of size 1 (those below utf8.RuneSelf) were
// handled above. We have invalid UTF-8 or we haven't seen the
// full character yet.
if !atEOF && !utf8.FullRune(src[nSrc:]) {
err = transform.ErrShortSrc
break
}
r = '\ufffd'
}
}
if nDst+utf8.RuneLen(r) > len(dst) {
err = transform.ErrShortDst
break
}
nDst += utf8.EncodeRune(dst[nDst:], r)
}
return nDst, nSrc, err
}
// ErrInvalidUTF8 means that a transformer encountered invalid UTF-8.
var ErrInvalidUTF8 = errors.New("encoding: invalid UTF-8")
// UTF8Validator is a transformer that returns ErrInvalidUTF8 on the first
// input byte that is not valid UTF-8.
var UTF8Validator transform.Transformer = utf8Validator{}
type utf8Validator struct{ transform.NopResetter }
func (utf8Validator) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
n := len(src)
if n > len(dst) {
n = len(dst)
}
for i := 0; i < n; {
if c := src[i]; c < utf8.RuneSelf {
dst[i] = c
i++
continue
}
_, size := utf8.DecodeRune(src[i:])
if size == 1 {
// All valid runes of size 1 (those below utf8.RuneSelf) were
// handled above. We have invalid UTF-8 or we haven't seen the
// full character yet.
err = ErrInvalidUTF8
if !atEOF && !utf8.FullRune(src[i:]) {
err = transform.ErrShortSrc
}
return i, i, err
}
if i+size > len(dst) {
return i, i, transform.ErrShortDst
}
for ; size > 0; size-- {
dst[i] = src[i]
i++
}
}
if len(src) > len(dst) {
err = transform.ErrShortDst
}
return n, n, err
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,40 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package encoding_test
import (
"fmt"
"io"
"os"
"strings"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/transform"
)
func ExampleDecodeWindows1252() {
sr := strings.NewReader("Gar\xe7on !")
tr := transform.NewReader(sr, charmap.Windows1252.NewDecoder())
io.Copy(os.Stdout, tr)
// Output: Garçon !
}
func ExampleUTF8Validator() {
for i := 0; i < 2; i++ {
transformer := charmap.Windows1252.NewEncoder()
if i == 1 {
transformer = transform.Chain(encoding.UTF8Validator, transformer)
}
dst := make([]byte, 256)
src := []byte("abc\xffxyz") // src is invalid UTF-8.
nDst, nSrc, err := transformer.Transform(dst, src, true)
fmt.Printf("i=%d: produced %q, consumed %q, error %v\n",
i, dst[:nDst], src[:nSrc], err)
}
// Output:
// i=0: produced "abc\x1axyz", consumed "abc\xffxyz", error <nil>
// i=1: produced "abc", consumed "abc", error encoding: invalid UTF-8
}

@ -0,0 +1,45 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package htmlindex maps character set encoding names to Encodings as
// recommended by the W3C for use in HTML 5. See http://www.w3.org/TR/encoding.
package htmlindex
// TODO: perhaps have a "bare" version of the index (used by this package) that
// is not pre-loaded with all encodings. Global variables in encodings prevent
// the linker from being able to purge unneeded tables. This means that
// referencing all encodings, as this package does for the default index, links
// in all encodings unconditionally.
//
// This issue can be solved by either solving the linking issue (see
// https://github.com/golang/go/issues/6330) or refactoring the encoding tables
// (e.g. moving the tables to internal packages that do not use global
// variables).
// TODO: allow canonicalizing names
import (
"golang.org/x/text/encoding"
"golang.org/x/text/language"
)
// LanguageDefault returns the canonical name of the default encoding for a
// given language.
func LanguageDefault(tag language.Tag) string {
// Note: we'll use the map defined in the W3C spec.
panic("TODO: implement")
}
// Get returns an Encoding for one of the names listed in
// http://www.w3.org/TR/encoding using the Default Index. Matching is case-
// insensitive.
func Get(name string) (encoding.Encoding, error) {
panic("TODO: implement")
}
// Name reports the canonical name of the given Encoding. It will return
// an error if e is not associated with a supported encoding scheme.
func Name(e encoding.Encoding) (string, error) {
panic("TODO: implement")
}

@ -0,0 +1,26 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ianaindex_test
import (
"fmt"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/encoding/ianaindex"
)
func ExampleIndex() {
fmt.Println(ianaindex.MIME.Name(charmap.ISO8859_7))
fmt.Println(ianaindex.IANA.Name(charmap.ISO8859_7))
e, _ := ianaindex.IANA.Get("cp437")
fmt.Println(ianaindex.IANA.Name(e))
// TODO: Output:
// ISO-8859-7
// ISO8859_7:1987
// IBM437
}

@ -0,0 +1,64 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package ianaindex maps names to Encodings as specified by the IANA registry.
// This includes both the MIME and IANA names.
//
// See http://www.iana.org/assignments/character-sets/character-sets.xhtml for
// more details.
package ianaindex
import (
"golang.org/x/text/encoding"
)
// TODO: allow users to specify their own aliases?
// TODO: allow users to specify their own indexes?
// TODO: allow canonicalizing names
// NOTE: only use these top-level variables if we can get the linker to drop
// the indexes when they are not used. Make them a function or perhaps only
// support MIME otherwise.
var (
// MIME is an index to map MIME names. It does not support aliases.
MIME *Index
// IANA is an index that supports all names and aliases using IANA names as
// the canonical identifier.
IANA *Index
)
// Index maps names registered by IANA to Encodings.
type Index struct {
}
// Get returns an Encoding for IANA-registered names. Matching is
// case-insensitive.
func (x *Index) Get(name string) (encoding.Encoding, error) {
panic("TODO: implement")
}
// Name reports the canonical name of the given Encoding. It will return an
// error if the e is not associated with a known encoding scheme.
func (x *Index) Name(e encoding.Encoding) (string, error) {
panic("TODO: implement")
}
// TODO: the coverage of this index is rather spotty. Allowing users to set
// encodings would allow:
// - users to increase coverage
// - allow a partially loaded set of encodings in case the user doesn't need to
// them all.
// - write an OS-specific wrapper for supported encodings and set them.
// The exact definition of Set depends a bit on if and how we want to let users
// write their own Encoding implementations. Also, it is not possible yet to
// only partially load the encodings without doing some refactoring. Until this
// is solved, we might as well not support Set.
// // Set sets the e to be used for the encoding scheme identified by name. Only
// // canonical names may be used. An empty name assigns e to its internally
// // associated encoding scheme.
// func (x *Index) Set(name string, e encoding.Encoding) error {
// panic("TODO: implement")
// }

@ -0,0 +1,137 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"bytes"
"encoding/xml"
"fmt"
"io"
"log"
"strings"
"golang.org/x/text/internal/gen"
)
type registry struct {
XMLName xml.Name `xml:"registry"`
Updated string `xml:"updated"`
Registry []struct {
ID string `xml:"id,attr"`
Record []struct {
Name string `xml:"name"`
Xref []struct {
Type string `xml:"type,attr"`
Data string `xml:"data,attr"`
} `xml:"xref"`
Desc struct {
Data string `xml:",innerxml"`
// Any []struct {
// Data string `xml:",chardata"`
// } `xml:",any"`
// Data string `xml:",chardata"`
} `xml:"description,"`
MIB string `xml:"value"`
Alias []string `xml:"alias"`
MIME string `xml:"preferred_alias"`
} `xml:"record"`
} `xml:"registry"`
}
func main() {
r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml")
reg := &registry{}
if err := xml.NewDecoder(r).Decode(&reg); err != nil && err != io.EOF {
log.Fatalf("Error decoding charset registry: %v", err)
}
if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" {
log.Fatalf("Unexpected ID %s", reg.Registry[0].ID)
}
w := &bytes.Buffer{}
fmt.Fprintf(w, "const (\n")
for _, rec := range reg.Registry[0].Record {
constName := ""
for _, a := range rec.Alias {
if strings.HasPrefix(a, "cs") && strings.IndexByte(a, '-') == -1 {
// Some of the constant definitions have comments in them. Strip those.
constName = strings.Title(strings.SplitN(a[2:], "\n", 2)[0])
}
}
if constName == "" {
switch rec.MIB {
case "2085":
constName = "HZGB2312" // Not listed as alias for some reason.
default:
log.Fatalf("No cs alias defined for %s.", rec.MIB)
}
}
if rec.MIME != "" {
rec.MIME = fmt.Sprintf(" (MIME: %s)", rec.MIME)
}
fmt.Fprintf(w, "// %s is the MIB identifier with IANA name %s%s.\n//\n", constName, rec.Name, rec.MIME)
if len(rec.Desc.Data) > 0 {
fmt.Fprint(w, "// ")
d := xml.NewDecoder(strings.NewReader(rec.Desc.Data))
inElem := true
attr := ""
for {
t, err := d.Token()
if err != nil {
if err != io.EOF {
log.Fatal(err)
}
break
}
switch x := t.(type) {
case xml.CharData:
attr = "" // Don't need attribute info.
a := bytes.Split([]byte(x), []byte("\n"))
for i, b := range a {
if b = bytes.TrimSpace(b); len(b) != 0 {
if !inElem && i > 0 {
fmt.Fprint(w, "\n// ")
}
inElem = false
fmt.Fprintf(w, "%s ", string(b))
}
}
case xml.StartElement:
if x.Name.Local == "xref" {
inElem = true
use := false
for _, a := range x.Attr {
if a.Name.Local == "type" {
use = use || a.Value != "person"
}
if a.Name.Local == "data" && use {
attr = a.Value + " "
}
}
}
case xml.EndElement:
inElem = false
fmt.Fprint(w, attr)
}
}
fmt.Fprint(w, "\n")
}
for _, x := range rec.Xref {
switch x.Type {
case "rfc":
fmt.Fprintf(w, "// Reference: %s\n", strings.ToUpper(x.Data))
case "uri":
fmt.Fprintf(w, "// Reference: %s\n", x.Data)
}
}
fmt.Fprintf(w, "%s MIB = %s\n", constName, rec.MIB)
fmt.Fprintln(w)
}
fmt.Fprintln(w, ")")
gen.WriteGoFile("mib.go", "identifier", w.Bytes())
}

@ -0,0 +1,80 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gen.go
// Package identifier defines the contract between implementations of Encoding
// and Index by defining identifiers that uniquely identify standardized coded
// character sets (CCS) and character encoding schemes (CES), which we will
// together refer to as encodings, for which Encoding implementations provide
// converters to and from UTF-8. This package is typically only of concern to
// implementers of Indexes and Encodings.
//
// One part of the identifier is the MIB code, which is defined by IANA and
// uniquely identifies a CCS or CES. Each code is associated with data that
// references authorities, official documentation as well as aliases and MIME
// names.
//
// Not all CESs are covered by the IANA registry. The "other" string that is
// returned by ID can be used to identify other character sets or versions of
// existing ones.
//
// It is recommended that each package that provides a set of Encodings provide
// the All and Common variables to reference all supported encodings and
// commonly used subset. This allows Index implementations to include all
// available encodings without explicitly referencing or knowing about them.
package identifier
// Note: this package is internal, but could be made public if there is a need
// for writing third-party Indexes and Encodings.
// References:
// - http://source.icu-project.org/repos/icu/icu/trunk/source/data/mappings/convrtrs.txt
// - http://www.iana.org/assignments/character-sets/character-sets.xhtml
// - http://www.iana.org/assignments/ianacharset-mib/ianacharset-mib
// - http://www.ietf.org/rfc/rfc2978.txt
// - http://www.unicode.org/reports/tr22/
// - http://www.w3.org/TR/encoding/
// - http://www.w3.org/TR/encoding/indexes/encodings.json
// - https://encoding.spec.whatwg.org/
// - https://tools.ietf.org/html/rfc6657#section-5
// Interface can be implemented by Encodings to define the CCS or CES for which
// it implements conversions.
type Interface interface {
// ID returns an encoding identifier. Exactly one of the mib and other
// values should be non-zero.
//
// In the usual case it is only necessary to indicate the MIB code. The
// other string can be used to specify encodings for which there is no MIB,
// such as "x-mac-dingbat".
//
// The other string may only contain the characters a-z, A-Z, 0-9, - and _.
ID() (mib MIB, other string)
// NOTE: the restrictions on the encoding are to allow extending the syntax
// with additional information such as versions, vendors and other variants.
}
// A MIB identifies an encoding. It is derived from the IANA MIB codes and adds
// some identifiers for some encodings that are not covered by the IANA
// standard.
//
// See http://www.iana.org/assignments/ianacharset-mib.
type MIB uint16
// These additional MIB types are not defined in IANA. They are added because
// they are common and defined within the text repo.
const (
// Unofficial marks the start of encodings not registered by IANA.
Unofficial MIB = 10000 + iota
// TODO: add Replacement?
// XUserDefined is the code for x-user-defined.
XUserDefined
// MacintoshCyrillic is the code for x-mac-cyrillic.
MacintoshCyrillic
)

File diff suppressed because it is too large Load Diff

@ -0,0 +1,60 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package internal contains code that is shared among encoding implementations.
package internal
import (
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/internal/identifier"
"golang.org/x/text/transform"
)
// Encoding is an implementation of the Encoding interface that adds the String
// and ID methods to an existing encoding.
type Encoding struct {
encoding.Encoding
Name string
MIB identifier.MIB
}
// _ verifies that Encoding implements identifier.Interface.
var _ identifier.Interface = (*Encoding)(nil)
func (e *Encoding) String() string {
return e.Name
}
func (e *Encoding) ID() (mib identifier.MIB, other string) {
return e.MIB, ""
}
// SimpleEncoding is an Encoding that combines two Transformers.
type SimpleEncoding struct {
Decoder transform.Transformer
Encoder transform.Transformer
}
func (e *SimpleEncoding) NewDecoder() transform.Transformer {
return e.Decoder
}
func (e *SimpleEncoding) NewEncoder() transform.Transformer {
return e.Encoder
}
// FuncEncoding is an Encoding that combines two functions returning a new
// Transformer.
type FuncEncoding struct {
Decoder func() transform.Transformer
Encoder func() transform.Transformer
}
func (e FuncEncoding) NewDecoder() transform.Transformer {
return e.Decoder()
}
func (e FuncEncoding) NewEncoder() transform.Transformer {
return e.Encoder()
}

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save