Skip to content

Commit 2d9a50b

Browse files
dspeziabradfitz
authored andcommitted
html: simplify and optimize escape/unescape
The html package uses some specific code to escape special characters. Actually, the strings.Replacer can be used instead, and is much more efficient. The converse operation is more complex but can still be slightly optimized. Credits to Ken Bloom ([email protected]), who first submitted a similar patch at https://codereview.appspot.com/141930043 Added benchmarks and slightly optimized UnescapeString. benchmark old ns/op new ns/op delta BenchmarkEscape-4 118713 19825 -83.30% BenchmarkEscapeNone-4 87653 3784 -95.68% BenchmarkUnescape-4 24888 23417 -5.91% BenchmarkUnescapeNone-4 14423 157 -98.91% benchmark old allocs new allocs delta BenchmarkEscape-4 9 2 -77.78% BenchmarkEscapeNone-4 0 0 +0.00% BenchmarkUnescape-4 2 2 +0.00% BenchmarkUnescapeNone-4 0 0 +0.00% benchmark old bytes new bytes delta BenchmarkEscape-4 24800 12288 -50.45% BenchmarkEscapeNone-4 0 0 +0.00% BenchmarkUnescape-4 10240 10240 +0.00% BenchmarkUnescapeNone-4 0 0 +0.00% Fixes #8697 Change-Id: I208261ed7cbe9b3dee6317851f8c0cf15528bce4 Reviewed-on: https://go-review.googlesource.com/9808 Run-TryBot: Brad Fitzpatrick <[email protected]> Reviewed-by: Brad Fitzpatrick <[email protected]> TryBot-Result: Gobot Gobot <[email protected]>
1 parent 2b83366 commit 2d9a50b

File tree

2 files changed

+50
-47
lines changed

2 files changed

+50
-47
lines changed

src/html/escape.go

+11-46
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
package html
77

88
import (
9-
"bytes"
109
"strings"
1110
"unicode/utf8"
1211
)
@@ -187,52 +186,20 @@ func unescape(b []byte) []byte {
187186
return b
188187
}
189188

190-
const escapedChars = `&'<>"`
191-
192-
func escape(w writer, s string) error {
193-
i := strings.IndexAny(s, escapedChars)
194-
for i != -1 {
195-
if _, err := w.WriteString(s[:i]); err != nil {
196-
return err
197-
}
198-
var esc string
199-
switch s[i] {
200-
case '&':
201-
esc = "&amp;"
202-
case '\'':
203-
// "&#39;" is shorter than "&apos;" and apos was not in HTML until HTML5.
204-
esc = "&#39;"
205-
case '<':
206-
esc = "&lt;"
207-
case '>':
208-
esc = "&gt;"
209-
case '"':
210-
// "&#34;" is shorter than "&quot;".
211-
esc = "&#34;"
212-
default:
213-
panic("unrecognized escape character")
214-
}
215-
s = s[i+1:]
216-
if _, err := w.WriteString(esc); err != nil {
217-
return err
218-
}
219-
i = strings.IndexAny(s, escapedChars)
220-
}
221-
_, err := w.WriteString(s)
222-
return err
223-
}
189+
var htmlEscaper = strings.NewReplacer(
190+
`&`, "&amp;",
191+
`'`, "&#39;", // "&#39;" is shorter than "&apos;" and apos was not in HTML until HTML5.
192+
`<`, "&lt;",
193+
`>`, "&gt;",
194+
`"`, "&#34;", // "&#34;" is shorter than "&quot;".
195+
)
224196

225197
// EscapeString escapes special characters like "<" to become "&lt;". It
226198
// escapes only five such characters: <, >, &, ' and ".
227199
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
228200
// always true.
229201
func EscapeString(s string) string {
230-
if strings.IndexAny(s, escapedChars) == -1 {
231-
return s
232-
}
233-
var buf bytes.Buffer
234-
escape(&buf, s)
235-
return buf.String()
202+
return htmlEscaper.Replace(s)
236203
}
237204

238205
// UnescapeString unescapes entities like "&lt;" to become "<". It unescapes a
@@ -241,10 +208,8 @@ func EscapeString(s string) string {
241208
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
242209
// always true.
243210
func UnescapeString(s string) string {
244-
for _, c := range s {
245-
if c == '&' {
246-
return string(unescape([]byte(s)))
247-
}
211+
if !strings.Contains(s, "&") {
212+
return s
248213
}
249-
return s
214+
return string(unescape([]byte(s)))
250215
}

src/html/escape_test.go

+39-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44

55
package html
66

7-
import "testing"
7+
import (
8+
"strings"
9+
"testing"
10+
)
811

912
type unescapeTest struct {
1013
// A short description of the test case.
@@ -113,3 +116,38 @@ func TestUnescapeEscape(t *testing.T) {
113116
}
114117
}
115118
}
119+
120+
var (
121+
benchEscapeData = strings.Repeat("AAAAA < BBBBB > CCCCC & DDDDD ' EEEEE \" ", 100)
122+
benchEscapeNone = strings.Repeat("AAAAA x BBBBB x CCCCC x DDDDD x EEEEE x ", 100)
123+
)
124+
125+
func BenchmarkEscape(b *testing.B) {
126+
n := 0
127+
for i := 0; i < b.N; i++ {
128+
n += len(EscapeString(benchEscapeData))
129+
}
130+
}
131+
132+
func BenchmarkEscapeNone(b *testing.B) {
133+
n := 0
134+
for i := 0; i < b.N; i++ {
135+
n += len(EscapeString(benchEscapeNone))
136+
}
137+
}
138+
139+
func BenchmarkUnescape(b *testing.B) {
140+
s := EscapeString(benchEscapeData)
141+
n := 0
142+
for i := 0; i < b.N; i++ {
143+
n += len(UnescapeString(s))
144+
}
145+
}
146+
147+
func BenchmarkUnescapeNone(b *testing.B) {
148+
s := EscapeString(benchEscapeNone)
149+
n := 0
150+
for i := 0; i < b.N; i++ {
151+
n += len(UnescapeString(s))
152+
}
153+
}

0 commit comments

Comments
 (0)