Skip to content

Commit a824f5c

Browse files
committed
add option to encode space to '%20' as per url standard
Per default the space character is exclusively encoded to '+'. This is wrong, as the URL Standard [0] specifies that the default is '%20'. PR servo#928 fixes this behavior, but is obviously a breaking change. To introduce this feature early, add a new function that sets the correct behavior. This way, we can use it without causing a breaking change. [0]: https://url.spec.whatwg.org/#string-percent-encode-after-encoding Fixes: servo#927 Fixes: servo#888 Signed-off-by: Gabriel Goller <[email protected]>
1 parent 4b9f1e6 commit a824f5c

File tree

1 file changed

+57
-3
lines changed

1 file changed

+57
-3
lines changed

form_urlencoded/src/lib.rs

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,17 +115,34 @@ impl Iterator for ParseIntoOwned<'_> {
115115
}
116116

117117
/// The [`application/x-www-form-urlencoded` byte serializer](
118-
/// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer).
118+
/// https://url.spec.whatwg.org/#string-percent-encode-after-encoding).
119+
/// Converts spaces (b' ') to plus signs (b'+').
119120
///
120121
/// Return an iterator of `&str` slices.
121122
pub fn byte_serialize(input: &[u8]) -> ByteSerialize<'_> {
122-
ByteSerialize { bytes: input }
123+
ByteSerialize {
124+
bytes: input,
125+
space_as_plus: true,
126+
}
127+
}
128+
129+
/// The [`application/x-www-form-urlencoded` byte serializer](
130+
/// https://url.spec.whatwg.org/#string-percent-encode-after-encoding).
131+
/// Converts spaces (b' ') to the percent-encoded equivalent ("%20").
132+
///
133+
/// Return an iterator of `&str` slices.
134+
pub fn byte_serialize_percent_encoded(input: &[u8]) -> ByteSerialize<'_> {
135+
ByteSerialize {
136+
bytes: input,
137+
space_as_plus: false,
138+
}
123139
}
124140

125141
/// Return value of `byte_serialize()`.
126142
#[derive(Debug)]
127143
pub struct ByteSerialize<'a> {
128144
bytes: &'a [u8],
145+
space_as_plus: bool,
129146
}
130147

131148
fn byte_serialized_unchanged(byte: u8) -> bool {
@@ -139,7 +156,7 @@ impl<'a> Iterator for ByteSerialize<'a> {
139156
if let Some((&first, tail)) = self.bytes.split_first() {
140157
if !byte_serialized_unchanged(first) {
141158
self.bytes = tail;
142-
return Some(if first == b' ' {
159+
return Some(if first == b' ' && self.space_as_plus {
143160
"+"
144161
} else {
145162
percent_encode_byte(first)
@@ -428,3 +445,40 @@ pub(crate) fn decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str> {
428445
}
429446

430447
pub type EncodingOverride<'a> = Option<&'a dyn Fn(&str) -> Cow<'_, [u8]>>;
448+
449+
#[cfg(test)]
450+
mod tests {
451+
use alloc::string::String;
452+
453+
use crate::{byte_serialize, byte_serialize_percent_encoded};
454+
455+
#[test]
456+
fn byte_serializer() {
457+
let in_1 = "c ool/org";
458+
let out_1 = "c+ool%2Forg";
459+
460+
let in_2 = "a🔒nother&bu=ck?et ";
461+
let out_2 = "a%F0%9F%94%92nother%26bu%3Dck%3Fet+";
462+
463+
assert_eq!(byte_serialize(in_1.as_bytes()).collect::<String>(), out_1);
464+
assert_eq!(byte_serialize(in_2.as_bytes()).collect::<String>(), out_2);
465+
}
466+
467+
#[test]
468+
fn byte_serializer_percent_encoded() {
469+
let in_1 = "c ool/org";
470+
let out_1 = "c%20ool%2Forg";
471+
472+
let in_2 = "a🔒nother&bu=ck?et ";
473+
let out_2 = "a%F0%9F%94%92nother%26bu%3Dck%3Fet%20";
474+
475+
assert_eq!(
476+
byte_serialize_percent_encoded(in_1.as_bytes()).collect::<String>(),
477+
out_1
478+
);
479+
assert_eq!(
480+
byte_serialize_percent_encoded(in_2.as_bytes()).collect::<String>(),
481+
out_2
482+
);
483+
}
484+
}

0 commit comments

Comments
 (0)