From ee6d6d2fc7d0c9cd64a740a2e925044dacec1ac4 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Alexis=20Laferri=C3=A8re?= Date: Wed, 9 Sep 2015 13:50:15 -0400 Subject: [PATCH] lib/text: fix to|from_percent_encoding with unicode characters MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Alexis Laferrière --- lib/core/text/abstract_text.nit | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/lib/core/text/abstract_text.nit b/lib/core/text/abstract_text.nit index 0e02750..b1e6a04 100644 --- a/lib/core/text/abstract_text.nit +++ b/lib/core/text/abstract_text.nit @@ -731,6 +731,7 @@ abstract class Text # assert "aBc09-._~".to_percent_encoding == "aBc09-._~" # assert "%()< >".to_percent_encoding == "%25%28%29%3c%20%3e" # assert ".com/post?e=asdf&f=123".to_percent_encoding == ".com%2fpost%3fe%3dasdf%26f%3d123" + # assert "éあいう".to_percent_encoding == "%c3%a9%e3%81%82%e3%81%84%e3%81%86" fun to_percent_encoding: String do var buf = new Buffer @@ -744,7 +745,10 @@ abstract class Text c == '_' or c == '~' then buf.add c - else buf.append "%{c.ascii.to_hex}" + else + var bytes = c.to_s.bytes + for b in bytes do buf.append "%{b.to_i.to_hex}" + end end return buf.to_s @@ -760,36 +764,50 @@ abstract class Text # assert "%25%28%29%3C%20%3E".from_percent_encoding == "%()< >" # assert "incomplete %".from_percent_encoding == "incomplete ?" # assert "invalid % usage".from_percent_encoding == "invalid ? usage" + # assert "%c3%a9%e3%81%82%e3%81%84%e3%81%86".from_percent_encoding == "éあいう" fun from_percent_encoding: String do - var buf = new Buffer + var len = bytelen + var has_percent = false + for c in chars do + if c == '%' then + len -= 2 + has_percent = true + end + end + + # If no transformation is needed, return self as a string + if not has_percent then return to_s + var buf = new NativeString(len) var i = 0 + var l = 0 while i < length do var c = chars[i] if c == '%' then if i + 2 >= length then # What follows % has been cut off - buf.add '?' + buf[l] = '?'.ascii.to_b else i += 1 var hex_s = substring(i, 2) if hex_s.is_hex then var hex_i = hex_s.to_hex - buf.add hex_i.ascii + buf[l] = hex_i.to_b i += 1 else # What follows a % is not Hex - buf.add '?' + buf[l] = '?'.ascii.to_b i -= 1 end end - else buf.add c + else buf[l] = c.ascii.to_b i += 1 + l += 1 end - return buf.to_s + return buf.to_s_with_length(l) end # Escape the characters `<`, `>`, `&`, `"`, `'` and `/` as HTML/XML entity references. -- 1.7.9.5