lib/core: remove uses of Byte for Text
[nit.git] / lib / json / static.nit
index 59f7629..2d42e16 100644 (file)
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Static interface to get Nit objects from a Json string.
+# Static interface to read Nit objects from JSON strings
 #
-# `String::json_to_nit_object` returns an equivalent Nit object from
-# the Json source. This object can then be type checked by the usual
-# languages features (`isa` and `as`).
+# `Text::parse_json` returns a simple Nit object from the JSON source.
+# This object can then be type checked as usual with `isa` and `as`.
 module static
 
-import standard
-private import json_parser
-private import json_lexer
-
-# Something that can be translated to JSON.
-interface Jsonable
-       # Encode `self` in JSON.
-       fun to_json: String is abstract
-end
+import parser_base
+intrude import error
 
 redef class Text
-       super Jsonable
 
-       # Encode `self` in JSON.
+       # Removes JSON-escaping if necessary in a JSON string
+       #
+       #     assert "\\\"string\\uD83D\\uDE02\\\"".unescape_json == "\"string😂\""
+       fun unescape_json: Text do
+               if not json_need_escape then return self
+               return self.json_to_nit_string
+       end
+
+       # Does `self` need treatment from JSON to Nit ?
        #
-       #     assert "\t\"http://example.com\"\r\n\0\\".to_json ==
-       #               "\"\\t\\\"http:\\/\\/example.com\\\"\\r\\n\\u0000\\\\\""
-       redef fun to_json do
-               var buffer = new FlatBuffer
-               buffer.add '\"'
-               for i in [0..self.length[ do
+       # i.e. is there at least one `\` character in it ?
+       #
+       #     assert not "string".json_need_escape
+       #     assert "\\\"string\\\"".json_need_escape
+       private fun json_need_escape: Bool do return has('\\')
+
+       # Escapes `self` from a JSON string to a Nit string
+       #
+       #     assert "\\\"string\\\"".json_to_nit_string == "\"string\""
+       #     assert "\\nEscape\\t\\n".json_to_nit_string == "\nEscape\t\n"
+       #     assert "\\u0041zu\\uD800\\uDFD3".json_to_nit_string == "Azu𐏓"
+       private fun json_to_nit_string: String do
+               var res = new FlatBuffer.with_capacity(byte_length)
+               var i = 0
+               var ln = self.length
+               while i < ln do
                        var char = self[i]
                        if char == '\\' then
-                               buffer.append "\\\\"
-                       else if char == '\"' then
-                               buffer.append "\\\""
-                       else if char == '\/' then
-                               buffer.append "\\/"
-                       else if char < 16.ascii then
-                               if char == '\n' then
-                                       buffer.append "\\n"
-                               else if char == '\r' then
-                                       buffer.append "\\r"
-                               else if char == '\t' then
-                                       buffer.append "\\t"
-                               else if char == 0x0C.ascii then
-                                       buffer.append "\\f"
-                               else if char == 0x08.ascii then
-                                       buffer.append "\\b"
-                               else
-                                       buffer.append "\\u000{char.ascii.to_hex}"
+                               i += 1
+                               char = self[i]
+                               if char == 'b' then
+                                       char = 0x08.code_point
+                               else if char == 'f' then
+                                       char = 0x0C.code_point
+                               else if char == 'n' then
+                                       char = '\n'
+                               else if char == 'r' then
+                                       char = '\r'
+                               else if char == 't' then
+                                       char = '\t'
+                               else if char == 'u' then
+                                       var u16_esc = from_utf16_digit(i + 1)
+                                       char = u16_esc.code_point
+                                       if char.is_surrogate and i + 10 < ln then
+                                               if self[i + 5] == '\\' and self[i + 6] == 'u' then
+                                                       u16_esc <<= 16
+                                                       u16_esc += from_utf16_digit(i + 7)
+                                                       char = u16_esc.to_u32.from_utf16_surr.code_point
+                                                       i += 6
+                                               else
+                                                       char = 0xFFFD.code_point
+                                               end
+                                       end
+                                       i += 4
                                end
-                       else if char < ' ' then
-                               buffer.append "\\u00{char.ascii.to_hex}"
-                       else
-                               buffer.add char
+                               # `"`, `/` or `\` => Keep `char` as-is.
                        end
+                       res.add char
+                       i += 1
                end
-               buffer.add '\"'
-               return buffer.write_to_string
-       end
-
-       fun json_to_nit_object: nullable Object
-       do
-               var lexer = new Lexer_json(to_s)
-               var parser = new Parser_json
-               var tokens = lexer.lex
-               parser.tokens.add_all(tokens)
-               var root_node = parser.parse
-               if root_node isa NStart then
-                       return root_node.n_0.to_nit_object
-               else if root_node isa NLexerError then
-                       var pos = root_node.position
-                       print "Json lexer error: {root_node.message} at {pos or else "<unknown>"} for {root_node}"
-                       return null
-               else if root_node isa NParserError then
-                       var pos = root_node.position
-                       print "Json parsing error: {root_node.message} at {pos or else "<unknown>"} for {root_node}"
-                       return null
-               else abort
+               return res.to_s
        end
-end
 
-redef class Buffer
-
-       # Append the JSON representation of `jsonable` to `self`.
+       # Parse `self` as JSON.
        #
-       # Append `"null"` for `null`.
-       private fun append_json_of(jsonable: nullable Jsonable) do
-               if jsonable isa Jsonable then
-                       append jsonable.to_json
-               else
-                       append "null"
-               end
-       end
-end
-
-redef class Int
-       super Jsonable
-
-       # Encode `self` in JSON.
+       # If `self` is not a valid JSON document or contains an unsupported escape
+       # sequence, return a `JSONParseError`.
        #
-       #     assert 0.to_json == "0"
-       #     assert (-42).to_json == "-42"
-       redef fun to_json do return self.to_s
-end
-
-redef class Float
-       super Jsonable
-
-       # Encode `self` in JSON.
+       # Example with `JsonObject`:
        #
-       # Note: Because this method use `to_s`, it may lose precision.
+       #     var obj = "\{\"foo\": \{\"bar\": true, \"goo\": [1, 2, 3]\}\}".parse_json
+       #     assert obj isa JsonObject
+       #     assert obj["foo"] isa JsonObject
+       #     assert obj["foo"].as(JsonObject)["bar"] == true
        #
-       # ~~~
-       # # Will not work as expected.
-       # # assert (-0.0).to_json == "-0.0"
+       # Example with `JsonArray`:
        #
-       # assert (.5).to_json == "0.5"
-       # assert (0.0).to_json == "0.0"
-       # ~~~
-       redef fun to_json do return self.to_s
-end
-
-redef class Bool
-       super Jsonable
-
-       # Encode `self` in JSON.
+       #     var arr = "[1, 2, 3]".parse_json
+       #     assert arr isa JsonArray
+       #     assert arr.length == 3
+       #     assert arr.first == 1
+       #     assert arr.last == 3
+       #
+       # Example with `String`:
        #
-       #     assert true.to_json == "true"
-       #     assert false.to_json == "false"
-       redef fun to_json do return self.to_s
+       #     var str = "\"foo, bar, baz\"".parse_json
+       #     assert str isa String
+       #     assert str == "foo, bar, baz"
+       #
+       # Example of a syntax error:
+       #
+       #     var error = "\{foo: \"bar\"\}".parse_json
+       #     assert error isa JsonParseError
+       #     assert error.to_s == "Bad key format Error: bad JSON entity"
+       fun parse_json: nullable Serializable do return (new JSONStringParser(self.to_s)).parse_entity
 end
 
-# A map that can be translated into a JSON object.
-interface JsonMapRead[K: String, V: nullable Jsonable]
-       super MapRead[K, V]
-       super Jsonable
-
-       # Encode `self` in JSON.
-       #
-       #     var obj = new JsonObject
-       #     obj["foo"] = "bar"
-       #     assert obj.to_json == "\{\"foo\":\"bar\"\}"
-       #     obj = new JsonObject
-       #     obj["baz"] = null
-       #     assert obj.to_json == "\{\"baz\":null\}"
-       redef fun to_json do
-               var buffer = new FlatBuffer
-               buffer.append "\{"
-               var it = iterator
-               if it.is_ok then
-                       append_json_entry(it, buffer)
-                       while it.is_ok do
-                               buffer.append ","
-                               append_json_entry(it, buffer)
-                       end
+redef class FlatText
+       redef fun json_need_escape do
+               var its = items
+               for i in [first_byte .. last_byte] do
+                       if its[i] == 0x5C then return true
                end
-               it.finish
-               buffer.append "\}"
-               return buffer.write_to_string
+               return false
        end
+end
 
-       private fun append_json_entry(iterator: MapIterator[String, nullable Jsonable],
-                       buffer: Buffer) do
-               buffer.append iterator.key.to_json
-               buffer.append ":"
-               buffer.append_json_of(iterator.item)
-               iterator.next
+redef class Char
+       # Is `self` a valid number start ?
+       private fun is_json_num_start: Bool do
+               if self == '-' then return true
+               if self.is_numeric then return true
+               return false
        end
-end
 
-# A JSON Object.
-class JsonObject
-       super JsonMapRead[String, nullable Jsonable]
-       super HashMap[String, nullable Jsonable]
+       # Is `self` a valid JSON separator ?
+       private fun is_json_separator: Bool do
+               if self == ':' then return true
+               if self == ',' then return true
+               if self == '{' then return true
+               if self == '}' then return true
+               if self == '[' then return true
+               if self == ']' then return true
+               if self == '"' then return true
+               if self.is_whitespace then return true
+               return false
+       end
 end
 
-# A sequence that can be translated into a JSON array.
-class JsonSequenceRead[E: nullable Jsonable]
-       super Jsonable
-       super SequenceRead[E]
+# A simple ad-hoc JSON parser
+#
+# To parse a simple JSON document, read it as a String and give it to `parse_entity`
+# NOTE: if your document contains several non-nested entities, use `parse_entity` for each
+# JSON entity to parse
+class JSONStringParser
+       super StringProcessor
 
-       # Encode `self` in JSON.
+       # Parses a JSON Entity
        #
-       #     var arr = new JsonArray.with_items("foo", null)
-       #     assert arr.to_json == "[\"foo\",null]"
-       #     arr.pop
-       #     assert arr.to_json =="[\"foo\"]"
-       #     arr.pop
-       #     assert arr.to_json =="[]"
-       redef fun to_json do
-               var buffer = new FlatBuffer
-               buffer.append "["
-               var it = iterator
-               if it.is_ok then
-                       append_json_entry(it, buffer)
-                       while it.is_ok do
-                               buffer.append ","
-                               append_json_entry(it, buffer)
+       # ~~~nit
+       # var p = new JSONStringParser("""{"numbers": [1,23,3], "string": "string"}""")
+       # assert p.parse_entity isa JsonObject
+       # ~~~
+       fun parse_entity: nullable Serializable do
+               var srclen = len
+               ignore_whitespaces
+               if pos >= srclen then return make_parse_error("Empty JSON")
+               var c = src[pos]
+               if c == '[' then
+                       pos += 1
+                       return parse_json_array
+               else if c == '"' then
+                       var s = parse_json_string
+                       return s
+               else if c == '{' then
+                       pos += 1
+                       return parse_json_object
+               else if c == 'f' then
+                       if pos + 4 >= srclen then make_parse_error("Error: bad JSON entity")
+                       if src[pos + 1] == 'a' and src[pos + 2] == 'l' and src[pos + 3] == 's' and src[pos + 4] == 'e' then
+                               pos += 5
+                               return false
                        end
+                       return make_parse_error("Error: bad JSON entity")
+               else if c == 't' then
+                       if pos + 3 >= srclen then make_parse_error("Error: bad JSON entity")
+                       if src[pos + 1] == 'r' and src[pos + 2] == 'u' and src[pos + 3] == 'e' then
+                               pos += 4
+                               return true
+                       end
+                       return make_parse_error("Error: bad JSON entity")
+               else if c == 'n' then
+                       if pos + 3 >= srclen then make_parse_error("Error: bad JSON entity")
+                       if src[pos + 1] == 'u' and src[pos + 2] == 'l' and src[pos + 3] == 'l' then
+                               pos += 4
+                               return null
+                       end
+                       return make_parse_error("Error: bad JSON entity")
                end
-               it.finish
-               buffer.append "]"
-               return buffer.write_to_string
+               if not c.is_json_num_start then return make_parse_error("Bad JSON character")
+               return parse_json_number
        end
 
-       private fun append_json_entry(iterator: Iterator[nullable Jsonable],
-                       buffer: Buffer) do
-               buffer.append_json_of(iterator.item)
-               iterator.next
+       # Parses a JSON Array
+       fun parse_json_array: Serializable do
+               var max = len
+               if pos >= max then return make_parse_error("Incomplete JSON array")
+               var arr = new JsonArray
+               var c = src[pos]
+               while not c == ']' do
+                       ignore_whitespaces
+                       if pos >= max then return make_parse_error("Incomplete JSON array")
+                       if src[pos] == ']' then break
+                       var ent = parse_entity
+                       #print "Parsed an entity {ent} for a JSON array"
+                       if ent isa JsonParseError then return ent
+                       arr.add ent
+                       ignore_whitespaces
+                       if pos >= max then return make_parse_error("Incomplete JSON array")
+                       c = src[pos]
+                       if c == ']' then break
+                       if c != ',' then return make_parse_error("Bad array separator {c}")
+                       pos += 1
+               end
+               pos += 1
+               return arr
        end
-end
-
-# A JSON array.
-class JsonArray
-       super JsonSequenceRead[nullable Jsonable]
-       super Array[nullable Jsonable]
-end
-
-################################################################################
-# Redef parser
 
-redef class Nvalue
-       fun to_nit_object: nullable Object is abstract
-end
-
-redef class Nvalue_number
-       redef fun to_nit_object
-       do
-               var text = n_number.text
-               if text.chars.has('.') or text.chars.has('e') or text.chars.has('E') then return text.to_f
-               return text.to_i
+       # Parses a JSON Object
+       fun parse_json_object: Serializable do
+               var max = len
+               if pos >= max then return make_parse_error("Incomplete JSON object")
+               var obj = new JsonObject
+               var c = src[pos]
+               while not c == '}' do
+                       ignore_whitespaces
+                       if pos >= max then return make_parse_error("Malformed JSON object")
+                       if src[pos] == '}' then break
+                       var key = parse_entity
+                       #print "Parsed key {key} for JSON object"
+                       if not key isa String then return make_parse_error("Bad key format {key or else "null"}")
+                       ignore_whitespaces
+                       if pos >= max then return make_parse_error("Incomplete JSON object")
+                       if not src[pos] == ':' then return make_parse_error("Bad key/value separator {src[pos]}")
+                       pos += 1
+                       ignore_whitespaces
+                       var value = parse_entity
+                       #print "Parsed value {value} for JSON object"
+                       if value isa JsonParseError then return value
+                       obj[key] = value
+                       ignore_whitespaces
+                       if pos >= max then return make_parse_error("Incomplete JSON object")
+                       c = src[pos]
+                       if c == '}' then break
+                       if c != ',' then return make_parse_error("Bad object separator {src[pos]}")
+                       pos += 1
+               end
+               pos += 1
+               return obj
        end
-end
-
-redef class Nvalue_string
-       redef fun to_nit_object do return n_string.to_nit_string
-end
 
-redef class Nvalue_true
-       redef fun to_nit_object do return true
-end
-
-redef class Nvalue_false
-       redef fun to_nit_object do return false
-end
+       # Creates a `JsonParseError` with the right message and location
+       protected fun make_parse_error(message: String): JsonParseError do
+               var err = new JsonParseError(message)
+               err.location = hot_location
+               return err
+       end
 
-redef class Nvalue_null
-       redef fun to_nit_object do return null
-end
+       # Parses an Int or Float
+       fun parse_json_number: Serializable do
+               var max = len
+               var p = pos
+               var c = src[p]
+               var is_neg = false
+               if c == '-' then
+                       is_neg = true
+                       p += 1
+                       if p >= max then return make_parse_error("Bad JSON number")
+                       c = src[p]
+               end
+               var val = 0
+               while c.is_numeric do
+                       val *= 10
+                       val += c.to_i
+                       p += 1
+                       if p >= max then break
+                       c = src[p]
+               end
+               if c == '.' then
+                       p += 1
+                       if p >= max then return make_parse_error("Bad JSON number")
+                       c = src[p]
+                       var fl = val.to_f
+                       var frac = 0.1
+                       while c.is_numeric do
+                               fl += c.to_i.to_f * frac
+                               frac /= 10.0
+                               p += 1
+                               if p >= max then break
+                               c = src[p]
+                       end
+                       if c == 'e' or c == 'E' then
+                               p += 1
+                               var exp = 0
+                               if p >= max then return make_parse_error("Malformed JSON number")
+                               c = src[p]
+                               while c.is_numeric do
+                                       exp *= 10
+                                       exp += c.to_i
+                                       p += 1
+                                       if p >= max then break
+                                       c = src[p]
+                               end
+                               fl *= (10 ** exp).to_f
+                       end
+                       if p < max and not c.is_json_separator then return make_parse_error("Malformed JSON number")
+                       pos = p
+                       if is_neg then return -fl
+                       return fl
+               end
+               if c == 'e' or c == 'E' then
+                       p += 1
+                       if p >= max then return make_parse_error("Bad JSON number")
+                       var exp = src[p].to_i
+                       c = src[p]
+                       while c.is_numeric do
+                               exp *= 10
+                               exp += c.to_i
+                               p += 1
+                               if p >= max then break
+                               c = src[p]
+                       end
+                       val *= (10 ** exp)
+               end
+               if p < max and not src[p].is_json_separator then return make_parse_error("Malformed JSON number")
+               pos = p
+               if is_neg then return -val
+               return val
+       end
 
-redef class Nstring
-       fun to_nit_string: String do
-               var res = new FlatBuffer
-               var i = 1
-               while i < text.length - 1 do
-                       var char = text[i]
-                       if char == '\\' then
-                               i += 1
-                               char = text[i]
-                               if char == 'b' then
-                                       char = 0x08.ascii
-                               else if char == 'f' then
-                                       char = 0x0C.ascii
-                               else if char == 'n' then
-                                       char = '\n'
-                               else if char == 'r' then
-                                       char = '\r'
-                               else if char == 't' then
-                                       char = '\t'
-                               else if char == 'u' then
-                                       var code = text.substring(i + 1, 4).to_hex
-                                       # TODO UTF-16 escaping is not supported yet.
-                                       if code >= 128 then
-                                               char = '?'
+       private var parse_str_buf = new FlatBuffer
+
+       # Parses and returns a Nit string from a JSON String
+       fun parse_json_string: Serializable do
+               var src = src
+               var ln = src.length
+               var p = pos
+               p += 1
+               if p > ln then return make_parse_error("Malformed JSON String")
+               var c = src[p]
+               var ret = parse_str_buf
+               var chunk_st = p
+               while c != '"' do
+                       if c != '\\' then
+                               p += 1
+                               if p >= ln then return make_parse_error("Malformed JSON string")
+                               c = src[p]
+                               continue
+                       end
+                       ret.append_substring_impl(src, chunk_st, p - chunk_st)
+                       p += 1
+                       if p >= ln then return make_parse_error("Malformed Escape sequence in JSON string")
+                       c = src[p]
+                       if c == 'r' then
+                               ret.add '\r'
+                               p += 1
+                       else if c == 'n' then
+                               ret.add '\n'
+                               p += 1
+                       else if c == 't' then
+                               ret.add '\t'
+                               p += 1
+                       else if c == 'u' then
+                               var cp = 0
+                               p += 1
+                               for i in [0 .. 4[ do
+                                       cp <<= 4
+                                       if p >= ln then make_parse_error("Malformed \uXXXX Escape sequence in JSON string")
+                                       c = src[p]
+                                       if c >= '0' and c <= '9' then
+                                               cp += c.code_point - '0'.code_point
+                                       else if c >= 'a' and c <= 'f' then
+                                               cp += c.code_point - 'a'.code_point + 10
+                                       else if c >= 'A' and c <= 'F' then
+                                               cp += c.code_point - 'A'.code_point + 10
                                        else
-                                               char = code.ascii
+                                               make_parse_error("Malformed \uXXXX Escape sequence in JSON string")
                                        end
-                                       i += 4
+                                       p += 1
                                end
-                               # `"`, `/` or `\` => Keep `char` as-is.
+                               c = cp.code_point
+                               if cp >= 0xD800 and cp <= 0xDBFF then
+                                       if p >= ln then make_parse_error("Malformed \uXXXX Escape sequence in JSON string")
+                                       c = src[p]
+                                       if c != '\\' then make_parse_error("Malformed \uXXXX Escape sequence in JSON string")
+                                       p += 1
+                                       c = src[p]
+                                       if c != 'u' then make_parse_error("Malformed \uXXXX Escape sequence in JSON string")
+                                       var locp = 0
+                                       p += 1
+                                       for i in [0 .. 4[ do
+                                               locp <<= 4
+                                               if p > ln then make_parse_error("Malformed \uXXXX Escape sequence in JSON string")
+                                               c = src[p]
+                                               if c >= '0' and c <= '9' then
+                                                       locp += c.code_point - '0'.code_point
+                                               else if c >= 'a' and c <= 'f' then
+                                                       locp += c.code_point - 'a'.code_point + 10
+                                               else if c >= 'A' and c <= 'F' then
+                                                       locp += c.code_point - 'A'.code_point + 10
+                                               else
+                                                       make_parse_error("Malformed \uXXXX Escape sequence in JSON string")
+                                               end
+                                               p += 1
+                                       end
+                                       c = (((locp & 0x3FF) | ((cp & 0x3FF) << 10)) + 0x10000).code_point
+                               end
+                               ret.add c
+                       else if c == 'b' then
+                               ret.add 8.code_point
+                               p += 1
+                       else if c == 'f' then
+                               ret.add '\f'
+                               p += 1
+                       else
+                               p += 1
+                               ret.add c
                        end
-                       res.add char
-                       i += 1
+                       chunk_st = p
+                       c = src[p]
                end
-               return res.write_to_string
+               pos = p + 1
+               if ret.is_empty then return src.substring(chunk_st, p - chunk_st)
+               ret.append_substring_impl(src, chunk_st, p - chunk_st)
+               var rets = ret.to_s
+               ret.clear
+               return rets
        end
-end
 
-redef class Nvalue_object
-       redef fun to_nit_object
-       do
-               var obj = new HashMap[String, nullable Object]
-               var members = n_members
-               if members != null then
-                       var pairs = members.pairs
-                       for pair in pairs do obj[pair.name] = pair.value
+       # Ignores any character until a JSON separator is encountered
+       fun ignore_until_separator do
+               var max = len
+               while pos < max do
+                       if not src[pos].is_json_separator then return
                end
-               return obj
-       end
-end
-
-redef class Nmembers
-       fun pairs: Array[Npair] is abstract
-end
-
-redef class Nmembers_tail
-       redef fun pairs
-       do
-               var arr = n_members.pairs
-               arr.add n_pair
-               return arr
        end
 end
 
-redef class Nmembers_head
-       redef fun pairs do return [n_pair]
-end
-
-redef class Npair
-       fun name: String do return n_string.to_nit_string
-       fun value: nullable Object do return n_value.to_nit_object
-end
-
-redef class Nvalue_array
-       redef fun to_nit_object
-       do
-               var arr = new Array[nullable Object]
-               var elements = n_elements
-               if elements != null then
-                       var items = elements.items
-                       for item in items do arr.add(item.to_nit_object)
-               end
-               return arr
-       end
+# A map that can be translated into a JSON object.
+interface JsonMapRead[K: String, V: nullable Serializable]
+       super MapRead[K, V]
+       super Serializable
 end
 
-redef class Nelements
-       fun items: Array[Nvalue] is abstract
+# A JSON Object.
+class JsonObject
+       super JsonMapRead[String, nullable Serializable]
+       super HashMap[String, nullable Serializable]
 end
 
-redef class Nelements_tail
-       redef fun items
-       do
-               var items = n_elements.items
-               items.add(n_value)
-               return items
-       end
+# A sequence that can be translated into a JSON array.
+class JsonSequenceRead[E: nullable Serializable]
+       super Serializable
+       super SequenceRead[E]
 end
 
-redef class Nelements_head
-       redef fun items do return [n_value]
+# A JSON array.
+class JsonArray
+       super JsonSequenceRead[nullable Serializable]
+       super Array[nullable Serializable]
 end