Merge: doc: fixed some typos and other misc. corrections
[nit.git] / lib / json / static.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Copyright 2014 Alexis Laferrière <alexis.laf@xymus.net>
4 # Copyright 2014 Alexandre Terrasa <alexandre@moz-concept.com>
5 # Copyright 2014 Jean-Christophe Beaupré <jcbrinfo@users.noreply.github.com>
6 #
7 # Licensed under the Apache License, Version 2.0 (the "License");
8 # you may not use this file except in compliance with the License.
9 # You may obtain a copy of the License at
10 #
11 # http://www.apache.org/licenses/LICENSE-2.0
12 #
13 # Unless required by applicable law or agreed to in writing, software
14 # distributed under the License is distributed on an "AS IS" BASIS,
15 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 # See the License for the specific language governing permissions and
17 # limitations under the License.
18
19 # Static interface to read Nit objects from JSON strings
20 #
21 # `Text::parse_json` returns a simple Nit object from the JSON source.
22 # This object can then be type checked as usual with `isa` and `as`.
23 module static
24
25 import parser_base
26 intrude import error
27
28 redef class Text
29
30 # Removes JSON-escaping if necessary in a JSON string
31 #
32 # assert "\\\"string\\uD83D\\uDE02\\\"".unescape_json == "\"string😂\""
33 fun unescape_json: Text do
34 if not json_need_escape then return self
35 return self.json_to_nit_string
36 end
37
38 # Does `self` need treatment from JSON to Nit ?
39 #
40 # i.e. is there at least one `\` character in it ?
41 #
42 # assert not "string".json_need_escape
43 # assert "\\\"string\\\"".json_need_escape
44 private fun json_need_escape: Bool do return has('\\')
45
46 # Escapes `self` from a JSON string to a Nit string
47 #
48 # assert "\\\"string\\\"".json_to_nit_string == "\"string\""
49 # assert "\\nEscape\\t\\n".json_to_nit_string == "\nEscape\t\n"
50 # assert "\\u0041zu\\uD800\\uDFD3".json_to_nit_string == "Azu𐏓"
51 private fun json_to_nit_string: String do
52 var res = new FlatBuffer.with_capacity(byte_length)
53 var i = 0
54 var ln = self.length
55 while i < ln do
56 var char = self[i]
57 if char == '\\' then
58 i += 1
59 char = self[i]
60 if char == 'b' then
61 char = 0x08.code_point
62 else if char == 'f' then
63 char = 0x0C.code_point
64 else if char == 'n' then
65 char = '\n'
66 else if char == 'r' then
67 char = '\r'
68 else if char == 't' then
69 char = '\t'
70 else if char == 'u' then
71 var u16_esc = from_utf16_digit(i + 1)
72 char = u16_esc.code_point
73 if char.is_surrogate and i + 10 < ln then
74 if self[i + 5] == '\\' and self[i + 6] == 'u' then
75 u16_esc <<= 16
76 u16_esc += from_utf16_digit(i + 7)
77 char = u16_esc.to_u32.from_utf16_surr.code_point
78 i += 6
79 else
80 char = 0xFFFD.code_point
81 end
82 end
83 i += 4
84 end
85 # `"`, `/` or `\` => Keep `char` as-is.
86 end
87 res.add char
88 i += 1
89 end
90 return res.to_s
91 end
92
93 # Parse `self` as JSON.
94 #
95 # If `self` is not a valid JSON document or contains an unsupported escape
96 # sequence, return a `JSONParseError`.
97 #
98 # Example with `JsonObject`:
99 #
100 # var obj = "\{\"foo\": \{\"bar\": true, \"goo\": [1, 2, 3]\}\}".parse_json
101 # assert obj isa JsonObject
102 # assert obj["foo"] isa JsonObject
103 # assert obj["foo"].as(JsonObject)["bar"] == true
104 #
105 # Example with `JsonArray`:
106 #
107 # var arr = "[1, 2, 3]".parse_json
108 # assert arr isa JsonArray
109 # assert arr.length == 3
110 # assert arr.first == 1
111 # assert arr.last == 3
112 #
113 # Example with `String`:
114 #
115 # var str = "\"foo, bar, baz\"".parse_json
116 # assert str isa String
117 # assert str == "foo, bar, baz"
118 #
119 # Example of a syntax error:
120 #
121 # var error = "\{foo: \"bar\"\}".parse_json
122 # assert error isa JsonParseError
123 # assert error.to_s == "Bad key format Error: bad JSON entity"
124 fun parse_json: nullable Serializable do return (new JSONStringParser(self.to_s)).parse_entity
125 end
126
127 redef class FlatText
128 redef fun json_need_escape do
129 var its = items
130 for i in [first_byte .. last_byte] do
131 if its[i] == 0x5C then return true
132 end
133 return false
134 end
135 end
136
137 redef class Char
138 # Is `self` a valid number start ?
139 private fun is_json_num_start: Bool do
140 if self == '-' then return true
141 if self.is_numeric then return true
142 return false
143 end
144
145 # Is `self` a valid JSON separator ?
146 private fun is_json_separator: Bool do
147 if self == ':' then return true
148 if self == ',' then return true
149 if self == '{' then return true
150 if self == '}' then return true
151 if self == '[' then return true
152 if self == ']' then return true
153 if self == '"' then return true
154 if self.is_whitespace then return true
155 return false
156 end
157 end
158
159 # A simple ad-hoc JSON parser
160 #
161 # To parse a simple JSON document, read it as a String and give it to `parse_entity`
162 # NOTE: if your document contains several non-nested entities, use `parse_entity` for each
163 # JSON entity to parse
164 class JSONStringParser
165 super StringProcessor
166
167 # Parses a JSON Entity
168 #
169 # ~~~nit
170 # var p = new JSONStringParser("""{"numbers": [1,23,3], "string": "string"}""")
171 # assert p.parse_entity isa JsonObject
172 # ~~~
173 fun parse_entity: nullable Serializable do
174 var srclen = len
175 ignore_whitespaces
176 if pos >= srclen then return make_parse_error("Empty JSON")
177 var c = src[pos]
178 if c == '[' then
179 pos += 1
180 return parse_json_array
181 else if c == '"' then
182 var s = parse_json_string
183 return s
184 else if c == '{' then
185 pos += 1
186 return parse_json_object
187 else if c == 'f' then
188 if pos + 4 >= srclen then make_parse_error("Error: bad JSON entity")
189 if src[pos + 1] == 'a' and src[pos + 2] == 'l' and src[pos + 3] == 's' and src[pos + 4] == 'e' then
190 pos += 5
191 return false
192 end
193 return make_parse_error("Error: bad JSON entity")
194 else if c == 't' then
195 if pos + 3 >= srclen then make_parse_error("Error: bad JSON entity")
196 if src[pos + 1] == 'r' and src[pos + 2] == 'u' and src[pos + 3] == 'e' then
197 pos += 4
198 return true
199 end
200 return make_parse_error("Error: bad JSON entity")
201 else if c == 'n' then
202 if pos + 3 >= srclen then make_parse_error("Error: bad JSON entity")
203 if src[pos + 1] == 'u' and src[pos + 2] == 'l' and src[pos + 3] == 'l' then
204 pos += 4
205 return null
206 end
207 return make_parse_error("Error: bad JSON entity")
208 end
209 if not c.is_json_num_start then return make_parse_error("Bad JSON character")
210 return parse_json_number
211 end
212
213 # Parses a JSON Array
214 fun parse_json_array: Serializable do
215 var max = len
216 if pos >= max then return make_parse_error("Incomplete JSON array")
217 var arr = new JsonArray
218 var c = src[pos]
219 while not c == ']' do
220 ignore_whitespaces
221 if pos >= max then return make_parse_error("Incomplete JSON array")
222 if src[pos] == ']' then break
223 var ent = parse_entity
224 #print "Parsed an entity {ent} for a JSON array"
225 if ent isa JsonParseError then return ent
226 arr.add ent
227 ignore_whitespaces
228 if pos >= max then return make_parse_error("Incomplete JSON array")
229 c = src[pos]
230 if c == ']' then break
231 if c != ',' then return make_parse_error("Bad array separator {c}")
232 pos += 1
233 end
234 pos += 1
235 return arr
236 end
237
238 # Parses a JSON Object
239 fun parse_json_object: Serializable do
240 var max = len
241 if pos >= max then return make_parse_error("Incomplete JSON object")
242 var obj = new JsonObject
243 var c = src[pos]
244 while not c == '}' do
245 ignore_whitespaces
246 if pos >= max then return make_parse_error("Malformed JSON object")
247 if src[pos] == '}' then break
248 var key = parse_entity
249 #print "Parsed key {key} for JSON object"
250 if not key isa String then return make_parse_error("Bad key format {key or else "null"}")
251 ignore_whitespaces
252 if pos >= max then return make_parse_error("Incomplete JSON object")
253 if not src[pos] == ':' then return make_parse_error("Bad key/value separator {src[pos]}")
254 pos += 1
255 ignore_whitespaces
256 var value = parse_entity
257 #print "Parsed value {value} for JSON object"
258 if value isa JsonParseError then return value
259 obj[key] = value
260 ignore_whitespaces
261 if pos >= max then return make_parse_error("Incomplete JSON object")
262 c = src[pos]
263 if c == '}' then break
264 if c != ',' then return make_parse_error("Bad object separator {src[pos]}")
265 pos += 1
266 end
267 pos += 1
268 return obj
269 end
270
271 # Creates a `JsonParseError` with the right message and location
272 protected fun make_parse_error(message: String): JsonParseError do
273 var err = new JsonParseError(message)
274 err.location = hot_location
275 return err
276 end
277
278 # Parses an Int or Float
279 fun parse_json_number: Serializable do
280 var max = len
281 var p = pos
282 var c = src[p]
283 var is_neg = false
284 if c == '-' then
285 is_neg = true
286 p += 1
287 if p >= max then return make_parse_error("Bad JSON number")
288 c = src[p]
289 end
290 var val = 0
291 while c.is_numeric do
292 val *= 10
293 val += c.to_i
294 p += 1
295 if p >= max then break
296 c = src[p]
297 end
298 if c == '.' then
299 p += 1
300 if p >= max then return make_parse_error("Bad JSON number")
301 c = src[p]
302 var fl = val.to_f
303 var frac = 0.1
304 while c.is_numeric do
305 fl += c.to_i.to_f * frac
306 frac /= 10.0
307 p += 1
308 if p >= max then break
309 c = src[p]
310 end
311 if c == 'e' or c == 'E' then
312 p += 1
313 var exp = 0
314 if p >= max then return make_parse_error("Malformed JSON number")
315 c = src[p]
316 while c.is_numeric do
317 exp *= 10
318 exp += c.to_i
319 p += 1
320 if p >= max then break
321 c = src[p]
322 end
323 fl *= (10 ** exp).to_f
324 end
325 if p < max and not c.is_json_separator then return make_parse_error("Malformed JSON number")
326 pos = p
327 if is_neg then return -fl
328 return fl
329 end
330 if c == 'e' or c == 'E' then
331 p += 1
332 if p >= max then return make_parse_error("Bad JSON number")
333 var exp = src[p].to_i
334 c = src[p]
335 while c.is_numeric do
336 exp *= 10
337 exp += c.to_i
338 p += 1
339 if p >= max then break
340 c = src[p]
341 end
342 val *= (10 ** exp)
343 end
344 if p < max and not src[p].is_json_separator then return make_parse_error("Malformed JSON number")
345 pos = p
346 if is_neg then return -val
347 return val
348 end
349
350 private var parse_str_buf = new FlatBuffer
351
352 # Parses and returns a Nit string from a JSON String
353 fun parse_json_string: Serializable do
354 var src = src
355 var ln = src.length
356 var p = pos
357 p += 1
358 if p > ln then return make_parse_error("Malformed JSON String")
359 var c = src[p]
360 var ret = parse_str_buf
361 var chunk_st = p
362 while c != '"' do
363 if c != '\\' then
364 p += 1
365 if p >= ln then return make_parse_error("Malformed JSON string")
366 c = src[p]
367 continue
368 end
369 ret.append_substring_impl(src, chunk_st, p - chunk_st)
370 p += 1
371 if p >= ln then return make_parse_error("Malformed Escape sequence in JSON string")
372 c = src[p]
373 if c == 'r' then
374 ret.add '\r'
375 p += 1
376 else if c == 'n' then
377 ret.add '\n'
378 p += 1
379 else if c == 't' then
380 ret.add '\t'
381 p += 1
382 else if c == 'u' then
383 var cp = 0
384 p += 1
385 for i in [0 .. 4[ do
386 cp <<= 4
387 if p >= ln then make_parse_error("Malformed \uXXXX Escape sequence in JSON string")
388 c = src[p]
389 if c >= '0' and c <= '9' then
390 cp += c.code_point - '0'.code_point
391 else if c >= 'a' and c <= 'f' then
392 cp += c.code_point - 'a'.code_point + 10
393 else if c >= 'A' and c <= 'F' then
394 cp += c.code_point - 'A'.code_point + 10
395 else
396 make_parse_error("Malformed \uXXXX Escape sequence in JSON string")
397 end
398 p += 1
399 end
400 c = cp.code_point
401 if cp >= 0xD800 and cp <= 0xDBFF then
402 if p >= ln then make_parse_error("Malformed \uXXXX Escape sequence in JSON string")
403 c = src[p]
404 if c != '\\' then make_parse_error("Malformed \uXXXX Escape sequence in JSON string")
405 p += 1
406 c = src[p]
407 if c != 'u' then make_parse_error("Malformed \uXXXX Escape sequence in JSON string")
408 var locp = 0
409 p += 1
410 for i in [0 .. 4[ do
411 locp <<= 4
412 if p > ln then make_parse_error("Malformed \uXXXX Escape sequence in JSON string")
413 c = src[p]
414 if c >= '0' and c <= '9' then
415 locp += c.code_point - '0'.code_point
416 else if c >= 'a' and c <= 'f' then
417 locp += c.code_point - 'a'.code_point + 10
418 else if c >= 'A' and c <= 'F' then
419 locp += c.code_point - 'A'.code_point + 10
420 else
421 make_parse_error("Malformed \uXXXX Escape sequence in JSON string")
422 end
423 p += 1
424 end
425 c = (((locp & 0x3FF) | ((cp & 0x3FF) << 10)) + 0x10000).code_point
426 end
427 ret.add c
428 else if c == 'b' then
429 ret.add 8.code_point
430 p += 1
431 else if c == 'f' then
432 ret.add '\f'
433 p += 1
434 else
435 p += 1
436 ret.add c
437 end
438 chunk_st = p
439 c = src[p]
440 end
441 pos = p + 1
442 if ret.is_empty then return src.substring(chunk_st, p - chunk_st)
443 ret.append_substring_impl(src, chunk_st, p - chunk_st)
444 var rets = ret.to_s
445 ret.clear
446 return rets
447 end
448
449 # Ignores any character until a JSON separator is encountered
450 fun ignore_until_separator do
451 var max = len
452 while pos < max do
453 if not src[pos].is_json_separator then return
454 end
455 end
456 end
457
458 # A map that can be translated into a JSON object.
459 interface JsonMapRead[K: String, V: nullable Serializable]
460 super MapRead[K, V]
461 super Serializable
462 end
463
464 # A JSON Object.
465 class JsonObject
466 super JsonMapRead[String, nullable Serializable]
467 super HashMap[String, nullable Serializable]
468 end
469
470 # A sequence that can be translated into a JSON array.
471 class JsonSequenceRead[E: nullable Serializable]
472 super Serializable
473 super SequenceRead[E]
474 end
475
476 # A JSON array.
477 class JsonArray
478 super JsonSequenceRead[nullable Serializable]
479 super Array[nullable Serializable]
480 end