examples: annotate examples
[nit.git] / src / examples / nitwebcrawl.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Crawler on the nitweb web API
16 module nitwebcrawl is example
17
18 import json::static
19
20 # Download a HTTP resource
21 fun curl(url: String): String do
22 # TODO: use curl.nit
23 var p = new ProcessReader("curl", "-s", url)
24 var res = p.read_all
25 p.close
26 p.wait
27 # TODO: process HTTP error codes
28 if p.status != 0 then
29 print "Error with {url}"
30 end
31 return res
32 end
33
34 # Recursively collect all string values in a json value associated to a given key.
35 fun search_json(json: nullable Serializable, key: String, result: nullable Array[String]): Array[String]
36 do
37 if result == null then result = new Array[String]
38 if json isa JsonObject then
39 for k, v in json do
40 search_json(v, key, result)
41 end
42 var v = json.get_or_null(key)
43 if v != null then
44 assert v isa String
45 result.add v
46 end
47 else if json isa JsonArray then
48 for e in json do search_json(e, key, result)
49 else if json == null or json isa String or json isa Int or json isa Bool or json isa Float then
50 # nop
51 else
52 print json.class_name
53 abort
54 end
55 return result
56 end
57
58 var server = "http://localhost:3000"
59 var todo = ["/api/entity/core"]
60
61 if args.not_empty then
62 server = args.pop
63 if args.not_empty then
64 todo.clear
65 todo.add_all args
66 end
67 end
68
69 var seen = new Set[String]
70 seen.add_all todo
71
72 var cpt = 0
73 while todo.not_empty do
74 cpt += 1
75 var url = todo.pop
76 url = server + url
77 print "process {url}. {cpt}+{todo.length}/{seen.length}"
78
79 var js = curl(url)
80 var txt = js.parse_json
81 if txt isa Error then
82 print "{url}: {txt.message}"
83 continue
84 end
85 for x in search_json(txt, "api_url") do
86 if seen.has(x) then continue
87 todo.add x
88 seen.add x
89 end
90 end