--- /dev/null
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Crawler on the nitweb web API
+module nitwebcrawl
+
+import json
+
+# Download a HTTP resource
+fun curl(url: String): String do
+ # TODO: use curl.nit
+ var p = new ProcessReader("curl", "-s", url)
+ var res = p.read_all
+ p.close
+ p.wait
+ # TODO: process HTTP error codes
+ if p.status != 0 then
+ print "Error with {url}"
+ end
+ return res
+end
+
+# Recursively collect all string values in a json value associated to a given key.
+fun search_json(json: nullable Jsonable, key: String, result: nullable Array[String]): Array[String]
+do
+ if result == null then result = new Array[String]
+ if json isa JsonObject then
+ for k, v in json do
+ search_json(v, key, result)
+ end
+ var v = json.get_or_null(key)
+ if v != null then
+ assert v isa String
+ result.add v
+ end
+ else if json isa JsonArray then
+ for e in json do search_json(e, key, result)
+ else if json == null or json isa String or json isa Int or json isa Bool or json isa Float then
+ # nop
+ else
+ print json.class_name
+ abort
+ end
+ return result
+end
+
+var server = "http://localhost:3000"
+var todo = ["/api/entity/core"]
+
+if args.not_empty then
+ server = args.pop
+ if args.not_empty then
+ todo.clear
+ todo.add_all args
+ end
+end
+
+var seen = new Set[String]
+seen.add_all todo
+
+var cpt = 0
+while todo.not_empty do
+ cpt += 1
+ var url = todo.pop
+ url = server + url
+ print "process {url}. {cpt}+{todo.length}/{seen.length}"
+
+ var js = curl(url)
+ var txt = js.parse_json
+ if txt isa Error then
+ print "{url}: {txt.message}"
+ continue
+ end
+ for x in search_json(txt, "api_url") do
+ if seen.has(x) then continue
+ todo.add x
+ seen.add x
+ end
+end