Merge: gamnit: new services and a lot of bug fixes and performance improvements
[nit.git] / lib / nlp / examples / nlp_index.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Example showing how to use a NLPFileIndex.
16 module nlp_index
17
18 import nlp
19 import config
20
21 redef class Config
22
23 # --whitelist-exts
24 var opt_white_exts = new OptionArray("Allowed file extensions (default is [])",
25 "-w", "--whitelist-exts")
26
27 # --blacklist-exts
28 var opt_black_exts = new OptionArray("Allowed file extensions (default is [])",
29 "-b", "--blacklist-exts")
30
31 # --server
32 var opt_server = new OptionString("StanfordNLP server URI (default is https://localhost:9000)",
33 "-s", "--server")
34
35 # --lang
36 var opt_lang = new OptionString("Language to use (default is fr)", "-l", "--lang")
37
38 redef init do
39 opts.add_option(opt_server, opt_lang, opt_white_exts, opt_black_exts)
40 end
41 end
42
43 var config = new Config
44 config.tool_description = "usage: example_index <files>"
45 config.parse_options(args)
46
47 if args.length < 1 then
48 config.usage
49 exit 1
50 end
51
52 var host = config.opt_server.value
53 if host == null then host = "http://localhost:9000"
54 var lang = config.opt_lang.value
55 if lang == null then lang = "en"
56
57 var cli = new NLPClient(host)
58 cli.language = lang
59
60 var bl = config.opt_black_exts.value
61 if bl.is_empty then bl = ["CD", "SYM", "-RRB-", "-LRB-", "''", "``", ".", "#", ":", ",", "$", ""]
62
63 var index = new NLPFileIndex(cli)
64 index.whitelist_exts = config.opt_white_exts.value
65 index.blacklist_exts = bl
66
67 print "Building index..."
68 index.index_files(args, true)
69
70 print "Indexed {index.documents.length} documents"
71
72 loop
73 print "\nEnter query:"
74 printn "> "
75 var input = sys.stdin.read_line
76 var matches = index.match_string(input)
77
78 for match in matches do
79 print match
80 end
81 end