1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Example showing how to use a NLPFileIndex.
24 var opt_white_exts
= new OptionArray("Allowed file extensions (default is [])",
25 "-w", "--whitelist-exts")
28 var opt_black_exts
= new OptionArray("Allowed file extensions (default is [])",
29 "-b", "--blacklist-exts")
32 var opt_server
= new OptionString("StanfordNLP server URI (default is https://localhost:9000)",
36 var opt_lang
= new OptionString("Language to use (default is fr)", "-l", "--lang")
39 opts
.add_option
(opt_server
, opt_lang
, opt_white_exts
, opt_black_exts
)
43 var config
= new Config
44 config
.tool_description
= "usage: example_index <files>"
45 config
.parse_options
(args
)
47 if args
.length
< 1 then
52 var host
= config
.opt_server
.value
53 if host
== null then host
= "http://localhost:9000"
54 var lang
= config
.opt_lang
.value
55 if lang
== null then lang
= "en"
57 var cli
= new NLPClient(host
)
60 var bl
= config
.opt_black_exts
.value
61 if bl
.is_empty
then bl
= ["CD", "SYM", "-RRB-", "-LRB-", "''", "``", ".", "#", ":", ",", "$", ""]
63 var index
= new NLPFileIndex(cli
)
64 index
.whitelist_exts
= config
.opt_white_exts
.value
65 index
.blacklist_exts
= bl
67 print
"Building index..."
68 index
.index_files
(args
, true)
70 print
"Indexed {index.documents.length} documents"
73 print
"\nEnter query:"
75 var input
= sys
.stdin
.read_line
76 var matches
= index
.match_string
(input
)
78 for match
in matches
do