Example showing how to use a NLPFileIndex.

Redefined classes

redef class Config

nlp :: nlp_index $ Config

Basic configuration class
redef class Sys

nlp :: nlp_index $ Sys

The main class of the program.

All class definitions

redef class Config

nlp :: nlp_index $ Config

Basic configuration class
redef class Sys

nlp :: nlp_index $ Sys

The main class of the program.
package_diagram nlp::nlp_index nlp_index nlp nlp nlp::nlp_index->nlp config config nlp::nlp_index->config nlp->config vsm vsm nlp->vsm opts opts nlp->opts dom dom nlp->dom curl curl nlp->curl pthreads pthreads nlp->pthreads ...vsm ... ...vsm->vsm ...opts ... ...opts->opts ...dom ... ...dom->dom ...curl ... ...curl->curl ...pthreads ... ...pthreads->pthreads ...config ... ...config->config a_star-m a_star-m a_star-m->nlp::nlp_index


module abstract_collection

core :: abstract_collection

Abstract collection classes and services.
module abstract_text

core :: abstract_text

Abstract class for manipulation of sequences of characters
module array

core :: array

This module introduces the standard array structure.
module bitset

core :: bitset

Services to handle BitSet
module bytes

core :: bytes

Services for byte streams and arrays
module caching

serialization :: caching

Services for caching serialization engines
module circular_array

core :: circular_array

Efficient data structure to access both end of the sequence.
module codec_base

core :: codec_base

Base for codecs to use with streams
module codecs

core :: codecs

Group module for all codec-related manipulations
module collection

core :: collection

This module define several collection classes.
module core

core :: core

Standard classes and methods used by default by Nit programs and libraries.
module counter

counter :: counter

Simple numerical statistical analysis and presentation
module curl

curl :: curl

Data transfer powered by the native curl library
module dom

dom :: dom

Easy XML DOM parser
module engine_tools

serialization :: engine_tools

Advanced services for serialization engines
module environ

core :: environ

Access to the environment variables of the process
module error

core :: error

Standard error-management infrastructure.
module exec

core :: exec

Invocation and management of operating system sub-processes.
module file

core :: file

File manipulations (create, read, write, etc.)
module fixed_ints

core :: fixed_ints

Basic integers of fixed-precision
module fixed_ints_text

core :: fixed_ints_text

Text services to complement fixed_ints
module flat

core :: flat

All the array-based text representations
module gc

core :: gc

Access to the Nit internal garbage collection mechanism
module hash_collection

core :: hash_collection

Introduce HashMap and HashSet.
module ini

ini :: ini

Read and write INI configuration files
module inspect

serialization :: inspect

Refine Serializable::inspect to show more useful information
module iso8859_1

core :: iso8859_1

Codec for ISO8859-1 I/O
module kernel

core :: kernel

Most basic classes and methods.
module list

core :: list

This module handle double linked lists
module math

core :: math

Mathematical operations
module meta

meta :: meta

Simple user-defined meta-level to manipulate types of instances as object.
module native

core :: native

Native structures for text and bytes
module native_curl

curl :: native_curl

Binding of C libCurl which allow us to interact with network.
module numeric

core :: numeric

Advanced services for Numeric types
module opts

opts :: opts

Management of options on the command line
module parser

dom :: parser

XML DOM-parsing facilities
module parser_base

parser_base :: parser_base

Simple base for hand-made parsers of all kinds
module poset

poset :: poset

Pre order sets and partial order set (ie hierarchies)
module protocol

core :: protocol

module pthreads

pthreads :: pthreads

Main POSIX threads support and intro the classes Thread, Mutex and Barrier
module queue

core :: queue

Queuing data structures and wrappers
module range

core :: range

Module for range of discrete objects.
module re

core :: re

Regular expression support for all services based on Pattern
module ropes

core :: ropes

Tree-based representation of a String.
module serialization

serialization :: serialization

General serialization services
module serialization_core

serialization :: serialization_core

Abstract services to serialize Nit objects to different formats
module sorter

core :: sorter

This module contains classes used to compare things and sorts arrays.
module stanford

nlp :: stanford

Natural Language Processor based on the StanfordNLP core.
module stream

core :: stream

Input and output streams of characters
module text

core :: text

All the classes and methods related to the manipulation of text entities
module time

core :: time

Management of time and dates
module union_find

core :: union_find

union–find algorithm using an efficient disjoint-set data structure
module utf8

core :: utf8

Codec for UTF-8 I/O
module vsm

vsm :: vsm

Vector Space Model
module xml_entities

dom :: xml_entities

Basic blocks for DOM-XML representation


module config

config :: config

Configuration options for nit tools and apps
module nlp

nlp :: nlp

Natural Language Processor based on the StanfordNLP core.


module a_star-m


# Example showing how to use a NLPFileIndex.
module nlp_index

import nlp
import config

redef class Config

	# --whitelist-exts
	var opt_white_exts = new OptionArray("Allowed file extensions (default is [])",
		"-w", "--whitelist-exts")

	# --blacklist-exts
	var opt_black_exts = new OptionArray("Allowed file extensions (default is [])",
		"-b", "--blacklist-exts")

	# --server
	var opt_server = new OptionString("StanfordNLP server URI (default is https://localhost:9000)",
		"-s", "--server")

	# --lang
	var opt_lang = new OptionString("Language to use (default is fr)", "-l", "--lang")

	redef init do
		opts.add_option(opt_server, opt_lang, opt_white_exts, opt_black_exts)

var config = new Config
config.tool_description = "usage: example_index <files>"

if args.length < 1 then
	exit 1

var host = config.opt_server.value
if host == null then host = "http://localhost:9000"
var lang = config.opt_lang.value
if lang == null then lang = "en"

var cli = new NLPClient(host)
cli.language = lang

var bl = config.opt_black_exts.value
if bl.is_empty then bl = ["CD", "SYM", "-RRB-", "-LRB-", "''", "``", ".", "#", ":", ",", "$", ""]

var index = new NLPFileIndex(cli)
index.whitelist_exts = config.opt_white_exts.value
index.blacklist_exts = bl

print "Building index..."
index.index_files(args, true)

print "Indexed {index.documents.length} documents"

	print "\nEnter query:"
	printn "> "
	var input = sys.stdin.read_line
	var matches = index.match_string(input)

	for match in matches do
		print match