From: Alexandre Terrasa Date: Fri, 29 Sep 2017 19:18:23 +0000 (-0400) Subject: lib/nlp: provide more examples X-Git-Url: http://nitlanguage.org lib/nlp: provide more examples And remove old example nitnlp Signed-off-by: Alexandre Terrasa --- diff --git a/lib/nlp/README.md b/lib/nlp/README.md index 9c718a9..3ed2c5e 100644 --- a/lib/nlp/README.md +++ b/lib/nlp/README.md @@ -11,7 +11,9 @@ This wrapper needs the Stanford CoreNLP jars that run on Java 1.8+. See http://nlp.stanford.edu/software/corenlp.shtml. -## Usage +## NLPProcessor + +### Java client ~~~nitish var proc = new NLPProcessor("path/to/StanfordCoreNLP/jars") @@ -25,52 +27,41 @@ for sentence in doc.sentences do end ~~~ -## Nit API - -For ease of use, this wrapper introduce a Nit model to handle CoreNLP XML results. - -### NLPDocument - -[[doc: NLPDocument]] - -[[doc: nlp::NLPDocument::from_xml]] -[[doc: nlp::NLPDocument::from_xml_file]] -[[doc: nlp::NLPDocument::sentences]] - -### NLPSentence - -[[doc: NLPSentence]] +### NLPServer -[[doc: nlp::NLPSentence::tokens]] +The NLPServer provides a wrapper around the StanfordCoreNLPServer. -### NLPToken +See `https://stanfordnlp.github.io/CoreNLP/corenlp-server.html`. -[[doc: NLPToken]] - -[[doc: nlp::NLPToken::word]] -[[doc: nlp::NLPToken::lemma]] -[[doc: nlp::NLPToken::pos]] +~~~nitish +var cp = "/path/to/StanfordCoreNLP/jars" +var srv = new NLPServer(cp, 9000) +srv.start +~~~ -### NLP Processor +### NLPClient -[[doc: NLPProcessor]] +The NLPClient is used as a NLPProcessor with a NLPServer backend. -[[doc: nlp::NLPProcessor::java_cp]] +~~~nitish +var cli = new NLPClient("http://localhost:9000") +var doc = cli.process("String to analyze") +~~~ -[[doc: nlp::NLPProcessor::process]] -[[doc: nlp::NLPProcessor::process_file]] -[[doc: nlp::NLPProcessor::process_files]] +## NLPIndex -## NitNLP binary +NLPIndex extends the StringIndex to use a NLPProcessor to tokenize, lemmatize and +tag the terms of a document. -The `nitnlp` binary is given as an example of NitNLP client. -It compares two strings and display ther cosine similarity value. +~~~nitish +var index = new NLPIndex(proc) -Usage: +var d1 = index.index_string("Doc 1", "/uri/1", "this is a sample") +var d2 = index.index_string("Doc 2", "/uri/2", "this and this is another example") +assert index.documents.length == 2 -~~~raw -nitnlp --cp "/path/to/jars" "sort" "Sorting array data" -0.577 +matches = index.match_string("this sample") +assert matches.first.document == d1 ~~~ ## TODO diff --git a/lib/nlp/nitnlp.nit b/lib/nlp/nitnlp.nit deleted file mode 100644 index bbf7d53..0000000 --- a/lib/nlp/nitnlp.nit +++ /dev/null @@ -1,49 +0,0 @@ -# This file is part of NIT ( http://www.nitlanguage.org ). -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Natural Language Processor based on the StanfordNLP core. -# -# This tool provides a document comparison service from command line based on -# StanfordNLP and NLPVector consine similarity. -# -# See http://nlp.stanford.edu/software/corenlp.shtml. -module nitnlp - -import opts -import nlp - -# Option management -var opt_java_cp = new OptionString("Java classpath for StanfordNLP jars", "--cp") -var options = new OptionContext -options.add_option(opt_java_cp) -options.parse(args) -var arguments = options.rest - -# Processor initialization -var java_cp = opt_java_cp.value -if java_cp == null then java_cp = "*" -var proc = new NLPJavaProcessor(java_cp) - -if arguments.length != 2 then - print "Usage: nitnlp text1 text2\n" - options.usage - sys.exit 1 -end - -var doc1 = proc.process(arguments.first) -print doc1.vector.join(":", ",") -var doc2 = proc.process(arguments.last) -print doc2.vector.join(":", ",") - -print doc1.vector.cosine_similarity(doc2.vector)