lib/nlp: provide more examples

author Alexandre Terrasa <alexandre@moz-code.org>

Fri, 29 Sep 2017 19:18:23 +0000 (15:18 -0400)

committer Alexandre Terrasa <alexandre@moz-code.org>

Thu, 12 Oct 2017 00:49:00 +0000 (20:49 -0400)
author Alexandre Terrasa <alexandre@moz-code.org>
Fri, 29 Sep 2017 19:18:23 +0000 (15:18 -0400)
committer Alexandre Terrasa <alexandre@moz-code.org>
Thu, 12 Oct 2017 00:49:00 +0000 (20:49 -0400)
diff --git a/lib/nlp/README.md b/lib/nlp/README.md

index 9c718a9..3ed2c5e 100644 (file)
--- a/lib/nlp/README.md
+++ b/lib/nlp/README.md
@@ -11,7 +11,9 @@ This wrapper needs the Stanford CoreNLP jars that run on Java 1.8+.
  
  See http://nlp.stanford.edu/software/corenlp.shtml.
  
-## Usage
+## NLPProcessor
+
+### Java client
  
  ~~~nitish
  var proc = new NLPProcessor("path/to/StanfordCoreNLP/jars")
@@ -25,52 +27,41 @@ for sentence in doc.sentences do
  end
  ~~~
  
-## Nit API
-
-For ease of use, this wrapper introduce a Nit model to handle CoreNLP XML results.
-
-### NLPDocument
-
-[[doc: NLPDocument]]
-
-[[doc: nlp::NLPDocument::from_xml]]
-[[doc: nlp::NLPDocument::from_xml_file]]
-[[doc: nlp::NLPDocument::sentences]]
-
-### NLPSentence
-
-[[doc: NLPSentence]]
+### NLPServer
  
-[[doc: nlp::NLPSentence::tokens]]
+The NLPServer provides a wrapper around the StanfordCoreNLPServer.
  
-### NLPToken
+See `https://stanfordnlp.github.io/CoreNLP/corenlp-server.html`.
  
-[[doc: NLPToken]]
-
-[[doc: nlp::NLPToken::word]]
-[[doc: nlp::NLPToken::lemma]]
-[[doc: nlp::NLPToken::pos]]
+~~~nitish
+var cp = "/path/to/StanfordCoreNLP/jars"
+var srv = new NLPServer(cp, 9000)
+srv.start
+~~~
  
-### NLP Processor
+### NLPClient
  
-[[doc: NLPProcessor]]
+The NLPClient is used as a NLPProcessor with a NLPServer backend.
  
-[[doc: nlp::NLPProcessor::java_cp]]
+~~~nitish
+var cli = new NLPClient("http://localhost:9000")
+var doc = cli.process("String to analyze")
+~~~
  
-[[doc: nlp::NLPProcessor::process]]
-[[doc: nlp::NLPProcessor::process_file]]
-[[doc: nlp::NLPProcessor::process_files]]
+## NLPIndex
  
-## NitNLP binary
+NLPIndex extends the StringIndex to use a NLPProcessor to tokenize, lemmatize and
+tag the terms of a document.
  
-The `nitnlp` binary is given as an example of NitNLP client.
-It compares two strings and display ther cosine similarity value.
+~~~nitish
+var index = new NLPIndex(proc)
  
-Usage:
+var d1 = index.index_string("Doc 1", "/uri/1", "this is a sample")
+var d2 = index.index_string("Doc 2", "/uri/2", "this and this is another example")
+assert index.documents.length == 2
  
-~~~raw
-nitnlp --cp "/path/to/jars" "sort" "Sorting array data"
-0.577
+matches = index.match_string("this sample")
+assert matches.first.document == d1
  ~~~
  
  ## TODO
diff --git a/lib/nlp/nitnlp.nit b/lib/nlp/nitnlp.nit

deleted file mode 100644 (file)

index bbf7d53..0000000
--- a/lib/nlp/nitnlp.nit
+++ /dev/null
@@ -1,49 +0,0 @@
-# This file is part of NIT ( http://www.nitlanguage.org ).
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Natural Language Processor based on the StanfordNLP core.
-#
-# This tool provides a document comparison service from command line based on
-# StanfordNLP and NLPVector consine similarity.
-#
-# See http://nlp.stanford.edu/software/corenlp.shtml.
-module nitnlp
-
-import opts
-import nlp
-
-# Option management
-var opt_java_cp = new OptionString("Java classpath for StanfordNLP jars", "--cp")
-var options = new OptionContext
-options.add_option(opt_java_cp)
-options.parse(args)
-var arguments = options.rest
-
-# Processor initialization
-var java_cp = opt_java_cp.value
-if java_cp == null then java_cp = "*"
-var proc = new NLPJavaProcessor(java_cp)
-
-if arguments.length != 2 then
-       print "Usage: nitnlp text1 text2\n"
-       options.usage
-       sys.exit 1
-end
-
-var doc1 = proc.process(arguments.first)
-print doc1.vector.join(":", ",")
-var doc2 = proc.process(arguments.last)
-print doc2.vector.join(":", ",")
-
-print doc1.vector.cosine_similarity(doc2.vector)
author	Alexandre Terrasa <alexandre@moz-code.org>
	Fri, 29 Sep 2017 19:18:23 +0000 (15:18 -0400)
committer	Alexandre Terrasa <alexandre@moz-code.org>
	Thu, 12 Oct 2017 00:49:00 +0000 (20:49 -0400)
lib/nlp/README.md		patch \| blob \| history
lib/nlp/nitnlp.nit	[deleted file]	patch \| blob \| history