From 988777722014197213cdd84e72d0188823ce35ef Mon Sep 17 00:00:00 2001 From: Alexandre Terrasa Date: Fri, 16 Oct 2015 10:12:35 -0400 Subject: [PATCH] lib/nlp: add package metadata and readme Signed-off-by: Alexandre Terrasa --- lib/nlp/README.md | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++ lib/nlp/package.ini | 11 +++++++ 2 files changed, 100 insertions(+) create mode 100644 lib/nlp/README.md create mode 100644 lib/nlp/package.ini diff --git a/lib/nlp/README.md b/lib/nlp/README.md new file mode 100644 index 0000000..6545537 --- /dev/null +++ b/lib/nlp/README.md @@ -0,0 +1,89 @@ +# Nit wrapper for Stanford CoreNLP + +Stanford CoreNLP provides a set of natural language analysis tools which can take +raw text input and give the base forms of words, their parts of speech, whether +they are names of companies, people, etc., normalize dates, times, and numeric +quantities, and mark up the structure of sentences in terms of phrases and word +dependencies, indicate which noun phrases refer to the same entities, indicate +sentiment, etc. + +This wrapper needs the Stanford CoreNLP jars that run on Java 1.8+. + +See http://nlp.stanford.edu/software/corenlp.shtml. + +## Usage + +~~~nitish +var proc = new NLPProcessor("path/to/StanfordCoreNLP/jars") + +var doc = proc.process("String to analyze") + +for sentence in doc.sentences do + for token in sentence.tokens do + print "{token.lemma}: {token.pos}" + end +end +~~~ + +## Nit API + +For ease of use, this wrapper introduce a Nit model to handle CoreNLP XML results. + +### NLPDocument + +[[doc: NLPDocument]] + +[[doc: NLPDocument::from_xml]] +[[doc: NLPDocument::from_xml_file]] +[[doc: NLPDocument::sentences]] + +### NLPSentence + +[[doc: NLPSentence]] + +[[doc: NLPSentence::tokens]] + +### NLPToken + +[[doc: NLPToken]] + +[[doc: NLPToken::word]] +[[doc: NLPToken::lemma]] +[[doc: NLPToken::pos]] + +### NLP Processor + +[[doc: NLPProcessor]] + +[[doc: NLPProcessor::java_cp]] + +[[doc: NLPProcessor::process]] +[[doc: NLPProcessor::process_file]] +[[doc: NLPProcessor::process_files]] + +## Vector Space Model + +[[doc: NLPVector]] + +[[doc: NLPDocument::vector]] + +[[doc: NLPVector::cosine_similarity]] + +## NitNLP binary + +The `nitnlp` binary is given as an example of NitNLP client. +It compares two strings and display ther cosine similarity value. + +Usage: + +~~~raw +nitnlp --cp "/path/to/jars" "sort" "Sorting array data" +0.577 +~~~ + +## TODO + +* Use JWrapper +* Use options to choose CoreNLP analyzers +* Analyze sentences dependencies +* Analyze sentiment diff --git a/lib/nlp/package.ini b/lib/nlp/package.ini new file mode 100644 index 0000000..789aa44 --- /dev/null +++ b/lib/nlp/package.ini @@ -0,0 +1,11 @@ +[package] +name=nlp +tags=nlp,lib +maintainer=Alexandre Terrasa +license=Apache-2.0 +[upstream] +browse=https://github.com/nitlang/nit/tree/master/lib/nlp/ +git=https://github.com/nitlang/nit.git +git.directory=lib/nlp/ +homepage=http://nitlanguage.org +issues=https://github.com/nitlang/nit/issues -- 1.7.9.5