lib/nlp/README.md

   1 # Nit wrapper for Stanford CoreNLP
   2
   3 Stanford CoreNLP provides a set of natural language analysis tools which can take
   4 raw text input and give the base forms of words, their parts of speech, whether
   5 they are names of companies, people, etc., normalize dates, times, and numeric
   6 quantities, and mark up the structure of sentences in terms of phrases and word
   7 dependencies, indicate which noun phrases refer to the same entities, indicate
   8 sentiment, etc.
   9
  10 This wrapper needs the Stanford CoreNLP jars that run on Java 1.8+.
  11
  12 See http://nlp.stanford.edu/software/corenlp.shtml.
  13
  14 ## NLPProcessor
  15
  16 ### Java client
  17
  18 ~~~nitish
  19 var proc = new NLPProcessor("path/to/StanfordCoreNLP/jars")
  20
  21 var doc = proc.process("String to analyze")
  22
  23 for sentence in doc.sentences do
  24         for token in sentence.tokens do
  25                 print "{token.lemma}: {token.pos}"
  26         end
  27 end
  28 ~~~
  29
  30 ### NLPServer
  31
  32 The NLPServer provides a wrapper around the StanfordCoreNLPServer.
  33
  34 See `https://stanfordnlp.github.io/CoreNLP/corenlp-server.html`.
  35
  36 ~~~nitish
  37 var cp = "/path/to/StanfordCoreNLP/jars"
  38 var srv = new NLPServer(cp, 9000)
  39 srv.start
  40 ~~~
  41
  42 ### NLPClient
  43
  44 The NLPClient is used as a NLPProcessor with a NLPServer backend.
  45
  46 ~~~nitish
  47 var cli = new NLPClient("http://localhost:9000")
  48 var doc = cli.process("String to analyze")
  49 ~~~
  50
  51 ## NLPIndex
  52
  53 NLPIndex extends the StringIndex to use a NLPProcessor to tokenize, lemmatize and
  54 tag the terms of a document.
  55
  56 ~~~nitish
  57 var index = new NLPIndex(proc)
  58
  59 var d1 = index.index_string("Doc 1", "/uri/1", "this is a sample")
  60 var d2 = index.index_string("Doc 2", "/uri/2", "this and this is another example")
  61 assert index.documents.length == 2
  62
  63 matches = index.match_string("this sample")
  64 assert matches.first.document == d1
  65 ~~~
  66
  67 ## TODO
  68
  69 * Use JWrapper
  70 * Use options to choose CoreNLP analyzers
  71 * Analyze sentences dependencies
  72 * Analyze sentiment