From: Alexandre Terrasa Date: Fri, 22 Sep 2017 20:35:56 +0000 (-0400) Subject: lib/nlp: add wrapper to the web REST api X-Git-Url: http://nitlanguage.org lib/nlp: add wrapper to the web REST api Signed-off-by: Alexandre Terrasa --- diff --git a/lib/nlp/examples/nlp_server.nit b/lib/nlp/examples/nlp_server.nit new file mode 100644 index 0000000..61dd888 --- /dev/null +++ b/lib/nlp/examples/nlp_server.nit @@ -0,0 +1,37 @@ +# This file is part of NIT ( http://www.nitlanguage.org ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +module nlp_server + +import nlp +import config + +redef class Config + + var opt_java_cp = new OptionString("StanfordNLP java classpath", "-c", "--classpath") + var opt_port = new OptionInt("Server port on localhost (default is 9000)", 9000, "-p", "--port") + + redef init do + opts.add_option(opt_java_cp, opt_port) + end +end + +var config = new Config +config.parse_options(args) + +var cp = config.opt_java_cp.value +if cp == null then cp = "stanfordnlp/*" + +var srv = new NLPServer(cp, config.opt_port.value) +srv.start diff --git a/lib/nlp/stanford.nit b/lib/nlp/stanford.nit index 29058b3..22153b5 100644 --- a/lib/nlp/stanford.nit +++ b/lib/nlp/stanford.nit @@ -19,6 +19,8 @@ module stanford import opts import dom +import curl +import pthreads # Natural Language Processor # @@ -285,3 +287,81 @@ class NLPToken init(index, word, lemma, begin_offset, end_offset, pos) end end + +# Stanford web server +# +# Runs the server on `port`. +# +# For more details about the stanford NLP server see +# https://stanfordnlp.github.io/CoreNLP/corenlp-server.html +class NLPServer + super Thread + + # Stanford jar classpath + # + # Classpath to give to Java when loading the StanfordNLP jars. + var java_cp: String + + # Port the Java server will listen on + var port: Int + + redef fun main do + sys.system "java -mx4g -cp \"{java_cp}\" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port {port.to_s} -timeout 15000" + return null + end +end + +# A NLPProcessor using a NLPServer as backend +class NLPClient + super NLPProcessor + + # Base uri of the NLP server API + # + # For examples "http://localhost:9000" or "https://myserver.com" + var api_uri: String + + # Annotators to use + # + # The specified annotators must exist on the server. + # + # Defaults are: `tokenize`, `ssplit`, `pos` and `lemma`. + var annotators: Array[String] = ["tokenize", "ssplit", "pos", "lemma"] is writable + + # Language to process + # + # The language must be available on the server. + # + # Default is `en`. + var language = "en" is writable + + # Output format to ask. + # + # Only `xml` is implemented at the moment. + private var format = "xml" + + # API uri used to build curl POST requests + fun post_uri: String do + return "{api_uri}/?properties=%7B%22annotators%22%3A%20%22tokenize%2Cssplit%2Cpos%2clemma%22%2C%22outputFormat%22%3A%22{format}%22%7D&pipelineLanguage={language}" + end + + redef fun process(string) do + var request = new CurlHTTPRequest(post_uri) + request.body = string + var response = request.execute + if response isa CurlResponseSuccess then + if response.status_code != 200 then + print "Error: {response.body_str}" + return new NLPDocument + end + var xml = response.body_str.to_xml + if xml isa XMLError then + print xml + end + return new NLPDocument.from_xml(response.body_str.to_xml.as(XMLDocument)) + else if response isa CurlResponseFailed then + print "Error: {response.error_msg}" + return new NLPDocument + end + return new NLPDocument + end +end