From: Alexandre Terrasa <alexandre@moz-code.org>
Date: Fri, 22 Sep 2017 20:35:56 +0000 (-0400)
Subject: lib/nlp: add wrapper to the web REST api
X-Git-Url: http://nitlanguage.org

lib/nlp: add wrapper to the web REST api

Signed-off-by: Alexandre Terrasa <alexandre@moz-code.org>
---

diff --git a/lib/nlp/examples/nlp_server.nit b/lib/nlp/examples/nlp_server.nit
new file mode 100644
index 0000000..61dd888
--- /dev/null
+++ b/lib/nlp/examples/nlp_server.nit
@@ -0,0 +1,37 @@
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+module nlp_server
+
+import nlp
+import config
+
+redef class Config
+
+	var opt_java_cp = new OptionString("StanfordNLP java classpath", "-c", "--classpath")
+	var opt_port = new OptionInt("Server port on localhost (default is 9000)", 9000, "-p", "--port")
+
+	redef init do
+		opts.add_option(opt_java_cp, opt_port)
+	end
+end
+
+var config = new Config
+config.parse_options(args)
+
+var cp = config.opt_java_cp.value
+if cp == null then cp = "stanfordnlp/*"
+
+var srv = new NLPServer(cp, config.opt_port.value)
+srv.start
diff --git a/lib/nlp/stanford.nit b/lib/nlp/stanford.nit
index 29058b3..22153b5 100644
--- a/lib/nlp/stanford.nit
+++ b/lib/nlp/stanford.nit
@@ -19,6 +19,8 @@ module stanford
 
 import opts
 import dom
+import curl
+import pthreads
 
 # Natural Language Processor
 #
@@ -285,3 +287,81 @@ class NLPToken
 		init(index, word, lemma, begin_offset, end_offset, pos)
 	end
 end
+
+# Stanford web server
+#
+# Runs the server on `port`.
+#
+# For more details about the stanford NLP server see
+# https://stanfordnlp.github.io/CoreNLP/corenlp-server.html
+class NLPServer
+	super Thread
+
+	# Stanford jar classpath
+	#
+	# Classpath to give to Java when loading the StanfordNLP jars.
+	var java_cp: String
+
+	# Port the Java server will listen on
+	var port: Int
+
+	redef fun main do
+		sys.system "java -mx4g -cp \"{java_cp}\" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port {port.to_s} -timeout 15000"
+		return null
+	end
+end
+
+# A NLPProcessor using a NLPServer as backend
+class NLPClient
+	super NLPProcessor
+
+	# Base uri of the NLP server API
+	#
+	# For examples "http://localhost:9000" or "https://myserver.com"
+	var api_uri: String
+
+	# Annotators to use
+	#
+	# The specified annotators must exist on the server.
+	#
+	# Defaults are: `tokenize`, `ssplit`, `pos` and `lemma`.
+	var annotators: Array[String] = ["tokenize", "ssplit", "pos", "lemma"] is writable
+
+	# Language to process
+	#
+	# The language must be available on the server.
+	#
+	# Default is `en`.
+	var language = "en" is writable
+
+	# Output format to ask.
+	#
+	# Only `xml` is implemented at the moment.
+	private var format = "xml"
+
+	# API uri used to build curl POST requests
+	fun post_uri: String do
+		return "{api_uri}/?properties=%7B%22annotators%22%3A%20%22tokenize%2Cssplit%2Cpos%2clemma%22%2C%22outputFormat%22%3A%22{format}%22%7D&pipelineLanguage={language}"
+	end
+
+	redef fun process(string) do
+		var request = new CurlHTTPRequest(post_uri)
+		request.body = string
+		var response = request.execute
+		if response isa CurlResponseSuccess then
+			if response.status_code != 200 then
+				print "Error: {response.body_str}"
+				return new NLPDocument
+			end
+			var xml = response.body_str.to_xml
+			if xml isa XMLError then
+				print xml
+			end
+			return new NLPDocument.from_xml(response.body_str.to_xml.as(XMLDocument))
+		else if response isa CurlResponseFailed then
+			print "Error: {response.error_msg}"
+			return new NLPDocument
+		end
+		return new NLPDocument
+	end
+end