Wrapper around StanfordNLP jar.

FIXME this should use the Java FFI.

Introduced properties

fun java_cp: String

nlp :: NLPJavaProcessor :: java_cp

Classpath to give to Java when loading the StanfordNLP jars.
protected fun java_cp=(java_cp: String)

nlp :: NLPJavaProcessor :: java_cp=

Classpath to give to Java when loading the StanfordNLP jars.
fun tmp_dir: String

nlp :: NLPJavaProcessor :: tmp_dir

Temp dir used to store batch results
protected fun tmp_dir=(tmp_dir: String)

nlp :: NLPJavaProcessor :: tmp_dir=

Temp dir used to store batch results

Redefined properties

redef type SELF: NLPJavaProcessor

nlp $ NLPJavaProcessor :: SELF

Type of this instance, automatically specialized in every class
redef fun process(string: String): NLPDocument

nlp $ NLPJavaProcessor :: process

Process a string and return a new NLPDocument from this.
redef fun process_file(input: String): NLPDocument

nlp $ NLPJavaProcessor :: process_file

Process the input file and return a new NLPDocument from this.
redef fun process_files(inputs: Array[String]): Map[String, NLPDocument]

nlp $ NLPJavaProcessor :: process_files

Batch mode.

All properties

fun !=(other: nullable Object): Bool

core :: Object :: !=

Have self and other different values?
fun ==(other: nullable Object): Bool

core :: Object :: ==

Have self and other the same value?
type CLASS: Class[SELF]

core :: Object :: CLASS

The type of the class of self.
type SELF: Object

core :: Object :: SELF

Type of this instance, automatically specialized in every class
protected fun class_factory(name: String): CLASS

core :: Object :: class_factory

Implementation used by get_class to create the specific class.
fun class_name: String

core :: Object :: class_name

The class name of the object.
fun get_class: CLASS

core :: Object :: get_class

The meta-object representing the dynamic type of self.
fun hash: Int

core :: Object :: hash

The hash code of the object.
init init

core :: Object :: init

fun inspect: String

core :: Object :: inspect

Developer readable representation of self.
protected fun inspect_head: String

core :: Object :: inspect_head

Return "CLASSNAME:#OBJECTID".
intern fun is_same_instance(other: nullable Object): Bool

core :: Object :: is_same_instance

Return true if self and other are the same instance (i.e. same identity).
fun is_same_serialized(other: nullable Object): Bool

core :: Object :: is_same_serialized

Is self the same as other in a serialization context?
intern fun is_same_type(other: Object): Bool

core :: Object :: is_same_type

Return true if self and other have the same dynamic type.
fun java_cp: String

nlp :: NLPJavaProcessor :: java_cp

Classpath to give to Java when loading the StanfordNLP jars.
protected fun java_cp=(java_cp: String)

nlp :: NLPJavaProcessor :: java_cp=

Classpath to give to Java when loading the StanfordNLP jars.
intern fun object_id: Int

core :: Object :: object_id

An internal hash code for the object based on its identity.
fun output

core :: Object :: output

Display self on stdout (debug only).
intern fun output_class_name

core :: Object :: output_class_name

Display class name on stdout (debug only).
abstract fun process(string: String): NLPDocument

nlp :: NLPProcessor :: process

Creates a new NLPDocument from a string
fun process_file(path: String): NLPDocument

nlp :: NLPProcessor :: process_file

Creates a new NLPDocument from a file content
fun process_files(paths: Array[String]): Map[String, NLPDocument]

nlp :: NLPProcessor :: process_files

Creates a new NLPDocument from a list of files (batch mode)
fun serialization_hash: Int

core :: Object :: serialization_hash

Hash value use for serialization
intern fun sys: Sys

core :: Object :: sys

Return the global sys object, the only instance of the Sys class.
fun tmp_dir: String

nlp :: NLPJavaProcessor :: tmp_dir

Temp dir used to store batch results
protected fun tmp_dir=(tmp_dir: String)

nlp :: NLPJavaProcessor :: tmp_dir=

Temp dir used to store batch results
abstract fun to_jvalue(env: JniEnv): JValue

core :: Object :: to_jvalue

fun to_s: String

core :: Object :: to_s

User readable representation of self.
package_diagram nlp::NLPJavaProcessor NLPJavaProcessor nlp::NLPProcessor NLPProcessor nlp::NLPJavaProcessor->nlp::NLPProcessor core::Object Object nlp::NLPProcessor->core::Object ...core::Object ... ...core::Object->core::Object

Ancestors

interface Object

core :: Object

The root of the class hierarchy.

Parents

interface NLPProcessor

nlp :: NLPProcessor

Natural Language Processor

Class definitions

nlp $ NLPJavaProcessor
# Wrapper around StanfordNLP jar.
#
# FIXME this should use the Java FFI.
class NLPJavaProcessor
	super NLPProcessor

	# Classpath to give to Java when loading the StanfordNLP jars.
	var java_cp: String

	# Temp dir used to store batch results
	var tmp_dir = ".nlp"

	# Process a string and return a new NLPDocument from this.
	redef fun process(string) do
		var tmp_file = ".nlp.in"
		var file = new FileWriter.open(tmp_file)
		file.write string
		file.close
		var doc = process_file(tmp_file)
		tmp_file.file_delete
		return doc
	end

	# Process the `input` file and return a new NLPDocument from this.
	redef fun process_file(input) do
		# TODO opt annotators
		var tmp_file = "{input.basename}.xml"
		sys.system "java -cp \"{java_cp}\" -Xmx2g edu.stanford.nlp.pipeline.StanfordCoreNLP -annotators tokenize,ssplit,pos,lemma -outputFormat xml -file {input}"
		var doc = new NLPDocument.from_xml_file(tmp_file)
		tmp_file.file_delete
		return doc
	end

	# Batch mode.
	#
	# Returns a map of file path associated with their NLPDocument.
	redef fun process_files(inputs) do
		# Prepare the input file list
		var input_file = "inputs.list"
		var fw = new FileWriter.open(input_file)
		for input in inputs do fw.write "{input}\n"
		fw.close

		# Run Stanford NLP jar
		sys.system "java -cp \"{java_cp}\" -Xmx2g edu.stanford.nlp.pipeline.StanfordCoreNLP -annotators tokenize,ssplit,pos,lemma -outputFormat xml -filelist {input_file} -outputDirectory {tmp_dir}"
		# Parse output
		var map = new HashMap[String, NLPDocument]
		for input in inputs do
			var out_file = tmp_dir / "{input.basename}.xml"
			map[input] = new NLPDocument.from_xml_file(out_file)
		end
		input_file.file_delete
		tmp_dir.rmdir
		return map
	end
end
lib/nlp/stanford.nit:52,1--107,3