FIXME this should use the Java FFI.
nlp :: NLPJavaProcessor :: defaultinit
nlp :: NLPJavaProcessor :: java_cp
Classpath to give to Java when loading the StanfordNLP jars.nlp :: NLPJavaProcessor :: java_cp=
Classpath to give to Java when loading the StanfordNLP jars.nlp :: NLPJavaProcessor :: tmp_dir=
Temp dir used to store batch resultsnlp $ NLPJavaProcessor :: SELF
Type of this instance, automatically specialized in every classnlp $ NLPJavaProcessor :: process
Process a string and return a new NLPDocument from this.nlp $ NLPJavaProcessor :: process_file
Process theinput
file and return a new NLPDocument from this.
nlp $ NLPJavaProcessor :: process_files
Batch mode.core :: Object :: class_factory
Implementation used byget_class
to create the specific class.
nlp :: NLPJavaProcessor :: defaultinit
core :: Object :: defaultinit
nlp :: NLPProcessor :: defaultinit
core :: Object :: is_same_instance
Return true ifself
and other
are the same instance (i.e. same identity).
core :: Object :: is_same_serialized
Isself
the same as other
in a serialization context?
core :: Object :: is_same_type
Return true ifself
and other
have the same dynamic type.
nlp :: NLPJavaProcessor :: java_cp
Classpath to give to Java when loading the StanfordNLP jars.nlp :: NLPJavaProcessor :: java_cp=
Classpath to give to Java when loading the StanfordNLP jars.core :: Object :: output_class_name
Display class name on stdout (debug only).nlp :: NLPProcessor :: process
Creates a new NLPDocument from a stringnlp :: NLPProcessor :: process_file
Creates a new NLPDocument from a file contentnlp :: NLPProcessor :: process_files
Creates a new NLPDocument from a list of files (batch mode)nlp :: NLPJavaProcessor :: tmp_dir=
Temp dir used to store batch results
# Wrapper around StanfordNLP jar.
#
# FIXME this should use the Java FFI.
class NLPJavaProcessor
super NLPProcessor
# Classpath to give to Java when loading the StanfordNLP jars.
var java_cp: String
# Temp dir used to store batch results
var tmp_dir = ".nlp"
# Process a string and return a new NLPDocument from this.
redef fun process(string) do
var tmp_file = ".nlp.in"
var file = new FileWriter.open(tmp_file)
file.write string
file.close
var doc = process_file(tmp_file)
tmp_file.file_delete
return doc
end
# Process the `input` file and return a new NLPDocument from this.
redef fun process_file(input) do
# TODO opt annotators
var tmp_file = "{input.basename}.xml"
sys.system "java -cp \"{java_cp}\" -Xmx2g edu.stanford.nlp.pipeline.StanfordCoreNLP -annotators tokenize,ssplit,pos,lemma -outputFormat xml -file {input}"
var doc = new NLPDocument.from_xml_file(tmp_file)
tmp_file.file_delete
return doc
end
# Batch mode.
#
# Returns a map of file path associated with their NLPDocument.
redef fun process_files(inputs) do
# Prepare the input file list
var input_file = "inputs.list"
var fw = new FileWriter.open(input_file)
for input in inputs do fw.write "{input}\n"
fw.close
# Run Stanford NLP jar
sys.system "java -cp \"{java_cp}\" -Xmx2g edu.stanford.nlp.pipeline.StanfordCoreNLP -annotators tokenize,ssplit,pos,lemma -outputFormat xml -filelist {input_file} -outputDirectory {tmp_dir}"
# Parse output
var map = new HashMap[String, NLPDocument]
for input in inputs do
var out_file = tmp_dir / "{input.basename}.xml"
map[input] = new NLPDocument.from_xml_file(out_file)
end
input_file.file_delete
tmp_dir.rmdir
return map
end
end
lib/nlp/stanford.nit:52,1--107,3