.fail(function(data){
//TODO: Notify of failure
});
+
+ // Remember the participant's name client-side
+ set_cookie("opportunity_participant_name", pname);
}
+
function remove_people(ele){
var arr = ele.id.split("_")
var pid = arr[1]
}
});
}
+
// ID of line currently open for modification
var in_modification_id = null;
function modify_people(ele, id){
in_modification_id = null;
}
}
+
+ function get_cookie(cookie_name) {
+ var name = cookie_name + "=";
+ var ca = document.cookie.split(';');
+ for(var i = 0; i < ca.length; i ++) {
+ var c = ca[i];
+ while (c.charAt(0) == ' ') c = c.substring(1);
+ if (c.indexOf(name) == 0) return c.substring(name.length, c.length);
+ }
+ return "";
+ }
+
+ function set_cookie(cookie_name, value) {
+ var date = new Date();
+ date.setTime(date.getTime() + (365*24*60*60*1000));
+ var expires = "expires="+date.toUTCString();
+ document.cookie = cookie_name + "=" + value + "; " + expires;
+ }
+
+ // Retrieve the last client-side participant's name
+ window.onload = function () {
+ var name_field = document.getElementById("new_name");
+ name_field.value = get_cookie("opportunity_participant_name");
+ }
"""
end
# `"div"` for `<div></div>`.
var tag: String
init do
- self.is_void = (once ["area", "base", "br", "col", "command", "embed", "hr", "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr"]).has(tag)
+ self.is_void = (once void_list).has(tag)
+ end
+
+ private fun void_list: Set[String]
+ do
+ return new HashSet[String].from(["area", "base", "br", "col", "command", "embed", "hr", "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr"])
end
# Is the HTML element a void element?
--- /dev/null
+# Nit wrapper for Stanford CoreNLP
+
+Stanford CoreNLP provides a set of natural language analysis tools which can take
+raw text input and give the base forms of words, their parts of speech, whether
+they are names of companies, people, etc., normalize dates, times, and numeric
+quantities, and mark up the structure of sentences in terms of phrases and word
+dependencies, indicate which noun phrases refer to the same entities, indicate
+sentiment, etc.
+
+This wrapper needs the Stanford CoreNLP jars that run on Java 1.8+.
+
+See http://nlp.stanford.edu/software/corenlp.shtml.
+
+## Usage
+
+~~~nitish
+var proc = new NLPProcessor("path/to/StanfordCoreNLP/jars")
+
+var doc = proc.process("String to analyze")
+
+for sentence in doc.sentences do
+ for token in sentence.tokens do
+ print "{token.lemma}: {token.pos}"
+ end
+end
+~~~
+
+## Nit API
+
+For ease of use, this wrapper introduce a Nit model to handle CoreNLP XML results.
+
+### NLPDocument
+
+[[doc: NLPDocument]]
+
+[[doc: NLPDocument::from_xml]]
+[[doc: NLPDocument::from_xml_file]]
+[[doc: NLPDocument::sentences]]
+
+### NLPSentence
+
+[[doc: NLPSentence]]
+
+[[doc: NLPSentence::tokens]]
+
+### NLPToken
+
+[[doc: NLPToken]]
+
+[[doc: NLPToken::word]]
+[[doc: NLPToken::lemma]]
+[[doc: NLPToken::pos]]
+
+### NLP Processor
+
+[[doc: NLPProcessor]]
+
+[[doc: NLPProcessor::java_cp]]
+
+[[doc: NLPProcessor::process]]
+[[doc: NLPProcessor::process_file]]
+[[doc: NLPProcessor::process_files]]
+
+## Vector Space Model
+
+[[doc: NLPVector]]
+
+[[doc: NLPDocument::vector]]
+
+[[doc: NLPVector::cosine_similarity]]
+
+## NitNLP binary
+
+The `nitnlp` binary is given as an example of NitNLP client.
+It compares two strings and display ther cosine similarity value.
+
+Usage:
+
+~~~raw
+nitnlp --cp "/path/to/jars" "sort" "Sorting array data"
+0.577
+~~~
+
+## TODO
+
+* Use JWrapper
+* Use options to choose CoreNLP analyzers
+* Analyze sentences dependencies
+* Analyze sentiment
--- /dev/null
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Natural Language Processor based on the StanfordNLP core.
+#
+# This tool provides a document comparison service from command line based on
+# StanfordNLP and NLPVector consine similarity.
+#
+# See http://nlp.stanford.edu/software/corenlp.shtml.
+module nitnlp
+
+import opts
+import nlp
+
+# Option management
+var opt_java_cp = new OptionString("Java classpath for StanfordNLP jars", "--cp")
+var options = new OptionContext
+options.add_option(opt_java_cp)
+options.parse(args)
+var arguments = options.rest
+
+# Processor initialization
+var java_cp = opt_java_cp.value
+if java_cp == null then java_cp = "*"
+var proc = new NLPProcessor(java_cp)
+
+if arguments.length != 2 then
+ print "Usage: nitnlp text1 text2\n"
+ options.usage
+ sys.exit 1
+end
+
+var doc1 = proc.process(arguments.first)
+print doc1.vector.join(":", ",")
+var doc2 = proc.process(arguments.last)
+print doc2.vector.join(":", ",")
+
+print doc1.vector.cosine_similarity(doc2.vector)
--- /dev/null
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Natural Language Processor based on the StanfordNLP core.
+#
+# See http://nlp.stanford.edu/software/corenlp.shtml.
+module nlp
+
+import stanford
+import vsm
+
+redef class NLPDocument
+
+ # `NLPVector` representing `self`.
+ var vector: NLPVector is lazy do
+ var vector = new NLPVector
+ for sentence in sentences do
+ for token in sentence.tokens do
+ if not keep_pos_token(token) then continue
+ var lemma = token.lemma
+ if lemma_black_list.has(lemma) then continue
+ if not vector.has_key(lemma) then
+ vector[lemma] = 1
+ else
+ vector[lemma] += 1
+ end
+ end
+ end
+ return vector
+ end
+
+ # Should we keep `token` when composing the vector?
+ #
+ # Choice is based on the POS tag of the token.
+ # See `allowed_pos_prefixes`.
+ private fun keep_pos_token(token: NLPToken): Bool do
+ var pos = token.pos
+ for prefix in allowed_pos_prefixes do
+ if pos.has_prefix(prefix) then return true
+ end
+ return false
+ end
+
+ # Should we keep `lemma` when composing the vector?
+ #
+ # See `lemma_black_list`.
+ private fun keep_lemma(lemma: String): Bool do
+ return true
+ end
+
+ # Allowed POS tag prefixes.
+ #
+ # When building a vector from `self`, only tokens tagged with one of these
+ # prefixes are kept.
+ # Other tokens are ignored.
+ var allowed_pos_prefixes: Array[String] = ["NN", "VB", "RB"] is writable
+
+ # Ignored lemmas.
+ var lemma_black_list: Array[String] = ["module", "class", "method"] is writable
+end
--- /dev/null
+[package]
+name=nlp
+tags=nlp,lib
+maintainer=Alexandre Terrasa <alexandre@moz-code.org>
+license=Apache-2.0
+[upstream]
+browse=https://github.com/nitlang/nit/tree/master/lib/nlp/
+git=https://github.com/nitlang/nit.git
+git.directory=lib/nlp/
+homepage=http://nitlanguage.org
+issues=https://github.com/nitlang/nit/issues
--- /dev/null
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Natural Language Processor based on the StanfordNLP core.
+#
+# See http://nlp.stanford.edu/software/corenlp.shtml.
+module stanford
+
+import opts
+import dom
+
+# Wrapper around StanfordNLP jar.
+#
+# NLPProcessor provides natural language processing of input text files and
+# an API to handle analysis results.
+#
+# FIXME this should use the Java FFI.
+class NLPProcessor
+
+ # Classpath to give to Java when loading the StanfordNLP jars.
+ var java_cp: String
+
+ # Process a string and return a new NLPDocument from this.
+ fun process(string: String): NLPDocument do
+ var tmp_file = ".nlp.in"
+ var file = new FileWriter.open(tmp_file)
+ file.write string
+ file.close
+ var doc = process_file(tmp_file)
+ tmp_file.file_delete
+ return doc
+ end
+
+ # Process the `input` file and return a new NLPDocument from this.
+ fun process_file(input: String): NLPDocument do
+ # TODO opt annotators
+ var tmp_file = "{input.basename}.xml"
+ sys.system "java -cp \"{java_cp}\" -Xmx2g edu.stanford.nlp.pipeline.StanfordCoreNLP -annotators tokenize,ssplit,pos,lemma -outputFormat xml -file {input}"
+ var doc = new NLPDocument.from_xml_file(tmp_file)
+ tmp_file.file_delete
+ return doc
+ end
+
+ # Batch mode.
+ #
+ # Returns a map of file path associated with their NLPDocument.
+ fun process_files(inputs: Collection[String], output_dir: String): Map[String, NLPDocument] do
+ # Prepare the input file list
+ var input_file = "inputs.list"
+ var fw = new FileWriter.open(input_file)
+ for input in inputs do fw.write "{input}\n"
+ fw.close
+
+ # Run Stanford NLP jar
+ sys.system "java -cp \"{java_cp}\" -Xmx2g edu.stanford.nlp.pipeline.StanfordCoreNLP -annotators tokenize,ssplit,pos,lemma -outputFormat xml -filelist {input_file} -outputDirectory {output_dir}"
+ # Parse output
+ var map = new HashMap[String, NLPDocument]
+ for input in inputs do
+ var out_file = output_dir / "{input.basename}.xml"
+ map[input] = new NLPDocument.from_xml_file(out_file)
+ end
+ input_file.file_delete
+ return map
+ end
+end
+
+# A `Document` represent a text input given to the NLP processor.
+#
+# Once processed, it contains a list of sentences that contain tokens.
+class NLPDocument
+
+ # NLPSentences contained in `self`
+ var sentences = new Array[NLPSentence]
+
+ # Init `self` from an xml element.
+ #
+ # ~~~
+ # var xml = """
+ # <root>
+ # <document>
+ # <sentences>
+ # <sentence id="1">
+ # <tokens>
+ # <token id="1">
+ # <word>Stanford</word>
+ # <lemma>Stanford</lemma>
+ # <CharacterOffsetBegin>0</CharacterOffsetBegin>
+ # <CharacterOffsetEnd>8</CharacterOffsetEnd>
+ # <POS>NNP</POS>
+ # </token>
+ # <token id="2">
+ # <word>University</word>
+ # <lemma>University</lemma>
+ # <CharacterOffsetBegin>9</CharacterOffsetBegin>
+ # <CharacterOffsetEnd>19</CharacterOffsetEnd>
+ # <POS>NNP</POS>
+ # </token>
+ # </tokens>
+ # </sentence>
+ # <sentence id="2">
+ # <tokens>
+ # <token id="1">
+ # <word>UQAM</word>
+ # <lemma>UQAM</lemma>
+ # <CharacterOffsetBegin>0</CharacterOffsetBegin>
+ # <CharacterOffsetEnd>4</CharacterOffsetEnd>
+ # <POS>NNP</POS>
+ # </token>
+ # <token id="2">
+ # <word>University</word>
+ # <lemma>University</lemma>
+ # <CharacterOffsetBegin>5</CharacterOffsetBegin>
+ # <CharacterOffsetEnd>15</CharacterOffsetEnd>
+ # <POS>NNP</POS>
+ # </token>
+ # </tokens>
+ # </sentence>
+ # </sentences>
+ # </document>
+ # </root>""".to_xml.as(XMLDocument)
+ #
+ # var document = new NLPDocument.from_xml(xml)
+ # assert document.sentences.length == 2
+ # assert document.sentences.first.tokens.first.word == "Stanford"
+ # assert document.sentences.last.tokens.first.word == "UQAM"
+ # ~~~
+ init from_xml(xml: XMLDocument) do
+ for obj in xml["root"].first["document"].first["sentences"].first["sentence"] do
+ if obj isa XMLStartTag then
+ sentences.add new NLPSentence.from_xml(obj)
+ else
+ print "Warning: malformed xml, `sentences` is supposed to contain `sencence` tags"
+ end
+ end
+ end
+
+ # Init `self` from a XML file.
+ init from_xml_file(path: String) do
+ var file = new FileReader.open(path)
+ var xml = file.read_lines
+ file.close
+ xml.shift # remove xml doctype
+ xml.shift # remove xslt link
+ from_xml(xml.join("\n").to_xml.as(XMLDocument))
+ end
+end
+
+# Represent one sentence in a `Document`.
+class NLPSentence
+
+ # Index of this sentence in the input text.
+ var index: Int
+
+ # NLPTokens contained in `self`.
+ var tokens = new Array[NLPToken]
+
+ # Init `self` from an XML element.
+ #
+ # ~~~
+ # var xml = """
+ # <sentence id="1">
+ # <tokens>
+ # <token id="1">
+ # <word>Stanford</word>
+ # <lemma>Stanford</lemma>
+ # <CharacterOffsetBegin>0</CharacterOffsetBegin>
+ # <CharacterOffsetEnd>8</CharacterOffsetEnd>
+ # <POS>NNP</POS>
+ # </token>
+ # <token id="2">
+ # <word>University</word>
+ # <lemma>University</lemma>
+ # <CharacterOffsetBegin>9</CharacterOffsetBegin>
+ # <CharacterOffsetEnd>19</CharacterOffsetEnd>
+ # <POS>NNP</POS>
+ # </token>
+ # </tokens>
+ # </sentence>""".to_xml["sentence"].first.as(XMLStartTag)
+ #
+ # var sentence = new NLPSentence.from_xml(xml)
+ # assert sentence.index == 1
+ # assert sentence.tokens.length == 2
+ # ~~~
+ init from_xml(xml: XMLStartTag) do
+ var index = xml.attributes.first.as(XMLStringAttr).value.to_i
+ for obj in xml["tokens"].first["token"] do
+ if obj isa XMLStartTag then
+ tokens.add new NLPToken.from_xml(obj)
+ else
+ print "Warning: malformed xml, `tokens` is supposed to contain `token` tags"
+ end
+ end
+ init(index)
+ end
+end
+
+# Represent one word (or puncutation mark) in a `NLPSentence`.
+class NLPToken
+
+ # Index of this word in the sentence.
+ var index: Int
+
+ # Original word
+ var word: String
+
+ # `word` lemma
+ var lemma: String
+
+ # Position of the first character in the input
+ var begin_offset: Int
+
+ # Position of the last character in the input
+ var end_offset: Int
+
+ # Part Of Speech tag
+ var pos: String
+
+ # Init `self` from an XML element.
+ #
+ # ~~~
+ # var xml = """
+ # <token id="2">
+ # <word>University</word>
+ # <lemma>University</lemma>
+ # <CharacterOffsetBegin>9</CharacterOffsetBegin>
+ # <CharacterOffsetEnd>19</CharacterOffsetEnd>
+ # <POS>NNP</POS>
+ # </token>""".to_xml["token"].first.as(XMLStartTag)
+ #
+ # var token = new NLPToken.from_xml(xml)
+ # assert token.index == 2
+ # assert token.word == "University"
+ # assert token.lemma == "University"
+ # assert token.begin_offset == 9
+ # assert token.end_offset == 19
+ # assert token.pos == "NNP"
+ # ~~~
+ init from_xml(xml: XMLStartTag) do
+ var index = xml.attributes.first.as(XMLStringAttr).value.to_i
+ var word = xml["word"].first.as(XMLStartTag).data
+ var lemma = xml["lemma"].first.as(XMLStartTag).data
+ var begin_offset = xml["CharacterOffsetBegin"].first.as(XMLStartTag).data.to_i
+ var end_offset = xml["CharacterOffsetEnd"].first.as(XMLStartTag).data.to_i
+ var pos = xml["POS"].first.as(XMLStartTag).data
+ init(index, word, lemma, begin_offset, end_offset, pos)
+ end
+end
--- /dev/null
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# NLPVector Space Model.
+#
+# The Vector Space Model (VSM) is used to compare natural language texts.
+# Texts are translated to multidimensionnal vectors then compared by cosine
+# similarity.
+module vsm
+
+import counter
+
+# A multi-dimensional vector.
+class NLPVector
+ super Counter[String]
+
+ # Cosine similarity of `self` and `other`.
+ #
+ # Gives the proximity in the range `[0.0 .. 1.0]` where 0.0 means that the
+ # two vectors are orthogonal and 1.0 means that they are identical.
+ #
+ # ~~~
+ # var v1 = new NLPVector
+ # v1["x"] = 1
+ # v1["y"] = 2
+ # v1["z"] = 3
+ #
+ # var v2 = new NLPVector
+ # v2["x"] = 1
+ # v2["y"] = 2
+ # v2["z"] = 3
+ #
+ # var v3 = new NLPVector
+ # v3["a"] = 1
+ # v3["b"] = 2
+ # v3["c"] = 3
+ #
+ # print v1.cosine_similarity(v2)
+ # #assert v1.cosine_similarity(v2) == 1.0
+ # print v1.cosine_similarity(v3)
+ # assert v1.cosine_similarity(v3) == 0.0
+ # ~~~
+ fun cosine_similarity(other: SELF): Float do
+ # Collect terms
+ var terms = new HashSet[String]
+ for k in self.keys do terms.add k
+ for k in other.keys do terms.add k
+
+ # Get dot product of two verctors
+ var dot = 0
+ for term in terms do
+ dot += self.get_or_default(term, 0) * other.get_or_default(term, 0)
+ end
+
+ return dot.to_f / (self.norm * other.norm)
+ end
+
+ # The norm of the vector.
+ #
+ # `||x|| = (x1 ** 2 ... + xn ** 2).sqrt`
+ #
+ # ~~~
+ # var v = new NLPVector
+ # v["x"] = 1
+ # v["y"] = 1
+ # v["z"] = 1
+ # v["t"] = 1
+ # assert v.norm.is_approx(2.0, 0.001)
+ #
+ # v["x"] = 1
+ # v["y"] = 2
+ # v["z"] = 3
+ # v["t"] = 0
+ # assert v.norm.is_approx(3.742, 0.001)
+ # ~~~
+ fun norm: Float do
+ var sum = 0
+ for v in self.values do sum += v ** 2
+ return sum.to_f.sqrt
+ end
+end
redef fun build_reduce_table
do
var reduce_table = new Array[ReduceAction].with_capacity(1091)
- self.reduce_table = reduce_table
reduce_table.add new ReduceAction0(0)
reduce_table.add new ReduceAction1(0)
reduce_table.add new ReduceAction2(0)
reduce_table.add new ReduceAction1088(220)
reduce_table.add new ReduceAction473(221)
reduce_table.add new ReduceAction492(221)
+ return reduce_table
end
end
init
do
- build_reduce_table
+ self.reduce_table = once build_reduce_table
end
# Do a transition in the automata
end
private var reduce_table: Array[ReduceAction] is noinit
- private fun build_reduce_table is abstract
+ private fun build_reduce_table: Array[ReduceAction] is abstract
end
redef class Prod
redef fun build_reduce_table
do
var reduce_table = new Array[ReduceAction].with_capacity(${count(rules/rule)})
- self.reduce_table = reduce_table
$ foreach {rules/rule}
reduce_table.add new ReduceAction@index(@leftside)
$ end foreach
+ return reduce_table
end
end