nitcc: add an example of a monkey-patching of a Lexer to add behavior.
authorJean Privat <jean@pryen.org>
Fri, 12 Aug 2016 21:53:43 +0000 (17:53 -0400)
committerJean Privat <jean@pryen.org>
Fri, 12 Aug 2016 21:53:43 +0000 (17:53 -0400)
Signed-off-by: Jean Privat <jean@pryen.org>

contrib/nitcc/.gitignore
contrib/nitcc/examples/blob.nit [new file with mode: 0644]
contrib/nitcc/examples/blob.sablecc [new file with mode: 0644]
contrib/nitcc/src/Makefile

index 8d6951d..f7a4467 100644 (file)
@@ -13,3 +13,4 @@ nitcc1
 nitcc
 calc
 minilang
+blob
diff --git a/contrib/nitcc/examples/blob.nit b/contrib/nitcc/examples/blob.nit
new file mode 100644 (file)
index 0000000..696197f
--- /dev/null
@@ -0,0 +1,85 @@
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Example of the hijack of a lexer to inject custom behavior.
+# see `blob.sablecc` for the grammar
+import blob_test_parser
+
+redef class Lexer_blob
+
+       # Two context, *in blob* (custom), and *not in blob* (normal).
+       # The initial state is *in blob*.
+       var in_blob = true
+
+       # Refine the `next_token` to hijack the lexer.
+       redef fun next_token
+       do
+               if not in_blob then
+                       # Normal lexer
+                       var res = super
+                       # Watch for tokens that trigger a context change
+                       if res isa Nendmark then in_blob = true
+                       return res
+               end
+
+               # Custom lexer
+               # Manage pos, line and col manually
+               # TODO: improve the lexer API
+
+               var pos = pos_start
+               var line = line_start
+               var col = col_start
+               var text = stream
+               var len = text.length
+
+               # Need to count three '{' or the end of text
+               var cpt = 0
+               while pos < len do
+                       var c = text[pos]
+                       if c == '{' then
+                               cpt += 1
+                               if cpt == 3 then
+                                       # Got them, backtrack them.
+                                       pos -= 3
+                                       col -= 3
+                                       break
+                               end
+                       else
+                               cpt = 0
+                       end
+
+                       # Next char, count lines.
+                       pos += 1
+                       col += 1
+                       if c == '\n' then
+                               line += 1
+                               col = 1
+                       end
+               end
+
+               # Create manually the `blob token`
+               var token = new Nblob
+               var position = new Position(pos_start, pos, line_start, line, col_start, col)
+               token.position = position
+               token.text = text.substring(pos_start, pos-pos_start+1)
+
+               # Prepare for the next token
+               pos_start = pos + 1
+               line_start = line
+               col_start = col + 1
+               in_blob = false
+
+               return token
+       end
+end
diff --git a/contrib/nitcc/examples/blob.sablecc b/contrib/nitcc/examples/blob.sablecc
new file mode 100644 (file)
index 0000000..a2dee60
--- /dev/null
@@ -0,0 +1,22 @@
+/* Special lexer that will be hijacked. See blob.nit */
+Grammar blob;
+
+Lexer
+// These tokens are recognized by the genuine lexer
+d = '0'..'9';
+int = d+;
+white = #9..#13 | ' ';
+// Need to name this token, we will use it to change context
+endmark = '}}}';
+
+// Special token that the genuine lexer is expect to not recognize.
+// But that muse be known by the parser or the application.
+// TODO: Maybe add a special keyword?
+//       blob = Phony;
+blob = #0;
+
+Parser
+Ignored white;
+ps = p*;
+// Parser do not know that `blob` is phony.
+p = blob | '{{{' int endmark;
index 52356e6..ed5ba5d 100644 (file)
@@ -1,6 +1,6 @@
 NITC=../../../bin/nitc
 
-all: nitcc calc minilang
+all: nitcc calc minilang blob
 
 nitcc_parser_gen: nitcc_parser_gen.nit
        @echo "*** Compile the nitcc bootstrap parser generator -- level 0"
@@ -33,6 +33,12 @@ minilang: nitcc ../examples/minilang.sablecc ../examples/minilang.nit
        ${NITC} ../examples/minilang.nit -v
        printf "10\n42\n" | ./minilang ../examples/minilang.minilang
 
+blob: nitcc ../examples/blob.sablecc ../examples/blob.nit
+       @echo "*** Example program, blob"
+       cd ../examples && ../src/nitcc blob.sablecc
+       ${NITC} ../examples/blob.nit -v
+       ./blob -e "abc {{{ 1 }}} de {{{ 2 }}} { 3 }"
+
 check: tests
 tests:
        cd ../tests && ./run
@@ -42,7 +48,7 @@ clean:
                *.dot *.out \
                nitcc_lexer.nit nitcc_parser.nit nitcc_test_parser.nit nitcc_parser_gen \
                nitcc0 nitcc1 \
-               calc minilang \
+               calc minilang blob \
                ../examples/*.dot ../examples/*.out ../examples/*_lexer.nit ../examples/*_parser.nit ../examples/*_test_parser.nit \
                2>/dev/null || true