--- /dev/null
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Example of the hijack of a lexer to inject custom behavior.
+# see `blob.sablecc` for the grammar
+import blob_test_parser
+
+redef class Lexer_blob
+
+ # Two context, *in blob* (custom), and *not in blob* (normal).
+ # The initial state is *in blob*.
+ var in_blob = true
+
+ # Refine the `next_token` to hijack the lexer.
+ redef fun next_token
+ do
+ if not in_blob then
+ # Normal lexer
+ var res = super
+ # Watch for tokens that trigger a context change
+ if res isa Nendmark then in_blob = true
+ return res
+ end
+
+ # Custom lexer
+ # Manage pos, line and col manually
+ # TODO: improve the lexer API
+
+ var pos = pos_start
+ var line = line_start
+ var col = col_start
+ var text = stream
+ var len = text.length
+
+ # Need to count three '{' or the end of text
+ var cpt = 0
+ while pos < len do
+ var c = text[pos]
+ if c == '{' then
+ cpt += 1
+ if cpt == 3 then
+ # Got them, backtrack them.
+ pos -= 3
+ col -= 3
+ break
+ end
+ else
+ cpt = 0
+ end
+
+ # Next char, count lines.
+ pos += 1
+ col += 1
+ if c == '\n' then
+ line += 1
+ col = 1
+ end
+ end
+
+ # Create manually the `blob token`
+ var token = new Nblob
+ var position = new Position(pos_start, pos, line_start, line, col_start, col)
+ token.position = position
+ token.text = text.substring(pos_start, pos-pos_start+1)
+
+ # Prepare for the next token
+ pos_start = pos + 1
+ line_start = line
+ col_start = col + 1
+ in_blob = false
+
+ return token
+ end
+end
--- /dev/null
+/* Special lexer that will be hijacked. See blob.nit */
+Grammar blob;
+
+Lexer
+// These tokens are recognized by the genuine lexer
+d = '0'..'9';
+int = d+;
+white = #9..#13 | ' ';
+// Need to name this token, we will use it to change context
+endmark = '}}}';
+
+// Special token that the genuine lexer is expect to not recognize.
+// But that muse be known by the parser or the application.
+// TODO: Maybe add a special keyword?
+// blob = Phony;
+blob = #0;
+
+Parser
+Ignored white;
+ps = p*;
+// Parser do not know that `blob` is phony.
+p = blob | '{{{' int endmark;
NITC=../../../bin/nitc
-all: nitcc calc minilang
+all: nitcc calc minilang blob
nitcc_parser_gen: nitcc_parser_gen.nit
@echo "*** Compile the nitcc bootstrap parser generator -- level 0"
${NITC} ../examples/minilang.nit -v
printf "10\n42\n" | ./minilang ../examples/minilang.minilang
+blob: nitcc ../examples/blob.sablecc ../examples/blob.nit
+ @echo "*** Example program, blob"
+ cd ../examples && ../src/nitcc blob.sablecc
+ ${NITC} ../examples/blob.nit -v
+ ./blob -e "abc {{{ 1 }}} de {{{ 2 }}} { 3 }"
+
check: tests
tests:
cd ../tests && ./run
*.dot *.out \
nitcc_lexer.nit nitcc_parser.nit nitcc_test_parser.nit nitcc_parser_gen \
nitcc0 nitcc1 \
- calc minilang \
+ calc minilang blob \
../examples/*.dot ../examples/*.out ../examples/*_lexer.nit ../examples/*_parser.nit ../examples/*_test_parser.nit \
2>/dev/null || true