The lexer extract NIT tokens from an input stream.

It is better user with the Parser

Introduced properties

private var _cr: Bool

nitc :: Lexer :: _cr

Was the last character a carriage-return?
private var _file: SourceFile

nitc :: Lexer :: _file

The source file
private var _last_token: nullable Token

nitc :: Lexer :: _last_token

The last peeked token to chain them
private var _line: Int

nitc :: Lexer :: _line

Current line number in the input stream
private var _pos: Int

nitc :: Lexer :: _pos

Current column in the input stream
private var _state: Int

nitc :: Lexer :: _state

Lexer current state
private var _stream_pos: Int

nitc :: Lexer :: _stream_pos

Current character in the stream
private var _token: nullable Token

nitc :: Lexer :: _token

Last peeked token
fun cr: Bool

nitc :: Lexer :: cr

Was the last character a carriage-return?
protected fun cr=(cr: Bool)

nitc :: Lexer :: cr=

Was the last character a carriage-return?
fun file: SourceFile

nitc :: Lexer :: file

The source file
protected fun file=(file: SourceFile)

nitc :: Lexer :: file=

The source file
protected fun get_token: nullable Token

nitc :: Lexer :: get_token

Primitive method to return a token, or return null if it is discarded
private fun last_token: nullable Token

nitc :: Lexer :: last_token

The last peeked token to chain them
private fun last_token=(last_token: nullable Token)

nitc :: Lexer :: last_token=

The last peeked token to chain them
fun line: Int

nitc :: Lexer :: line

Current line number in the input stream
protected fun line=(line: Int)

nitc :: Lexer :: line=

Current line number in the input stream
protected abstract fun make_token(accept_token: Int, location: Location): Token

nitc :: Lexer :: make_token

Allocate the right Token object for a given identifier
fun next: Token

nitc :: Lexer :: next

Give and consume the next token
fun peek: Token

nitc :: Lexer :: peek

Give the next token (but do not consume it)
fun pos: Int

nitc :: Lexer :: pos

Current column in the input stream
protected fun pos=(pos: Int)

nitc :: Lexer :: pos=

Current column in the input stream
private fun state: Int

nitc :: Lexer :: state

Lexer current state
private fun state=(state: Int)

nitc :: Lexer :: state=

Lexer current state
private fun state_initial: Int

nitc :: Lexer :: state_initial

Constante state values
fun stream_pos: Int

nitc :: Lexer :: stream_pos

Current character in the stream
protected fun stream_pos=(stream_pos: Int)

nitc :: Lexer :: stream_pos=

Current character in the stream
fun token: nullable Token

nitc :: Lexer :: token

Last peeked token
protected fun token=(token: nullable Token)

nitc :: Lexer :: token=

Last peeked token

Redefined properties

redef type SELF: Lexer

nitc $ Lexer :: SELF

Type of this instance, automatically specialized in every class
redef fun make_token(accept_token: Int, location: Location): Token

nitc :: lexer $ Lexer :: make_token

Allocate the right Token object for a given identifier

All properties

fun !=(other: nullable Object): Bool

core :: Object :: !=

Have self and other different values?
fun ==(other: nullable Object): Bool

core :: Object :: ==

Have self and other the same value?
type CLASS: Class[SELF]

core :: Object :: CLASS

The type of the class of self.
type SELF: Object

core :: Object :: SELF

Type of this instance, automatically specialized in every class
private var _cr: Bool

nitc :: Lexer :: _cr

Was the last character a carriage-return?
private var _file: SourceFile

nitc :: Lexer :: _file

The source file
private var _last_token: nullable Token

nitc :: Lexer :: _last_token

The last peeked token to chain them
private var _line: Int

nitc :: Lexer :: _line

Current line number in the input stream
private var _pos: Int

nitc :: Lexer :: _pos

Current column in the input stream
private var _state: Int

nitc :: Lexer :: _state

Lexer current state
private var _stream_pos: Int

nitc :: Lexer :: _stream_pos

Current character in the stream
private var _token: nullable Token

nitc :: Lexer :: _token

Last peeked token
protected fun class_factory(name: String): CLASS

core :: Object :: class_factory

Implementation used by get_class to create the specific class.
fun class_name: String

core :: Object :: class_name

The class name of the object.
fun cr: Bool

nitc :: Lexer :: cr

Was the last character a carriage-return?
protected fun cr=(cr: Bool)

nitc :: Lexer :: cr=

Was the last character a carriage-return?
fun file: SourceFile

nitc :: Lexer :: file

The source file
protected fun file=(file: SourceFile)

nitc :: Lexer :: file=

The source file
fun get_class: CLASS

core :: Object :: get_class

The meta-object representing the dynamic type of self.
protected fun get_token: nullable Token

nitc :: Lexer :: get_token

Primitive method to return a token, or return null if it is discarded
fun hash: Int

core :: Object :: hash

The hash code of the object.
init init

core :: Object :: init

fun inspect: String

core :: Object :: inspect

Developer readable representation of self.
protected fun inspect_head: String

core :: Object :: inspect_head

Return "CLASSNAME:#OBJECTID".
intern fun is_same_instance(other: nullable Object): Bool

core :: Object :: is_same_instance

Return true if self and other are the same instance (i.e. same identity).
fun is_same_serialized(other: nullable Object): Bool

core :: Object :: is_same_serialized

Is self the same as other in a serialization context?
intern fun is_same_type(other: Object): Bool

core :: Object :: is_same_type

Return true if self and other have the same dynamic type.
private fun last_token: nullable Token

nitc :: Lexer :: last_token

The last peeked token to chain them
private fun last_token=(last_token: nullable Token)

nitc :: Lexer :: last_token=

The last peeked token to chain them
fun lexer_accept(i: Int): Int

nitc :: TablesCapable :: lexer_accept

The accept value of the lexer at i
fun lexer_goto(i: Int, j: Int): Int

nitc :: TablesCapable :: lexer_goto

The goto value of the lexer at row i, column j-1
fun line: Int

nitc :: Lexer :: line

Current line number in the input stream
protected fun line=(line: Int)

nitc :: Lexer :: line=

Current line number in the input stream
protected abstract fun make_token(accept_token: Int, location: Location): Token

nitc :: Lexer :: make_token

Allocate the right Token object for a given identifier
private intern fun native_class_name: CString

core :: Object :: native_class_name

The class name of the object in CString format.
fun next: Token

nitc :: Lexer :: next

Give and consume the next token
intern fun object_id: Int

core :: Object :: object_id

An internal hash code for the object based on its identity.
fun output

core :: Object :: output

Display self on stdout (debug only).
intern fun output_class_name

core :: Object :: output_class_name

Display class name on stdout (debug only).
fun parser_action(i: Int, j: Int): Int

nitc :: TablesCapable :: parser_action

The action value of the parser at row i, column j-1
fun parser_goto(i: Int, j: Int): Int

nitc :: TablesCapable :: parser_goto

The goto value of the parser at row i, column j-1
fun peek: Token

nitc :: Lexer :: peek

Give the next token (but do not consume it)
fun pos: Int

nitc :: Lexer :: pos

Current column in the input stream
protected fun pos=(pos: Int)

nitc :: Lexer :: pos=

Current column in the input stream
fun serialization_hash: Int

core :: Object :: serialization_hash

Hash value use for serialization
private fun state: Int

nitc :: Lexer :: state

Lexer current state
private fun state=(state: Int)

nitc :: Lexer :: state=

Lexer current state
private fun state_initial: Int

nitc :: Lexer :: state_initial

Constante state values
fun stream_pos: Int

nitc :: Lexer :: stream_pos

Current character in the stream
protected fun stream_pos=(stream_pos: Int)

nitc :: Lexer :: stream_pos=

Current character in the stream
intern fun sys: Sys

core :: Object :: sys

Return the global sys object, the only instance of the Sys class.
abstract fun to_jvalue(env: JniEnv): JValue

core :: Object :: to_jvalue

fun to_s: String

core :: Object :: to_s

User readable representation of self.
fun token: nullable Token

nitc :: Lexer :: token

Last peeked token
protected fun token=(token: nullable Token)

nitc :: Lexer :: token=

Last peeked token
package_diagram nitc::Lexer Lexer nitc::TablesCapable TablesCapable nitc::Lexer->nitc::TablesCapable core::Object Object nitc::TablesCapable->core::Object ...core::Object ... ...core::Object->core::Object nitc::InjectedLexer InjectedLexer nitc::InjectedLexer->nitc::Lexer

Ancestors

interface Object

core :: Object

The root of the class hierarchy.

Parents

interface TablesCapable

nitc :: TablesCapable

Interface allowing the acces of the tables used during the parsing.

Children

class InjectedLexer

nitc :: InjectedLexer

A modified lexer that feed tokens before and after the real tokens.

Class definitions

nitc $ Lexer
# The lexer extract NIT tokens from an input stream.
# It is better user with the Parser
class Lexer
	super TablesCapable

	# Last peeked token
	var token: nullable Token = null

	# Lexer current state
	private var state: Int = 0

	# The source file
	var file: SourceFile

	# Current character in the stream
	var stream_pos: Int = 0

	# Current line number in the input stream
	var line: Int = 0

	# Current column in the input stream
	var pos: Int = 0

	# Was the last character a carriage-return?
	var cr: Bool = false

	# Constante state values
	private fun state_initial: Int do return 0 end

	# The last peeked token to chain them
	private var last_token: nullable Token = null

	# Give the next token (but do not consume it)
	fun peek: Token
	do
		var t = _token
		if t != null then return t

		t = get_token
		while t == null do t = get_token

		if isset t._location then
			var l = last_token
			if l != null then
				l.next_token = t
				t.prev_token = l
			else
				file.first_token = t
			end
			last_token = t
		end

		_token = t
		return t
	end

	# Give and consume the next token
	fun next: Token
	do
		var result = peek
		_token = null
		return result
	end

	# Primitive method to return a token, or return null if it is discarded
	# Is used to implement `peek` and `next`
	protected fun get_token: nullable Token
	do
		var dfa_state = 0

		var sp = _stream_pos
		var start_stream_pos = sp
		var start_pos = _pos
		var start_line = _line
		var file = self.file
		var string = file.string
		var string_len = string.length

		var accept_state = -1
		var accept_token = -1
		var accept_length = -1
		var accept_pos = -1
		var accept_line = -1

		loop
			if sp >= string_len then
				dfa_state = -1
			else
				# Very ugly hack, this is because of the way SableCC generates its tables.
				# Due to the 0xFFFF limit of a Java char, when a big Nit char is read (i.e.
				# code point > 65535), it crashes.
				#
				# Hence, if a char has a code point <= 255 (ISO8859 range), it is left as is.
				# Else, it is replaced by 255.
				# This does not corrupt the lexer and works perfectly on any character.
				#
				# TL;DR: Java fucked up, need retarded solution to cope for retarded decision
				var c = string[sp].code_point
				if c >= 256 then c = 255
				sp += 1

				var cr = _cr
				var line = _line
				var pos = _pos
				if c == 10 then
					if cr then
						cr = false
					        file.line_starts[line] = sp
					else
						line = line + 1
						pos = 0
					        file.line_starts[line] = sp
					end
				else if c == 13 then
					line = line + 1
					pos = 0
					cr = true
					file.line_starts[line] = sp
				else
					pos = pos + 1
					cr = false
				end

				loop
					var old_state = dfa_state
					if dfa_state < -1 then
						old_state = -2 - dfa_state
					end

					dfa_state = -1

					var low = 0
					var high = lexer_goto(old_state, 0) - 1

					if high >= 0 then
						while low <= high do
							var middle = (low + high) / 2
							var offset = middle * 3 + 1 # +1 because length is at 0

							if c < lexer_goto(old_state, offset) then
								high = middle - 1
							else if c > lexer_goto(old_state, offset+1) then
								low = middle + 1
							else
								dfa_state = lexer_goto(old_state, offset+2)
								break
							end
						end
					end
					if dfa_state > -2 then break
				end

				_cr = cr
				_line = line
				_pos = pos
			end

			if dfa_state >= 0 then
				var tok = lexer_accept(dfa_state)
				if tok != -1 then
					accept_state = dfa_state
					accept_token = tok
					accept_length = sp - start_stream_pos
					accept_pos = _pos
					accept_line = _line
				end
			else
				if accept_state != -1 then
					_pos = accept_pos
					_line = accept_line
					_stream_pos = start_stream_pos + accept_length
					if accept_token == 0 then
						# Ignored token (whitespaces)
						return null
					end
					var location = new Location(file, start_line + 1, accept_line + 1, start_pos + 1, accept_pos)
					return make_token(accept_token, location)
				else
					_stream_pos = sp
					var location = new Location(file, start_line + 1, start_line + 1, start_pos + 1, start_pos + 1)
					if sp > start_stream_pos then
						var text = string.substring(start_stream_pos, sp-start_stream_pos)
						var token = new ALexerError.init_lexer_error("Syntax Error: unknown token `{text}`.", location, text)
						file.last_token = token
						return token
					else
						var token = new EOF.init_tk(location)
						file.last_token = token
						return token
					end
				end
			end
		end
	end

	# Allocate the right Token object for a given identifier
	protected fun make_token(accept_token: Int, location: Location): Token is abstract
end
src/parser/lexer_work.nit:79,1--276,3

nitc :: lexer $ Lexer
redef class Lexer
	redef fun make_token(accept_token, location)
	do
		if accept_token == 1 then
			return new TEol.init_tk(location)
		end
		if accept_token == 2 then
			return new TComment.init_tk(location)
		end
		if accept_token == 3 then
			return new TKwpackage.init_tk(location)
		end
		if accept_token == 4 then
			return new TKwmodule.init_tk(location)
		end
		if accept_token == 5 then
			return new TKwimport.init_tk(location)
		end
		if accept_token == 6 then
			return new TKwclass.init_tk(location)
		end
		if accept_token == 7 then
			return new TKwabstract.init_tk(location)
		end
		if accept_token == 8 then
			return new TKwinterface.init_tk(location)
		end
		if accept_token == 9 then
			return new TKwenum.init_tk(location)
		end
		if accept_token == 10 then
			return new TKwsubset.init_tk(location)
		end
		if accept_token == 11 then
			return new TKwend.init_tk(location)
		end
		if accept_token == 12 then
			return new TKwmeth.init_tk(location)
		end
		if accept_token == 13 then
			return new TKwtype.init_tk(location)
		end
		if accept_token == 14 then
			return new TKwinit.init_tk(location)
		end
		if accept_token == 15 then
			return new TKwredef.init_tk(location)
		end
		if accept_token == 16 then
			return new TKwis.init_tk(location)
		end
		if accept_token == 17 then
			return new TKwdo.init_tk(location)
		end
		if accept_token == 18 then
			return new TKwvar.init_tk(location)
		end
		if accept_token == 19 then
			return new TKwextern.init_tk(location)
		end
		if accept_token == 20 then
			return new TKwpublic.init_tk(location)
		end
		if accept_token == 21 then
			return new TKwprotected.init_tk(location)
		end
		if accept_token == 22 then
			return new TKwprivate.init_tk(location)
		end
		if accept_token == 23 then
			return new TKwintrude.init_tk(location)
		end
		if accept_token == 24 then
			return new TKwif.init_tk(location)
		end
		if accept_token == 25 then
			return new TKwthen.init_tk(location)
		end
		if accept_token == 26 then
			return new TKwelse.init_tk(location)
		end
		if accept_token == 27 then
			return new TKwwhile.init_tk(location)
		end
		if accept_token == 28 then
			return new TKwloop.init_tk(location)
		end
		if accept_token == 29 then
			return new TKwfor.init_tk(location)
		end
		if accept_token == 30 then
			return new TKwin.init_tk(location)
		end
		if accept_token == 31 then
			return new TKwand.init_tk(location)
		end
		if accept_token == 32 then
			return new TKwor.init_tk(location)
		end
		if accept_token == 33 then
			return new TKwnot.init_tk(location)
		end
		if accept_token == 34 then
			return new TKwimplies.init_tk(location)
		end
		if accept_token == 35 then
			return new TKwreturn.init_tk(location)
		end
		if accept_token == 36 then
			return new TKwcontinue.init_tk(location)
		end
		if accept_token == 37 then
			return new TKwbreak.init_tk(location)
		end
		if accept_token == 38 then
			return new TKwabort.init_tk(location)
		end
		if accept_token == 39 then
			return new TKwassert.init_tk(location)
		end
		if accept_token == 40 then
			return new TKwnew.init_tk(location)
		end
		if accept_token == 41 then
			return new TKwisa.init_tk(location)
		end
		if accept_token == 42 then
			return new TKwonce.init_tk(location)
		end
		if accept_token == 43 then
			return new TKwsuper.init_tk(location)
		end
		if accept_token == 44 then
			return new TKwself.init_tk(location)
		end
		if accept_token == 45 then
			return new TKwtrue.init_tk(location)
		end
		if accept_token == 46 then
			return new TKwfalse.init_tk(location)
		end
		if accept_token == 47 then
			return new TKwnull.init_tk(location)
		end
		if accept_token == 48 then
			return new TKwas.init_tk(location)
		end
		if accept_token == 49 then
			return new TKwnullable.init_tk(location)
		end
		if accept_token == 50 then
			return new TKwisset.init_tk(location)
		end
		if accept_token == 51 then
			return new TKwlabel.init_tk(location)
		end
		if accept_token == 52 then
			return new TKwwith.init_tk(location)
		end
		if accept_token == 53 then
			return new TKwdebug.init_tk(location)
		end
		if accept_token == 54 then
			return new TKwyield.init_tk(location)
		end
		if accept_token == 55 then
			return new TKwcatch.init_tk(location)
		end
		if accept_token == 56 then
			return new TOpar.init_tk(location)
		end
		if accept_token == 57 then
			return new TCpar.init_tk(location)
		end
		if accept_token == 58 then
			return new TObra.init_tk(location)
		end
		if accept_token == 59 then
			return new TCbra.init_tk(location)
		end
		if accept_token == 60 then
			return new TComma.init_tk(location)
		end
		if accept_token == 61 then
			return new TColumn.init_tk(location)
		end
		if accept_token == 62 then
			return new TQuad.init_tk(location)
		end
		if accept_token == 63 then
			return new TAssign.init_tk(location)
		end
		if accept_token == 64 then
			return new TPluseq.init_tk(location)
		end
		if accept_token == 65 then
			return new TMinuseq.init_tk(location)
		end
		if accept_token == 66 then
			return new TStareq.init_tk(location)
		end
		if accept_token == 67 then
			return new TSlasheq.init_tk(location)
		end
		if accept_token == 68 then
			return new TPercenteq.init_tk(location)
		end
		if accept_token == 69 then
			return new TStarstareq.init_tk(location)
		end
		if accept_token == 70 then
			return new TPipeeq.init_tk(location)
		end
		if accept_token == 71 then
			return new TCareteq.init_tk(location)
		end
		if accept_token == 72 then
			return new TAmpeq.init_tk(location)
		end
		if accept_token == 73 then
			return new TLleq.init_tk(location)
		end
		if accept_token == 74 then
			return new TGgeq.init_tk(location)
		end
		if accept_token == 75 then
			return new TDotdotdot.init_tk(location)
		end
		if accept_token == 76 then
			return new TDotdot.init_tk(location)
		end
		if accept_token == 77 then
			return new TDot.init_tk(location)
		end
		if accept_token == 78 then
			return new TPlus.init_tk(location)
		end
		if accept_token == 79 then
			return new TMinus.init_tk(location)
		end
		if accept_token == 80 then
			return new TStar.init_tk(location)
		end
		if accept_token == 81 then
			return new TStarstar.init_tk(location)
		end
		if accept_token == 82 then
			return new TSlash.init_tk(location)
		end
		if accept_token == 83 then
			return new TPercent.init_tk(location)
		end
		if accept_token == 84 then
			return new TPipe.init_tk(location)
		end
		if accept_token == 85 then
			return new TCaret.init_tk(location)
		end
		if accept_token == 86 then
			return new TAmp.init_tk(location)
		end
		if accept_token == 87 then
			return new TTilde.init_tk(location)
		end
		if accept_token == 88 then
			return new TEq.init_tk(location)
		end
		if accept_token == 89 then
			return new TNe.init_tk(location)
		end
		if accept_token == 90 then
			return new TLt.init_tk(location)
		end
		if accept_token == 91 then
			return new TLe.init_tk(location)
		end
		if accept_token == 92 then
			return new TLl.init_tk(location)
		end
		if accept_token == 93 then
			return new TGt.init_tk(location)
		end
		if accept_token == 94 then
			return new TGe.init_tk(location)
		end
		if accept_token == 95 then
			return new TGg.init_tk(location)
		end
		if accept_token == 96 then
			return new TStarship.init_tk(location)
		end
		if accept_token == 97 then
			return new TBang.init_tk(location)
		end
		if accept_token == 98 then
			return new TQuest.init_tk(location)
		end
		if accept_token == 99 then
			return new TAt.init_tk(location)
		end
		if accept_token == 100 then
			return new TSemi.init_tk(location)
		end
		if accept_token == 101 then
			return new TClassid.init_tk(location)
		end
		if accept_token == 102 then
			return new TId.init_tk(location)
		end
		if accept_token == 103 then
			return new TAttrid.init_tk(location)
		end
		if accept_token == 104 then
			return new TInteger.init_tk(location)
		end
		if accept_token == 105 then
			return new TFloat.init_tk(location)
		end
		if accept_token == 106 then
			return new TString.init_tk(location)
		end
		if accept_token == 107 then
			return new TStartString.init_tk(location)
		end
		if accept_token == 108 then
			return new TMidString.init_tk(location)
		end
		if accept_token == 109 then
			return new TEndString.init_tk(location)
		end
		if accept_token == 110 then
			return new TChar.init_tk(location)
		end
		if accept_token == 111 then
			return new TBadString.init_tk(location)
		end
		if accept_token == 112 then
			return new TBadTString.init_tk(location)
		end
		if accept_token == 113 then
			return new TBadChar.init_tk(location)
		end
		if accept_token == 114 then
			return new TExternCodeSegment.init_tk(location)
		end
		if accept_token == 115 then
			return new TBadExtern.init_tk(location)
		end
		abort # unknown token index `accept_token`
	end
end
src/parser/lexer.nit:1397,1--1747,3