From cc41bd0a9a2dbbde1f738e559bcae27e6324ae6e Mon Sep 17 00:00:00 2001 From: Jean Privat Date: Fri, 29 Apr 2011 17:29:33 -0400 Subject: [PATCH] parser: new class SourceFile This class is used to feed the lexer and to give a more precise information to the Location. Signed-off-by: Jean Privat --- src/compiling/compiling.nit | 2 +- src/compiling/compiling_global.nit | 2 +- src/compiling/compiling_icode.nit | 2 +- src/location.nit | 33 ++++++++-- src/parser/lexer.nit | 126 +++++++++++------------------------- src/parser/xss/lexer.xss | 100 +++++++--------------------- src/syntax/syntax.nit | 5 +- 7 files changed, 92 insertions(+), 178 deletions(-) diff --git a/src/compiling/compiling.nit b/src/compiling/compiling.nit index 8dcadba..a6b9a71 100644 --- a/src/compiling/compiling.nit +++ b/src/compiling/compiling.nit @@ -97,7 +97,7 @@ redef class MMModule var v = new CompilerVisitor(self, cprogram) v.add_decl("#include ") - var native_name = location.file.strip_extension(".nit") + var native_name = location.file.filename.strip_extension(".nit") var native_header = native_name + "_nit.h" if native_header.file_exists then v.add_decl("#include <{native_header.basename("")}>") diff --git a/src/compiling/compiling_global.nit b/src/compiling/compiling_global.nit index da63476..1a5aa73 100644 --- a/src/compiling/compiling_global.nit +++ b/src/compiling/compiling_global.nit @@ -116,7 +116,7 @@ redef class MMModule # Compile module file for the current module fun compile_local_table_to_c(v: CompilerVisitor) do - v.add_instr("const char *LOCATE_{name} = \"{location.file}\";") + v.add_instr("const char *LOCATE_{name} = \"{location.file.filename}\";") if v.program.tc.use_SFT_optimization or local_table.is_empty then return diff --git a/src/compiling/compiling_icode.nit b/src/compiling/compiling_icode.nit index 00e87c4..40894fe 100644 --- a/src/compiling/compiling_icode.nit +++ b/src/compiling/compiling_icode.nit @@ -168,7 +168,7 @@ class I2CCompilerVisitor if l != null then visitor.add_indent(w) w.add("/* ") - w.add(l.file) + w.add(l.file.filename) w.add(":") w.add(l.line_start.to_s) w.add(" */\n") diff --git a/src/location.nit b/src/location.nit index c6e4dee..65cbbbc 100644 --- a/src/location.nit +++ b/src/location.nit @@ -14,19 +14,37 @@ # See the License for the specific language governing permissions and # limitations under the License. +# This module is used to model Nit source-file and locations in source-file package location +# A raw text Nit source file +class SourceFile + # The path of the source + var filename: String + + # The content of the source + var string: String + + # Create a new sourcefile using a filename and a stream + init(filename: String, stream: IStream) + do + self.filename = filename + string = stream.read_all + end +end + +# A location inside a source file class Location super Comparable redef type OTHER: Location - readable var _file: String + readable var _file: nullable SourceFile readable var _line_start: Int readable var _line_end: Int readable var _column_start: Int readable var _column_end: Int - init(f: String, line_s: Int, line_e: Int, column_s: Int, column_e: Int) do + init(f: nullable SourceFile, line_s: Int, line_e: Int, column_s: Int, column_e: Int) do _file = f _line_start = line_s _line_end = line_e @@ -34,7 +52,7 @@ class Location _column_end = column_e end - init with_file(f: String) do init(f,0,0,0,0) + init with_file(f: SourceFile) do init(f,0,0,0,0) redef fun ==(other: nullable Object): Bool do if other == null then return false @@ -68,8 +86,11 @@ class Location end redef fun to_s: String do - var file_part = file - if file_part.length > 0 then file_part += ":" + var file_part = "" + if file != null then + file_part = file.filename + if file.filename.length > 0 then file_part += ":" + end if line_start == line_end then if column_start == column_end then @@ -85,7 +106,7 @@ class Location fun relative_to(loc: nullable Location): String do var relative: Location if loc != null and loc.file == self.file then - relative = new Location("", self.line_start, self.line_end, self.column_start, self.column_end) + relative = new Location(null, self.line_start, self.line_end, self.column_start, self.column_end) else relative = new Location(self.file, self.line_start, self.line_end, self.column_start, self.column_end) end diff --git a/src/parser/lexer.nit b/src/parser/lexer.nit index e66dc87..a6a56e2 100644 --- a/src/parser/lexer.nit +++ b/src/parser/lexer.nit @@ -1207,17 +1207,11 @@ class Lexer # Lexer current state var _state: Int = 0 - # Name of the stream (as given to tokens) - readable var _filename: String + # The source file + readable var _file: SourceFile - # Input stream where character are read - var _stream: IStream - - # Pushback buffer to store unread character - var _stream_buf: Buffer - - # Number of character stored in the pushback buffer - var _stream_pos: Int + # Current character in the stream + var _stream_pos: Int = 0 # Current line number in the input stream var _line: Int = 0 @@ -1228,23 +1222,13 @@ class Lexer # Was the last character a cariage-return? var _cr: Bool = false - # If the end of stream? - var _eof: Bool = false - - # Current working text read from the input stream - var _text: Buffer - # Constante state values private fun state_initial: Int do return 0 end # Create a new lexer for a stream (and a name) - init(stream: IStream, fname: String) + init(file: SourceFile) do - _filename = fname - _text = new Buffer - _stream = stream - _stream_pos = -1 - _stream_buf = new Buffer + _file = file end # Give the next token (but do not consume it) @@ -1272,8 +1256,12 @@ class Lexer do var dfa_state = 0 + var sp = _stream_pos + var start_stream_pos = sp var start_pos = _pos var start_line = _line + var string = _file.string + var string_len = string.length var accept_state = -1 var accept_token = -1 @@ -1281,13 +1269,13 @@ class Lexer var accept_pos = -1 var accept_line = -1 - var text = _text - text.clear - loop - var c = get_char + if sp >= string_len then + dfa_state = -1 + else + var c = string[sp].ascii + sp += 1 - if c != -1 then var cr = _cr var line = _line var pos = _pos @@ -1307,8 +1295,6 @@ class Lexer cr = false end - text.add(c.ascii) - loop var old_state = dfa_state if dfa_state < -1 then @@ -1341,8 +1327,6 @@ class Lexer _cr = cr _line = line _pos = pos - else - dfa_state = -1 end if dfa_state >= 0 then @@ -1350,29 +1334,29 @@ class Lexer if tok != -1 then accept_state = dfa_state accept_token = tok - accept_length = text.length + accept_length = sp - start_stream_pos accept_pos = _pos accept_line = _line end else if accept_state != -1 then - var location = new Location(_filename, start_line + 1, accept_line + 1, start_pos + 1, accept_pos) + var location = new Location(_file, start_line + 1, accept_line + 1, start_pos + 1, accept_pos) _pos = accept_pos _line = accept_line - push_back(accept_length) + _stream_pos = start_stream_pos + accept_length if accept_token == 0 then return null end if accept_token == 1 then - var token_text = text.substring(0, accept_length) + var token_text = string.substring(start_stream_pos, accept_length) return new TEol.init_tk(token_text, location) end if accept_token == 2 then - var token_text = text.substring(0, accept_length) + var token_text = string.substring(start_stream_pos, accept_length) return new TComment.init_tk(token_text, location) end if accept_token == 3 then - var token_text = text.substring(0, accept_length) + var token_text = string.substring(start_stream_pos, accept_length) return new TKwmodule.init_tk(token_text, location) end if accept_token == 4 then @@ -1388,7 +1372,7 @@ class Lexer return new TKwinterface.init_tk(location) end if accept_token == 8 then - var token_text = text.substring(0, accept_length) + var token_text = string.substring(start_stream_pos, accept_length) return new TKwenum.init_tk(token_text, location) end if accept_token == 9 then @@ -1605,48 +1589,50 @@ class Lexer return new TBang.init_tk(location) end if accept_token == 80 then - var token_text = text.substring(0, accept_length) + var token_text = string.substring(start_stream_pos, accept_length) return new TClassid.init_tk(token_text, location) end if accept_token == 81 then - var token_text = text.substring(0, accept_length) + var token_text = string.substring(start_stream_pos, accept_length) return new TId.init_tk(token_text, location) end if accept_token == 82 then - var token_text = text.substring(0, accept_length) + var token_text = string.substring(start_stream_pos, accept_length) return new TAttrid.init_tk(token_text, location) end if accept_token == 83 then - var token_text = text.substring(0, accept_length) + var token_text = string.substring(start_stream_pos, accept_length) return new TNumber.init_tk(token_text, location) end if accept_token == 84 then - var token_text = text.substring(0, accept_length) + var token_text = string.substring(start_stream_pos, accept_length) return new TFloat.init_tk(token_text, location) end if accept_token == 85 then - var token_text = text.substring(0, accept_length) + var token_text = string.substring(start_stream_pos, accept_length) return new TChar.init_tk(token_text, location) end if accept_token == 86 then - var token_text = text.substring(0, accept_length) + var token_text = string.substring(start_stream_pos, accept_length) return new TString.init_tk(token_text, location) end if accept_token == 87 then - var token_text = text.substring(0, accept_length) + var token_text = string.substring(start_stream_pos, accept_length) return new TStartString.init_tk(token_text, location) end if accept_token == 88 then - var token_text = text.substring(0, accept_length) + var token_text = string.substring(start_stream_pos, accept_length) return new TMidString.init_tk(token_text, location) end if accept_token == 89 then - var token_text = text.substring(0, accept_length) + var token_text = string.substring(start_stream_pos, accept_length) return new TEndString.init_tk(token_text, location) end else - var location = new Location(_filename, start_line + 1, start_line + 1, start_pos + 1, start_pos + 1) - if text.length > 0 then + _stream_pos = sp + var location = new Location(_file, start_line + 1, start_line + 1, start_pos + 1, start_pos + 1) + if sp > start_stream_pos then + var text = string.substring(start_stream_pos, sp-start_stream_pos) var token = new AError.init_error("Syntax error: unknown token {text}.", location) return token else @@ -1657,45 +1643,5 @@ class Lexer end end end - - # Read the next character. - # The character is read from the stream of from the pushback buffer. - private fun get_char: Int - do - if _eof then - return -1 - end - - var result: Int - - var sp = _stream_pos - if sp >= 0 then - var res = _stream_buf[_stream_pos] - _stream_pos = sp - 1 - result = res.ascii - else - result = _stream.read_char - end - - if result == -1 then - _eof = true - end - - return result - end - - # Unread some characters. - # Unread characters are stored in the pushback buffer. - private fun push_back(accept_length: Int) - do - var length = _text.length - var i = length - 1 - while i >= accept_length do - _eof = false - _stream_pos = _stream_pos + 1 - _stream_buf[_stream_pos] = _text[i] - i = i - 1 - end - end end diff --git a/src/parser/xss/lexer.xss b/src/parser/xss/lexer.xss index 780a118..070295d 100644 --- a/src/parser/xss/lexer.xss +++ b/src/parser/xss/lexer.xss @@ -27,17 +27,11 @@ class Lexer # Lexer current state var _state: Int = 0 - # Name of the stream (as given to tokens) - readable var _filename: String + # The source file + readable var _file: SourceFile - # Input stream where character are read - var _stream: IStream - - # Pushback buffer to store unread character - var _stream_buf: Buffer - - # Number of character stored in the pushback buffer - var _stream_pos: Int + # Current character in the stream + var _stream_pos: Int = 0 # Current line number in the input stream var _line: Int = 0 @@ -48,25 +42,15 @@ class Lexer # Was the last character a cariage-return? var _cr: Bool = false - # If the end of stream? - var _eof: Bool = false - - # Current working text read from the input stream - var _text: Buffer - $ foreach {lexer_data/state} # Constante state values private fun state_${translate(@name,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}: Int do return @id end $ end foreach # Create a new lexer for a stream (and a name) - init(stream: IStream, fname: String) + init(file: SourceFile) do - _filename = fname - _text = new Buffer - _stream = stream - _stream_pos = -1 - _stream_buf = new Buffer + _file = file end # Give the next token (but do not consume it) @@ -94,8 +78,12 @@ $ end foreach do var dfa_state = 0 + var sp = _stream_pos + var start_stream_pos = sp var start_pos = _pos var start_line = _line + var string = _file.string + var string_len = string.length var accept_state = -1 var accept_token = -1 @@ -103,13 +91,13 @@ $ end foreach var accept_pos = -1 var accept_line = -1 - var text = _text - text.clear - loop - var c = get_char + if sp >= string_len then + dfa_state = -1 + else + var c = string[sp].ascii + sp += 1 - if c != -1 then var cr = _cr var line = _line var pos = _pos @@ -129,8 +117,6 @@ $ end foreach cr = false end - text.add(c.ascii) - loop var old_state = dfa_state if dfa_state < -1 then @@ -163,8 +149,6 @@ $ end foreach _cr = cr _line = line _pos = pos - else - dfa_state = -1 end if dfa_state >= 0 then @@ -172,16 +156,16 @@ $ end foreach if tok != -1 then accept_state = dfa_state accept_token = tok - accept_length = text.length + accept_length = sp - start_stream_pos accept_pos = _pos accept_line = _line end else if accept_state != -1 then - var location = new Location(_filename, start_line + 1, accept_line + 1, start_pos + 1, accept_pos) + var location = new Location(_file, start_line + 1, accept_line + 1, start_pos + 1, accept_pos) _pos = accept_pos _line = accept_line - push_back(accept_length) + _stream_pos = start_stream_pos + accept_length $ foreach {//token} if accept_token == ${position()-1} then $ if {count(transition[@from!=@to])!=0} @@ -194,7 +178,7 @@ $ end $ end if $ if {@parser_index} $ if {not(@text)} - var token_text = text.substring(0, accept_length) + var token_text = string.substring(start_stream_pos, accept_length) return new @ename.init_tk(token_text, location) $ else return new @ename.init_tk(location) @@ -205,8 +189,10 @@ $ end end $ end foreach else - var location = new Location(_filename, start_line + 1, start_line + 1, start_pos + 1, start_pos + 1) - if text.length > 0 then + _stream_pos = sp + var location = new Location(_file, start_line + 1, start_line + 1, start_pos + 1, start_pos + 1) + if sp > start_stream_pos then + var text = string.substring(start_stream_pos, sp-start_stream_pos) var token = new PError.init_error("Syntax error: unknown token {text}.", location) return token else @@ -217,46 +203,6 @@ $ end foreach end end end - - # Read the next character. - # The character is read from the stream of from the pushback buffer. - private fun get_char: Int - do - if _eof then - return -1 - end - - var result: Int - - var sp = _stream_pos - if sp >= 0 then - var res = _stream_buf[_stream_pos] - _stream_pos = sp - 1 - result = res.ascii - else - result = _stream.read_char - end - - if result == -1 then - _eof = true - end - - return result - end - - # Unread some characters. - # Unread characters are stored in the pushback buffer. - private fun push_back(accept_length: Int) - do - var length = _text.length - var i = length - 1 - while i >= accept_length do - _eof = false - _stream_pos = _stream_pos + 1 - _stream_buf[_stream_pos] = _text[i] - i = i - 1 - end - end end $ end template diff --git a/src/syntax/syntax.nit b/src/syntax/syntax.nit index a4cbaf0..aa17066 100644 --- a/src/syntax/syntax.nit +++ b/src/syntax/syntax.nit @@ -42,7 +42,8 @@ class SrcModuleLoader context.error( null, "{filename}: Error module name \"{name}\", must start with a lower case letter and contain only letters, digits and '_'." ) end - var lexer = new Lexer(file, filename) + var source = new SourceFile(filename, file) + var lexer = new Lexer(source) var parser = new Parser(lexer) var node_tree = parser.parse if node_tree.n_base == null then @@ -52,7 +53,7 @@ class SrcModuleLoader end var node_module = node_tree.n_base assert node_module != null - var module_loc = new Location.with_file(filename) + var module_loc = new Location.with_file(source) var mod = new MMSrcModule(context, node_module, dir, name, module_loc) return mod end -- 1.7.9.5