When searching for a module, generate list of introduced classes and properties to add to the user module documentation.
So from within vim, when entering `:Nit opts`, it shows:
~~~
# opts
Management of options on the command line
## Introduced classes
* Option: Super class of all option's class
* OptionArray: An option with an array as parameter
* OptionBool: A boolean option, `true` when present, `false` if not
* OptionContext: Context where the options process
* OptionCount: A count option. Count the number of time this option is present
* OptionEnum: An option to choose from an enumeration
* OptionFloat: An option with a Float as parameter
* OptionInt: An option with an Int as parameter
* OptionParameter: Option with one parameter (mandatory by default)
* OptionString: An option with a `String` as parameter
* OptionText: Not really an option. Just add a line of text when displaying the usage
## Introduced properties
+ VALUE # Type of the value of the option
+ add_aliases(names: String...) # Add new aliases for this option
+ default_value: VALUE # Default value of this option
+ default_value=(default_value: VALUE) # Default value of this option
+ errors: Array[String] # Gathering errors during parsing
~ errors=(errors: Array[String]) # Gathering errors during parsing
+ helptext: String # Human readable description of the option
~ helptext=(helptext: String) # Human readable description of the option
+ hidden: Bool # Is this option hidden from `usage`?
+ hidden=(hidden: Bool) # Is this option hidden from `usage`?
+ init(help: String, default: VALUE, names: nullable Array[String]) # Create a new option
[...]
~~~
The list of properties can be long, but it is useful to search by keywords.
As usual, use `:pc` to **c**lose the **p**review window showing the doc.
Pull-Request: #1645
Reviewed-by: Jean Privat <jean@pryen.org>
Reviewed-by: Lucas Bajolet <r4pass@hotmail.com>
# Returns a truncated string when an error is pending (`last_error != null`).
fun read_string: String
do
- var buf = new FlatBuffer
+ var buf = new Bytes.empty
loop
var byte = read_byte
- if byte == null or byte == 0x00u8 then return buf.to_s
- buf.bytes.add byte
+ if byte == null or byte == 0u8 then
+ return buf.to_s
+ end
+ buf.add byte
end
end
redef fun to_s do
persisted = true
- return new FlatString.with_infos(items, length, 0, length -1)
+ var b = self
+ if not is_utf8 then
+ b = clean_utf8
+ persisted = false
+ end
+ return new FlatString.with_infos(b.items, b.length, 0, b.length -1)
end
redef fun iterator do return new BytesIterator.with_buffer(self)
+
+ # Is the byte collection valid UTF-8 ?
+ fun is_utf8: Bool do
+ var charst = once [0x80u8, 0u8, 0xE0u8, 0xC0u8, 0xF0u8, 0xE0u8, 0xF8u8, 0xF0u8]
+ var lobounds = once [0, 0x80, 0x800, 0x10000]
+ var hibounds = once [0x7F, 0x7FF, 0xFFFF, 0x10FFFF]
+ var pos = 0
+ var len = length
+ var mits = items
+ while pos < len do
+ var nxst = mits.length_of_char_at(pos)
+ var charst_index = (nxst - 1) * 2
+ if mits[pos] & charst[charst_index] == charst[charst_index + 1] then
+ var c = mits.char_at(pos)
+ var cp = c.ascii
+ if cp <= hibounds[nxst - 1] and cp >= lobounds[nxst - 1] then
+ if cp >= 0xD800 and cp <= 0xDFFF or
+ cp == 0xFFFE or cp == 0xFFFF then return false
+ else
+ return false
+ end
+ else
+ return false
+ end
+ pos += nxst
+ end
+ return true
+ end
+
+ # Cleans the bytes of `self` to be UTF-8 compliant
+ private fun clean_utf8: Bytes do
+ var charst = once [0x80u8, 0u8, 0xE0u8, 0xC0u8, 0xF0u8, 0xE0u8, 0xF8u8, 0xF0u8]
+ var badchar = once [0xEFu8, 0xBFu8, 0xBDu8]
+ var lobounds = once [0, 0x80, 0x800, 0x10000]
+ var hibounds = once [0x7F, 0x7FF, 0xFFFF, 0x10FFFF]
+ var pos = 0
+ var len = length
+ var ret = new Bytes.with_capacity(len)
+ var mits = items
+ while pos < len do
+ var nxst = mits.length_of_char_at(pos)
+ var charst_index = (nxst - 1) * 2
+ if mits[pos] & charst[charst_index] == charst[charst_index + 1] then
+ var c = mits.char_at(pos)
+ var cp = c.ascii
+ if cp <= hibounds[nxst - 1] and cp >= lobounds[nxst - 1] then
+ if cp >= 0xD800 and cp <= 0xDFFF or
+ cp == 0xFFFE or cp == 0xFFFF then
+ ret.append badchar
+ pos += 1
+ else
+ var pend = pos + nxst
+ for i in [pos .. pend[ do ret.add mits[i]
+ pos += nxst
+ end
+ else
+ ret.append badchar
+ pos += 1
+ end
+ else
+ ret.append badchar
+ pos += 1
+ end
+ end
+ return ret
+ end
end
private class BytesIterator
# ~~~
fun to_bytes: Bytes do
var b = new Bytes.with_capacity(bytelen)
+ append_to_bytes b
+ return b
+ end
+
+ # Appends `self.bytes` to `b`
+ fun append_to_bytes(b: Bytes) do
for s in substrings do
var from = if s isa FlatString then s.first_byte else 0
b.append_ns_from(s.items, s.bytelen, from)
end
- return b
end
end
redef class FlatText
- redef fun to_bytes do
- var len = bytelen
- var b = new Bytes.with_capacity(len)
+ redef fun append_to_bytes(b) do
var from = if self isa FlatString then first_byte else 0
- b.append_ns_from(items, len, from)
- return b
+ b.append_ns_from(items, bytelen, from)
end
end
--- /dev/null
+# This file is part of NIT (http://www.nitlanguage.org).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Base for codecs to use with streams
+#
+# A Codec (Coder/Decoder) is a tranformer from a byte-format to another
+#
+# As Nit Strings are UTF-8, a codec works as :
+# - Coder: From a UTF-8 string to a specified format (writing)
+# - Decoder: From a specified format to a UTF-8 string (reading)
+module codec_base
+
+import text
+import bytes
+
+# Codes UTF-8 entities to an external format
+abstract class Coder
+
+ # Transforms `c` to its representation in the format of `self`
+ fun code_char(c: Char): Bytes is abstract
+
+ # Adds a char `c` to bytes `s`
+ fun add_char_to(c: Char, s: Bytes) is abstract
+
+ # Transforms `s` to the format of `self`
+ fun code_string(s: Text): Bytes is abstract
+
+ # Adds a string `s` to bytes `b`
+ fun add_string_to(s: Text, b: Bytes) is abstract
+end
+
+# Decodes entities in an external format to UTF-8
+abstract class Decoder
+
+ # Decodes a char from `b` to a Unicode code-point
+ fun decode_char(b: Bytes): Char is abstract
+
+ # Decodes a string `b` to UTF-8
+ fun decode_string(b: Bytes): String is abstract
+end
--- /dev/null
+# This file is part of NIT (http://www.nitlanguage.org).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Group module for all codec-related manipulations
+module codecs
+
+import codec_base
+import utf8
--- /dev/null
+# This file is part of NIT (http://www.nitlanguage.org).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Codec for UTF-8 I/O
+module utf8
+
+import codec_base
+
+# Returns UTF-8 entities as-is
+private class UTF8Coder
+ super Coder
+
+ redef fun code_char(c) do return c.to_s.to_bytes
+
+ redef fun add_char_to(c, stream) do c.to_s.append_to_bytes(stream)
+
+ redef fun code_string(s) do return s.to_bytes
+
+ redef fun add_string_to(s, b) do s.append_to_bytes(b)
+end
+
+# Decodes entities in an external format to UTF-8
+private class UTF8Decoder
+ super Decoder
+
+ redef fun decode_char(b) do
+ var s = b.to_s
+ return s[0]
+ end
+
+ redef fun decode_string(b) do
+ return b.to_s
+ end
+end
+
+# Returns the instance of a UTF-8 Coder
+fun utf8_coder: Coder do return once new UTF8Coder
+# Returns the instance of a UTF-8 Decoder
+fun utf8_decoder: Decoder do return once new UTF8Decoder
intrude import text::ropes
import error
intrude import bytes
+import codecs
in "C" `{
#include <unistd.h>
# A `Stream` that can be read from
abstract class Reader
super Stream
+
+ # Decoder used to transform input bytes to UTF-8
+ var decoder: Decoder = utf8_decoder is writable
+
# Reads a character. Returns `null` on EOF or timeout
fun read_char: nullable Char is abstract
# ~~~
fun read_all: String do
var s = read_all_bytes
+ if not s.is_utf8 then s = s.clean_utf8
var slen = s.length
if slen == 0 then return ""
var rets = ""
abstract class Writer
super Stream
+ # The coder from a nit UTF-8 String to the output file
+ var coder: Coder = utf8_coder is writable
+
# Writes bytes from `s`
fun write_bytes(s: Bytes) is abstract
return c
end
+ # Resets the internal buffer
fun buffer_reset do
_buffer_length = 0
_buffer_pos = 0
redef fun append_line_to(s)
do
+ var lb = new Bytes.with_capacity(10)
loop
# First phase: look for a '\n'
var i = _buffer_pos
# if there is something to append
if i > _buffer_pos then
- # Enlarge the string (if needed)
- s.enlarge(s.bytelen + i - _buffer_pos)
-
# Copy from the buffer to the string
var j = _buffer_pos
while j < i do
- s.bytes.add(_buffer[j])
+ lb.add(_buffer[j])
j += 1
end
_buffer_pos = i
else
assert end_reached
+ s.append lb.to_s
return
end
if eol then
# so \n is found
+ s.append lb.to_s
return
else
# so \n is not found
- if end_reached then return
+ if end_reached then
+ s.append lb.to_s
+ return
+ end
fill_buffer
end
end
# In Buffers, the internal sequence of character is mutable
# Thus, `chars` can be used to modify the buffer.
redef fun chars: Sequence[Char] is abstract
-
- # In Buffers, the internal sequence of bytes is mutable
- # Thus, `bytes` can be used to modify the buffer.
- redef fun bytes: Sequence[Byte] is abstract
end
# View for chars on Buffer objects, extends Sequence
# for mutation operations
private abstract class BufferByteView
super StringByteView
- super Sequence[Byte]
redef type SELFTYPE: Buffer
end
redef fun next do tgt = null
end
-# Immutable strings of characters.
-class FlatString
- super FlatText
- super String
-
- # Index at which `self` begins in `items`, inclusively
- private var first_byte: Int is noinit
+redef class FlatText
- # Index at which `self` ends in `items`, inclusively
- private var last_byte: Int is noinit
-
- redef var chars = new FlatStringCharView(self) is lazy
+ private fun first_byte: Int do return 0
- redef var bytes = new FlatStringByteView(self) is lazy
+ private fun last_byte: Int do return bytelen - 1
# Cache of the latest position (char) explored in the string
var position: Int = 0
+
# Cached position (bytes) in the NativeString underlying the String
var bytepos: Int = first_byte is lateinit
- redef var length is lazy do
- if bytelen == 0 then return 0
- var st = first_byte
- var its = items
- var ln = 0
- var lst = last_byte
- while st <= lst do
- st += its.length_of_char_at(st)
- ln += 1
- end
- return ln
- end
-
- redef fun [](index) do return items.char_at(char_to_byte_index(index))
-
# Index of the character `index` in `items`
private fun char_to_byte_index(index: Int): Int do
var ln = length
return ns_i
end
+ redef fun [](index) do return items.char_at(char_to_byte_index(index))
+end
+
+# Immutable strings of characters.
+class FlatString
+ super FlatText
+ super String
+
+ # Index at which `self` begins in `items`, inclusively
+ redef var first_byte is noinit
+
+ # Index at which `self` ends in `items`, inclusively
+ redef var last_byte is noinit
+
+ redef var chars = new FlatStringCharView(self) is lazy
+
+ redef var bytes = new FlatStringByteView(self) is lazy
+
+ redef var length is lazy do
+ if bytelen == 0 then return 0
+ var st = first_byte
+ var its = items
+ var ln = 0
+ var lst = last_byte
+ while st <= lst do
+ st += its.length_of_char_at(st)
+ ln += 1
+ end
+ return ln
+ end
+
redef fun reversed
do
var b = new FlatBuffer.with_capacity(bytelen + 1)
var mifrom = first_byte
if s isa FlatText then
var sits = s.items
- var sifrom = s.as(FlatString).first_byte
+ var sifrom = s.first_byte
var ns = new NativeString(nlen + 1)
mits.copy_to(ns, mlen, mifrom, 0)
sits.copy_to(ns, slen, sifrom, mlen)
redef var chars: Sequence[Char] = new FlatBufferCharView(self) is lazy
- redef var bytes: Sequence[Byte] = new FlatBufferByteView(self) is lazy
+ redef var bytes = new FlatBufferByteView(self) is lazy
redef var bytelen = 0
- # O(n)
- redef fun length do
- var max = bytelen
- if max == 0 then return 0
- var pos = 0
- var ln = 0
- var its = items
- while pos < max do
- pos += its.length_of_char_at(pos)
- ln += 1
- end
- return ln
- end
+ redef var length = 0
+
+ private var char_cache: Int = -1
+
+ private var byte_cache: Int = -1
private var capacity = 0
items.copy_to(items, bytelen - from, from, from - len)
end
- redef fun [](i)
- do
- assert i < length and i >= 0
- return items.char_at(items.char_to_byte_index(i))
- end
-
redef fun []=(index, item)
do
assert index >= 0 and index <= length
lshift_bytes(ip + clen, -size_diff)
end
bytelen += size_diff
+ bytepos += size_diff
items.set_char_at(ip, item)
end
enlarge(bytelen + clen)
items.set_char_at(bytelen, c)
bytelen += clen
- end
-
- private fun add_byte(b: Byte) do
- if written then reset
- is_dirty = true
- enlarge(bytelen + 1)
- items[bytelen] = b
- # FIXME: Might trigger errors
- bytelen += 1
+ length += 1
end
redef fun clear do
is_dirty = true
if written then reset
bytelen = 0
+ length = 0
end
redef fun empty do return new Buffer
#
# If `items` is shared, `written` should be set to true after the creation
# so that a modification will do a copy-on-write.
- private init with_infos(items: NativeString, capacity, bytelen: Int)
+ private init with_infos(items: NativeString, capacity, bytelen, length: Int)
do
self.items = items
self.capacity = capacity
self.bytelen = bytelen
+ self.length = length
end
# Create a new string copied from `s`.
for i in substrings do i.as(FlatString).items.copy_to(items, i.bytelen, 0, 0)
end
bytelen = s.bytelen
+ length = s.length
capacity = s.bytelen
written = true
end
is_dirty = true
var sl = s.bytelen
enlarge(bytelen + sl)
- if s isa FlatString then
+ if s isa FlatText then
s.items.copy_to(items, sl, s.first_byte, bytelen)
- else if s isa FlatBuffer then
- s.items.copy_to(items, sl, 0, bytelen)
else
for i in s.substrings do append i
return
end
bytelen += sl
+ length += s.length
end
# Copies the content of self in `dest`
var byte_length = byteto - bytefrom + 1
var r_items = new NativeString(byte_length)
items.copy_to(r_items, byte_length, bytefrom, 0)
- return new FlatBuffer.with_infos(r_items, byte_length, byte_length)
+ return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count)
else
return new Buffer
end
redef fun [](index) do return target.items[index]
- redef fun []=(index, item)
- do
- assert index >= 0 and index <= target.bytelen
- if index == target.bytelen then
- add(item)
- return
- end
- target.items[index] = item
- end
-
- redef fun push(c)
- do
- target.add_byte(c)
- end
-
- fun enlarge(cap: Int)
- do
- target.enlarge(cap)
- end
-
- redef fun append(s)
- do
- var s_length = s.length
- if target.capacity < (target.length + s_length) then enlarge(s_length + target.length)
- var pos = target.length
- var its = target.items
- for i in s do
- its[pos] = i
- pos += 1
- end
- target.length += s.length
- end
-
redef fun iterator_from(pos) do return new FlatBufferByteIterator.with_pos(target, pos)
redef fun reverse_iterator_from(pos) do return new FlatBufferByteReverseIterator.with_pos(target, pos)
redef var chars: Sequence[Char] is lazy do return new RopeBufferChars(self)
- redef var bytes: Sequence[Byte] is lazy do return new RopeBufferBytes(self)
+ redef var bytes is lazy do return new RopeBufferBytes(self)
# The final string being built on the fly
private var str: String = ""
# Next available (e.g. unset) character in the `Buffer`
private var rpos = 0
+ # Length (in chars) of the buffered part
+ private var nslen = 0
+
# Keeps track of the buffer's currently dumped part
#
# This might happen if for instance, a String was being
end
if s isa FlatText then
var oits = s.items
- var from = if s isa FlatString then s.first_byte else 0
+ var from = s.first_byte
var remsp = buf_size - rpos
if slen <= remsp then
oits.copy_to(ns, slen, from, rpos)
rpos = rp
end
- private fun add_byte(b: Byte) do
- var rp = rpos
- if rp >= buf_size then
- dump_buffer
- rp = 0
- end
- ns[rp] = b
- rp += 1
- bytelen += 1
- rpos = rp
- end
-
# Converts the Buffer to a FlatString, appends it to
# the final String and re-allocates a new larger Buffer.
private fun dump_buffer do
end
end
- redef fun []=(i,c) do
- if i == target.length then target.add_byte c
- if i < target.str.length then
- # FIXME: Will need to be optimized and rewritten with Unicode
- var s = target.str
- var l = s.substring(0, i)
- var r = s.substring_from(i + 1)
- target.str = l + c.to_i.ascii.to_s + r
- else
- target.ns[i - target.str.length] = c
- end
- end
-
- redef fun add(c) do target.add_byte c
-
- redef fun push(c) do target.add_byte c
-
redef fun iterator_from(i) do return new RopeBufferByteIterator.from(target, i)
redef fun reverse_iterator_from(i) do return new RopeBufferByteReverseIterator.from(target, i)
-../lib/standard/stream.nit:451,6--17: Documentation warning: Undocumented property `buffer_reset`
test_advice_repeated_types.nit:36,15--20: Warning: useless type repetition on redefined attribute `_a`
test_advice_repeated_types.nit:37,18--20: Warning: useless type repetition on parameter `b1` for redefined method `b`
test_advice_repeated_types.nit:38,18--20: Warning: useless type repetition on parameter `c1` for redefined method `c`
--- /dev/null
+usage ./test_read_all file
[0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54]
[0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54]
[0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54]
-This string is cool
-This string is coolA
-Ahis string is coolA
-This string is cool
-This string is coolA
-Ahis string is coolA
+[0x54,0x68,0x69,0x73,0x20,0x73,0x74,0x72,0x69,0x6e,0x67,0x20,0x69,0x73,0x20,0x63,0x6f,0x6f,0x6c]
+[0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54]
+[0x54,0x68,0x69,0x73,0x20,0x73,0x74,0x72,0x69,0x6e,0x67,0x20,0x69,0x73,0x20,0x63,0x6f,0x6f,0x6c]
+[0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54]
--- /dev/null
+UTF-8-test.txt
--- /dev/null
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if args.is_empty then
+ print "usage ./test_read_all file"
+ exit -1
+end
+
+var file = new FileReader.open(args[0])
+
+var s = file.read_all
+
+for i in s do
+ printn i
+end
var b = new FlatBuffer.from(x)
-print b
-
-b.bytes.add 0x41u8
-
-print b
-
-b.bytes[0] = 0x41u8
-
-print b
+print b.bytes.to_a
+print b.bytes.reverse_iterator.to_a
var c = new RopeBuffer.from(x)
-print c
-
-c.bytes.add 0x41u8
-
-print c
-
-c.bytes[0] = 0x41u8
-
-print c
+print c.bytes
+print c.bytes.reverse_iterator.to_a