X-Git-Url: http://nitlanguage.org diff --git a/lib/standard/kernel.nit b/lib/standard/kernel.nit index a7d31d4..11c4419 100644 --- a/lib/standard/kernel.nit +++ b/lib/standard/kernel.nit @@ -5,20 +5,24 @@ # # This file is free software, which comes along with NIT. This software is # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; -# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. You can modify it is you want, provided this header # is kept unaltered, and a notification of the changes is added. # You are allowed to redistribute it and sell it, alone or is a part of # another product. -# Most minimal classes and methods. -# This module is the root of the standard module hierarchy. +# Most basic classes and methods. +# +# This module is the root of the module hierarchy. +# It provides a very minimal set of classes and services used as a +# foundation to define other classes and methods. module kernel import end # Mark this module is a top level one. (must be only one) -`{ -#include +in "C" `{ + #include + #include `} ############################################################################### @@ -26,9 +30,13 @@ import end # Mark this module is a top level one. (must be only one) ############################################################################### # The root of the class hierarchy. -# Each class implicitly specialize Object. # -# Currently, Object is also used to collect all top-level methods. +# Each other class implicitly specializes Object, +# therefore the services of Object are inherited by every other class and are usable +# on each value, including primitive types like integers (`Int`), strings (`String`) and arrays (`Array`). +# +# Note that `nullable Object`, not `Object`, is the root of the type hierarchy +# since the special value `null` is not considered as an instance of Object. interface Object # Type of this instance, automatically specialized in every class # @@ -52,33 +60,124 @@ interface Object # `SELF`, pretty much the same things as you would do with parameter types. type SELF: Object - # The unique object identifier in the class. - # Unless specific code, you should not use this method. - # The identifier is used internally to provide a hash value. + # An internal hash code for the object based on its identity. + # + # Unless specific code, you should not use this method but + # use `hash` instead. + # + # As its name hints it, the internal hash code, is used internally + # to provide a hash value. + # It is also used by the `inspect` method to loosely identify objects + # and helps debugging. + # + # ~~~ + # var a = "Hello" + # var b = a + # assert a.object_id == b.object_id + # ~~~ + # + # The specific details of the internal hash code it let to the specific + # engine. The rules are the following: + # + # * The `object_id` MUST be invariant for the whole life of the object. + # * Two living instances of the same classes SHOULD NOT share the same `object_id`. + # * Two instances of different classes MIGHT share the same `object_id`. + # * The `object_id` of a garbage-collected instance MIGHT be reused by new instances. + # * The `object_id` of an object MIGHT be non constant across different executions. + # + # For instance, the `nitc` compiler uses the address of the object in memory + # as its `object_id`. + # + # TODO rename in something like `internal_hash_code` fun object_id: Int is intern # Return true if `self` and `other` have the same dynamic type. - # Unless specific code, you should not use this method. + # + # ~~~ + # assert 1.is_same_type(2) + # assert "Hello".is_same_type("World") + # assert not "Hello".is_same_type(2) + # ~~~ + # + # The method returns false if the dynamic type of `other` is a subtype of the dynamic type of `self` + # (or the other way around). + # + # Unless specific code, you should not use this method because it is inconsistent + # with the fact that a subclass can be used in lieu of a superclass. fun is_same_type(other: Object): Bool is intern - # Return true if `self` and `other` are the same instance. - # Unless specific code, you should use `==` instead. + # Return true if `self` and `other` are the same instance (i.e. same identity). + # + # ~~~ + # var a = new Buffer + # var b = a + # var c = new Buffer + # assert a.is_same_instance(b) + # assert not a.is_same_instance(c) + # assert a == c # because both buffers are empty + # ~~~ + # + # Obviously, the identity of an object is preserved even if the object is mutated. + # + # ~~~ + # var x = [1] + # var y = x + # x.add 2 + # assert x.is_same_instance(y) + # ~~~ + # + # Unless specific code, you should use `==` instead of `is_same_instance` because + # most of the time is it the semantic (and user-defined) comparison that make sense. + # + # Moreover, relying on `is_same_instance` on objects you do not control + # might have unexpected effects when libraries reuse objects or intern them. fun is_same_instance(other: nullable Object): Bool is intern # Have `self` and `other` the same value? - ## - # The exact meaning of "same value" is let to the subclasses. - # Implicitly, the default implementation, is `is_same_instance` + # + # ~~~ + # assert 1 + 1 == 2 + # assert not 1 == "1" + # assert 1.to_s == "1" + # ~~~ + # + # The exact meaning of *same value* is left to the subclasses. + # Implicitly, the default implementation, is `is_same_instance`. + # + # The laws of `==` are the following: + # + # * reflexivity `a.is_same_instance(b) implies a == b` + # * symmetry: `(a == b) == (b == a)` + # * transitivity: `(a == b) and (b == c) implies (a == c)` + # + # `==` might not be constant on some objects overtime because of their evolution. + # + # ~~~ + # var a = [1] + # var b = [1] + # var c = [1,2] + # assert a == b and not a == c + # a.add 2 + # assert not a == b and a == c + # ~~~ + # + # Lastly, `==` is highly linked with `hash` and a specific redefinition of `==` should + # usually be associated with a specific redefinition of `hash`. + # + # ENSURE `result implies self.hash == other.hash` fun ==(other: nullable Object): Bool do return self.is_same_instance(other) # Have `self` and `other` different values? - ## - # != is equivalent with "not ==". + # + # `!=` is equivalent with `not ==`. fun !=(other: nullable Object): Bool do return not (self == other) # Display self on stdout (debug only). + # # This method MUST not be used by programs, it is here for debugging - # only and can be removed without any notice + # only and can be removed without any notice. + # + # TODO: rename to avoid blocking a good identifier like `output`. fun output do '<'.output @@ -87,33 +186,117 @@ interface Object end # Display class name on stdout (debug only). + # # This method MUST not be used by programs, it is here for debugging - # only and can be removed without any notice - fun output_class_name is intern + # only and can be removed without any notice. + # + # TODO: rename to avoid blocking a good identifier like `output`. + fun output_class_name is intern # The hash code of the object. - # Assuming that a == b -> a.hash == b.hash - ## - # Without redefinition, it is based on the `object_id` of the instance. + # + # The hash code is used in many data-structures and algorithms to identify objects that might be equal. + # Therefore, the precise semantic of `hash` is highly linked with the semantic of `==` + # and the only law of `hash` is that `a == b implies a.hash == b.hash`. + # + # ~~~ + # assert (1+1).hash == 2.hash + # assert 1.to_s.hash == "1".hash + # ~~~ + # + # `hash` (like `==`) might not be constant on some objects over time because of their evolution. + # + # ~~~ + # var a = [1] + # var b = [1] + # var c = [1,2] + # assert a.hash == b.hash + # a.add 2 + # assert a.hash == c.hash + # # There is a very high probability that `b.hash != c.hash` + # ~~~ + # + # A specific redefinition of `==` should usually be associated with a specific redefinition of `hash`. + # Note that, unfortunately, a correct definition of `hash` that is lawful with `==` is sometime tricky + # and a cause of bugs. + # + # Without redefinition, `hash` is based on the `object_id` of the instance. fun hash: Int do return object_id / 8 end # The main class of the program. -# `Sys` is a singleton class, its only instance is `sys` defined in `Object`. -# `sys` is used to invoke methods on the program on the system. +# +# `Sys` is a singleton class, its only instance is accessible from everywhere with `sys`. +# +# Because of this, methods that should be accessible from everywhere, like `print` or `exit`, +# are defined in `Sys`. +# Moreover, unless there is an ambiguity with `self`, the receiver of a call to these methods is implicitly `sys`. +# Basically it means that the two following instructions are equivalent. +# +# ~~~nit +# print "Hello World" +# sys.print "Hello World" +# ~~~ +# +# ## Methods Implicitly Defined in Sys +# +# `Sys` is the class where are defined top-level methods, +# i.e. those defined outside of any class like in a procedural language. +# Basically it means that +# +# ~~~nitish +# redef class Sys +# fun foo do print "hello" +# end +# ~~~ +# +# is equivalent with +# +# ~~~nitish +# fun foo print "hello" +# ~~~ +# +# As a corollary, in a top-level method, `self` (the current receiver) is always `sys`. class Sys - # Instructions outside classes implicitly redefine this method. + # The main method of a program. + # + # In a module, the instructions defined outside any classes or methods + # (usually called the *main* of the module) is + # an implicit definition of this `main` method. + # Basically it means that the following program + # + # ~~~nit + # print "Hello World" + # ~~~ + # + # is equivalent with + # + # ~~~nit + # redef class Sys + # redef fun main do + # print "Hello World" + # end + # end + # ~~~ fun main do end # The entry point for the execution of the whole program. - # Its job is to call `main` but some modules may want to refine it + # + # When a program starts, the following implicit sequence of instructions is executed + # + # ~~~nitish + # sys = new Sys + # sys.run + # ~~~ + # + # Whereas the job of the `run` method is just to execute `main`. + # + # The only reason of the existence of `run` is to allow modules to refine it # and inject specific work before or after the main part. fun run do main # Number of the last error - fun errno: Int is extern `{ - return errno; - `} + fun errno: Int `{ return errno; `} end # Quit the program with a specific return code @@ -134,7 +317,7 @@ interface Comparable type OTHER: Comparable # Is `self` lesser than `other`? - fun <(other: OTHER): Bool is abstract + fun <(other: OTHER): Bool is abstract # not `other` < `self` # Note, the implementation must ensure that: `(x<=y) == (x=(i): Bool is intern - redef fun >(i): Bool is intern + redef fun <=(i) is intern + redef fun <(i) is intern + redef fun >=(i) is intern + redef fun >(i) is intern redef fun +(i) is intern redef fun - is intern @@ -330,6 +539,7 @@ universal Float redef fun to_i is intern redef fun to_f do return self + redef fun to_b is intern redef fun zero do return 0.0 redef fun value_of(val) do return val.to_f @@ -388,6 +598,110 @@ universal Float end end +# Native bytes. +# Same as a C `unsigned char` +universal Byte + super Discrete + super Numeric + + redef type OTHER: Byte + + redef fun successor(i) do return self + i.to_b + redef fun predecessor(i) do return self - i.to_b + + redef fun object_id is intern + redef fun hash do return self.to_i + redef fun ==(i) is intern + redef fun !=(i) is intern + redef fun output is intern + + redef fun <=(i) is intern + redef fun <(i) is intern + redef fun >=(i) is intern + redef fun >(i) is intern + redef fun +(i) is intern + + # On an Byte, unary minus will return `(256 - self) % 256` + # + # assert -(1.to_b) == 0xFF.to_b + # assert -(0.to_b) == 0x00.to_b + redef fun - is intern + redef fun -(i) is intern + redef fun *(i) is intern + redef fun /(i) is intern + + # Modulo of `self` with `i`. + # + # Finds the remainder of division of `self` by `i`. + # + # assert 5.to_b % 2.to_b == 1.to_b + # assert 10.to_b % 2.to_b == 0.to_b + fun %(i: Byte): Byte is intern + + redef fun zero do return 0.to_b + redef fun value_of(val) do return val.to_b + + # `i` bits shift fo the left (aka <<) + # + # assert 5.to_b.lshift(1) == 10.to_b + fun lshift(i: Int): Byte is intern + + # alias of `lshift` + fun <<(i: Int): Byte do return lshift(i) + + # `i` bits shift fo the right (aka >>) + # + # assert 5.to_b.rshift(1) == 2.to_b + fun rshift(i: Int): Byte is intern + + # alias of `rshift` + fun >>(i: Int): Byte do return rshift(i) + + redef fun to_i is intern + redef fun to_f is intern + redef fun to_b do return self + + redef fun distance(i) do return (self - i).to_i + + redef fun <=>(other) + do + if self < other then + return -1 + else if other < self then + return 1 + else + return 0 + end + end + + redef fun is_between(c, d) + do + if self < c or d < self then + return false + else + return true + end + end + + redef fun max(other) + do + if self < other then + return other + else + return self + end + end + + redef fun min(c) + do + if c < self then + return c + else + return self + end + end +end + # Native integer numbers. # Correspond to C int. universal Int @@ -432,13 +746,20 @@ universal Int # assert 5.lshift(1) == 10 fun lshift(i: Int): Int is intern + # alias of `lshift` + fun <<(i: Int): Int do return lshift(i) + # `i` bits shift fo the right (aka >>) # # assert 5.rshift(1) == 2 fun rshift(i: Int): Int is intern + # alias of `rshift` + fun >>(i: Int): Int do return rshift(i) + redef fun to_i do return self redef fun to_f is intern + redef fun to_b is intern redef fun distance(i) do @@ -463,9 +784,9 @@ universal Int redef fun is_between(c, d) do - if self < c or d < self then + if self < c or d < self then return false - else + else return true end end @@ -516,7 +837,7 @@ universal Int # count digits while n > 0 do d += 1 - n = n / b # euclidian division / + n = n / b # euclidian division / end return d end @@ -582,10 +903,23 @@ universal Char redef type OTHER: Char redef fun object_id is intern + redef fun output `{ + if(self < 128){ + printf("%c", self); + }else if(self < 2048){ + printf("%c%c", 0xC0 | ((0x7C0 & self) >> 6), 0x80 | (0x3F & self)); + }else if(self < 65536){ + printf("%c%c%c", 0xE0 | ((0xF000 & self) >> 12), 0x80 | ((0xFC0 & self) >> 6) ,0x80 | (0x3F & self)); + }else if(self < 2097152){ + printf("%c%c%c%c", 0xF0 | ((0x1C0000 & self) >> 18), 0x80 | ((0x3F000 & self) >> 12), 0x80 | ((0xFC0 & self) >> 6), 0x80 | (0x3F & self)); + }else{ + // Bad char + printf("%c", self); + } + `} redef fun hash do return ascii redef fun ==(o) is intern redef fun !=(o) is intern - redef fun output is intern redef fun <=(i) is intern redef fun <(i) is intern @@ -698,13 +1032,29 @@ universal Char do return is_lower or is_upper end + + # Is self a whitespace character? + # + # These correspond to the "Other" and "Separator" groups of the Unicode. + # + # In the ASCII encoding, this is those <= to space (0x20) plus delete (0x7F). + # + # assert 'A'.is_whitespace == false + # assert ','.is_whitespace == false + # assert ' '.is_whitespace == true + # assert '\t'.is_whitespace == true + fun is_whitespace: Bool + do + var i = ascii + return i <= 0x20 or i == 0x7F + end end # Pointer classes are used to manipulate extern C structures. extern class Pointer # Is the address behind this Object at NULL? - fun address_is_null: Bool is extern "address_is_null" + fun address_is_null: Bool `{ return self == NULL; `} # Free the memory pointed by this pointer - fun free `{ free(recv); `} + fun free `{ free(self); `} end