X-Git-Url: http://nitlanguage.org diff --git a/lib/standard/kernel.nit b/lib/standard/kernel.nit index e2f8f1f..11c4419 100644 --- a/lib/standard/kernel.nit +++ b/lib/standard/kernel.nit @@ -5,20 +5,24 @@ # # This file is free software, which comes along with NIT. This software is # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; -# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. You can modify it is you want, provided this header # is kept unaltered, and a notification of the changes is added. # You are allowed to redistribute it and sell it, alone or is a part of # another product. -# Most minimal classes and methods. -# This module is the root of the standard module hierarchy. +# Most basic classes and methods. +# +# This module is the root of the module hierarchy. +# It provides a very minimal set of classes and services used as a +# foundation to define other classes and methods. module kernel import end # Mark this module is a top level one. (must be only one) -`{ -#include +in "C" `{ + #include + #include `} ############################################################################### @@ -26,37 +30,154 @@ import end # Mark this module is a top level one. (must be only one) ############################################################################### # The root of the class hierarchy. -# Each class implicitly specialize Object. # -# Currently, Object is also used to collect all top-level methods. +# Each other class implicitly specializes Object, +# therefore the services of Object are inherited by every other class and are usable +# on each value, including primitive types like integers (`Int`), strings (`String`) and arrays (`Array`). +# +# Note that `nullable Object`, not `Object`, is the root of the type hierarchy +# since the special value `null` is not considered as an instance of Object. interface Object - # The unique object identifier in the class. - # Unless specific code, you should not use this method. - # The identifier is used internally to provide a hash value. + # Type of this instance, automatically specialized in every class + # + # A common use case of the virtual type `SELF` is to type an attribute and + # store another instance of the same type as `self`. It can also be used as as + # return type to a method producing a copy of `self` or returning an instance + # expected to be the exact same type as self. + # + # This virtual type must be used with caution as it can hinder specialization. + # In fact, it imposes strict restrictions on all sub-classes and their usage. + # For example, using `SELF` as a return type of a method `foo` + # forces all subclasses to ensure that `foo` returns the correct and updated + # type. + # A dangerous usage take the form of a method typed by `SELF` which creates + # and returns a new instance. + # If not correctly specialized, this method would break when invoked on a + # sub-class. + # + # A general rule for safe usage of `SELF` is to ensure that inputs typed + # `SELF` are stored in attributes typed `SELF` and returned by methods typed + # `SELF`, pretty much the same things as you would do with parameter types. + type SELF: Object + + # An internal hash code for the object based on its identity. + # + # Unless specific code, you should not use this method but + # use `hash` instead. + # + # As its name hints it, the internal hash code, is used internally + # to provide a hash value. + # It is also used by the `inspect` method to loosely identify objects + # and helps debugging. + # + # ~~~ + # var a = "Hello" + # var b = a + # assert a.object_id == b.object_id + # ~~~ + # + # The specific details of the internal hash code it let to the specific + # engine. The rules are the following: + # + # * The `object_id` MUST be invariant for the whole life of the object. + # * Two living instances of the same classes SHOULD NOT share the same `object_id`. + # * Two instances of different classes MIGHT share the same `object_id`. + # * The `object_id` of a garbage-collected instance MIGHT be reused by new instances. + # * The `object_id` of an object MIGHT be non constant across different executions. + # + # For instance, the `nitc` compiler uses the address of the object in memory + # as its `object_id`. + # + # TODO rename in something like `internal_hash_code` fun object_id: Int is intern # Return true if `self` and `other` have the same dynamic type. - # Unless specific code, you should not use this method. + # + # ~~~ + # assert 1.is_same_type(2) + # assert "Hello".is_same_type("World") + # assert not "Hello".is_same_type(2) + # ~~~ + # + # The method returns false if the dynamic type of `other` is a subtype of the dynamic type of `self` + # (or the other way around). + # + # Unless specific code, you should not use this method because it is inconsistent + # with the fact that a subclass can be used in lieu of a superclass. fun is_same_type(other: Object): Bool is intern - # Return true if `self` and `other` are the same instance. - # Unless specific code, you should use `==` instead. + # Return true if `self` and `other` are the same instance (i.e. same identity). + # + # ~~~ + # var a = new Buffer + # var b = a + # var c = new Buffer + # assert a.is_same_instance(b) + # assert not a.is_same_instance(c) + # assert a == c # because both buffers are empty + # ~~~ + # + # Obviously, the identity of an object is preserved even if the object is mutated. + # + # ~~~ + # var x = [1] + # var y = x + # x.add 2 + # assert x.is_same_instance(y) + # ~~~ + # + # Unless specific code, you should use `==` instead of `is_same_instance` because + # most of the time is it the semantic (and user-defined) comparison that make sense. + # + # Moreover, relying on `is_same_instance` on objects you do not control + # might have unexpected effects when libraries reuse objects or intern them. fun is_same_instance(other: nullable Object): Bool is intern # Have `self` and `other` the same value? - ## - # The exact meaning of "same value" is let to the subclasses. - # Implicitly, the default implementation, is `is_same_instance` + # + # ~~~ + # assert 1 + 1 == 2 + # assert not 1 == "1" + # assert 1.to_s == "1" + # ~~~ + # + # The exact meaning of *same value* is left to the subclasses. + # Implicitly, the default implementation, is `is_same_instance`. + # + # The laws of `==` are the following: + # + # * reflexivity `a.is_same_instance(b) implies a == b` + # * symmetry: `(a == b) == (b == a)` + # * transitivity: `(a == b) and (b == c) implies (a == c)` + # + # `==` might not be constant on some objects overtime because of their evolution. + # + # ~~~ + # var a = [1] + # var b = [1] + # var c = [1,2] + # assert a == b and not a == c + # a.add 2 + # assert not a == b and a == c + # ~~~ + # + # Lastly, `==` is highly linked with `hash` and a specific redefinition of `==` should + # usually be associated with a specific redefinition of `hash`. + # + # ENSURE `result implies self.hash == other.hash` fun ==(other: nullable Object): Bool do return self.is_same_instance(other) # Have `self` and `other` different values? - ## - # != is equivalent with "not ==". + # + # `!=` is equivalent with `not ==`. fun !=(other: nullable Object): Bool do return not (self == other) # Display self on stdout (debug only). + # # This method MUST not be used by programs, it is here for debugging - # only and can be removed without any notice + # only and can be removed without any notice. + # + # TODO: rename to avoid blocking a good identifier like `output`. fun output do '<'.output @@ -65,36 +186,126 @@ interface Object end # Display class name on stdout (debug only). + # # This method MUST not be used by programs, it is here for debugging - # only and can be removed without any notice - fun output_class_name is intern - - # Quit the program with a specific return code - protected fun exit(exit_value: Int) is intern - - # Return the global sys object, the only instance of the `Sys` class. - protected fun sys: Sys is intern + # only and can be removed without any notice. + # + # TODO: rename to avoid blocking a good identifier like `output`. + fun output_class_name is intern # The hash code of the object. - # Assuming that a == b -> a.hash == b.hash - ## - # Without redefinition, it is based on the `object_id` of the instance. + # + # The hash code is used in many data-structures and algorithms to identify objects that might be equal. + # Therefore, the precise semantic of `hash` is highly linked with the semantic of `==` + # and the only law of `hash` is that `a == b implies a.hash == b.hash`. + # + # ~~~ + # assert (1+1).hash == 2.hash + # assert 1.to_s.hash == "1".hash + # ~~~ + # + # `hash` (like `==`) might not be constant on some objects over time because of their evolution. + # + # ~~~ + # var a = [1] + # var b = [1] + # var c = [1,2] + # assert a.hash == b.hash + # a.add 2 + # assert a.hash == c.hash + # # There is a very high probability that `b.hash != c.hash` + # ~~~ + # + # A specific redefinition of `==` should usually be associated with a specific redefinition of `hash`. + # Note that, unfortunately, a correct definition of `hash` that is lawful with `==` is sometime tricky + # and a cause of bugs. + # + # Without redefinition, `hash` is based on the `object_id` of the instance. fun hash: Int do return object_id / 8 end # The main class of the program. -# `Sys` is a singleton class, its only instance is `sys` defined in `Object`. -# `sys` is used to invoke methods on the program on the system. +# +# `Sys` is a singleton class, its only instance is accessible from everywhere with `sys`. +# +# Because of this, methods that should be accessible from everywhere, like `print` or `exit`, +# are defined in `Sys`. +# Moreover, unless there is an ambiguity with `self`, the receiver of a call to these methods is implicitly `sys`. +# Basically it means that the two following instructions are equivalent. +# +# ~~~nit +# print "Hello World" +# sys.print "Hello World" +# ~~~ +# +# ## Methods Implicitly Defined in Sys +# +# `Sys` is the class where are defined top-level methods, +# i.e. those defined outside of any class like in a procedural language. +# Basically it means that +# +# ~~~nitish +# redef class Sys +# fun foo do print "hello" +# end +# ~~~ +# +# is equivalent with +# +# ~~~nitish +# fun foo print "hello" +# ~~~ +# +# As a corollary, in a top-level method, `self` (the current receiver) is always `sys`. class Sys - # Instructions outside classes implicitly redefine this method. + # The main method of a program. + # + # In a module, the instructions defined outside any classes or methods + # (usually called the *main* of the module) is + # an implicit definition of this `main` method. + # Basically it means that the following program + # + # ~~~nit + # print "Hello World" + # ~~~ + # + # is equivalent with + # + # ~~~nit + # redef class Sys + # redef fun main do + # print "Hello World" + # end + # end + # ~~~ fun main do end + # The entry point for the execution of the whole program. + # + # When a program starts, the following implicit sequence of instructions is executed + # + # ~~~nitish + # sys = new Sys + # sys.run + # ~~~ + # + # Whereas the job of the `run` method is just to execute `main`. + # + # The only reason of the existence of `run` is to allow modules to refine it + # and inject specific work before or after the main part. + fun run do main + # Number of the last error - fun errno: Int is extern `{ - return errno; - `} + fun errno: Int `{ return errno; `} end +# Quit the program with a specific return code +fun exit(exit_value: Int) is intern + +# Return the global sys object, the only instance of the `Sys` class. +fun sys: Sys is intern + + ############################################################################### # Abstract Classes # ############################################################################### @@ -106,7 +317,7 @@ interface Comparable type OTHER: Comparable # Is `self` lesser than `other`? - fun <(other: OTHER): Bool is abstract + fun <(other: OTHER): Bool is abstract # not `other` < `self` # Note, the implementation must ensure that: `(x<=y) == (x=(i: Float): Bool is intern - fun >(i: Float): Bool is intern - fun +(i: Float): Float is intern - fun -: Float is intern - fun -(i: Float): Float is intern - fun *(i: Float): Float is intern - fun /(i: Float): Float is intern + redef fun <=(i) is intern + redef fun <(i) is intern + redef fun >=(i) is intern + redef fun >(i) is intern - # The integer part of `self`. + redef fun +(i) is intern + redef fun - is intern + redef fun -(i) is intern + redef fun *(i) is intern + redef fun /(i) is intern + + redef fun to_i is intern + redef fun to_f do return self + redef fun to_b is intern + + redef fun zero do return 0.0 + redef fun value_of(val) do return val.to_f + + redef fun <=>(other) + do + if self < other then + return -1 + else if other < self then + return 1 + else + return 0 + end + end + + redef fun is_between(c, d) + do + if self < c or d < self then + return false + else + return true + end + end + + # Compare float numbers with a given precision. # - # assert (0.0).to_i == 0 - # assert (0.9).to_i == 0 - # assert (-0.9).to_i == 0 - # assert (9.9).to_i == 9 - # assert (-9.9).to_i == -9 - fun to_i: Int is intern + # Because of the loss of precision in floating numbers, + # the `==` method is often not the best way to compare them. + # + # ~~~ + # assert 0.01.is_approx(0.02, 0.1) == true + # assert 0.01.is_approx(0.02, 0.001) == false + # ~~~ + fun is_approx(other, precision: Float): Bool + do + assert precision >= 0.0 + return self <= other + precision and self >= other - precision + end + + redef fun max(other) + do + if self < other then + return other + else + return self + end + end + + redef fun min(c) + do + if c < self then + return c + else + return self + end + end +end + +# Native bytes. +# Same as a C `unsigned char` +universal Byte + super Discrete + super Numeric + + redef type OTHER: Byte + + redef fun successor(i) do return self + i.to_b + redef fun predecessor(i) do return self - i.to_b + + redef fun object_id is intern + redef fun hash do return self.to_i + redef fun ==(i) is intern + redef fun !=(i) is intern + redef fun output is intern + + redef fun <=(i) is intern + redef fun <(i) is intern + redef fun >=(i) is intern + redef fun >(i) is intern + redef fun +(i) is intern + + # On an Byte, unary minus will return `(256 - self) % 256` + # + # assert -(1.to_b) == 0xFF.to_b + # assert -(0.to_b) == 0x00.to_b + redef fun - is intern + redef fun -(i) is intern + redef fun *(i) is intern + redef fun /(i) is intern + + # Modulo of `self` with `i`. + # + # Finds the remainder of division of `self` by `i`. + # + # assert 5.to_b % 2.to_b == 1.to_b + # assert 10.to_b % 2.to_b == 0.to_b + fun %(i: Byte): Byte is intern + + redef fun zero do return 0.to_b + redef fun value_of(val) do return val.to_b + + # `i` bits shift fo the left (aka <<) + # + # assert 5.to_b.lshift(1) == 10.to_b + fun lshift(i: Int): Byte is intern + + # alias of `lshift` + fun <<(i: Int): Byte do return lshift(i) + + # `i` bits shift fo the right (aka >>) + # + # assert 5.to_b.rshift(1) == 2.to_b + fun rshift(i: Int): Byte is intern + + # alias of `rshift` + fun >>(i: Int): Byte do return rshift(i) + + redef fun to_i is intern + redef fun to_f is intern + redef fun to_b do return self + + redef fun distance(i) do return (self - i).to_i + + redef fun <=>(other) + do + if self < other then + return -1 + else if other < self then + return 1 + else + return 0 + end + end + + redef fun is_between(c, d) + do + if self < c or d < self then + return false + else + return true + end + end + + redef fun max(other) + do + if self < other then + return other + else + return self + end + end + + redef fun min(c) + do + if c < self then + return c + else + return self + end + end end # Native integer numbers. # Correspond to C int. universal Int super Discrete + super Numeric + redef type OTHER: Int + redef fun successor(i) do return self + i + redef fun predecessor(i) do return self - i + redef fun object_id is intern redef fun hash do return self redef fun ==(i) is intern @@ -274,30 +724,43 @@ universal Int redef fun >=(i) is intern redef fun >(i) is intern redef fun +(i) is intern - fun -: Int is intern + + redef fun - is intern redef fun -(i) is intern - fun *(i: Int): Int is intern - fun /(i: Int): Int is intern + redef fun *(i) is intern + redef fun /(i) is intern + + # Modulo of `self` with `i`. + # + # Finds the remainder of division of `self` by `i`. + # + # assert 5 % 2 == 1 + # assert 10 % 2 == 0 fun %(i: Int): Int is intern + redef fun zero do return 0 + redef fun value_of(val) do return val.to_i + # `i` bits shift fo the left (aka <<) # # assert 5.lshift(1) == 10 fun lshift(i: Int): Int is intern + # alias of `lshift` + fun <<(i: Int): Int do return lshift(i) + # `i` bits shift fo the right (aka >>) # # assert 5.rshift(1) == 2 fun rshift(i: Int): Int is intern - # The float equivalent of `self` - # - # assert 5.to_f == 5.0 - # assert 5.to_f != 5 # Float and Int are not equals - fun to_f: Float is intern + # alias of `rshift` + fun >>(i: Int): Int do return rshift(i) + + redef fun to_i do return self + redef fun to_f is intern + redef fun to_b is intern - redef fun succ is intern - redef fun prec is intern redef fun distance(i) do var d = self - i @@ -321,9 +784,9 @@ universal Int redef fun is_between(c, d) do - if self < c or d < self then + if self < c or d < self then return false - else + else return true end end @@ -348,8 +811,8 @@ universal Int # The character whose ASCII value is `self`. # - # assert 65.ascii == 'A' - # assert 10.ascii == '\n' + # assert 65.ascii == 'A' + # assert 10.ascii == '\n' fun ascii: Char is intern # Number of digits of an integer in base `b` (plus one if negative) @@ -374,7 +837,7 @@ universal Int # count digits while n > 0 do d += 1 - n = n / b # euclidian division / + n = n / b # euclidian division / end return d end @@ -440,18 +903,31 @@ universal Char redef type OTHER: Char redef fun object_id is intern + redef fun output `{ + if(self < 128){ + printf("%c", self); + }else if(self < 2048){ + printf("%c%c", 0xC0 | ((0x7C0 & self) >> 6), 0x80 | (0x3F & self)); + }else if(self < 65536){ + printf("%c%c%c", 0xE0 | ((0xF000 & self) >> 12), 0x80 | ((0xFC0 & self) >> 6) ,0x80 | (0x3F & self)); + }else if(self < 2097152){ + printf("%c%c%c%c", 0xF0 | ((0x1C0000 & self) >> 18), 0x80 | ((0x3F000 & self) >> 12), 0x80 | ((0xFC0 & self) >> 6), 0x80 | (0x3F & self)); + }else{ + // Bad char + printf("%c", self); + } + `} redef fun hash do return ascii redef fun ==(o) is intern redef fun !=(o) is intern - redef fun output is intern redef fun <=(i) is intern redef fun <(i) is intern redef fun >=(i) is intern redef fun >(i) is intern - redef fun succ is intern - redef fun prec is intern + redef fun successor(i) is intern + redef fun predecessor(i) is intern redef fun distance(c) do @@ -484,9 +960,6 @@ universal Char # assert '\n'.ascii == 10 fun ascii: Int is intern - redef fun +(i) is intern - redef fun -(i) is intern - # Return the lower case version of self. # If self is not a letter, then return self # @@ -559,13 +1032,29 @@ universal Char do return is_lower or is_upper end + + # Is self a whitespace character? + # + # These correspond to the "Other" and "Separator" groups of the Unicode. + # + # In the ASCII encoding, this is those <= to space (0x20) plus delete (0x7F). + # + # assert 'A'.is_whitespace == false + # assert ','.is_whitespace == false + # assert ' '.is_whitespace == true + # assert '\t'.is_whitespace == true + fun is_whitespace: Bool + do + var i = ascii + return i <= 0x20 or i == 0x7F + end end # Pointer classes are used to manipulate extern C structures. -extern Pointer +extern class Pointer # Is the address behind this Object at NULL? - fun address_is_null: Bool `{ return recv == NULL; `} + fun address_is_null: Bool `{ return self == NULL; `} # Free the memory pointed by this pointer - fun free `{ free(recv); `} + fun free `{ free(self); `} end