+ # Returns the Unicode code point representing the character
+ #
+ # Note : A unicode character might not be a visible glyph, but it will be used to determine canonical equivalence
+ fun code_point: Int import UnicodeChar.len `{
+ switch(UnicodeChar_len(recv)){
+ case 1:
+ return (long)(0x7F & (unsigned char)recv->ns[recv->pos]);
+ case 2:
+ return 0 | ((0x1F & (unsigned char)recv->ns[recv->pos]) << 6) | (0x3F & (unsigned char)recv->ns[recv->pos+1]);
+ case 3:
+ return 0 | ((0x0F & (unsigned char)recv->ns[recv->pos]) << 12) |
+ ((0x3F & (unsigned char)recv->ns[recv->pos+1]) << 6) |
+ (0x3F & (unsigned char)recv->ns[recv->pos+2]);
+ case 4:
+ return 0 | ((0x07 & (unsigned char)recv->ns[recv->pos]) << 18) |
+ ((0x3F & (unsigned char)recv->ns[recv->pos+1]) << 12) |
+ ((0x3F & (unsigned char)recv->ns[recv->pos+2]) << 6) |
+ (0x3F & (unsigned char)recv->ns[recv->pos+3]);
+ }
+ `}
+
+ # Returns an upper-case version of self
+ #
+ # NOTE : Works only on ASCII chars
+ # TODO : Support unicode for to_upper
+ fun to_upper: UnicodeChar import UnicodeChar.code_point `{
+ int cp = UnicodeChar_code_point(recv);
+ if(cp < 97 || cp > 122){ return recv; }
+ char* ns = malloc(2);
+ ns[1] = '\0';
+ char c = recv->ns[recv->pos];
+ ns[0] = c - 32;
+ UTF8Char* ret = malloc(sizeof(UTF8Char));
+ ret->ns = ns;
+ ret->pos = 0;
+ return ret;
+ `}
+
+ # Returns an lower-case version of self
+ #
+ # NOTE : Works only on ASCII chars
+ # TODO : Support unicode for to_upper
+ fun to_lower: UnicodeChar import UnicodeChar.code_point `{
+ int cp = UnicodeChar_code_point(recv);
+ if(cp < 65 || cp > 90){ return recv; }
+ char* ns = malloc(2);
+ ns[1] = '\0';
+ char c = recv->ns[recv->pos];
+ ns[0] = c + 32;
+ UTF8Char* ret = malloc(sizeof(UTF8Char));
+ ret->ns = ns;
+ ret->pos = 0;
+ return ret;
+ `}
+
+ redef fun ==(o)
+ do
+ if o isa Char then
+ if len != 1 then return false
+ if code_point == o.ascii then return true
+ else if o isa UnicodeChar then
+ if len != o.len then return false
+ if code_point == o.code_point then return true
+ end
+ return false
+ end
+
+ redef fun output import UnicodeChar.code_point `{
+ switch(UnicodeChar_len(recv)){
+ case 1:
+ printf("%c", recv->ns[recv->pos]);
+ break;
+ case 2:
+ printf("%c%c", recv->ns[recv->pos], recv->ns[recv->pos + 1]);
+ break;
+ case 3:
+ printf("%c%c%c", recv->ns[recv->pos], recv->ns[recv->pos + 1], recv->ns[recv->pos + 2]);
+ break;
+ case 4:
+ printf("%c%c%c%c", recv->ns[recv->pos], recv->ns[recv->pos + 1], recv->ns[recv->pos + 2], recv->ns[recv->pos + 3]);
+ break;
+ }
+ `}
+