1 # This file is part of NIT (http://www.nitlanguage.org).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
19 intrude import text
::flat
22 # Codec supporting UTF-8
23 private class UTF8Codec
26 redef fun char_max_size
do return 4
28 redef fun codet_size
do return 1
30 redef fun max_lookahead
do return 4
32 redef fun encode_char
(c
) do
33 var ns
= new NativeString(c
.u8char_len
)
38 redef fun add_char_to
(c
, stream
) do
39 c
.u8char_tos
(stream
, c
.u8char_len
)
43 redef fun encode_string
(s
) do
44 var buf
= new Bytes.with_capacity
(s
.bytelen
)
49 redef fun add_string_to
(s
, b
) do
54 redef fun is_valid_char
(ns
, len
) do
55 if len
== 0 then return 2
56 if not ns
[0].is_valid_utf8_start
then return 2
57 for i
in [1 .. len
[ do if ns
[i
] & 0b1100_0000u
8 != 0b1000_0000u
8 then return 2
58 if len
!= ns
[0].u8len
then return 1
62 redef fun decode_char
(b
) do
65 if cp
>= 0xD800 and cp
<= 0xDFFF then return 0xFFFD.code_point
66 if cp
== 0xFFFE or cp
== 0xFFFF then return 0xFFFD.code_point
70 redef fun decode_string
(ns
, len
) do
71 var ret
= ns
.to_s_with_length
(len
)
72 var rit
= ret
.as(FlatString).items
74 var nns
= new NativeString(len
)
75 rit
.copy_to
(nns
, len
, 0, 0)
76 return nns
.to_s_full
(ret
.bytelen
, ret
.length
)
82 # Returns the instance of a UTF-8 Codec
83 fun utf8_codec
: Codec do return once
new UTF8Codec