d0d90ac0eabe9ddaeef64ea5d4759b2c9e2b5c65
[nit.git] / contrib / nitcc / src / re2nfa.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Transformation of regular expression to NFA
16 module re2nfa
17
18 import nitcc_lexer
19 import autom
20
21 redef class Node
22 # Build the NFA of the regular expression
23 fun make_rfa: Automaton do
24 print inspect
25 abort
26 end
27
28 # The real value of the string
29 fun value: String do
30 print inspect
31 abort
32 end
33 end
34
35 redef class Nstr
36 redef fun value: String do return text.substring(1, text.length-2).unescape_nit
37 redef fun make_rfa: Automaton
38 do
39 var a = new Automaton.epsilon
40 for c in self.value.chars do
41 var b = new Automaton.atom(c.ascii)
42 a.concat(b)
43 end
44 return a
45 end
46 end
47
48 redef class Nch_dec
49 redef fun value: String do return text.substring_from(1).to_i.ascii.to_s
50 redef fun make_rfa: Automaton
51 do
52 var a = new Automaton.atom(self.value.chars.first.ascii)
53 return a
54 end
55 end
56
57 redef class Nch_hex
58 redef fun value: String do return text.substring_from(2).to_hex.ascii.to_s
59 redef fun make_rfa: Automaton
60 do
61 var a = new Automaton.atom(self.value.chars.first.ascii)
62 return a
63 end
64 end
65
66 redef class NProd
67 redef fun make_rfa: Automaton
68 do
69 assert children.length == 1 else print "no make_rfa for {self}"
70 return children.first.make_rfa
71 end
72 end
73
74 redef class Nre_alter
75 redef fun make_rfa
76 do
77 var a = children[0].make_rfa
78 var b = children[2].make_rfa
79 a.alternate(b)
80 return a
81 end
82 end
83
84 redef class Nre_minus
85 redef fun make_rfa
86 do
87 var a = children[0].make_rfa
88 var b = children[2].make_rfa.to_dfa
89 for t in b.start.outs do
90 if not t.to.outs.is_empty then
91 # `b` is not a single char, so just use except
92 # "a - b == a Except (Any* b Any*)"
93 var any1 = new Automaton.cla(0, null)
94 any1.close
95 var any2 = new Automaton.cla(0, null)
96 any2.close
97 var b2 = any1
98 b2.concat(b)
99 b2.concat(any2)
100 var c = a.except(b2)
101 return c
102 end
103 a.minus_sym(t.symbol.as(not null))
104 end
105 return a
106 end
107 end
108
109 redef class Nre_end
110 redef fun make_rfa
111 do
112 print "{children.first.position.to_s}: NOT YET IMPLEMENTED: token `End`; replaced with an empty string"
113 return new Automaton.epsilon
114 end
115 end
116
117 redef class Nre_and
118 redef fun make_rfa
119 do
120 var a = children[0].make_rfa
121 var ta = new Token("1")
122 a.tag_accept(ta)
123 var b = children[2].make_rfa
124 var tb = new Token("2")
125 b.tag_accept(tb)
126
127 var c = new Automaton.empty
128 c.absorb(a)
129 c.absorb(b)
130 c = c.to_dfa
131 c.accept.clear
132 for s in c.retrotags[ta] do
133 if c.tags[s].has(tb) then
134 c.accept.add(s)
135 end
136 end
137 c.clear_tag(ta)
138 c.clear_tag(tb)
139 return c
140 end
141 end
142
143 redef class Nre_except
144 redef fun make_rfa
145 do
146 var a = children[0].make_rfa
147 var b = children[2].make_rfa
148 return a.except(b)
149 end
150 end
151
152 redef class Nre_shortest
153 redef fun make_rfa
154 do
155 var a = children[2].make_rfa
156 a = a.to_dfa
157 for s in a.accept do
158 for t in s.outs.to_a do t.delete
159 end
160 return a
161 end
162 end
163
164 redef class Nre_longest
165 redef fun make_rfa
166 do
167 var a = children[2].make_rfa
168 a = a.to_dfa
169 for s in a.accept.to_a do
170 if not s.outs.is_empty then a.accept.remove(s)
171 end
172 return a
173 end
174 end
175
176 redef class Nre_conc
177 redef fun make_rfa
178 do
179 var a = children[0].make_rfa
180 var b = children[1].make_rfa
181 a.concat(b)
182 return a
183 end
184 end
185
186 redef class Nre_star
187 redef fun make_rfa
188 do
189 var a = children[0].make_rfa
190 a.close
191 return a
192 end
193 end
194
195 redef class Nre_ques
196 redef fun make_rfa
197 do
198 var a = children[0].make_rfa
199 a.optionnal
200 return a
201 end
202 end
203
204 redef class Nre_plus
205 redef fun make_rfa
206 do
207 var a = children[0].make_rfa
208 a.plus
209 return a
210 end
211 end
212
213 redef class Nre_par
214 redef fun make_rfa
215 do
216 return children[1].make_rfa
217 end
218 end
219
220 redef class Nre_class
221 redef fun make_rfa: Automaton
222 do
223 var c1 = children[0].children[0].value
224 var c2 = children[3].children[0].value
225 if c1.length != 1 or c2.length != 1 then
226 print "Classes only works on single char"
227 exit(1)
228 abort
229 end
230 var a = new Automaton.cla(c1.chars.first.ascii, c2.chars.first.ascii)
231 return a
232 end
233 end
234
235 redef class Nre_any
236 redef fun make_rfa: Automaton
237 do
238 var a = new Automaton.cla(0, null)
239 return a
240 end
241 end