First NIT release and new clean mercurial repository
[nit.git] / examples / various / glob.nit
1 # This file is part of Nit Language ( http://nitlanguage.org ).
2 #
3 # Copyright 2008 Etienne M. Gagnon <egagnon@j-meg.com>
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16
17 package glob
18
19 { A glob is a string pattern where:
20 "*" is 0 or more character except "/" (cannot be adjacent to another "*")
21 "**/" is 0 or more directory levels (must be preceded by "/" or the start)
22 "?" : any character except "/"
23
24 Examples:
25
26 "**/*.prm" => any *.prm file in current directory or below
27 "/**/*.nit" => any *.nit file in the entire file system
28 "src/**/*.bak" => any *.bak file in the src subdirectory
29 "a**/" => invalid glob (instead, you might want to write "a*/**/")
30 "a?*/" => any direct subdirectory that starts with an "a",
31 and is at least 2 characters long
32
33 Usage
34
35 printn("toto".match_glob("t*o")) # prints => true
36 }#
37
38 class Glob
39
40 # class that encapsulates a glob pattern
41
42 var _chars = new Array[GlobChar]
43
44 # the glob-chars of the pattern
45
46 var _string : String
47 delegate to_s
48
49 # cached string representation
50
51 init(pattern: String)
52 with error(message: String)
53 is final
54
55 # contructor that expects the glob is a string
56
57 # calls error when the glob string is invalid
58
59 do
60 _string = string
61
62 if not pattern.valid_glob then
63 error("invalid glob pattern")
64 end
65
66 var star_seen = false
67 for c in pattern.chars do
68 if star_seen then
69 if c == '*' then
70 _chars.add(dstar)
71 else
72 _chars.add('*')
73 _chars.add(c)
74 end
75
76 star_seen = false
77
78 else if c == '*' then
79 star_seen = true
80
81 else
82 _chars.add(c)
83 end
84 end
85 end
86
87 fun match(string: String): boolean
88 do
89 var pat_pos = new GlobPosition(_chars)
90 var str_pos = new StringPosition(string.chars)
91
92 pat_pos.match(str_pos)
93 on success do return true
94
95 return false
96 end
97 end
98
99 redef String
100
101 fun match_glob(pattern: String): Bool
102 with error(message: String)
103 is final
104
105 # returns true if self matches the given glob pattern
106
107 # calls error when the glob pattern is invalid
108
109 do
110 var pat = new Glob(pattern)
111 on error(message) do error(message)
112
113 return pat.match(self)
114 end
115
116 fun valid_glob: Bool
117 is private
118
119 # returns true if self is a valid glob
120
121 do
122 var state = state0
123
124 for c in chars do
125 state = state.target(c.to_symbol)
126 if state == null then return false
127 end
128
129 return state.accept
130 end
131 end
132
133 redef Char
134 special GlobChar
135
136 # Char has a new super class
137
138 fun to_symbol: Symbol
139 is private
140
141 # converts self into a Symbol
142
143 do
144 if self == '/' then return slash
145 if self == '*' then return star
146 if self == '?' then return qmark
147 return other
148 end
149 end
150
151 class GlobChar
152 is private
153 is universal
154
155 # super class of Char, which has an additional constant for '**'
156
157 const dstar
158
159 # the constant for '**'
160
161 end
162
163 class Position
164 is private
165
166 # class that encapsulate a position
167
168 var _pos: Int = 0
169
170 fun next do _pos += 1
171 fun prev do _pos -= 1
172 end
173
174 class GlobPosition
175 special Position
176 is private
177
178 # class that encapsulate a position in a pattern
179
180 var _chars: Array[GlobChar]
181 end
182
183 class StringPosition
184 special Position
185 is private
186
187 # class that encapsulate a position in a string
188
189 var _chars: Array[Char]
190
191 fun match(pattern: GlobPosition)
192 with success
193 is final
194
195 # match the rest of this string with the rest of the given pattern
196
197 # calls success when there is a match
198
199 # the implemented algorithm does a recursive exhaustive search
200 # until a match is found.
201
202 do
203 if _pos == _chars.len then
204 # end of string
205
206 if pattern._pos == pattern._chars.len then success
207
208 # not end of pattern
209
210 var pc = pattern._chars[pattern._pos]
211 if pc == '*' or pc == dstar or pc == '/' then
212 pattern.next
213 match(pattern)
214 pattern.prev
215 end
216
217 else if pattern._pos != pattern._chars.len then
218 # not end of pattern
219
220 var sc = _chars[_pos]
221 var pc = pattern._chars[pattern._pos]
222 if pc == dstar then
223 if sc == '/' then
224 pattern.next
225 match(pattern)
226 pattern.prev
227 else
228 next
229 match(pattern)
230 prev
231 end
232
233 else if pc == '*' then
234 pattern.next
235 match(pattern)
236 pattern.prev
237 next
238 match(pattern)
239 prev
240
241 else if pc == '?' or sc == pc then
242 pattern.next
243 next
244 match(pattern)
245 prev
246 pattern.prev
247 end
248 end
249 end
250 end
251
252 #
253 # Automaton for Glob validation
254 #
255
256 class State
257 is private
258 is universal
259
260 # automaton state class
261
262 const state0, state1, state2, state3, state4, state5, state6
263
264 # states of the automaton
265
266 fun target(symbol: Symbol): nullable State
267
268 # returns the target state, given a symbol
269
270 case state0 do return symbol.state0_target
271 case state1 do return symbol.state1_target
272 case state2 do return symbol.state2_target
273 case state3 do return symbol.state3_target
274 case state4 do return symbol.state4_target
275 case state5 do return symbol.state5_target
276 case state6 do return symbol.state6_target
277 default do abort
278
279 fun accept: Bool
280
281 # returns true if self is an accept state
282
283 case state5 do return false
284 default do return true
285 end
286
287 class Symbol
288 is private
289 is universal
290
291 # automaton symbol class
292
293 const slash, star, qmark, other
294
295 fun state0_target: nullable State
296
297 # returns the target of state0 (start)
298
299 case slash do return state1
300 case star do return state2
301 case qmark do return state3
302 case other do return state4
303 default do return null
304
305 fun state1_target: nullable State
306
307 # returns the target of state1 (".../")
308
309 case star do return state2
310 case qmark do return state3
311 case other do return state4
312 default do return null
313
314 fun state2_target: nullable State
315
316 # returns the target of state2 (".../*")
317
318 case slash do return state1
319 case star do return state5
320 case qmark do return state3
321 case other do return state4
322 default do return null
323
324 fun state3_target: nullable State
325
326 # returns the target of state3 ("...?")
327
328 case slash do return state1
329 case star do return state6
330 case qmark do return state3
331 case other do return state4
332 default do return null
333
334 fun state4_target: nullable State
335
336 # returns the target of state4 ("...o")
337
338 case slash do return state1
339 case star do return state6
340 case qmark do return state3
341 case other do return state4
342 default do return null
343
344 fun state5_target: nullable State
345
346 # returns the target of state5 (".../**")
347
348 case slash do return state1
349 default do return null
350
351 fun state6_target: nullable State
352
353 # returns the target of state6 ("...o*" or "...?*")
354
355 case slash do return state1
356 case qmark do return state3
357 case other do return state4
358 default do return null
359 end