contrib: bring in the pep8 analysis framework
[nit.git] / contrib / pep8analysis / src / flow_analysis / types.nit
1 import pipeline
2 import opts
3
4 import framework
5 import range
6
7 redef class AnalysisManager
8 var opt_report_unknown_types = new OptionBool("Report unknown types", "--report-types-top")
9 fun report_unknown_types: Bool do return opt_report_unknown_types.value
10
11 redef init
12 do
13 super
14 opts.add_option(opt_report_unknown_types)
15 end
16
17 fun do_types_analysis(ast: AListing, cfg: CFG)
18 do
19 # find types at program init
20 var tia = new TypesInitAnalysis(ast)
21 tia.analyze(ast)
22
23 # evaluate types with program flow
24 cfg.start.backup_types_out = tia.set
25 var ta = new TypesAnalysis
26 ta.analyze(cfg)
27
28 # check for errors
29 var tc = new TypesChecker(ast)
30 tc.analyze(ast)
31 end
32 end
33
34 # Types 1st step, find state at program load
35 # one pass over the AST
36 class TypesInitAnalysis
37 super StaticAnalysis[TypesMap]
38
39 var current_line: ALine
40
41 init(prog: AListing)
42 do
43 super( new TypesMap )
44 current_line = prog.n_lines.first
45 end
46 redef fun visit(node)
47 do
48 if node isa ALine then current_line = node
49 node.accept_types_init_analysis(self, set)
50 end
51 end
52
53 # Types 2nd step, evaluate types evolution
54 # one pass over the AST
55 class TypesAnalysis
56 super FineFlowAnalysis[TypesMap]
57
58 redef fun empty_set do return new TypesMap
59 redef fun is_forward do return true
60
61 init do super
62
63 redef fun visit(node) do node.accept_types_analysis(self, current_in, current_out.as(not null))
64
65 redef fun merge(a, b)
66 do
67 if a == null then
68 if b == null then return null
69 return b.copy
70 end
71 if b == null then return a.copy
72 return a.intersection(b)
73 end
74
75 redef fun backup_in(bb) do return bb.backup_types_in
76 redef fun backup_out(bb) do return bb.backup_types_out
77 redef fun backup_in=(bb, v) do bb.backup_types_in = v
78 redef fun backup_out=(bb, v) do bb.backup_types_out = v
79
80 redef fun line_in(line) do return line.types_in
81 redef fun line_out(line) do return line.types_out
82 redef fun line_in=(line, v) do line.types_in = v
83 redef fun line_out=(line, v) do line.types_out = v
84 end
85
86 # Types 3rd step, verification
87 # one pass over the AST
88 class TypesChecker
89 super StaticAnalysis[TypesMap]
90
91 var current_line: ALine
92
93 init(prog: AListing)
94 do
95 super( new TypesMap )
96 current_line = prog.n_lines.first
97 end
98 redef fun visit(node)
99 do
100 if node isa ALine then current_line = node
101 node.accept_types_checker(self)
102 end
103 end
104
105 class TypesMap
106 type T: Char
107
108 # bits
109 # 'u' unset
110 # 's' set
111 var bs = new HashMap[Char,T]
112
113 # The type can be:
114 # 'u' for uninitialized
115 # '0' zeroed
116 # 'b' byte
117 # 'w' word begin
118 # 'W' word end
119 # 'c' executable code
120 # 'l' ascii
121 # 'a' address begin
122 # 'A' address end
123
124 # registers
125 var rs = new HashMap[Char,Array[T]]
126
127 # stack
128 var stack = new Array[T]
129
130 # mem
131 var mem = new HashMap[Int, T]
132
133 init
134 do
135 rs['A'] = new Array[T].with_items('u', 'u')
136 rs['X'] = new Array[T].with_items('u', 'u')
137 bs['N'] = 'u'
138 bs['Z'] = 'u'
139 bs['V'] = 'u'
140 bs['C'] = 'u'
141 end
142
143 fun memory(a: Int): T
144 do
145 if mem.has_key(a) then return mem[a]
146 return 'u'
147 end
148 fun memory=(a: Int, v: T) do mem[a] = v
149
150 fun copy_to(o: TypesMap)
151 do
152 for k,v in rs do for b in [0..1] do o.rs[k][b] = rs[k][b]
153 for k,v in bs do o.bs[k] = v
154 for f in stack do o.stack.add(f)
155 for k, v in mem do o.mem[k] = v
156 end
157 fun copy: TypesMap
158 do
159 var tm = new TypesMap
160 copy_to(tm)
161 return tm
162 end
163
164 fun intersection(o: TypesMap): TypesMap
165 do
166 var tm = new TypesMap
167 for k,v in rs do for b in [0..1] do
168 var v1 = o.rs[k][b]
169 var v2 = rs[k][b]
170 if v1 == v2 then
171 tm.rs[k][b] = v1
172 else tm.rs[k][b] = merge(v1, v2)
173 end
174
175 for k,v in bs do o.bs[k] = v
176 for f in stack do o.stack.add(f)
177
178 for k, v in mem do if o.mem.has_key(k) then
179 if v == o.mem[k] then
180 tm.mem[k] = v
181 else tm.mem[k] = merge(v, o.mem[k])
182 else tm.mem[k] = 't'
183 for k, v in o.mem do if not tm.mem.has_key(k) then
184 tm.mem[k] = 't'
185 end
186 return tm
187 end
188
189 fun merge(a, b: Char): Char
190 do
191 var c = [a,b]
192 if c.has('w') and c.has('0') then return 'w'
193 if c.has('W') and c.has('0') then return 'W'
194 return 't'
195 end
196
197 fun label_at(index: Int): nullable String
198 do
199 var ltl = manager.model.address_to_line
200 if ltl.has_key(index) then
201 var line = ltl[index]
202 return line.lbl
203 end
204 return null
205 end
206
207 redef fun to_s
208 do
209 var s = "regs:\{{rs.join(",",":")}\}, "
210 #s = "{s}bits:\{{bs.join(",",":")}\}, "
211 #s = "{s}stack:\{{stack.join(",")}\}, "
212
213 var blocks = new Array[String]
214 var block_begin: nullable Int = null
215 var block_end = 0
216 var block_type = ' '
217 for a in mem.keys.to_a.sort_filter do
218 var t = mem[a]
219 if block_begin != null and (block_type != t or block_end != a-1) then
220 var lbl = label_at(block_begin)
221 if lbl != null then
222 lbl = "{lbl}@"
223 else lbl = ""
224
225 if block_begin == block_end then
226 blocks.add("{lbl}{block_begin}:{block_type}")
227 else blocks.add("{lbl}[{block_begin}..{block_end}]:{block_type}")
228 block_begin = null
229 end
230
231 if block_begin == null then block_begin = a
232
233 block_type = t
234 block_end = a
235 end
236 if block_begin != null then
237 var lbl = label_at(block_begin)
238 if lbl != null then
239 lbl = "{lbl}@"
240 else lbl = ""
241
242 if block_begin == block_end then
243 blocks.add("{lbl}{block_begin}:{block_type}")
244 else blocks.add("{lbl}[{block_begin}..{block_end}]:{block_type}")
245 end
246 s = "{s}mem:\{{blocks.join(",")}\}"
247
248 return s
249 end
250
251 redef fun ==(o)
252 do
253 if o == null or not o isa TypesMap then return false
254 for r,v in rs do for i in [0..2[ do if o.rs[r][i] != v[i] then return false
255
256 if stack.length != o.stack.length then return false
257 for s in [0..stack.length[ do if o.stack[s] != stack[s] then return false
258
259 if mem.length != o.mem.length then return false
260 for k,v in mem do if not o.mem.has_key(k) or o.mem[k] != v then return false
261
262 return true
263 end
264 end
265
266 redef class ALine
267 var types_in: nullable TypesMap = null
268 var types_out: nullable TypesMap = null
269 end
270
271 redef class BasicBlock
272 var backup_types_in: nullable TypesMap = null
273 var backup_types_out: nullable TypesMap = null
274
275 redef fun dot_node_header
276 do
277 if lines.is_empty then
278 if backup_types_in != null then
279 return "{super}-- types = \{{backup_types_in.to_s}\}\\l"
280 end
281 else if lines.first.types_in != null then return "{super}-- types = \{{lines.first.types_in.to_s}\}\\l"
282 return super
283 end
284 redef fun dot_node_footer
285 do
286 if lines.is_empty then
287 if backup_types_out != null then
288 return "{super}-- types = \{{backup_types_out.to_s}\}\\l"
289 end
290 else if lines.first.types_out != null then return "{super}-- types = \{{lines.last.types_out.to_s}\}\\l"
291 return super
292 end
293 end
294
295 redef class ANode
296 fun accept_types_analysis(v: TypesAnalysis, ins: nullable TypesMap, outs: TypesMap) do visit_all(v)
297 fun accept_types_init_analysis(v: TypesInitAnalysis, set: TypesMap) do visit_all(v)
298 fun accept_types_checker(v: TypesChecker) do visit_all(v)
299 end
300
301 redef class AAnyOperand
302 redef fun to_var
303 do
304 var line = parent.parent.as(ALine)
305 var ranges = line.ranges_in
306 if ranges != null then
307 if addressing_mode == "n" then
308 var addr_pos = new MemVar(n_value.to_i)
309 if ranges.has_key(addr_pos) and ranges[addr_pos].ponctual then
310 var addr_var = ranges[addr_pos].min
311 return new MemVar(addr_var)
312 end
313 else if addressing_mode == "x" then
314 var addr_pos: Var = new RegisterVar('X')
315 if ranges.has_key(addr_pos) and ranges[addr_pos].ponctual then
316 var reg_val = ranges[addr_pos].min
317 return new MemVar(n_value.to_i+reg_val)
318 end
319 end
320 end
321 return super
322 end
323 end
324
325 redef class AInstruction
326 # does not change the set
327 redef fun accept_types_analysis(v, ins, outs)
328 do
329 ins.copy_to(outs)
330 end
331
332 # set the memory for the line as being code
333 redef fun accept_types_init_analysis(v, set)
334 do
335 for i in [0..4[ do set.memory(v.current_line.address+i) = 'c'
336 end
337
338 fun verify_word(content: Array[Char], mem_str: String)
339 do
340 #if content.count('u') == 2 then
341 # uninitialized data
342 #manager.notes.add(new Error(location, "use of uninitialized values in {mem_str}, got {long_content_name(content)}"))
343 if content[0] == 'W' or content[1] == 'w' then
344 manager.notes.add(new Warn(location, "use of deorganized word in {mem_str}, got {long_content_name(content)}"))
345 #else if (content[0] == 'w' and content[1] != 'W') or (content[1] == 'W' and content[0] != 'w') then
346 else if (content[0] == 'w') != (content[1] == 'W') then
347 manager.notes.add(new Warn(location, "use of partial word in {mem_str}, got {long_content_name(content)}"))
348 #else if content.count('u') == 1 then # partially unitialized, a bad sign
349 #manager.notes.add(new Warn(location, "use of partially uninitialized values in {mem_str}, got {long_content_name(content)}"))
350 else if content.count('t') == 2 then # uninitialized data
351 if manager.report_unknown_types then
352 manager.notes.add(new Warn(location, "use of values from unknown source in {mem_str}, got {long_content_name(content)}"))
353 end
354 else if content[0] == '0' and content[1] == 'b' then # byte only OK!
355 else if content[0] == '0' and content[1] == 'l' then # ASCII only OK?
356 else if content[0] == '0' and content[1] == '0' then # all zero OK!
357 else if content[0] == 'a' and content[1] == 'A' then # address OK!
358 else if content[0] != 'w' and content[1] != 'W' then
359 manager.notes.add(new Warn(location, "expected word in {mem_str}, got {long_content_name(content)}"))
360 end
361 end
362
363 # 'u' for uninitialized
364 # '0' zeroed
365 # 'b' byte
366 # 'w' word begin
367 # 'W' word end
368 # 'c' executable code
369 # 'l' ascii
370 # 'a' address begin
371 # 'A' address end
372 fun long_data_name(d: Char): String
373 do
374 if d == 'u' then return "uninitialized"
375 if d == '0' then return "zero"
376 if d == 'b' then return "byte"
377 if d == 'w' then return "1st byte of word"
378 if d == 'W' then return "2nd byte of word"
379 if d == 'c' then return "code"
380 if d == 'l' then return "ASCII"
381 if d == 'a' then return "1st byte of address"
382 if d == 'A' then return "2nd byte of address"
383 if d == 't' then return "top"
384 print "Unknown data type '{d}'"
385 abort
386 end
387
388 fun long_content_name(c: Array[Char]): String
389 do
390 if (c[0]=='w' or c[0]=='0') and c[1]=='W' then return "word"
391 if c[0]=='a' and c[1]=='A' then return "address"
392 if c[0]==c[1] then return "2x {long_data_name(c[0])}"
393 return "{long_data_name(c[0])} then {long_data_name(c[1])}"
394 end
395 end
396
397
398 ## Section: directives
399
400 redef class AByteDirective
401 redef fun accept_types_init_analysis(v, set)
402 do
403 set.memory(v.current_line.address) = 'b'
404 end
405 end
406
407 redef class AWordDirective
408 redef fun accept_types_init_analysis(v, set)
409 do
410 set.memory(v.current_line.address) = 'w'
411 set.memory(v.current_line.address+1) = 'W'
412 end
413 end
414
415 redef class AAsciiDirective
416 redef fun accept_types_init_analysis(v, set)
417 do
418 # TODO AOperand::data
419 for i in [0..data.length[ do
420 set.memory(v.current_line.address+i) = 'l'
421 end
422 end
423 end
424
425 redef class AAddrssDirective
426 redef fun accept_types_init_analysis(v, set)
427 do
428 set.memory(v.current_line.address ) = 'a'
429 set.memory(v.current_line.address+1) = 'A'
430 end
431 end
432
433 ## Section: other instructions
434
435 redef class ALdInstruction
436 redef fun accept_types_analysis(v, ins, outs)
437 do
438 super
439
440 var op = n_operand
441 if op isa AAnyOperand and op.addressing_mode == "i" then
442 if op.n_value.to_i == 0 then
443 outs.rs[register][0] = '0'
444 outs.rs[register][1] = '0'
445 else
446 outs.rs[register][0] = 'w'
447 outs.rs[register][1] = 'W'
448 end
449 return
450 end
451
452 var mem = mem_var
453 if mem isa MemVar then
454 var content = [ins.memory(mem.index), ins.memory(mem.index+1)]
455 outs.rs[register][0] = content[0]
456 outs.rs[register][1] = content[1]
457 #outs.rs[register][0] = 'w'
458 #outs.rs[register][1] = 'W'
459 end
460 end
461 end
462
463 redef class ALdbyteInstruction
464 redef fun accept_types_analysis(v, ins, outs)
465 do
466 super
467 # outs.rs[register][1] = 'b'
468 var mem = mem_var
469 if mem isa MemVar then
470 var content = ins.memory(mem.index)
471 #verify_word(content, "m{mem.index}")
472 outs.rs[register][1] = content
473 end
474 end
475 end
476
477 redef class AStInstruction
478 redef fun accept_types_analysis(v, ins, outs)
479 do
480 super
481 #outs.mem[n_operand.n_value.to_i ] = 'w'
482 #outs.mem[n_operand.n_value.to_i+1] = 'W'
483 var mem = mem_var
484 if mem isa MemVar then
485 var content = ins.rs[register]
486 outs.mem[n_operand.n_value.to_i ] = content[0]
487 outs.mem[n_operand.n_value.to_i+1] = content[1]
488 end
489 end
490
491 redef fun accept_types_checker(v)
492 do
493 var ins = v.current_line.types_in
494 var mem = mem_var
495 if mem isa MemVar and ins != null then
496 var content = ins.rs[register]
497 if ins.memory(n_operand.n_value.to_i) == 'c' or
498 ins.memory(n_operand.n_value.to_i) == 'c' then
499 manager.notes.add(new Warn(location, "overwriting code at {mem} with {long_content_name(content)}"))
500 end
501 end
502 end
503 end
504
505 redef class AStbyteInstruction
506 redef fun accept_types_analysis(v, ins, outs)
507 do
508 super
509 #outs.mem[n_operand.n_value.to_i] = 'b'
510 var mem = mem_var
511 if mem isa MemVar then
512 var content = ins.rs[register]
513 outs.mem[n_operand.n_value.to_i] = content[1]
514 end
515 end
516
517 redef fun accept_types_checker(v)
518 do
519 var ins = v.current_line.types_in
520 var mem = mem_var
521 if mem isa MemVar and ins != null then
522 var content = ins.rs[register]
523 if ins.memory(n_operand.n_value.to_i) == 'c' then
524 manager.notes.add(new Warn(location, "overwriting code at {mem} with {long_data_name(content[1])}"))
525 end
526 end
527 end
528 end
529
530 redef class AShiftInstruction
531 redef fun accept_types_analysis(v, ins, outs)
532 do
533 super
534 end
535 end
536
537 redef class AArithmeticInstruction
538 redef fun accept_types_analysis(v, ins, outs)
539 do
540 super
541 outs.rs[register][0] = 'w'
542 outs.rs[register][1] = 'W'
543 end
544
545 redef fun accept_types_checker(v)
546 do
547 var ins = v.current_line.types_in
548 if ins == null then return
549
550 # register
551 var content = ins.rs[register]
552 verify_word(content, "r{register}")
553
554 # memory source
555 var mem = mem_var
556 if mem isa MemVar then
557 content = [ins.memory(mem.index), ins.memory(mem.index+1)]
558 verify_word(content, "m{mem.index}")
559 end
560 end
561 end
562
563 redef class ADecoInstruction
564 redef fun accept_types_checker(v)
565 do
566 var ins = v.current_line.types_in
567 if ins == null then return
568
569 var mem = mem_var
570 if mem isa MemVar then
571 var content = [ins.memory(mem.index), ins.memory(mem.index+1)]
572 verify_word(content, "m{mem.index}")
573 end
574 end
575 end
576
577 redef class ADeciInstruction
578 redef fun accept_types_analysis(v, ins, outs)
579 do
580 super
581 var mem = mem_var
582 if mem isa MemVar then
583 outs.mem[mem.index ] = 'w'
584 outs.mem[mem.index+1] = 'W'
585 end
586 end
587 end
588
589 redef class AOutputInstruction
590 fun verify_ascii(content: Char)
591 do
592 if content == 'u' then
593 manager.notes.add(new Warn(location, "use of uninitialized values"))
594 else if content != 'l' then
595 if content != 't' or manager.opt_report_unknown_types.value then
596 manager.notes.add(new Warn(location, "use of non-ascii types ({content})"))
597 end
598 end
599 end
600 end
601
602 redef class ACharoInstruction
603 redef fun accept_types_checker(v)
604 do
605 var ins = v.current_line.types_in
606 if ins == null then return
607
608 var mem = mem_var
609 if mem isa MemVar then
610 var content = ins.memory(mem.index)
611 verify_ascii(content)
612 end
613 end
614 end
615
616 redef class AStroInstruction
617 redef fun accept_types_checker(v)
618 do
619 var ins = v.current_line.types_in
620 if ins == null then return
621
622 var mem = mem_var
623 if mem isa MemVar then
624 var content = ins.memory(mem.index)
625 verify_ascii(content)
626 end
627 end
628 end
629
630 redef class AChariInstruction
631 redef fun accept_types_analysis(v, ins, outs)
632 do
633 super
634 outs.mem[n_operand.n_value.to_i] = 'l'
635 end
636 end
637
638 redef class ABranchInstruction
639 redef fun accept_types_checker(v)
640 do
641 var ins = v.current_line.types_in
642 if ins == null then return
643
644 var mem = mem_var
645 if mem isa MemVar then
646 var content = [ins.memory(mem.index), ins.memory(mem.index+1)]
647 if content[0] != 'a' or content[1] != 'A' then
648 manager.notes.add(new Warn(location, "use of non-address data for branching, got {long_content_name(content)}"))
649 end
650 end
651 end
652 end
653
654 redef class AMovInstruction
655 redef fun accept_types_analysis(v, ins, outs)
656 do
657 super
658
659 outs.rs['A'][0] = 'w'
660 outs.rs['A'][1] = 'W'
661 end
662 end