11df5b2d4c3f491b6390c33b88c909542a2908d1
[nit.git] / contrib / nitester / src / nitester.nit
1 # This file is part of NIT (http://www.nitlanguage.org).
2 #
3 # Copyright 2014 Alexis Laferrière <alexis.laf@xymus.net>
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16
17 # Tester of Nit engines on an MPI cluster
18 module nitester
19
20 import mpi
21 import signals
22 import opts
23
24 # Any processor, worker or controller
25 #
26 # All data and methods within this class are shared by the controller and the
27 # workers.
28 abstract class Processor
29 super SignalHandler
30
31 # Controller rank is always 0
32 var controller_rank: Rank = 0.rank
33
34 # Rank on this processor
35 fun rank: Rank is abstract
36
37 # Where to store data for transfer between nodes
38 #
39 # Require: `buffer.length % 4 == 0`
40 var buffer = new CIntArray(1024)
41
42 # Run in verbose mode, display more text
43 var verbose = 0
44
45 init
46 do
47 # OpenMPI sends a SIGTERM to all nodes upon receiving a SIGTERM or SIGINT
48 # on the first process.
49 handle_signal(sigterm, true)
50 end
51
52 # Tag of a new task packet of size `tasks_per_packet`
53 var task_tag: Tag = 0.tag
54
55 # Tag to return a set of `Result` thought `buffer`
56 var result_tag: Tag = 1.tag
57
58 # Tag to notify `Worker` when to quit
59 var quit_tag: Tag = 2.tag
60
61 # Tag to request more work from the `Controller` by a `Worker`
62 var need_work_tag: Tag = 4.tag
63
64 # Tag to notify `Controller` that the sender `Worker` is done
65 var done_tag: Tag = 5.tag
66
67 # Number of tasks within each task assignation with `task_tag`
68 var tasks_per_packet = 1
69
70 # Run the main logic of this node
71 fun run is abstract
72
73 # Hash or name of the branch to test
74 var branch_hash: String is noinit
75
76 # Engines targeted by this execution
77 var engines: Array[String] is noinit
78
79 # All known engines, used to detect errors in `engines`
80 var all_engines: Array[String] = ["nitg-s", "nitg-sg", "nitg-g", "nitg-e", "niti", "emscripten"]
81
82 # Programs to test in this execution
83 var test_programs: Array[String] is noinit
84
85 # Root of the temporary directory
86 var tmp_dir = "/dev/shm/"
87
88 # `ccache` directory
89 var ccache_dir = "/dev/shm/nit_ccache"
90
91 # Read command line options
92 fun read_cli_options
93 do
94 var opt_ctx = new OptionContext
95 var opt_hash = new OptionString(
96 "Branch to test",
97 "--hash", "-h")
98 opt_hash.mandatory = true
99 var opt_engines = new OptionString(
100 "Engines to test, separated with commas ({all_engines.join(", ")} or all)",
101 "--engine", "-e")
102 var opt_help = new OptionBool("Print this help message", "--help", "-h")
103 var opt_verbose = new OptionCount(
104 "Be verbose, repeat to increase verbose level (max with -vvv)",
105 "--verbose", "-v")
106 var opt_cleanup = new OptionBool(
107 "Clean up all nitester files (and do not run tests)",
108 "--cleanup", "-C")
109
110 opt_ctx.add_option(opt_hash, opt_engines, opt_help, opt_verbose, opt_cleanup)
111 opt_ctx.parse args
112
113 # --help?
114 if opt_help.value then opt_ctx.usage_error null
115
116 # --verbose?
117 verbose = opt_verbose.value
118
119 # --cleanup?
120 if opt_cleanup.value then
121 assert tmp_dir.file_exists
122 for file in tmp_dir.files do if file.has_prefix("nit") then
123 var full_path = tmp_dir / file
124 if full_path == ccache_dir then continue
125
126 assert full_path.file_exists
127
128 var stat = full_path.file_lstat
129 if stat.is_dir then
130 full_path.rmdir
131 else
132 full_path.file_delete
133 end
134 stat.free
135 end
136 mpi.finalize
137 exit 0
138 end
139
140 # any files?
141 var rest = opt_ctx.rest
142 if rest.is_empty then opt_ctx.usage_error "This tool needs at least one test_program.nit"
143 test_programs = rest
144
145 # hash
146 branch_hash = opt_hash.value.as(not null)
147
148 # gather and check engines
149 var engines_str = opt_engines.value
150 var engines
151 if engines_str == null then
152 # default
153 engines = ["nitg-s"]
154 else
155 engines = engines_str.split(',')
156
157 if engines.has("all") then
158 # all engines
159 engines = all_engines
160 end
161 end
162
163 # check validity of targetted engines
164 var unknown_engines = new Array[String]
165 for engine in engines do if not all_engines.has(engine) then unknown_engines.add engine
166
167 if not unknown_engines.is_empty then
168 opt_ctx.usage_error "Unknown engines: {unknown_engines.join(", ")} (expected one or most of {all_engines.join(", ")})"
169 end
170 self.engines = engines
171 end
172
173 # All tasks to be performed
174 var tasks = new Array[Task]
175
176 # Gather and register all tasks
177 fun create_tasks
178 do
179 # At this point we are in our local nit
180 var skip_path = "tests/turing.skip"
181 var skip
182 if skip_path.file_exists then
183 var skip_file = new FileReader.open(skip_path)
184 skip = skip_file.read_lines
185 skip_file.close
186 else
187 skip = new Array[String]
188 end
189
190 for engine in engines do for prog in test_programs do
191 # Is is blacklisted?
192 for s in skip do if not s.is_empty and prog.has(s) then
193 if verbose > 0 and rank == 0 then print "Skipping test '{prog}' because of '{s}' in turing.skip"
194 continue label
195 end
196
197 tasks.add new Task(engine, prog)
198 end label
199 end
200 end
201
202 # Single controller to dispatch tasks, gather results and produce stats
203 class Controller
204 super Processor
205
206 redef fun rank do return controller_rank
207
208 # Id as `Int` of the next task to distribute
209 var next_task_id = 0
210
211 redef fun receive_signal(signal)
212 do
213 shutdown
214 print_results
215
216 mpi.finalize
217 exit 0
218 end
219
220 redef fun run
221 do
222 read_cli_options
223 create_tasks
224 distribute_tasks
225 print_results
226 end
227
228 # Cumulated results from workers
229 var results = new ResultSet
230
231 # Maintain communication with workers to distribute tasks and receiver results
232 fun distribute_tasks
233 do
234 var at_work = new Array[Rank]
235
236 # send initial tasks
237 for r in [1..comm_world.size[ do
238 var sent = send_task_to(r.rank)
239 if sent then
240 at_work.add r.rank
241 else
242 mpi.send_empty(r.rank, quit_tag, comm_world)
243 end
244 end
245
246 var status = new Status
247 # await results and send new tasks
248 while not at_work.is_empty do
249 check_signals
250
251 # Double probe to avoid bug with some implementation of MPI
252 mpi.probe(new Rank.any, new Tag.any, comm_world, status)
253 mpi.probe(new Rank.any, new Tag.any, comm_world, status)
254
255 if status.tag == result_tag then
256 # Receive results fron a worker
257 var count = status.count(new DataType.int)
258 mpi.recv_into(buffer, 0, count, status.source, status.tag, comm_world)
259
260 # Parse results from C array to `Result` instances
261 #
262 # Each result is on 4 ints: task id, arg, alt and result.
263 #
264 # See the comments where the data is produced in `Worker::work_on_tasks` for more informationé
265 assert count % 4 == 0
266 for t in (count/4).times do
267 var tt = t*4
268
269 var task_id = buffer[tt]
270 var arg = buffer[tt+1]
271 var alt = buffer[tt+2]
272 var res = buffer[tt+3]
273
274 var result = new Result(tasks[task_id], arg, alt)
275
276 if res == 1 then result.ok = true
277 if res == 2 then result.ok_empty = true
278 if res == 3 then result.no_sav = true
279 if res == 4 then result.fixme = true
280 if res == 5 then result.fail = true
281 if res == 6 then result.soso = true
282 if res == 7 then result.skip = true
283 if res == 8 then result.todo = true
284 if res == 9 then result.skip_exec = true
285 if res == 0 then result.unknown = true
286
287 results.add result
288
289 if verbose > 0 and results.length % 25 == 0 then print_short_results
290 end
291
292 else if status.tag == need_work_tag then
293 # A worker needs more work
294 mpi.recv_empty(status.source, status.tag, comm_world)
295 var sent = send_task_to(status.source)
296 if not sent then
297 # no more work, quit
298 mpi.send_empty(status.source, quit_tag, comm_world)
299 end
300 else if status.tag == done_tag then
301 # A worker is done and will quit
302 mpi.recv_empty(status.source, status.tag, comm_world)
303 at_work.remove(status.source)
304
305 if verbose > 0 then print "Worker {status.source} is done ({at_work.length} still at work)"
306 else
307 print "Unexpected tag {status.tag}"
308 shutdown
309 break
310 end
311 end
312 status.free
313 end
314
315 # Send a packet of tasks to worker at `rank`
316 fun send_task_to(rank: Rank): Bool
317 do
318 if next_task_id >= tasks.length then return false
319
320 buffer[0] = next_task_id
321 next_task_id += tasks_per_packet
322
323 mpi.send_from(buffer, 0, 1, rank, task_tag, comm_world)
324
325 if verbose > 1 then print "sent tasks [{buffer[0]}..{next_task_id}[ to worker {rank}"
326 return true
327 end
328
329 # Display the accumulated results received from workers
330 fun print_results
331 do
332 print "# results #"
333 print "* {results.length} total"
334 print "* {results.oks.length + results.ok_empties.length} oks & 0ks"
335 print "* {results.fails.length} fails"
336 print "* {results.no_savs.length} no savs"
337 print "* {results.fixmes.length} fixmes"
338 print "* {results.sosos.length} sosos"
339 print "* {results.skips.length} skips"
340 print "* {results.todos.length} todos"
341 print "* {results.skip_execs.length} skip execs"
342 print "* {results.unknowns.length} unknowns (bug in tests.sh or nitester)"
343 end
344
345 fun print_short_results do print "oks & fails / total: {results.oks.length + results.ok_empties.length} " +
346 "& {results.fails.length} / {results.length}"
347
348 # Shutdown anormaly the running tests
349 fun shutdown
350 do
351 print "Shutting down"
352 mpi.send_empty(new Rank.any, quit_tag, comm_world)
353 end
354 end
355
356 # A worker node which actually execute the tests
357 class Worker
358 super Processor
359
360 # The `Rank` of `self`
361 redef var rank: Rank
362
363 # Compilation directory
364 var comp_dir = "/dev/shm/nit_compile{rank}" is lazy
365
366 # Directory to store the xml files produced for Jenkins
367 var xml_dir = "~/jenkins_xml/"
368
369 # Output file of the `tests.sh` script
370 var tests_sh_out = "/dev/shm/nit_local_out{rank}" is lazy
371
372 # Source Nit repository, must be already updated and `make` before execution
373 var local_nit = "/dev/shm/nit{rank}" is lazy
374
375 # Remote Nit repository (actually the local source)
376 var remote_nit = "~/nit/"
377
378 # Compiled `Regex` to detect the argument of an execution
379 var re_arg: Regex = "arg [0-9]+".to_re
380
381 # Compiled `Regex` to detect the alternative of an execution
382 var re_alt: Regex = "_alt[0-9]+".to_re
383
384 redef fun run
385 do
386 read_cli_options
387 setup
388 create_tasks
389 work_on_tasks
390 cleanup
391 end
392
393 # Setup the testing environment
394 #
395 # Clone the nit repository.
396 fun setup
397 do
398 if verbose > 0 then sys.system "hostname"
399
400 if local_nit.file_exists then local_nit.rmdir
401
402 exec_and_check "git clone {remote_nit} {local_nit}"
403 local_nit.chdir
404 exec_and_check "git config remote.origin.fetch +refs/remotes/origin/pr/*:refs/remotes/origin/pr/*"
405 exec_and_check "git fetch origin --quiet"
406 exec_and_check "git checkout {branch_hash}"
407 exec_and_check "cp {remote_nit}/bin/* bin/"
408 exec_and_check "src/git-gen-version.sh"
409 end
410
411 private fun exec_and_check(cmd: String)
412 do
413 if verbose > 0 then
414 print "+ {cmd}"
415 var res = sys.system(cmd)
416 assert res == 0 else print "Command '{cmd}' failed."
417 end
418 end
419
420 # Clean up the testing environment
421 #
422 # Delete all temporary files, except `ccache_dir`.
423 fun cleanup
424 do
425 if comp_dir.file_exists then comp_dir.rmdir
426 if tests_sh_out.file_exists then tests_sh_out.file_delete
427 if local_nit.file_exists then local_nit.file_delete
428 end
429
430 # Single C `int` to hold the next task id received from the `Controller`
431 var task_buffer = new CIntArray(1)
432
433 # Manage communication with the `Controller` and execute dispatched `Task`s
434 fun work_on_tasks
435 do
436 var status = new Status
437 loop
438 check_signals
439
440 # We double probe to prevent bug where a single probes does not receive the
441 # real next read.
442 mpi.probe(controller_rank, new Tag.any, comm_world, status)
443 mpi.probe(controller_rank, new Tag.any, comm_world, status)
444
445 if status.tag == task_tag then
446 # Receive tasks to execute
447 mpi.recv_into(task_buffer, 0, 1, status.source, status.tag, comm_world)
448 var first_id = task_buffer[0]
449 for task_id in [first_id .. first_id + tasks_per_packet[ do
450
451 # If id is over all known tasks, stop right here
452 if task_id >= tasks.length then break
453 var task = tasks[task_id]
454
455 "tests".chdir
456
457 # Command line to execute test
458 var cmd = "XMLDIR={xml_dir} " +
459 "CCACHE_DIR={ccache_dir} CCACHE_TEMPDIR={ccache_dir} CCACHE_BASEDIR={comp_dir} " +
460 "./tests.sh --node --engine {task.engine} {task.test_program} > {tests_sh_out}"
461
462 # Execute test
463 sys.system cmd
464
465 # Test results were written to file, read them
466 var fstream = new FileReader.open(tests_sh_out)
467 var content = fstream.read_all
468 fstream.close
469
470 # Parse result and prepare them for sending
471 #
472 # The structure is composed of 4 ints for each result.
473 # 1. task id
474 # 2. arg number
475 # 3. alt number
476 # 4. test result as int
477 var c = results_count
478 for line in content.split('\n') do if not line.is_empty then
479 var cc = c*4
480
481 buffer[cc] = task_id
482
483 var arg_match = line.search(re_arg)
484 var arg = 0
485 if arg_match != null then arg = arg_match.to_s.substring_from(4).to_i
486 buffer[cc+1] = arg
487
488 var alt_match = line.search(re_alt)
489 var alt = 0
490 if alt_match != null then alt = alt_match.to_s.substring_from(4).to_i
491 buffer[cc+2] = alt
492
493 var res = null
494 if line.has("[ok]") then res = 1
495 if line.has("[0k]") then res = 2
496 if line.has("[=== no sav ===]") then res = 3
497 if line.has("[fixme]") then res = 4
498 if line.has("[======= fail") then res = 5
499 if line.has("[======= soso") then res = 6
500 if line.has("[skip]") then res = 7
501 if line.has("[todo]") then res = 8
502 if line.has("[skip exec]") then res = 9
503
504 if res == null then
505 res = 0
506 if verbose > 1 then print "Unknown result: '{line}'"
507 end
508 buffer[cc+3] = res
509
510 c += 1
511
512 if verbose > 2 then print "tests.sh output line: {line}"
513
514 # If result buffer is full, send to `Controller`
515 if c*4 == buffer.length then
516 send_results
517 c = 0
518 end
519 end
520
521 if verbose > 1 then print "Done testing: {task}"
522
523 self.results_count = c
524 end
525
526 mpi.send_empty(controller_rank, need_work_tag, comm_world)
527 else if status.tag == quit_tag then
528 # Notification from the `Controller` to quit
529 mpi.recv_empty(status.source, status.tag, comm_world)
530
531 # Send remaining results
532 send_results
533
534 # Notify `Controller` that `self` is done and will quit
535 mpi.send_empty(controller_rank, done_tag, comm_world)
536 break
537 else
538 print "Unexpected tag {status.tag}"
539 break
540 end
541 end
542 status.free
543 end
544
545 # Total results listed in `buffer` and ready to send
546 var results_count = 0
547
548 # Send all results in `buffer` to the `Controller`
549 fun send_results
550 do
551 if results_count > 0 then
552 if verbose > 2 then print "Sending {results_count} results"
553 mpi.send_from(buffer, 0, results_count*4, controller_rank, result_tag, comm_world)
554 results_count = 0
555 end
556 end
557
558 redef fun receive_signal(signal)
559 do
560 cleanup
561 mpi.finalize
562 exit 0
563 end
564 end
565
566 # A single test task, on a `test_program` with an `engine`
567 #
568 # Note that a task may involve more than one program to test considering the
569 # alts and args for the `test_program`.
570 class Task
571 # Engine to test executing `test_program`
572 var engine: String
573
574 # Program to execute with `engine`
575 var test_program: String
576
577 redef fun to_s do return "{engine} {test_program}"
578 end
579
580 # Result of a `Task`
581 #
582 # There may be more than one result per `Task`.
583 class Result
584 # `Task` associated to `self`
585 var task: Task
586
587 # Argument index of the execution resulting in `self`
588 var arg: Int
589
590 # Alternative index of the execution resulting in `self`
591 var alt: Int
592
593 # Is `self` result an _ok_?
594 var ok = false
595
596 # Is `self` result an _0k_?
597 var ok_empty = false
598
599 # Is `self` result a _no sav_?
600 var no_sav = false
601
602 # Is `self` result a _fixme_?
603 var fixme = false
604
605 # Is `self` result a _fail_?
606 var fail = false
607
608 # Is `self` result a _soso_?
609 var soso = false
610
611 # Has `self` been skipped?
612 var skip = false
613
614 # Is `self` TODO?
615 var todo = false
616
617 # Has the execution of `self` been skipped?
618 var skip_exec = false
619
620 # Is `self` an unknown result, probably an error
621 var unknown = false
622
623 redef fun to_s
624 do
625 var err = "Unknown"
626 if no_sav then err = "no sav"
627 if ok then err = "ok"
628 if ok_empty then err = "0k"
629 if fixme then err = "fixme"
630 if fail then err = "fail"
631 if soso then err = "soso"
632 if skip then err = "skip"
633 if todo then err = "todo"
634 if skip_exec then err = "skip_exec"
635
636 return "{task} arg{arg} alt{alt} => {err}"
637 end
638 end
639
640 # A global and sorted collection of `Result`
641 class ResultSet
642 super HashSet[Result]
643
644 var no_savs = new HashSet[Result]
645 var oks = new HashSet[Result]
646 var ok_empties = new HashSet[Result]
647 var fixmes = new HashSet[Result]
648 var fails = new HashSet[Result]
649 var sosos = new HashSet[Result]
650 var skips = new HashSet[Result]
651 var todos = new HashSet[Result]
652 var skip_execs = new HashSet[Result]
653 var unknowns = new HashSet[Result]
654
655 # TODO remove
656 var per_engines = new HashMap[String, Result]
657
658 redef fun add(result)
659 do
660 if result.no_sav then no_savs.add result
661 if result.ok then oks.add result
662 if result.ok_empty then ok_empties.add result
663 if result.fixme then fixmes.add result
664 if result.fail then fails.add result
665 if result.soso then sosos.add result
666 if result.skip then skips.add result
667 if result.todo then todos.add result
668 if result.skip_exec then skip_execs.add result
669 if result.unknown then unknowns.add result
670
671 super
672 end
673
674 redef fun remove(r) do abort
675
676 redef fun clear do abort
677 end
678
679 redef class OptionContext
680
681 # Print usage with a possible error `message`
682 private fun usage_error(message: nullable String)
683 do
684 var ret = 0
685 if message != null then
686 print "Error: {message}"
687 ret = 1
688 end
689
690 if comm_world.rank == 0 then
691 print "Usage: mpirun nitester [Options] test_program.nit [other_test.nit [...]]"
692 usage
693 end
694
695 mpi.finalize
696 exit ret
697 end
698 end
699
700 # On `Worker` nodes, prefix all prints with `rank/comm_world.size`
701 redef fun print(msg: Object)
702 do
703 if comm_world.rank != 0.rank then
704 super "{comm_world.rank}/{comm_world.size}: {msg}"
705 else super msg
706 end
707
708 # Running MPI instance
709 fun mpi: MPI do return once new MPI
710
711 # Launch mpi
712 mpi
713
714 # Local rank
715 var rank = comm_world.rank
716
717 var processor: Processor
718 if rank == 0.rank then
719 # If rank == 0, this is the `Controller`
720 processor = new Controller
721 else
722 # This is a worker
723 processor = new Worker(rank)
724 end
725 processor.run
726
727 mpi.finalize