contrib/nitester: no more free to call
[nit.git] / contrib / nitester / src / nitester.nit
1 # This file is part of NIT (http://www.nitlanguage.org).
2 #
3 # Copyright 2014 Alexis Laferrière <alexis.laf@xymus.net>
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16
17 # Tester of Nit engines on an MPI cluster
18 module nitester
19
20 import mpi
21 import signals
22 import opts
23
24 # Any processor, worker or controller
25 #
26 # All data and methods within this class are shared by the controller and the
27 # workers.
28 abstract class Processor
29 super SignalHandler
30
31 # Controller rank is always 0
32 var controller_rank: Rank = 0.rank
33
34 # Rank on this processor
35 fun rank: Rank is abstract
36
37 # Where to store data for transfer between nodes
38 #
39 # Require: `buffer.length % 4 == 0`
40 var buffer = new CIntArray(1024)
41
42 # Run in verbose mode, display more text
43 var verbose = 0
44
45 init
46 do
47 # OpenMPI sends a SIGTERM to all nodes upon receiving a SIGTERM or SIGINT
48 # on the first process.
49 handle_signal(sigterm, true)
50 end
51
52 # Tag of a new task packet of size `tasks_per_packet`
53 var task_tag: Tag = 0.tag
54
55 # Tag to return a set of `Result` thought `buffer`
56 var result_tag: Tag = 1.tag
57
58 # Tag to notify `Worker` when to quit
59 var quit_tag: Tag = 2.tag
60
61 # Tag to request more work from the `Controller` by a `Worker`
62 var need_work_tag: Tag = 4.tag
63
64 # Tag to notify `Controller` that the sender `Worker` is done
65 var done_tag: Tag = 5.tag
66
67 # Number of tasks within each task assignation with `task_tag`
68 var tasks_per_packet = 1
69
70 # Run the main logic of this node
71 fun run is abstract
72
73 # Hash or name of the branch to test
74 var branch_hash: String is noinit
75
76 # Engines targeted by this execution
77 var engines: Array[String] is noinit
78
79 # All known engines, used to detect errors in `engines`
80 var all_engines: Array[String] = ["nitg-s", "nitg-sg", "nitg-g", "nitg-e", "niti", "emscripten"]
81
82 # Programs to test in this execution
83 var test_programs: Array[String] is noinit
84
85 # Root of the temporary directory
86 var tmp_dir = "/dev/shm/"
87
88 # `ccache` directory
89 var ccache_dir = "/dev/shm/nit_ccache"
90
91 # Read command line options
92 fun read_cli_options
93 do
94 var opt_ctx = new OptionContext
95 var opt_hash = new OptionString(
96 "Branch to test",
97 "--hash", "-h")
98 opt_hash.mandatory = true
99 var opt_engines = new OptionString(
100 "Engines to test, separated with commas ({all_engines.join(", ")} or all)",
101 "--engine", "-e")
102 var opt_help = new OptionBool("Print this help message", "--help", "-h")
103 var opt_verbose = new OptionCount(
104 "Be verbose, repeat to increase verbose level (max with -vvv)",
105 "--verbose", "-v")
106 var opt_cleanup = new OptionBool(
107 "Clean up all nitester files (and do not run tests)",
108 "--cleanup", "-C")
109
110 opt_ctx.add_option(opt_hash, opt_engines, opt_help, opt_verbose, opt_cleanup)
111 opt_ctx.parse args
112
113 # --help?
114 if opt_help.value then opt_ctx.usage_error null
115
116 # --verbose?
117 verbose = opt_verbose.value
118
119 # --cleanup?
120 if opt_cleanup.value then
121 assert tmp_dir.file_exists
122 for file in tmp_dir.files do if file.has_prefix("nit") then
123 var full_path = tmp_dir / file
124 if full_path == ccache_dir then continue
125
126 assert full_path.file_exists
127
128 var stat = full_path.file_lstat
129 if stat.is_dir then
130 full_path.rmdir
131 else
132 full_path.file_delete
133 end
134 end
135 mpi.finalize
136 exit 0
137 end
138
139 # any files?
140 var rest = opt_ctx.rest
141 if rest.is_empty then opt_ctx.usage_error "This tool needs at least one test_program.nit"
142 test_programs = rest
143
144 # hash
145 branch_hash = opt_hash.value.as(not null)
146
147 # gather and check engines
148 var engines_str = opt_engines.value
149 var engines
150 if engines_str == null then
151 # default
152 engines = ["nitg-s"]
153 else
154 engines = engines_str.split(',')
155
156 if engines.has("all") then
157 # all engines
158 engines = all_engines
159 end
160 end
161
162 # check validity of targetted engines
163 var unknown_engines = new Array[String]
164 for engine in engines do if not all_engines.has(engine) then unknown_engines.add engine
165
166 if not unknown_engines.is_empty then
167 opt_ctx.usage_error "Unknown engines: {unknown_engines.join(", ")} (expected one or most of {all_engines.join(", ")})"
168 end
169 self.engines = engines
170 end
171
172 # All tasks to be performed
173 var tasks = new Array[Task]
174
175 # Gather and register all tasks
176 fun create_tasks
177 do
178 # At this point we are in our local nit
179 var skip_path = "tests/turing.skip"
180 var skip
181 if skip_path.file_exists then
182 var skip_file = new FileReader.open(skip_path)
183 skip = skip_file.read_lines
184 skip_file.close
185 else
186 skip = new Array[String]
187 end
188
189 for engine in engines do for prog in test_programs do
190 # Is is blacklisted?
191 for s in skip do if not s.is_empty and prog.has(s) then
192 if verbose > 0 and rank == 0 then print "Skipping test '{prog}' because of '{s}' in turing.skip"
193 continue label
194 end
195
196 tasks.add new Task(engine, prog)
197 end label
198 end
199 end
200
201 # Single controller to dispatch tasks, gather results and produce stats
202 class Controller
203 super Processor
204
205 redef fun rank do return controller_rank
206
207 # Id as `Int` of the next task to distribute
208 var next_task_id = 0
209
210 redef fun receive_signal(signal)
211 do
212 shutdown
213 print_results
214
215 mpi.finalize
216 exit 0
217 end
218
219 redef fun run
220 do
221 read_cli_options
222 create_tasks
223 distribute_tasks
224 print_results
225 end
226
227 # Cumulated results from workers
228 var results = new ResultSet
229
230 # Maintain communication with workers to distribute tasks and receiver results
231 fun distribute_tasks
232 do
233 var at_work = new Array[Rank]
234
235 # send initial tasks
236 for r in [1..comm_world.size[ do
237 var sent = send_task_to(r.rank)
238 if sent then
239 at_work.add r.rank
240 else
241 mpi.send_empty(r.rank, quit_tag, comm_world)
242 end
243 end
244
245 var status = new Status
246 # await results and send new tasks
247 while not at_work.is_empty do
248 check_signals
249
250 # Double probe to avoid bug with some implementation of MPI
251 mpi.probe(new Rank.any, new Tag.any, comm_world, status)
252 mpi.probe(new Rank.any, new Tag.any, comm_world, status)
253
254 if status.tag == result_tag then
255 # Receive results fron a worker
256 var count = status.count(new DataType.int)
257 mpi.recv_into(buffer, 0, count, status.source, status.tag, comm_world)
258
259 # Parse results from C array to `Result` instances
260 #
261 # Each result is on 4 ints: task id, arg, alt and result.
262 #
263 # See the comments where the data is produced in `Worker::work_on_tasks` for more informationé
264 assert count % 4 == 0
265 for t in (count/4).times do
266 var tt = t*4
267
268 var task_id = buffer[tt]
269 var arg = buffer[tt+1]
270 var alt = buffer[tt+2]
271 var res = buffer[tt+3]
272
273 var result = new Result(tasks[task_id], arg, alt)
274
275 if res == 1 then result.ok = true
276 if res == 2 then result.ok_empty = true
277 if res == 3 then result.no_sav = true
278 if res == 4 then result.fixme = true
279 if res == 5 then result.fail = true
280 if res == 6 then result.soso = true
281 if res == 7 then result.skip = true
282 if res == 8 then result.todo = true
283 if res == 9 then result.skip_exec = true
284 if res == 0 then result.unknown = true
285
286 results.add result
287
288 if verbose > 0 and results.length % 25 == 0 then print_short_results
289 end
290
291 else if status.tag == need_work_tag then
292 # A worker needs more work
293 mpi.recv_empty(status.source, status.tag, comm_world)
294 var sent = send_task_to(status.source)
295 if not sent then
296 # no more work, quit
297 mpi.send_empty(status.source, quit_tag, comm_world)
298 end
299 else if status.tag == done_tag then
300 # A worker is done and will quit
301 mpi.recv_empty(status.source, status.tag, comm_world)
302 at_work.remove(status.source)
303
304 if verbose > 0 then print "Worker {status.source} is done ({at_work.length} still at work)"
305 else
306 print "Unexpected tag {status.tag}"
307 shutdown
308 break
309 end
310 end
311 status.free
312 end
313
314 # Send a packet of tasks to worker at `rank`
315 fun send_task_to(rank: Rank): Bool
316 do
317 if next_task_id >= tasks.length then return false
318
319 buffer[0] = next_task_id
320 next_task_id += tasks_per_packet
321
322 mpi.send_from(buffer, 0, 1, rank, task_tag, comm_world)
323
324 if verbose > 1 then print "sent tasks [{buffer[0]}..{next_task_id}[ to worker {rank}"
325 return true
326 end
327
328 # Display the accumulated results received from workers
329 fun print_results
330 do
331 print "# results #"
332 print "* {results.length} total"
333 print "* {results.oks.length + results.ok_empties.length} oks & 0ks"
334 print "* {results.fails.length} fails"
335 print "* {results.no_savs.length} no savs"
336 print "* {results.fixmes.length} fixmes"
337 print "* {results.sosos.length} sosos"
338 print "* {results.skips.length} skips"
339 print "* {results.todos.length} todos"
340 print "* {results.skip_execs.length} skip execs"
341 print "* {results.unknowns.length} unknowns (bug in tests.sh or nitester)"
342 end
343
344 fun print_short_results do print "oks & fails / total: {results.oks.length + results.ok_empties.length} " +
345 "& {results.fails.length} / {results.length}"
346
347 # Shutdown anormaly the running tests
348 fun shutdown
349 do
350 print "Shutting down"
351 mpi.send_empty(new Rank.any, quit_tag, comm_world)
352 end
353 end
354
355 # A worker node which actually execute the tests
356 class Worker
357 super Processor
358
359 # The `Rank` of `self`
360 redef var rank: Rank
361
362 # Compilation directory
363 var comp_dir = "/dev/shm/nit_compile{rank}" is lazy
364
365 # Directory to store the xml files produced for Jenkins
366 var xml_dir = "~/jenkins_xml/"
367
368 # Output file of the `tests.sh` script
369 var tests_sh_out = "/dev/shm/nit_local_out{rank}" is lazy
370
371 # Source Nit repository, must be already updated and `make` before execution
372 var local_nit = "/dev/shm/nit{rank}" is lazy
373
374 # Remote Nit repository (actually the local source)
375 var remote_nit = "~/nit/"
376
377 # Compiled `Regex` to detect the argument of an execution
378 var re_arg: Regex = "arg [0-9]+".to_re
379
380 # Compiled `Regex` to detect the alternative of an execution
381 var re_alt: Regex = "_alt[0-9]+".to_re
382
383 redef fun run
384 do
385 read_cli_options
386 setup
387 create_tasks
388 work_on_tasks
389 cleanup
390 end
391
392 # Setup the testing environment
393 #
394 # Clone the nit repository.
395 fun setup
396 do
397 if verbose > 0 then sys.system "hostname"
398
399 if local_nit.file_exists then local_nit.rmdir
400
401 exec_and_check "git clone {remote_nit} {local_nit}"
402 local_nit.chdir
403 exec_and_check "git config remote.origin.fetch +refs/remotes/origin/pr/*:refs/remotes/origin/pr/*"
404 exec_and_check "git fetch origin --quiet"
405 exec_and_check "git checkout {branch_hash}"
406 exec_and_check "cp {remote_nit}/bin/* bin/"
407 exec_and_check "src/git-gen-version.sh"
408 end
409
410 private fun exec_and_check(cmd: String)
411 do
412 if verbose > 0 then
413 print "+ {cmd}"
414 var res = sys.system(cmd)
415 assert res == 0 else print "Command '{cmd}' failed."
416 end
417 end
418
419 # Clean up the testing environment
420 #
421 # Delete all temporary files, except `ccache_dir`.
422 fun cleanup
423 do
424 if comp_dir.file_exists then comp_dir.rmdir
425 if tests_sh_out.file_exists then tests_sh_out.file_delete
426 if local_nit.file_exists then local_nit.file_delete
427 end
428
429 # Single C `int` to hold the next task id received from the `Controller`
430 var task_buffer = new CIntArray(1)
431
432 # Manage communication with the `Controller` and execute dispatched `Task`s
433 fun work_on_tasks
434 do
435 var status = new Status
436 loop
437 check_signals
438
439 # We double probe to prevent bug where a single probes does not receive the
440 # real next read.
441 mpi.probe(controller_rank, new Tag.any, comm_world, status)
442 mpi.probe(controller_rank, new Tag.any, comm_world, status)
443
444 if status.tag == task_tag then
445 # Receive tasks to execute
446 mpi.recv_into(task_buffer, 0, 1, status.source, status.tag, comm_world)
447 var first_id = task_buffer[0]
448 for task_id in [first_id .. first_id + tasks_per_packet[ do
449
450 # If id is over all known tasks, stop right here
451 if task_id >= tasks.length then break
452 var task = tasks[task_id]
453
454 "tests".chdir
455
456 # Command line to execute test
457 var cmd = "XMLDIR={xml_dir} " +
458 "CCACHE_DIR={ccache_dir} CCACHE_TEMPDIR={ccache_dir} CCACHE_BASEDIR={comp_dir} " +
459 "./tests.sh --node --engine {task.engine} {task.test_program} > {tests_sh_out}"
460
461 # Execute test
462 sys.system cmd
463
464 # Test results were written to file, read them
465 var fstream = new FileReader.open(tests_sh_out)
466 var content = fstream.read_all
467 fstream.close
468
469 # Parse result and prepare them for sending
470 #
471 # The structure is composed of 4 ints for each result.
472 # 1. task id
473 # 2. arg number
474 # 3. alt number
475 # 4. test result as int
476 var c = results_count
477 for line in content.split('\n') do if not line.is_empty then
478 var cc = c*4
479
480 buffer[cc] = task_id
481
482 var arg_match = line.search(re_arg)
483 var arg = 0
484 if arg_match != null then arg = arg_match.to_s.substring_from(4).to_i
485 buffer[cc+1] = arg
486
487 var alt_match = line.search(re_alt)
488 var alt = 0
489 if alt_match != null then alt = alt_match.to_s.substring_from(4).to_i
490 buffer[cc+2] = alt
491
492 var res = null
493 if line.has("[ok]") then res = 1
494 if line.has("[0k]") then res = 2
495 if line.has("[=== no sav ===]") then res = 3
496 if line.has("[fixme]") then res = 4
497 if line.has("[======= fail") then res = 5
498 if line.has("[======= soso") then res = 6
499 if line.has("[skip]") then res = 7
500 if line.has("[todo]") then res = 8
501 if line.has("[skip exec]") then res = 9
502
503 if res == null then
504 res = 0
505 if verbose > 1 then print "Unknown result: '{line}'"
506 end
507 buffer[cc+3] = res
508
509 c += 1
510
511 if verbose > 2 then print "tests.sh output line: {line}"
512
513 # If result buffer is full, send to `Controller`
514 if c*4 == buffer.length then
515 send_results
516 c = 0
517 end
518 end
519
520 if verbose > 1 then print "Done testing: {task}"
521
522 self.results_count = c
523 end
524
525 mpi.send_empty(controller_rank, need_work_tag, comm_world)
526 else if status.tag == quit_tag then
527 # Notification from the `Controller` to quit
528 mpi.recv_empty(status.source, status.tag, comm_world)
529
530 # Send remaining results
531 send_results
532
533 # Notify `Controller` that `self` is done and will quit
534 mpi.send_empty(controller_rank, done_tag, comm_world)
535 break
536 else
537 print "Unexpected tag {status.tag}"
538 break
539 end
540 end
541 status.free
542 end
543
544 # Total results listed in `buffer` and ready to send
545 var results_count = 0
546
547 # Send all results in `buffer` to the `Controller`
548 fun send_results
549 do
550 if results_count > 0 then
551 if verbose > 2 then print "Sending {results_count} results"
552 mpi.send_from(buffer, 0, results_count*4, controller_rank, result_tag, comm_world)
553 results_count = 0
554 end
555 end
556
557 redef fun receive_signal(signal)
558 do
559 cleanup
560 mpi.finalize
561 exit 0
562 end
563 end
564
565 # A single test task, on a `test_program` with an `engine`
566 #
567 # Note that a task may involve more than one program to test considering the
568 # alts and args for the `test_program`.
569 class Task
570 # Engine to test executing `test_program`
571 var engine: String
572
573 # Program to execute with `engine`
574 var test_program: String
575
576 redef fun to_s do return "{engine} {test_program}"
577 end
578
579 # Result of a `Task`
580 #
581 # There may be more than one result per `Task`.
582 class Result
583 # `Task` associated to `self`
584 var task: Task
585
586 # Argument index of the execution resulting in `self`
587 var arg: Int
588
589 # Alternative index of the execution resulting in `self`
590 var alt: Int
591
592 # Is `self` result an _ok_?
593 var ok = false
594
595 # Is `self` result an _0k_?
596 var ok_empty = false
597
598 # Is `self` result a _no sav_?
599 var no_sav = false
600
601 # Is `self` result a _fixme_?
602 var fixme = false
603
604 # Is `self` result a _fail_?
605 var fail = false
606
607 # Is `self` result a _soso_?
608 var soso = false
609
610 # Has `self` been skipped?
611 var skip = false
612
613 # Is `self` TODO?
614 var todo = false
615
616 # Has the execution of `self` been skipped?
617 var skip_exec = false
618
619 # Is `self` an unknown result, probably an error
620 var unknown = false
621
622 redef fun to_s
623 do
624 var err = "Unknown"
625 if no_sav then err = "no sav"
626 if ok then err = "ok"
627 if ok_empty then err = "0k"
628 if fixme then err = "fixme"
629 if fail then err = "fail"
630 if soso then err = "soso"
631 if skip then err = "skip"
632 if todo then err = "todo"
633 if skip_exec then err = "skip_exec"
634
635 return "{task} arg{arg} alt{alt} => {err}"
636 end
637 end
638
639 # A global and sorted collection of `Result`
640 class ResultSet
641 super HashSet[Result]
642
643 var no_savs = new HashSet[Result]
644 var oks = new HashSet[Result]
645 var ok_empties = new HashSet[Result]
646 var fixmes = new HashSet[Result]
647 var fails = new HashSet[Result]
648 var sosos = new HashSet[Result]
649 var skips = new HashSet[Result]
650 var todos = new HashSet[Result]
651 var skip_execs = new HashSet[Result]
652 var unknowns = new HashSet[Result]
653
654 # TODO remove
655 var per_engines = new HashMap[String, Result]
656
657 redef fun add(result)
658 do
659 if result.no_sav then no_savs.add result
660 if result.ok then oks.add result
661 if result.ok_empty then ok_empties.add result
662 if result.fixme then fixmes.add result
663 if result.fail then fails.add result
664 if result.soso then sosos.add result
665 if result.skip then skips.add result
666 if result.todo then todos.add result
667 if result.skip_exec then skip_execs.add result
668 if result.unknown then unknowns.add result
669
670 super
671 end
672
673 redef fun remove(r) do abort
674
675 redef fun clear do abort
676 end
677
678 redef class OptionContext
679
680 # Print usage with a possible error `message`
681 private fun usage_error(message: nullable String)
682 do
683 var ret = 0
684 if message != null then
685 print "Error: {message}"
686 ret = 1
687 end
688
689 if comm_world.rank == 0 then
690 print "Usage: mpirun nitester [Options] test_program.nit [other_test.nit [...]]"
691 usage
692 end
693
694 mpi.finalize
695 exit ret
696 end
697 end
698
699 # On `Worker` nodes, prefix all prints with `rank/comm_world.size`
700 redef fun print(msg: Object)
701 do
702 if comm_world.rank != 0.rank then
703 super "{comm_world.rank}/{comm_world.size}: {msg}"
704 else super msg
705 end
706
707 # Running MPI instance
708 fun mpi: MPI do return once new MPI
709
710 # Launch mpi
711 mpi
712
713 # Local rank
714 var rank = comm_world.rank
715
716 var processor: Processor
717 if rank == 0.rank then
718 # If rank == 0, this is the `Controller`
719 processor = new Controller
720 else
721 # This is a worker
722 processor = new Worker(rank)
723 end
724 processor.run
725
726 mpi.finalize