contrib/nitester/src/nitester.nit

   1 # This file is part of NIT (http://www.nitlanguage.org).
   2 #
   3 # Copyright 2014 Alexis Laferrière <alexis.laf@xymus.net>
   4 #
   5 # Licensed under the Apache License, Version 2.0 (the "License");
   6 # you may not use this file except in compliance with the License.
   7 # You may obtain a copy of the License at
   8 #
   9 #     http://www.apache.org/licenses/LICENSE-2.0
  10 #
  11 # Unless required by applicable law or agreed to in writing, software
  12 # distributed under the License is distributed on an "AS IS" BASIS,
  13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 # See the License for the specific language governing permissions and
  15 # limitations under the License.
  16
  17 # Tester of Nit engines on an MPI cluster
  18 module nitester
  19
  20 import mpi
  21 import signals
  22 import opts
  23
  24 # Any processor, worker or controller
  25 #
  26 # All data and methods within this class are shared by the controller and the
  27 # workers.
  28 abstract class Processor
  29         super SignalHandler
  30
  31         # Controller rank is always 0
  32         var controller_rank: Rank = 0.rank
  33
  34         # Rank on this processor
  35         fun rank: Rank is abstract
  36
  37         # Where to store data for transfer between nodes
  38         #
  39         # Require: `buffer.length % 4 == 0`
  40         var buffer = new CIntArray(1024)
  41
  42         # Run in verbose mode, display more text
  43         var verbose = 0
  44
  45         init
  46         do
  47                 # OpenMPI sends a SIGTERM to all nodes upon receiving a SIGTERM or SIGINT
  48                 # on the first process.
  49                 handle_signal(sigterm, true)
  50         end
  51
  52         # Tag of a new task packet of size `tasks_per_packet`
  53         var task_tag: Tag = 0.tag
  54
  55         # Tag to return a set of `Result` thought `buffer`
  56         var result_tag: Tag = 1.tag
  57
  58         # Tag to notify `Worker` when to quit
  59         var quit_tag: Tag = 2.tag
  60
  61         # Tag to request more work from the `Controller` by a `Worker`
  62         var need_work_tag: Tag = 4.tag
  63
  64         # Tag to notify `Controller` that the sender `Worker` is done
  65         var done_tag: Tag = 5.tag
  66
  67         # Number of tasks within each task assignation with `task_tag`
  68         var tasks_per_packet = 1
  69
  70         # Run the main logic of this node
  71         fun run is abstract
  72
  73         # Engines targeted by this execution
  74         var engines: Array[String] is noinit
  75
  76         # All known engines, used to detect errors in `engines`
  77         var all_engines: Array[String] = ["nitg-s", "nitg-sg", "nitg-g", "nitg-e", "niti", "emscripten"]
  78
  79         # Programs to test in this execution
  80         var test_programs: Array[String] is noinit
  81
  82         # Root of the temporary directory
  83         var tmp_dir = "/dev/shm/"
  84
  85         # `ccache` directory
  86         var ccache_dir = "/dev/shm/nit_ccache"
  87
  88         # Read command line options
  89         fun read_cli_options
  90         do
  91                 var opt_ctx = new OptionContext
  92                 var opt_engines = new OptionString(
  93                         "Engines to test, separated with commas ({all_engines.join(", ")} or all)",
  94                         "--engine", "-e")
  95                 var opt_help = new OptionBool("Print this help message", "--help", "-h")
  96                 var opt_verbose = new OptionCount(
  97                         "Be verbose, repeat to increase verbose level (max with -vvv)",
  98                         "--verbose", "-v")
  99                 var opt_cleanup = new OptionBool(
 100                         "Clean up all nitester files (and do not run tests)",
 101                         "--cleanup", "-C")
 102
 103                 opt_ctx.add_option(opt_engines, opt_help, opt_verbose, opt_cleanup)
 104                 opt_ctx.parse args
 105
 106                 # --help?
 107                 if opt_help.value then opt_ctx.usage_error null
 108
 109                 # --verbose?
 110                 verbose = opt_verbose.value
 111
 112                 # --cleanup?
 113                 if opt_cleanup.value then
 114                         assert tmp_dir.file_exists
 115                         for file in tmp_dir.files do if file.has_prefix("nit") then
 116                                 var full_path = tmp_dir / file
 117                                 if full_path == ccache_dir then continue
 118
 119                                 assert full_path.file_exists
 120
 121                                 var stat = full_path.file_lstat
 122                                 if stat.is_dir then
 123                                         full_path.rmdir
 124                                 else
 125                                         full_path.file_delete
 126                                 end
 127                                 stat.free
 128                         end
 129                         mpi.finalize
 130                         exit 0
 131                 end
 132
 133                 # any files?
 134                 var rest = opt_ctx.rest
 135                 if rest.is_empty then opt_ctx.usage_error "This tool needs at least one test_program.nit"
 136                 test_programs = rest
 137
 138                 # gather and check engines
 139                 var engines_str = opt_engines.value
 140                 var engines
 141                 if engines_str == null then
 142                         # default
 143                         engines = ["nitg-s"]
 144                 else
 145                         engines = engines_str.split(',')
 146
 147                         if engines.has("all") then
 148                                 # all engines
 149                                 engines = all_engines
 150                         end
 151                 end
 152
 153                 # check validity of targetted engines
 154                 var unknown_engines = new Array[String]
 155                 for engine in engines do if not all_engines.has(engine) then unknown_engines.add engine
 156
 157                 if not unknown_engines.is_empty then
 158                         opt_ctx.usage_error "Unknown engines: {unknown_engines.join(", ")} (expected one or most of {all_engines.join(", ")})"
 159                 end
 160                 self.engines = engines
 161         end
 162
 163         # All tasks to be performed
 164         var tasks = new Array[Task]
 165
 166         # Gather and register all tasks
 167         fun create_tasks
 168         do
 169                 # At this point we are in our local nit
 170                 var skip_path = "tests/turing.skip"
 171                 var skip
 172                 if skip_path.file_exists then
 173                         var skip_file = new IFStream.open(skip_path)
 174                         skip = skip_file.read_lines
 175                         skip_file.close
 176                 else
 177                         skip = new Array[String]
 178                 end
 179
 180                 for prog in test_programs do for engine in engines do
 181
 182                         # Is is blacklisted?
 183                         for s in skip do if not s.is_empty and prog.has(s) then
 184                                 if verbose > 0 and rank == 0 then print "Skipping test '{prog}' because of '{s}' in turing.skip"
 185                                 continue label
 186                         end
 187
 188                         tasks.add new Task(engine, prog)
 189                 end label
 190         end
 191 end
 192
 193 # Single controller to dispatch tasks, gather results and produce stats
 194 class Controller
 195         super Processor
 196
 197         redef fun rank do return controller_rank
 198
 199         # Id as `Int` of the next task to distribute
 200         var next_task_id = 0
 201
 202         redef fun receive_signal(signal)
 203         do
 204                 shutdown
 205                 print_results
 206
 207                 mpi.finalize
 208                 exit 0
 209         end
 210
 211         redef fun run
 212         do
 213                 read_cli_options
 214                 create_tasks
 215                 distribute_tasks
 216                 print_results
 217         end
 218
 219         # Cumulated results from workers
 220         var results = new ResultSet
 221
 222         # Maintain communication with workers to distribute tasks and receiver results
 223         fun distribute_tasks
 224         do
 225                 var at_work = new Array[Rank]
 226
 227                 # send initial tasks
 228                 for r in [1..comm_world.size[ do
 229                         var sent = send_task_to(r.rank)
 230                         if sent then
 231                                 at_work.add r.rank
 232                         else
 233                                 mpi.send_empty(r.rank, quit_tag, comm_world)
 234                         end
 235                 end
 236
 237                 var status = new Status
 238                 # await results and send new tasks
 239                 while not at_work.is_empty do
 240                         check_signals
 241
 242                         # Double probe to avoid bug with some implementation of MPI
 243                         mpi.probe(new Rank.any, new Tag.any, comm_world, status)
 244                         mpi.probe(new Rank.any, new Tag.any, comm_world, status)
 245
 246                         if status.tag == result_tag then
 247                                 # Receive results fron a worker
 248                                 var count = status.count(new DataType.int)
 249                                 mpi.recv_into(buffer, 0, count, status.source, status.tag, comm_world)
 250
 251                                 # Parse results from C array to `Result` instances
 252                                 #
 253                                 # Each result is on 4 ints: task id, arg, alt and result.
 254                                 #
 255                                 # See the comments where the data is produced in `Worker::work_on_tasks` for more informationé
 256                                 assert count % 4 == 0
 257                                 for t in (count/4).times do
 258                                         var tt = t*4
 259
 260                                         var task_id = buffer[tt]
 261                                         var arg = buffer[tt+1]
 262                                         var alt = buffer[tt+2]
 263                                         var res = buffer[tt+3]
 264
 265                                         var result = new Result(tasks[task_id], arg, alt)
 266
 267                                         if res == 1 then result.ok = true
 268                                         if res == 2 then result.ok_empty = true
 269                                         if res == 3 then result.no_sav = true
 270                                         if res == 4 then result.fixme = true
 271                                         if res == 5 then result.fail = true
 272                                         if res == 6 then result.soso = true
 273                                         if res == 7 then result.skip = true
 274                                         if res == 8 then result.todo = true
 275                                         if res == 9 then result.skip_exec = true
 276                                         if res == 0 then result.unknown = true
 277
 278                                         results.add result
 279
 280                                         if verbose > 0 and results.length % 25 == 0 then print_short_results
 281                                 end
 282
 283                         else if status.tag == need_work_tag then
 284                                 # A worker needs more work
 285                                 mpi.recv_empty(status.source, status.tag, comm_world)
 286                                 var sent = send_task_to(status.source)
 287                                 if not sent then
 288                                         # no more work, quit
 289                                         mpi.send_empty(status.source, quit_tag, comm_world)
 290                                 end
 291                         else if status.tag == done_tag then
 292                                 # A worker is done and will quit
 293                                 mpi.recv_empty(status.source, status.tag, comm_world)
 294                                 at_work.remove(status.source)
 295
 296                                 if verbose > 1 then print "worker {status.source} is done ({at_work.length} still at work)"
 297                         else
 298                                 print "Unexpected tag {status.tag}"
 299                                 shutdown
 300                                 break
 301                         end
 302                 end
 303                 status.free
 304         end
 305
 306         # Send a packet of tasks to worker at `rank`
 307         fun send_task_to(rank: Rank): Bool
 308         do
 309                 if next_task_id >= tasks.length then return false
 310
 311                 buffer[0] = next_task_id
 312                 next_task_id += tasks_per_packet
 313
 314                 mpi.send_from(buffer, 0, 1, rank, task_tag, comm_world)
 315
 316                 if verbose > 1 then print "sent tasks [{buffer[0]}..{next_task_id}[ to worker {rank}"
 317                 return true
 318         end
 319
 320         # Display the accumulated results received from workers
 321         fun print_results
 322         do
 323                 print "# results #"
 324                 print "* {results.length} total"
 325                 print "* {results.oks.length + results.ok_empties.length} oks & 0ks"
 326                 print "* {results.fails.length} fails"
 327                 print "* {results.no_savs.length} no savs"
 328                 print "* {results.fixmes.length} fixmes"
 329                 print "* {results.sosos.length} sosos"
 330                 print "* {results.skips.length} skips"
 331                 print "* {results.todos.length} todos"
 332                 print "* {results.skip_execs.length} skip execs"
 333                 print "* {results.unknowns.length} unknowns (bug in tests.sh or nitester)"
 334         end
 335
 336         fun print_short_results do print "oks & fails / total: {results.oks.length + results.ok_empties.length} " +
 337                 "& {results.fails.length} / {results.length}"
 338
 339         # Shutdown anormaly the running tests
 340         fun shutdown
 341         do
 342                 print "Shutting down"
 343                 mpi.send_empty(new Rank.any, quit_tag, comm_world)
 344         end
 345 end
 346
 347 # A worker node which actually execute the tests
 348 class Worker
 349         super Processor
 350
 351         # The `Rank` of `self`
 352         redef var rank: Rank
 353
 354         # Compilation directory
 355         var comp_dir = "/dev/shm/nit_compile{rank}" is lazy
 356
 357         # Output file directory
 358         var out_dir = "/dev/shm/nit_out{rank}" is lazy
 359
 360         # Directory to store the xml files produced for Jenkins
 361         var xml_dir = "~/jenkins_xml/"
 362
 363         # Output file of the `tests.sh` script
 364         var tests_sh_out = "/dev/shm/nit_local_out{rank}" is lazy
 365
 366         # Source Nit repository, must be already updated and `make` before execution
 367         var nit_source_dir = "~/nit"
 368
 369         # Compiled `Regex` to detect the argument of an execution
 370         var re_arg: Regex = "arg [0-9]+".to_re
 371
 372         # Compiled `Regex` to detect the alternative of an execution
 373         var re_alt: Regex = "_alt[0-9]+".to_re
 374
 375         redef fun run
 376         do
 377                 read_cli_options
 378                 setup
 379                 create_tasks
 380                 work_on_tasks
 381                 cleanup
 382         end
 383
 384         # Setup the testing environment
 385         #
 386         # Clone the nit repository.
 387         fun setup
 388         do
 389                 if verbose > 0 then sys.system "hostname"
 390         end
 391
 392         # Clean up the testing environment
 393         #
 394         # Delete all temporary files, except `ccache_dir`.
 395         fun cleanup
 396         do
 397                 if comp_dir.file_exists then comp_dir.rmdir
 398                 if out_dir.file_exists then out_dir.rmdir
 399                 if tests_sh_out.file_exists then tests_sh_out.file_delete
 400         end
 401
 402         # Single C `int` to hold the next task id received from the `Controller`
 403         var task_buffer = new CIntArray(1)
 404
 405         # Manage communication with the `Controller` and execute dispatched `Task`s
 406         fun work_on_tasks
 407         do
 408                 var status = new Status
 409                 loop
 410                         check_signals
 411
 412                         # We double probe to prevent bug where a single probes does not receive the
 413                         # real next read.
 414                         mpi.probe(controller_rank, new Tag.any, comm_world, status)
 415                         mpi.probe(controller_rank, new Tag.any, comm_world, status)
 416
 417                         if status.tag == task_tag then
 418                                 # Receive tasks to execute
 419                                 mpi.recv_into(task_buffer, 0, 1, status.source, status.tag, comm_world)
 420                                 var first_id = task_buffer[0]
 421                                 for task_id in [first_id .. first_id + tasks_per_packet[ do
 422
 423                                         # If id is over all known tasks, stop right here
 424                                         if task_id >= tasks.length then break
 425                                         var task = tasks[task_id]
 426
 427                                         # Command line to execute test
 428                                         var cmd = "XMLDIR={xml_dir} ERRLIST={out_dir}/errlist TMPDIR={out_dir} " +
 429                                                 "CCACHE_DIR={ccache_dir} CCACHE_TEMPDIR={ccache_dir} CCACHE_BASEDIR={comp_dir} " +
 430                                                 "./tests.sh --compdir {comp_dir} --outdir {out_dir} " +
 431                                                 " --node --engine {task.engine} {task.test_program} > {tests_sh_out}"
 432
 433                                         # Execute test
 434                                         sys.system cmd
 435
 436                                         # Test results were written to file, read them
 437                                         var fstream = new IFStream.open(tests_sh_out)
 438                                         var content = fstream.read_all
 439                                         fstream.close
 440
 441                                         # Parse result and prepare them for sending
 442                                         #
 443                                         # The structure is composed of 4 ints for each result.
 444                                         # 1. task id
 445                                         # 2. arg number
 446                                         # 3. alt number
 447                                         # 4. test result as int
 448                                         var c = results_count
 449                                         for line in content.split('\n') do if not line.is_empty then
 450                                                 var cc = c*4
 451
 452                                                 buffer[cc] = task_id
 453
 454                                                 var arg_match = line.search(re_arg)
 455                                                 var arg = 0
 456                                                 if arg_match != null then arg = arg_match.to_s.substring_from(4).to_i
 457                                                 buffer[cc+1] = arg
 458
 459                                                 var alt_match = line.search(re_alt)
 460                                                 var alt = 0
 461                                                 if alt_match != null then alt = alt_match.to_s.substring_from(4).to_i
 462                                                 buffer[cc+2] = alt
 463
 464                                                 var res = null
 465                                                 if line.has("[ok]") then res = 1
 466                                                 if line.has("[0k]") then res = 2
 467                                                 if line.has("[=== no sav ===]") then res = 3
 468                                                 if line.has("[fixme]") then res = 4
 469                                                 if line.has("[======= fail") then res = 5
 470                                                 if line.has("[======= soso") then res = 6
 471                                                 if line.has("[skip]") then res = 7
 472                                                 if line.has("[todo]") then res = 8
 473                                                 if line.has("[skip exec]") then res = 9
 474
 475                                                 if res == null then
 476                                                         res = 0
 477                                                         if verbose > 1 then print "Unknown result: '{line}'"
 478                                                 end
 479                                                 buffer[cc+3] = res
 480
 481                                                 c += 1
 482
 483                                                 if verbose > 2 then print "tests.sh output line: {line}"
 484
 485                                                 # If result buffer is full, send to `Controller`
 486                                                 if c*4 == buffer.length then
 487                                                         send_results
 488                                                         c = 0
 489                                                 end
 490                                         end
 491
 492                                         self.results_count = c
 493                                 end
 494
 495                                 mpi.send_empty(controller_rank, need_work_tag, comm_world)
 496                         else if status.tag == quit_tag then
 497                                 # Notification from the `Controller` to quit
 498                                 mpi.recv_empty(status.source, status.tag, comm_world)
 499
 500                                 # Send remaining results
 501                                 send_results
 502
 503                                 # Notify `Controller` that `self` is done and will quit
 504                                 mpi.send_empty(controller_rank, done_tag, comm_world)
 505                                 break
 506                         else
 507                                 print "Unexpected tag {status.tag}"
 508                                 break
 509                         end
 510                 end
 511                 status.free
 512         end
 513
 514         # Total results listed in `buffer` and ready to send
 515         var results_count = 0
 516
 517         # Send all results in `buffer` to the `Controller`
 518         fun send_results
 519         do
 520                 if results_count > 0 then
 521                         if verbose > 1 then print "sending {results_count} results"
 522                         mpi.send_from(buffer, 0, results_count*4, controller_rank, result_tag, comm_world)
 523                         results_count = 0
 524                 end
 525         end
 526
 527         redef fun receive_signal(signal)
 528         do
 529                 cleanup
 530                 mpi.finalize
 531                 exit 0
 532         end
 533 end
 534
 535 # A single test task, on a `test_program` with an `engine`
 536 #
 537 # Note that a task may involve more than one program to test considering the
 538 # alts and args for the `test_program`.
 539 class Task
 540         # Engine to test executing `test_program`
 541         var engine: String
 542
 543         # Program to execute with `engine`
 544         var test_program: String
 545
 546         redef fun to_s do return "{engine} {test_program}"
 547 end
 548
 549 # Result of a `Task`
 550 #
 551 # There may be more than one result per `Task`.
 552 class Result
 553         # `Task` associated to `self`
 554         var task: Task
 555
 556         # Argument index of the execution resulting in `self`
 557         var arg: Int
 558
 559         # Alternative index of the execution resulting in `self`
 560         var alt: Int
 561
 562         # Is `self` result an _ok_?
 563         var ok = false
 564
 565         # Is `self` result an _0k_?
 566         var ok_empty = false
 567
 568         # Is `self` result a _no sav_?
 569         var no_sav = false
 570
 571         # Is `self` result a _fixme_?
 572         var fixme = false
 573
 574         # Is `self` result a _fail_?
 575         var fail = false
 576
 577         # Is `self` result a _soso_?
 578         var soso = false
 579
 580         # Has `self` been skipped?
 581         var skip = false
 582
 583         # Is `self` TODO?
 584         var todo = false
 585
 586         # Has the execution of `self` been skipped?
 587         var skip_exec = false
 588
 589         # Is `self` an unknown result, probably an error
 590         var unknown = false
 591
 592         redef fun to_s
 593         do
 594                 var err = "Unknown"
 595                 if no_sav then err = "no sav"
 596                 if ok then err = "ok"
 597                 if ok_empty then err = "0k"
 598                 if fixme then err = "fixme"
 599                 if fail then err = "fail"
 600                 if soso then err = "soso"
 601                 if skip then err = "skip"
 602                 if todo then err = "todo"
 603                 if skip_exec then err = "skip_exec"
 604
 605                 return "{task} arg{arg} alt{alt} => {err}"
 606         end
 607 end
 608
 609 # A global and sorted collection of `Result`
 610 class ResultSet
 611         super HashSet[Result]
 612
 613         var no_savs = new HashSet[Result]
 614         var oks = new HashSet[Result]
 615         var ok_empties = new HashSet[Result]
 616         var fixmes = new HashSet[Result]
 617         var fails = new HashSet[Result]
 618         var sosos = new HashSet[Result]
 619         var skips = new HashSet[Result]
 620         var todos = new HashSet[Result]
 621         var skip_execs = new HashSet[Result]
 622         var unknowns = new HashSet[Result]
 623
 624         # TODO remove
 625         var per_engines = new HashMap[String, Result]
 626
 627         redef fun add(result)
 628         do
 629                 if result.no_sav then no_savs.add result
 630                 if result.ok then oks.add result
 631                 if result.ok_empty then ok_empties.add result
 632                 if result.fixme then fixmes.add result
 633                 if result.fail then fails.add result
 634                 if result.soso then sosos.add result
 635                 if result.skip then skips.add result
 636                 if result.todo then todos.add result
 637                 if result.skip_exec then skip_execs.add result
 638                 if result.unknown then unknowns.add result
 639
 640                 super
 641         end
 642
 643         redef fun remove(r) do abort
 644
 645         redef fun clear do abort
 646 end
 647
 648 redef class OptionContext
 649
 650         # Print usage with a possible error `message`
 651         private fun usage_error(message: nullable String)
 652         do
 653                 var ret = 0
 654                 if message != null then
 655                         print "Error: {message}"
 656                         ret = 1
 657                 end
 658
 659                 if comm_world.rank == 0 then
 660                         print "Usage: mpirun nitester [Options] test_program.nit [other_test.nit [...]]"
 661                         usage
 662                 end
 663
 664                 mpi.finalize
 665                 exit ret
 666         end
 667 end
 668
 669 # On `Worker` nodes, prefix all prints with `rank/comm_world.size`
 670 redef fun print(msg: Object)
 671 do
 672         if comm_world.rank != 0.rank then
 673                 super "{comm_world.rank}/{comm_world.size}: {msg}"
 674         else super msg
 675 end
 676
 677 # Running MPI instance
 678 fun mpi: MPI do return once new MPI
 679
 680 # Launch mpi
 681 mpi
 682
 683 # Local rank
 684 var rank = comm_world.rank
 685
 686 var processor: Processor
 687 if rank == 0.rank then
 688         # If rank == 0, this is the `Controller`
 689         processor = new Controller
 690 else
 691         # This is a worker
 692         processor = new Worker(rank)
 693 end
 694 processor.run
 695
 696 mpi.finalize