contrib/nitester/src/nitester.nit

   1 # This file is part of NIT (http://www.nitlanguage.org).
   2 #
   3 # Copyright 2014 Alexis Laferrière <alexis.laf@xymus.net>
   4 #
   5 # Licensed under the Apache License, Version 2.0 (the "License");
   6 # you may not use this file except in compliance with the License.
   7 # You may obtain a copy of the License at
   8 #
   9 #     http://www.apache.org/licenses/LICENSE-2.0
  10 #
  11 # Unless required by applicable law or agreed to in writing, software
  12 # distributed under the License is distributed on an "AS IS" BASIS,
  13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 # See the License for the specific language governing permissions and
  15 # limitations under the License.
  16
  17 # Tester of Nit engines on an MPI cluster
  18 module nitester
  19
  20 import mpi
  21 import signals
  22 import opts
  23
  24 # Any processor, worker or controller
  25 #
  26 # All data and methods within this class are shared by the controller and the
  27 # workers.
  28 abstract class Processor
  29         super SignalHandler
  30
  31         # Controller rank is always 0
  32         var controller_rank: Rank = 0.rank
  33
  34         # Rank on this processor
  35         fun rank: Rank is abstract
  36
  37         # Where to store data for transfer between nodes
  38         #
  39         # Require: `buffer.length % 4 == 0`
  40         var buffer = new CIntArray(1024)
  41
  42         # Run in verbose mode, display more text
  43         var verbose = 0
  44
  45         init
  46         do
  47                 # OpenMPI sends a SIGTERM to all nodes upon receiving a SIGTERM or SIGINT
  48                 # on the first process.
  49                 handle_signal(sigterm, true)
  50         end
  51
  52         # Tag of a new task packet of size `tasks_per_packet`
  53         var task_tag: Tag = 0.tag
  54
  55         # Tag to return a set of `Result` thought `buffer`
  56         var result_tag: Tag = 1.tag
  57
  58         # Tag to notify `Worker` when to quit
  59         var quit_tag: Tag = 2.tag
  60
  61         # Tag to request more work from the `Controller` by a `Worker`
  62         var need_work_tag: Tag = 4.tag
  63
  64         # Tag to notify `Controller` that the sender `Worker` is done
  65         var done_tag: Tag = 5.tag
  66
  67         # Number of tasks within each task assignation with `task_tag`
  68         var tasks_per_packet = 1
  69
  70         # Run the main logic of this node
  71         fun run is abstract
  72
  73         # Hash or name of the branch to test
  74         var branch_hash: String is noinit
  75
  76         # Engines targeted by this execution
  77         var engines: Array[String] is noinit
  78
  79         # All known engines, used to detect errors in `engines`
  80         var all_engines: Array[String] = ["nitg-s", "nitg-sg", "nitg-g", "nitg-e", "niti", "emscripten"]
  81
  82         # Programs to test in this execution
  83         var test_programs: Array[String] is noinit
  84
  85         # Root of the temporary directory
  86         var tmp_dir = "/dev/shm/"
  87
  88         # `ccache` directory
  89         var ccache_dir = "/dev/shm/nit_ccache"
  90
  91         # Read command line options
  92         fun read_cli_options
  93         do
  94                 var opt_ctx = new OptionContext
  95                 var opt_hash = new OptionString(
  96                         "Branch to test",
  97                         "--hash", "-h")
  98                 opt_hash.mandatory = true
  99                 var opt_engines = new OptionString(
 100                         "Engines to test, separated with commas ({all_engines.join(", ")} or all)",
 101                         "--engine", "-e")
 102                 var opt_help = new OptionBool("Print this help message", "--help", "-h")
 103                 var opt_verbose = new OptionCount(
 104                         "Be verbose, repeat to increase verbose level (max with -vvv)",
 105                         "--verbose", "-v")
 106                 var opt_cleanup = new OptionBool(
 107                         "Clean up all nitester files (and do not run tests)",
 108                         "--cleanup", "-C")
 109
 110                 opt_ctx.add_option(opt_hash, opt_engines, opt_help, opt_verbose, opt_cleanup)
 111                 opt_ctx.parse args
 112
 113                 # --help?
 114                 if opt_help.value then opt_ctx.usage_error null
 115
 116                 # --verbose?
 117                 verbose = opt_verbose.value
 118
 119                 # --cleanup?
 120                 if opt_cleanup.value then
 121                         assert tmp_dir.file_exists
 122                         for file in tmp_dir.files do if file.has_prefix("nit") then
 123                                 var full_path = tmp_dir / file
 124                                 if full_path == ccache_dir then continue
 125
 126                                 assert full_path.file_exists
 127
 128                                 var stat = full_path.file_lstat
 129                                 if stat.is_dir then
 130                                         full_path.rmdir
 131                                 else
 132                                         full_path.file_delete
 133                                 end
 134                                 stat.free
 135                         end
 136                         mpi.finalize
 137                         exit 0
 138                 end
 139
 140                 # any files?
 141                 var rest = opt_ctx.rest
 142                 if rest.is_empty then opt_ctx.usage_error "This tool needs at least one test_program.nit"
 143                 test_programs = rest
 144
 145                 # hash
 146                 branch_hash = opt_hash.value.as(not null)
 147
 148                 # gather and check engines
 149                 var engines_str = opt_engines.value
 150                 var engines
 151                 if engines_str == null then
 152                         # default
 153                         engines = ["nitg-s"]
 154                 else
 155                         engines = engines_str.split(',')
 156
 157                         if engines.has("all") then
 158                                 # all engines
 159                                 engines = all_engines
 160                         end
 161                 end
 162
 163                 # check validity of targetted engines
 164                 var unknown_engines = new Array[String]
 165                 for engine in engines do if not all_engines.has(engine) then unknown_engines.add engine
 166
 167                 if not unknown_engines.is_empty then
 168                         opt_ctx.usage_error "Unknown engines: {unknown_engines.join(", ")} (expected one or most of {all_engines.join(", ")})"
 169                 end
 170                 self.engines = engines
 171         end
 172
 173         # All tasks to be performed
 174         var tasks = new Array[Task]
 175
 176         # Gather and register all tasks
 177         fun create_tasks
 178         do
 179                 # At this point we are in our local nit
 180                 var skip_path = "tests/turing.skip"
 181                 var skip
 182                 if skip_path.file_exists then
 183                         var skip_file = new FileReader.open(skip_path)
 184                         skip = skip_file.read_lines
 185                         skip_file.close
 186                 else
 187                         skip = new Array[String]
 188                 end
 189
 190                 for engine in engines do for prog in test_programs do
 191                         # Is is blacklisted?
 192                         for s in skip do if not s.is_empty and prog.has(s) then
 193                                 if verbose > 0 and rank == 0 then print "Skipping test '{prog}' because of '{s}' in turing.skip"
 194                                 continue label
 195                         end
 196
 197                         tasks.add new Task(engine, prog)
 198                 end label
 199         end
 200 end
 201
 202 # Single controller to dispatch tasks, gather results and produce stats
 203 class Controller
 204         super Processor
 205
 206         redef fun rank do return controller_rank
 207
 208         # Id as `Int` of the next task to distribute
 209         var next_task_id = 0
 210
 211         redef fun receive_signal(signal)
 212         do
 213                 shutdown
 214                 print_results
 215
 216                 mpi.finalize
 217                 exit 0
 218         end
 219
 220         redef fun run
 221         do
 222                 read_cli_options
 223                 create_tasks
 224                 distribute_tasks
 225                 print_results
 226         end
 227
 228         # Cumulated results from workers
 229         var results = new ResultSet
 230
 231         # Maintain communication with workers to distribute tasks and receiver results
 232         fun distribute_tasks
 233         do
 234                 var at_work = new Array[Rank]
 235
 236                 # send initial tasks
 237                 for r in [1..comm_world.size[ do
 238                         var sent = send_task_to(r.rank)
 239                         if sent then
 240                                 at_work.add r.rank
 241                         else
 242                                 mpi.send_empty(r.rank, quit_tag, comm_world)
 243                         end
 244                 end
 245
 246                 var status = new Status
 247                 # await results and send new tasks
 248                 while not at_work.is_empty do
 249                         check_signals
 250
 251                         # Double probe to avoid bug with some implementation of MPI
 252                         mpi.probe(new Rank.any, new Tag.any, comm_world, status)
 253                         mpi.probe(new Rank.any, new Tag.any, comm_world, status)
 254
 255                         if status.tag == result_tag then
 256                                 # Receive results fron a worker
 257                                 var count = status.count(new DataType.int)
 258                                 mpi.recv_into(buffer, 0, count, status.source, status.tag, comm_world)
 259
 260                                 # Parse results from C array to `Result` instances
 261                                 #
 262                                 # Each result is on 4 ints: task id, arg, alt and result.
 263                                 #
 264                                 # See the comments where the data is produced in `Worker::work_on_tasks` for more informationé
 265                                 assert count % 4 == 0
 266                                 for t in (count/4).times do
 267                                         var tt = t*4
 268
 269                                         var task_id = buffer[tt]
 270                                         var arg = buffer[tt+1]
 271                                         var alt = buffer[tt+2]
 272                                         var res = buffer[tt+3]
 273
 274                                         var result = new Result(tasks[task_id], arg, alt)
 275
 276                                         if res == 1 then result.ok = true
 277                                         if res == 2 then result.ok_empty = true
 278                                         if res == 3 then result.no_sav = true
 279                                         if res == 4 then result.fixme = true
 280                                         if res == 5 then result.fail = true
 281                                         if res == 6 then result.soso = true
 282                                         if res == 7 then result.skip = true
 283                                         if res == 8 then result.todo = true
 284                                         if res == 9 then result.skip_exec = true
 285                                         if res == 0 then result.unknown = true
 286
 287                                         results.add result
 288
 289                                         if verbose > 0 and results.length % 25 == 0 then print_short_results
 290                                 end
 291
 292                         else if status.tag == need_work_tag then
 293                                 # A worker needs more work
 294                                 mpi.recv_empty(status.source, status.tag, comm_world)
 295                                 var sent = send_task_to(status.source)
 296                                 if not sent then
 297                                         # no more work, quit
 298                                         mpi.send_empty(status.source, quit_tag, comm_world)
 299                                 end
 300                         else if status.tag == done_tag then
 301                                 # A worker is done and will quit
 302                                 mpi.recv_empty(status.source, status.tag, comm_world)
 303                                 at_work.remove(status.source)
 304
 305                                 if verbose > 0 then print "Worker {status.source} is done ({at_work.length} still at work)"
 306                         else
 307                                 print "Unexpected tag {status.tag}"
 308                                 shutdown
 309                                 break
 310                         end
 311                 end
 312                 status.free
 313         end
 314
 315         # Send a packet of tasks to worker at `rank`
 316         fun send_task_to(rank: Rank): Bool
 317         do
 318                 if next_task_id >= tasks.length then return false
 319
 320                 buffer[0] = next_task_id
 321                 next_task_id += tasks_per_packet
 322
 323                 mpi.send_from(buffer, 0, 1, rank, task_tag, comm_world)
 324
 325                 if verbose > 1 then print "sent tasks [{buffer[0]}..{next_task_id}[ to worker {rank}"
 326                 return true
 327         end
 328
 329         # Display the accumulated results received from workers
 330         fun print_results
 331         do
 332                 print "# results #"
 333                 print "* {results.length} total"
 334                 print "* {results.oks.length + results.ok_empties.length} oks & 0ks"
 335                 print "* {results.fails.length} fails"
 336                 print "* {results.no_savs.length} no savs"
 337                 print "* {results.fixmes.length} fixmes"
 338                 print "* {results.sosos.length} sosos"
 339                 print "* {results.skips.length} skips"
 340                 print "* {results.todos.length} todos"
 341                 print "* {results.skip_execs.length} skip execs"
 342                 print "* {results.unknowns.length} unknowns (bug in tests.sh or nitester)"
 343         end
 344
 345         fun print_short_results do print "oks & fails / total: {results.oks.length + results.ok_empties.length} " +
 346                 "& {results.fails.length} / {results.length}"
 347
 348         # Shutdown anormaly the running tests
 349         fun shutdown
 350         do
 351                 print "Shutting down"
 352                 mpi.send_empty(new Rank.any, quit_tag, comm_world)
 353         end
 354 end
 355
 356 # A worker node which actually execute the tests
 357 class Worker
 358         super Processor
 359
 360         # The `Rank` of `self`
 361         redef var rank: Rank
 362
 363         # Compilation directory
 364         var comp_dir = "/dev/shm/nit_compile{rank}" is lazy
 365
 366         # Directory to store the xml files produced for Jenkins
 367         var xml_dir = "~/jenkins_xml/"
 368
 369         # Output file of the `tests.sh` script
 370         var tests_sh_out = "/dev/shm/nit_local_out{rank}" is lazy
 371
 372         # Source Nit repository, must be already updated and `make` before execution
 373         var local_nit = "/dev/shm/nit{rank}" is lazy
 374
 375         # Remote Nit repository (actually the local source)
 376         var remote_nit = "~/nit/"
 377
 378         # Compiled `Regex` to detect the argument of an execution
 379         var re_arg: Regex = "arg [0-9]+".to_re
 380
 381         # Compiled `Regex` to detect the alternative of an execution
 382         var re_alt: Regex = "_alt[0-9]+".to_re
 383
 384         redef fun run
 385         do
 386                 read_cli_options
 387                 setup
 388                 create_tasks
 389                 work_on_tasks
 390                 cleanup
 391         end
 392
 393         # Setup the testing environment
 394         #
 395         # Clone the nit repository.
 396         fun setup
 397         do
 398                 if verbose > 0 then sys.system "hostname"
 399
 400                 if local_nit.file_exists then local_nit.rmdir
 401
 402                 exec_and_check "git clone {remote_nit} {local_nit}"
 403                 local_nit.chdir
 404                 exec_and_check "git config remote.origin.fetch +refs/remotes/origin/pr/*:refs/remotes/origin/pr/*"
 405                 exec_and_check "git fetch origin --quiet"
 406                 exec_and_check "git checkout {branch_hash}"
 407                 exec_and_check "cp {remote_nit}/bin/*  bin/"
 408                 exec_and_check "src/git-gen-version.sh"
 409         end
 410
 411         private fun exec_and_check(cmd: String)
 412         do
 413                 if verbose > 0 then
 414                         print "+ {cmd}"
 415                         var res = sys.system(cmd)
 416                         assert res == 0 else print "Command '{cmd}' failed."
 417                 end
 418         end
 419
 420         # Clean up the testing environment
 421         #
 422         # Delete all temporary files, except `ccache_dir`.
 423         fun cleanup
 424         do
 425                 if comp_dir.file_exists then comp_dir.rmdir
 426                 if tests_sh_out.file_exists then tests_sh_out.file_delete
 427                 if local_nit.file_exists then local_nit.file_delete
 428         end
 429
 430         # Single C `int` to hold the next task id received from the `Controller`
 431         var task_buffer = new CIntArray(1)
 432
 433         # Manage communication with the `Controller` and execute dispatched `Task`s
 434         fun work_on_tasks
 435         do
 436                 var status = new Status
 437                 loop
 438                         check_signals
 439
 440                         # We double probe to prevent bug where a single probes does not receive the
 441                         # real next read.
 442                         mpi.probe(controller_rank, new Tag.any, comm_world, status)
 443                         mpi.probe(controller_rank, new Tag.any, comm_world, status)
 444
 445                         if status.tag == task_tag then
 446                                 # Receive tasks to execute
 447                                 mpi.recv_into(task_buffer, 0, 1, status.source, status.tag, comm_world)
 448                                 var first_id = task_buffer[0]
 449                                 for task_id in [first_id .. first_id + tasks_per_packet[ do
 450
 451                                         # If id is over all known tasks, stop right here
 452                                         if task_id >= tasks.length then break
 453                                         var task = tasks[task_id]
 454
 455                                         "tests".chdir
 456
 457                                         # Command line to execute test
 458                                         var cmd = "XMLDIR={xml_dir} " +
 459                                                 "CCACHE_DIR={ccache_dir} CCACHE_TEMPDIR={ccache_dir} CCACHE_BASEDIR={comp_dir} " +
 460                                                 "./tests.sh --node --engine {task.engine} {task.test_program} > {tests_sh_out}"
 461
 462                                         # Execute test
 463                                         sys.system cmd
 464
 465                                         # Test results were written to file, read them
 466                                         var fstream = new FileReader.open(tests_sh_out)
 467                                         var content = fstream.read_all
 468                                         fstream.close
 469
 470                                         # Parse result and prepare them for sending
 471                                         #
 472                                         # The structure is composed of 4 ints for each result.
 473                                         # 1. task id
 474                                         # 2. arg number
 475                                         # 3. alt number
 476                                         # 4. test result as int
 477                                         var c = results_count
 478                                         for line in content.split('\n') do if not line.is_empty then
 479                                                 var cc = c*4
 480
 481                                                 buffer[cc] = task_id
 482
 483                                                 var arg_match = line.search(re_arg)
 484                                                 var arg = 0
 485                                                 if arg_match != null then arg = arg_match.to_s.substring_from(4).to_i
 486                                                 buffer[cc+1] = arg
 487
 488                                                 var alt_match = line.search(re_alt)
 489                                                 var alt = 0
 490                                                 if alt_match != null then alt = alt_match.to_s.substring_from(4).to_i
 491                                                 buffer[cc+2] = alt
 492
 493                                                 var res = null
 494                                                 if line.has("[ok]") then res = 1
 495                                                 if line.has("[0k]") then res = 2
 496                                                 if line.has("[=== no sav ===]") then res = 3
 497                                                 if line.has("[fixme]") then res = 4
 498                                                 if line.has("[======= fail") then res = 5
 499                                                 if line.has("[======= soso") then res = 6
 500                                                 if line.has("[skip]") then res = 7
 501                                                 if line.has("[todo]") then res = 8
 502                                                 if line.has("[skip exec]") then res = 9
 503
 504                                                 if res == null then
 505                                                         res = 0
 506                                                         if verbose > 1 then print "Unknown result: '{line}'"
 507                                                 end
 508                                                 buffer[cc+3] = res
 509
 510                                                 c += 1
 511
 512                                                 if verbose > 2 then print "tests.sh output line: {line}"
 513
 514                                                 # If result buffer is full, send to `Controller`
 515                                                 if c*4 == buffer.length then
 516                                                         send_results
 517                                                         c = 0
 518                                                 end
 519                                         end
 520
 521                                         if verbose > 1 then print "Done testing: {task}"
 522
 523                                         self.results_count = c
 524                                 end
 525
 526                                 mpi.send_empty(controller_rank, need_work_tag, comm_world)
 527                         else if status.tag == quit_tag then
 528                                 # Notification from the `Controller` to quit
 529                                 mpi.recv_empty(status.source, status.tag, comm_world)
 530
 531                                 # Send remaining results
 532                                 send_results
 533
 534                                 # Notify `Controller` that `self` is done and will quit
 535                                 mpi.send_empty(controller_rank, done_tag, comm_world)
 536                                 break
 537                         else
 538                                 print "Unexpected tag {status.tag}"
 539                                 break
 540                         end
 541                 end
 542                 status.free
 543         end
 544
 545         # Total results listed in `buffer` and ready to send
 546         var results_count = 0
 547
 548         # Send all results in `buffer` to the `Controller`
 549         fun send_results
 550         do
 551                 if results_count > 0 then
 552                         if verbose > 2 then print "Sending {results_count} results"
 553                         mpi.send_from(buffer, 0, results_count*4, controller_rank, result_tag, comm_world)
 554                         results_count = 0
 555                 end
 556         end
 557
 558         redef fun receive_signal(signal)
 559         do
 560                 cleanup
 561                 mpi.finalize
 562                 exit 0
 563         end
 564 end
 565
 566 # A single test task, on a `test_program` with an `engine`
 567 #
 568 # Note that a task may involve more than one program to test considering the
 569 # alts and args for the `test_program`.
 570 class Task
 571         # Engine to test executing `test_program`
 572         var engine: String
 573
 574         # Program to execute with `engine`
 575         var test_program: String
 576
 577         redef fun to_s do return "{engine} {test_program}"
 578 end
 579
 580 # Result of a `Task`
 581 #
 582 # There may be more than one result per `Task`.
 583 class Result
 584         # `Task` associated to `self`
 585         var task: Task
 586
 587         # Argument index of the execution resulting in `self`
 588         var arg: Int
 589
 590         # Alternative index of the execution resulting in `self`
 591         var alt: Int
 592
 593         # Is `self` result an _ok_?
 594         var ok = false
 595
 596         # Is `self` result an _0k_?
 597         var ok_empty = false
 598
 599         # Is `self` result a _no sav_?
 600         var no_sav = false
 601
 602         # Is `self` result a _fixme_?
 603         var fixme = false
 604
 605         # Is `self` result a _fail_?
 606         var fail = false
 607
 608         # Is `self` result a _soso_?
 609         var soso = false
 610
 611         # Has `self` been skipped?
 612         var skip = false
 613
 614         # Is `self` TODO?
 615         var todo = false
 616
 617         # Has the execution of `self` been skipped?
 618         var skip_exec = false
 619
 620         # Is `self` an unknown result, probably an error
 621         var unknown = false
 622
 623         redef fun to_s
 624         do
 625                 var err = "Unknown"
 626                 if no_sav then err = "no sav"
 627                 if ok then err = "ok"
 628                 if ok_empty then err = "0k"
 629                 if fixme then err = "fixme"
 630                 if fail then err = "fail"
 631                 if soso then err = "soso"
 632                 if skip then err = "skip"
 633                 if todo then err = "todo"
 634                 if skip_exec then err = "skip_exec"
 635
 636                 return "{task} arg{arg} alt{alt} => {err}"
 637         end
 638 end
 639
 640 # A global and sorted collection of `Result`
 641 class ResultSet
 642         super HashSet[Result]
 643
 644         var no_savs = new HashSet[Result]
 645         var oks = new HashSet[Result]
 646         var ok_empties = new HashSet[Result]
 647         var fixmes = new HashSet[Result]
 648         var fails = new HashSet[Result]
 649         var sosos = new HashSet[Result]
 650         var skips = new HashSet[Result]
 651         var todos = new HashSet[Result]
 652         var skip_execs = new HashSet[Result]
 653         var unknowns = new HashSet[Result]
 654
 655         # TODO remove
 656         var per_engines = new HashMap[String, Result]
 657
 658         redef fun add(result)
 659         do
 660                 if result.no_sav then no_savs.add result
 661                 if result.ok then oks.add result
 662                 if result.ok_empty then ok_empties.add result
 663                 if result.fixme then fixmes.add result
 664                 if result.fail then fails.add result
 665                 if result.soso then sosos.add result
 666                 if result.skip then skips.add result
 667                 if result.todo then todos.add result
 668                 if result.skip_exec then skip_execs.add result
 669                 if result.unknown then unknowns.add result
 670
 671                 super
 672         end
 673
 674         redef fun remove(r) do abort
 675
 676         redef fun clear do abort
 677 end
 678
 679 redef class OptionContext
 680
 681         # Print usage with a possible error `message`
 682         private fun usage_error(message: nullable String)
 683         do
 684                 var ret = 0
 685                 if message != null then
 686                         print "Error: {message}"
 687                         ret = 1
 688                 end
 689
 690                 if comm_world.rank == 0 then
 691                         print "Usage: mpirun nitester [Options] test_program.nit [other_test.nit [...]]"
 692                         usage
 693                 end
 694
 695                 mpi.finalize
 696                 exit ret
 697         end
 698 end
 699
 700 # On `Worker` nodes, prefix all prints with `rank/comm_world.size`
 701 redef fun print(msg: Object)
 702 do
 703         if comm_world.rank != 0.rank then
 704                 super "{comm_world.rank}/{comm_world.size}: {msg}"
 705         else super msg
 706 end
 707
 708 # Running MPI instance
 709 fun mpi: MPI do return once new MPI
 710
 711 # Launch mpi
 712 mpi
 713
 714 # Local rank
 715 var rank = comm_world.rank
 716
 717 var processor: Processor
 718 if rank == 0.rank then
 719         # If rank == 0, this is the `Controller`
 720         processor = new Controller
 721 else
 722         # This is a worker
 723         processor = new Worker(rank)
 724 end
 725 processor.run
 726
 727 mpi.finalize