contrib/nitester/src/nitester.nit

   1 # This file is part of NIT (http://www.nitlanguage.org).
   2 #
   3 # Copyright 2014 Alexis Laferrière <alexis.laf@xymus.net>
   4 #
   5 # Licensed under the Apache License, Version 2.0 (the "License");
   6 # you may not use this file except in compliance with the License.
   7 # You may obtain a copy of the License at
   8 #
   9 #     http://www.apache.org/licenses/LICENSE-2.0
  10 #
  11 # Unless required by applicable law or agreed to in writing, software
  12 # distributed under the License is distributed on an "AS IS" BASIS,
  13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 # See the License for the specific language governing permissions and
  15 # limitations under the License.
  16
  17 # Tester of Nit engines on an MPI cluster
  18 module nitester
  19
  20 import mpi
  21 import signals
  22 import opts
  23
  24 # Any processor, worker or controller
  25 #
  26 # All data and methods within this class are shared by the controller and the
  27 # workers.
  28 abstract class Processor
  29         super SignalHandler
  30
  31         # Controller rank is always 0
  32         var controller_rank: Rank = 0.rank
  33
  34         # Rank on this processor
  35         fun rank: Rank is abstract
  36
  37         # Where to store data for transfer between nodes
  38         #
  39         # Require: `buffer.length % 4 == 0`
  40         var buffer = new CIntArray(1024)
  41
  42         # Run in verbose mode, display more text
  43         var verbose = 0
  44
  45         init
  46         do
  47                 # OpenMPI sends a SIGTERM to all nodes upon receiving a SIGTERM or SIGINT
  48                 # on the first process.
  49                 handle_signal(sigterm, true)
  50         end
  51
  52         # Tag of a new task packet of size `tasks_per_packet`
  53         var task_tag: Tag = 0.tag
  54
  55         # Tag to return a set of `Result` thought `buffer`
  56         var result_tag: Tag = 1.tag
  57
  58         # Tag to notify `Worker` when to quit
  59         var quit_tag: Tag = 2.tag
  60
  61         # Tag to request more work from the `Controller` by a `Worker`
  62         var need_work_tag: Tag = 4.tag
  63
  64         # Tag to notify `Controller` that the sender `Worker` is done
  65         var done_tag: Tag = 5.tag
  66
  67         # Number of tasks within each task assignation with `task_tag`
  68         var tasks_per_packet = 1
  69
  70         # Run the main logic of this node
  71         fun run is abstract
  72
  73         # Hash or name of the branch to test
  74         var branch_hash: String is noinit
  75
  76         # Engines targeted by this execution
  77         var engines: Array[String] is noinit
  78
  79         # All known engines, used to detect errors in `engines`
  80         var all_engines: Array[String] = ["nitg-s", "nitg-sg", "nitg-g", "nitg-e", "niti", "emscripten"]
  81
  82         # Programs to test in this execution
  83         var test_programs: Array[String] is noinit
  84
  85         # Root of the temporary directory
  86         var tmp_dir = "/dev/shm/"
  87
  88         # `ccache` directory
  89         var ccache_dir = "/dev/shm/nit_ccache"
  90
  91         # Read command line options
  92         fun read_cli_options
  93         do
  94                 var opt_ctx = new OptionContext
  95                 var opt_hash = new OptionString(
  96                         "Branch to test",
  97                         "--hash", "-h")
  98                 opt_hash.mandatory = true
  99                 var opt_engines = new OptionString(
 100                         "Engines to test, separated with commas ({all_engines.join(", ")} or all)",
 101                         "--engine", "-e")
 102                 var opt_help = new OptionBool("Print this help message", "--help", "-h")
 103                 var opt_verbose = new OptionCount(
 104                         "Be verbose, repeat to increase verbose level (max with -vvv)",
 105                         "--verbose", "-v")
 106                 var opt_cleanup = new OptionBool(
 107                         "Clean up all nitester files (and do not run tests)",
 108                         "--cleanup", "-C")
 109
 110                 opt_ctx.add_option(opt_hash, opt_engines, opt_help, opt_verbose, opt_cleanup)
 111                 opt_ctx.parse args
 112
 113                 # --help?
 114                 if opt_help.value then opt_ctx.usage_error null
 115
 116                 # --verbose?
 117                 verbose = opt_verbose.value
 118
 119                 # --cleanup?
 120                 if opt_cleanup.value then
 121                         assert tmp_dir.file_exists
 122                         for file in tmp_dir.files do if file.has_prefix("nit") then
 123                                 var full_path = tmp_dir / file
 124                                 if full_path == ccache_dir then continue
 125
 126                                 assert full_path.file_exists
 127
 128                                 var stat = full_path.file_lstat
 129                                 if stat.is_dir then
 130                                         full_path.rmdir
 131                                 else
 132                                         full_path.file_delete
 133                                 end
 134                                 stat.free
 135                         end
 136                         mpi.finalize
 137                         exit 0
 138                 end
 139
 140                 # any files?
 141                 var rest = opt_ctx.rest
 142                 if rest.is_empty then opt_ctx.usage_error "This tool needs at least one test_program.nit"
 143                 test_programs = rest
 144
 145                 # hash
 146                 branch_hash = opt_hash.value.as(not null)
 147
 148                 # gather and check engines
 149                 var engines_str = opt_engines.value
 150                 var engines
 151                 if engines_str == null then
 152                         # default
 153                         engines = ["nitg-s"]
 154                 else
 155                         engines = engines_str.split(',')
 156
 157                         if engines.has("all") then
 158                                 # all engines
 159                                 engines = all_engines
 160                         end
 161                 end
 162
 163                 # check validity of targetted engines
 164                 var unknown_engines = new Array[String]
 165                 for engine in engines do if not all_engines.has(engine) then unknown_engines.add engine
 166
 167                 if not unknown_engines.is_empty then
 168                         opt_ctx.usage_error "Unknown engines: {unknown_engines.join(", ")} (expected one or most of {all_engines.join(", ")})"
 169                 end
 170                 self.engines = engines
 171         end
 172
 173         # All tasks to be performed
 174         var tasks = new Array[Task]
 175
 176         # Gather and register all tasks
 177         fun create_tasks
 178         do
 179                 # At this point we are in our local nit
 180                 var skip_path = "tests/turing.skip"
 181                 var skip
 182                 if skip_path.file_exists then
 183                         var skip_file = new IFStream.open(skip_path)
 184                         skip = skip_file.read_lines
 185                         skip_file.close
 186                 else
 187                         skip = new Array[String]
 188                 end
 189
 190                 for prog in test_programs do for engine in engines do
 191
 192                         # Is is blacklisted?
 193                         for s in skip do if not s.is_empty and prog.has(s) then
 194                                 if verbose > 0 and rank == 0 then print "Skipping test '{prog}' because of '{s}' in turing.skip"
 195                                 continue label
 196                         end
 197
 198                         tasks.add new Task(engine, prog)
 199                 end label
 200         end
 201 end
 202
 203 # Single controller to dispatch tasks, gather results and produce stats
 204 class Controller
 205         super Processor
 206
 207         redef fun rank do return controller_rank
 208
 209         # Id as `Int` of the next task to distribute
 210         var next_task_id = 0
 211
 212         redef fun receive_signal(signal)
 213         do
 214                 shutdown
 215                 print_results
 216
 217                 mpi.finalize
 218                 exit 0
 219         end
 220
 221         redef fun run
 222         do
 223                 read_cli_options
 224                 create_tasks
 225                 distribute_tasks
 226                 print_results
 227         end
 228
 229         # Cumulated results from workers
 230         var results = new ResultSet
 231
 232         # Maintain communication with workers to distribute tasks and receiver results
 233         fun distribute_tasks
 234         do
 235                 var at_work = new Array[Rank]
 236
 237                 # send initial tasks
 238                 for r in [1..comm_world.size[ do
 239                         var sent = send_task_to(r.rank)
 240                         if sent then
 241                                 at_work.add r.rank
 242                         else
 243                                 mpi.send_empty(r.rank, quit_tag, comm_world)
 244                         end
 245                 end
 246
 247                 var status = new Status
 248                 # await results and send new tasks
 249                 while not at_work.is_empty do
 250                         check_signals
 251
 252                         # Double probe to avoid bug with some implementation of MPI
 253                         mpi.probe(new Rank.any, new Tag.any, comm_world, status)
 254                         mpi.probe(new Rank.any, new Tag.any, comm_world, status)
 255
 256                         if status.tag == result_tag then
 257                                 # Receive results fron a worker
 258                                 var count = status.count(new DataType.int)
 259                                 mpi.recv_into(buffer, 0, count, status.source, status.tag, comm_world)
 260
 261                                 # Parse results from C array to `Result` instances
 262                                 #
 263                                 # Each result is on 4 ints: task id, arg, alt and result.
 264                                 #
 265                                 # See the comments where the data is produced in `Worker::work_on_tasks` for more informationé
 266                                 assert count % 4 == 0
 267                                 for t in (count/4).times do
 268                                         var tt = t*4
 269
 270                                         var task_id = buffer[tt]
 271                                         var arg = buffer[tt+1]
 272                                         var alt = buffer[tt+2]
 273                                         var res = buffer[tt+3]
 274
 275                                         var result = new Result(tasks[task_id], arg, alt)
 276
 277                                         if res == 1 then result.ok = true
 278                                         if res == 2 then result.ok_empty = true
 279                                         if res == 3 then result.no_sav = true
 280                                         if res == 4 then result.fixme = true
 281                                         if res == 5 then result.fail = true
 282                                         if res == 6 then result.soso = true
 283                                         if res == 7 then result.skip = true
 284                                         if res == 8 then result.todo = true
 285                                         if res == 9 then result.skip_exec = true
 286                                         if res == 0 then result.unknown = true
 287
 288                                         results.add result
 289
 290                                         if verbose > 0 and results.length % 25 == 0 then print_short_results
 291                                 end
 292
 293                         else if status.tag == need_work_tag then
 294                                 # A worker needs more work
 295                                 mpi.recv_empty(status.source, status.tag, comm_world)
 296                                 var sent = send_task_to(status.source)
 297                                 if not sent then
 298                                         # no more work, quit
 299                                         mpi.send_empty(status.source, quit_tag, comm_world)
 300                                 end
 301                         else if status.tag == done_tag then
 302                                 # A worker is done and will quit
 303                                 mpi.recv_empty(status.source, status.tag, comm_world)
 304                                 at_work.remove(status.source)
 305
 306                                 if verbose > 1 then print "worker {status.source} is done ({at_work.length} still at work)"
 307                         else
 308                                 print "Unexpected tag {status.tag}"
 309                                 shutdown
 310                                 break
 311                         end
 312                 end
 313                 status.free
 314         end
 315
 316         # Send a packet of tasks to worker at `rank`
 317         fun send_task_to(rank: Rank): Bool
 318         do
 319                 if next_task_id >= tasks.length then return false
 320
 321                 buffer[0] = next_task_id
 322                 next_task_id += tasks_per_packet
 323
 324                 mpi.send_from(buffer, 0, 1, rank, task_tag, comm_world)
 325
 326                 if verbose > 1 then print "sent tasks [{buffer[0]}..{next_task_id}[ to worker {rank}"
 327                 return true
 328         end
 329
 330         # Display the accumulated results received from workers
 331         fun print_results
 332         do
 333                 print "# results #"
 334                 print "* {results.length} total"
 335                 print "* {results.oks.length + results.ok_empties.length} oks & 0ks"
 336                 print "* {results.fails.length} fails"
 337                 print "* {results.no_savs.length} no savs"
 338                 print "* {results.fixmes.length} fixmes"
 339                 print "* {results.sosos.length} sosos"
 340                 print "* {results.skips.length} skips"
 341                 print "* {results.todos.length} todos"
 342                 print "* {results.skip_execs.length} skip execs"
 343                 print "* {results.unknowns.length} unknowns (bug in tests.sh or nitester)"
 344         end
 345
 346         fun print_short_results do print "oks & fails / total: {results.oks.length + results.ok_empties.length} " +
 347                 "& {results.fails.length} / {results.length}"
 348
 349         # Shutdown anormaly the running tests
 350         fun shutdown
 351         do
 352                 print "Shutting down"
 353                 mpi.send_empty(new Rank.any, quit_tag, comm_world)
 354         end
 355 end
 356
 357 # A worker node which actually execute the tests
 358 class Worker
 359         super Processor
 360
 361         # The `Rank` of `self`
 362         redef var rank: Rank
 363
 364         # Compilation directory
 365         var comp_dir = "/dev/shm/nit_compile{rank}" is lazy
 366
 367         # Directory to store the xml files produced for Jenkins
 368         var xml_dir = "~/jenkins_xml/"
 369
 370         # Output file of the `tests.sh` script
 371         var tests_sh_out = "/dev/shm/nit_local_out{rank}" is lazy
 372
 373         # Source Nit repository, must be already updated and `make` before execution
 374         var local_nit = "/dev/shm/nit{rank}" is lazy
 375
 376         # Remote Nit repository (actually the local source)
 377         var remote_nit = "~/nit/"
 378
 379         # Compiled `Regex` to detect the argument of an execution
 380         var re_arg: Regex = "arg [0-9]+".to_re
 381
 382         # Compiled `Regex` to detect the alternative of an execution
 383         var re_alt: Regex = "_alt[0-9]+".to_re
 384
 385         redef fun run
 386         do
 387                 read_cli_options
 388                 setup
 389                 create_tasks
 390                 work_on_tasks
 391                 cleanup
 392         end
 393
 394         # Setup the testing environment
 395         #
 396         # Clone the nit repository.
 397         fun setup
 398         do
 399                 if verbose > 0 then sys.system "hostname"
 400
 401                 if local_nit.file_exists then local_nit.rmdir
 402
 403                 exec_and_check "git clone {remote_nit} {local_nit}"
 404                 local_nit.chdir
 405                 exec_and_check "git config remote.origin.fetch +refs/remotes/origin/pr/*:refs/remotes/origin/pr/*"
 406                 exec_and_check "git fetch origin --quiet"
 407                 exec_and_check "git checkout {branch_hash}"
 408                 exec_and_check "cp {remote_nit}/bin/nitg bin/"
 409                 exec_and_check "src/git-gen-version.sh"
 410                 exec_and_check "bin/nitg --dir bin/ src/nit.nit src/nitvm.nit"
 411         end
 412
 413         private fun exec_and_check(cmd: String)
 414         do
 415                 if verbose > 0 then
 416                         print "+ {cmd}"
 417                         var res = sys.system(cmd)
 418                         assert res == 0 else print "Command '{cmd}' failed."
 419                 end
 420         end
 421
 422         # Clean up the testing environment
 423         #
 424         # Delete all temporary files, except `ccache_dir`.
 425         fun cleanup
 426         do
 427                 if comp_dir.file_exists then comp_dir.rmdir
 428                 if tests_sh_out.file_exists then tests_sh_out.file_delete
 429                 if local_nit.file_exists then local_nit.file_delete
 430         end
 431
 432         # Single C `int` to hold the next task id received from the `Controller`
 433         var task_buffer = new CIntArray(1)
 434
 435         # Manage communication with the `Controller` and execute dispatched `Task`s
 436         fun work_on_tasks
 437         do
 438                 var status = new Status
 439                 loop
 440                         check_signals
 441
 442                         # We double probe to prevent bug where a single probes does not receive the
 443                         # real next read.
 444                         mpi.probe(controller_rank, new Tag.any, comm_world, status)
 445                         mpi.probe(controller_rank, new Tag.any, comm_world, status)
 446
 447                         if status.tag == task_tag then
 448                                 # Receive tasks to execute
 449                                 mpi.recv_into(task_buffer, 0, 1, status.source, status.tag, comm_world)
 450                                 var first_id = task_buffer[0]
 451                                 for task_id in [first_id .. first_id + tasks_per_packet[ do
 452
 453                                         # If id is over all known tasks, stop right here
 454                                         if task_id >= tasks.length then break
 455                                         var task = tasks[task_id]
 456
 457                                         "tests".chdir
 458
 459                                         # Command line to execute test
 460                                         var cmd = "XMLDIR={xml_dir} " +
 461                                                 "CCACHE_DIR={ccache_dir} CCACHE_TEMPDIR={ccache_dir} CCACHE_BASEDIR={comp_dir} " +
 462                                                 "./tests.sh --node --engine {task.engine} {task.test_program} > {tests_sh_out}"
 463
 464                                         # Execute test
 465                                         sys.system cmd
 466
 467                                         # Test results were written to file, read them
 468                                         var fstream = new IFStream.open(tests_sh_out)
 469                                         var content = fstream.read_all
 470                                         fstream.close
 471
 472                                         # Parse result and prepare them for sending
 473                                         #
 474                                         # The structure is composed of 4 ints for each result.
 475                                         # 1. task id
 476                                         # 2. arg number
 477                                         # 3. alt number
 478                                         # 4. test result as int
 479                                         var c = results_count
 480                                         for line in content.split('\n') do if not line.is_empty then
 481                                                 var cc = c*4
 482
 483                                                 buffer[cc] = task_id
 484
 485                                                 var arg_match = line.search(re_arg)
 486                                                 var arg = 0
 487                                                 if arg_match != null then arg = arg_match.to_s.substring_from(4).to_i
 488                                                 buffer[cc+1] = arg
 489
 490                                                 var alt_match = line.search(re_alt)
 491                                                 var alt = 0
 492                                                 if alt_match != null then alt = alt_match.to_s.substring_from(4).to_i
 493                                                 buffer[cc+2] = alt
 494
 495                                                 var res = null
 496                                                 if line.has("[ok]") then res = 1
 497                                                 if line.has("[0k]") then res = 2
 498                                                 if line.has("[=== no sav ===]") then res = 3
 499                                                 if line.has("[fixme]") then res = 4
 500                                                 if line.has("[======= fail") then res = 5
 501                                                 if line.has("[======= soso") then res = 6
 502                                                 if line.has("[skip]") then res = 7
 503                                                 if line.has("[todo]") then res = 8
 504                                                 if line.has("[skip exec]") then res = 9
 505
 506                                                 if res == null then
 507                                                         res = 0
 508                                                         if verbose > 1 then print "Unknown result: '{line}'"
 509                                                 end
 510                                                 buffer[cc+3] = res
 511
 512                                                 c += 1
 513
 514                                                 if verbose > 2 then print "tests.sh output line: {line}"
 515
 516                                                 # If result buffer is full, send to `Controller`
 517                                                 if c*4 == buffer.length then
 518                                                         send_results
 519                                                         c = 0
 520                                                 end
 521                                         end
 522
 523                                         self.results_count = c
 524                                 end
 525
 526                                 mpi.send_empty(controller_rank, need_work_tag, comm_world)
 527                         else if status.tag == quit_tag then
 528                                 # Notification from the `Controller` to quit
 529                                 mpi.recv_empty(status.source, status.tag, comm_world)
 530
 531                                 # Send remaining results
 532                                 send_results
 533
 534                                 # Notify `Controller` that `self` is done and will quit
 535                                 mpi.send_empty(controller_rank, done_tag, comm_world)
 536                                 break
 537                         else
 538                                 print "Unexpected tag {status.tag}"
 539                                 break
 540                         end
 541                 end
 542                 status.free
 543         end
 544
 545         # Total results listed in `buffer` and ready to send
 546         var results_count = 0
 547
 548         # Send all results in `buffer` to the `Controller`
 549         fun send_results
 550         do
 551                 if results_count > 0 then
 552                         if verbose > 1 then print "sending {results_count} results"
 553                         mpi.send_from(buffer, 0, results_count*4, controller_rank, result_tag, comm_world)
 554                         results_count = 0
 555                 end
 556         end
 557
 558         redef fun receive_signal(signal)
 559         do
 560                 cleanup
 561                 mpi.finalize
 562                 exit 0
 563         end
 564 end
 565
 566 # A single test task, on a `test_program` with an `engine`
 567 #
 568 # Note that a task may involve more than one program to test considering the
 569 # alts and args for the `test_program`.
 570 class Task
 571         # Engine to test executing `test_program`
 572         var engine: String
 573
 574         # Program to execute with `engine`
 575         var test_program: String
 576
 577         redef fun to_s do return "{engine} {test_program}"
 578 end
 579
 580 # Result of a `Task`
 581 #
 582 # There may be more than one result per `Task`.
 583 class Result
 584         # `Task` associated to `self`
 585         var task: Task
 586
 587         # Argument index of the execution resulting in `self`
 588         var arg: Int
 589
 590         # Alternative index of the execution resulting in `self`
 591         var alt: Int
 592
 593         # Is `self` result an _ok_?
 594         var ok = false
 595
 596         # Is `self` result an _0k_?
 597         var ok_empty = false
 598
 599         # Is `self` result a _no sav_?
 600         var no_sav = false
 601
 602         # Is `self` result a _fixme_?
 603         var fixme = false
 604
 605         # Is `self` result a _fail_?
 606         var fail = false
 607
 608         # Is `self` result a _soso_?
 609         var soso = false
 610
 611         # Has `self` been skipped?
 612         var skip = false
 613
 614         # Is `self` TODO?
 615         var todo = false
 616
 617         # Has the execution of `self` been skipped?
 618         var skip_exec = false
 619
 620         # Is `self` an unknown result, probably an error
 621         var unknown = false
 622
 623         redef fun to_s
 624         do
 625                 var err = "Unknown"
 626                 if no_sav then err = "no sav"
 627                 if ok then err = "ok"
 628                 if ok_empty then err = "0k"
 629                 if fixme then err = "fixme"
 630                 if fail then err = "fail"
 631                 if soso then err = "soso"
 632                 if skip then err = "skip"
 633                 if todo then err = "todo"
 634                 if skip_exec then err = "skip_exec"
 635
 636                 return "{task} arg{arg} alt{alt} => {err}"
 637         end
 638 end
 639
 640 # A global and sorted collection of `Result`
 641 class ResultSet
 642         super HashSet[Result]
 643
 644         var no_savs = new HashSet[Result]
 645         var oks = new HashSet[Result]
 646         var ok_empties = new HashSet[Result]
 647         var fixmes = new HashSet[Result]
 648         var fails = new HashSet[Result]
 649         var sosos = new HashSet[Result]
 650         var skips = new HashSet[Result]
 651         var todos = new HashSet[Result]
 652         var skip_execs = new HashSet[Result]
 653         var unknowns = new HashSet[Result]
 654
 655         # TODO remove
 656         var per_engines = new HashMap[String, Result]
 657
 658         redef fun add(result)
 659         do
 660                 if result.no_sav then no_savs.add result
 661                 if result.ok then oks.add result
 662                 if result.ok_empty then ok_empties.add result
 663                 if result.fixme then fixmes.add result
 664                 if result.fail then fails.add result
 665                 if result.soso then sosos.add result
 666                 if result.skip then skips.add result
 667                 if result.todo then todos.add result
 668                 if result.skip_exec then skip_execs.add result
 669                 if result.unknown then unknowns.add result
 670
 671                 super
 672         end
 673
 674         redef fun remove(r) do abort
 675
 676         redef fun clear do abort
 677 end
 678
 679 redef class OptionContext
 680
 681         # Print usage with a possible error `message`
 682         private fun usage_error(message: nullable String)
 683         do
 684                 var ret = 0
 685                 if message != null then
 686                         print "Error: {message}"
 687                         ret = 1
 688                 end
 689
 690                 if comm_world.rank == 0 then
 691                         print "Usage: mpirun nitester [Options] test_program.nit [other_test.nit [...]]"
 692                         usage
 693                 end
 694
 695                 mpi.finalize
 696                 exit ret
 697         end
 698 end
 699
 700 # On `Worker` nodes, prefix all prints with `rank/comm_world.size`
 701 redef fun print(msg: Object)
 702 do
 703         if comm_world.rank != 0.rank then
 704                 super "{comm_world.rank}/{comm_world.size}: {msg}"
 705         else super msg
 706 end
 707
 708 # Running MPI instance
 709 fun mpi: MPI do return once new MPI
 710
 711 # Launch mpi
 712 mpi
 713
 714 # Local rank
 715 var rank = comm_world.rank
 716
 717 var processor: Processor
 718 if rank == 0.rank then
 719         # If rank == 0, this is the `Controller`
 720         processor = new Controller
 721 else
 722         # This is a worker
 723         processor = new Worker(rank)
 724 end
 725 processor.run
 726
 727 mpi.finalize