contrib/nitester/src/nitester.nit

   1 # This file is part of NIT (http://www.nitlanguage.org).
   2 #
   3 # Copyright 2014 Alexis Laferrière <alexis.laf@xymus.net>
   4 #
   5 # Licensed under the Apache License, Version 2.0 (the "License");
   6 # you may not use this file except in compliance with the License.
   7 # You may obtain a copy of the License at
   8 #
   9 #     http://www.apache.org/licenses/LICENSE-2.0
  10 #
  11 # Unless required by applicable law or agreed to in writing, software
  12 # distributed under the License is distributed on an "AS IS" BASIS,
  13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 # See the License for the specific language governing permissions and
  15 # limitations under the License.
  16
  17 # Tester of Nit engines on an MPI cluster
  18 module nitester
  19
  20 import mpi
  21 import signals
  22 import opts
  23
  24 # Any processor, worker or controller
  25 #
  26 # All data and methods within this class are shared by the controller and the
  27 # workers.
  28 abstract class Processor
  29         super SignalHandler
  30
  31         # Controller rank is always 0
  32         var controller_rank: Rank = 0.rank
  33
  34         # Rank on this processor
  35         fun rank: Rank is abstract
  36
  37         # Where to store data for transfer between nodes
  38         #
  39         # Require: `buffer.length % 4 == 0`
  40         var buffer = new CIntArray(1024)
  41
  42         # Run in verbose mode, display more text
  43         var verbose = 0
  44
  45         init
  46         do
  47                 # OpenMPI sends a SIGTERM to all nodes upon receiving a SIGTERM or SIGINT
  48                 # on the first process.
  49                 handle_signal(sigterm, true)
  50         end
  51
  52         # Tag of a new task packet of size `tasks_per_packet`
  53         var task_tag: Tag = 0.tag
  54
  55         # Tag to return a set of `Result` thought `buffer`
  56         var result_tag: Tag = 1.tag
  57
  58         # Tag to notify `Worker` when to quit
  59         var quit_tag: Tag = 2.tag
  60
  61         # Tag to request more work from the `Controller` by a `Worker`
  62         var need_work_tag: Tag = 4.tag
  63
  64         # Tag to notify `Controller` that the sender `Worker` is done
  65         var done_tag: Tag = 5.tag
  66
  67         # Number of tasks within each task assignation with `task_tag`
  68         var tasks_per_packet = 1
  69
  70         # Run the main logic of this node
  71         fun run is abstract
  72
  73         # Hash or name of the branch to test
  74         var branch_hash: String is noinit
  75
  76         # Engines targeted by this execution
  77         var engines: Array[String] is noinit
  78
  79         # All known engines, used to detect errors in `engines`
  80         var all_engines: Array[String] = ["nitg-s", "nitg-sg", "nitg-g", "nitg-e", "niti", "emscripten"]
  81
  82         # Programs to test in this execution
  83         var test_programs: Array[String] is noinit
  84
  85         # Root of the temporary directory
  86         var tmp_dir = "/dev/shm/"
  87
  88         # `ccache` directory
  89         var ccache_dir = "/dev/shm/nit_ccache"
  90
  91         # Read command line options
  92         fun read_cli_options
  93         do
  94                 var opt_ctx = new OptionContext
  95                 var opt_hash = new OptionString(
  96                         "Branch to test",
  97                         "--hash", "-h")
  98                 opt_hash.mandatory = true
  99                 var opt_engines = new OptionString(
 100                         "Engines to test, separated with commas ({all_engines.join(", ")} or all)",
 101                         "--engine", "-e")
 102                 var opt_help = new OptionBool("Print this help message", "--help", "-h")
 103                 var opt_verbose = new OptionCount(
 104                         "Be verbose, repeat to increase verbose level (max with -vvv)",
 105                         "--verbose", "-v")
 106                 var opt_cleanup = new OptionBool(
 107                         "Clean up all nitester files (and do not run tests)",
 108                         "--cleanup", "-C")
 109
 110                 opt_ctx.add_option(opt_hash, opt_engines, opt_help, opt_verbose, opt_cleanup)
 111                 opt_ctx.parse args
 112
 113                 # --help?
 114                 if opt_help.value then opt_ctx.usage_error null
 115
 116                 # --verbose?
 117                 verbose = opt_verbose.value
 118
 119                 # --cleanup?
 120                 if opt_cleanup.value then
 121                         assert tmp_dir.file_exists
 122                         for file in tmp_dir.files do if file.has_prefix("nit") then
 123                                 var full_path = tmp_dir / file
 124                                 if full_path == ccache_dir then continue
 125
 126                                 assert full_path.file_exists
 127
 128                                 var stat = full_path.file_lstat
 129                                 if stat.is_dir then
 130                                         full_path.rmdir
 131                                 else
 132                                         full_path.file_delete
 133                                 end
 134                         end
 135                         mpi.finalize
 136                         exit 0
 137                 end
 138
 139                 # any files?
 140                 var rest = opt_ctx.rest
 141                 if rest.is_empty then opt_ctx.usage_error "This tool needs at least one test_program.nit"
 142                 test_programs = rest
 143
 144                 # hash
 145                 branch_hash = opt_hash.value.as(not null)
 146
 147                 # gather and check engines
 148                 var engines_str = opt_engines.value
 149                 var engines
 150                 if engines_str == null then
 151                         # default
 152                         engines = ["nitg-s"]
 153                 else
 154                         engines = engines_str.split(',')
 155
 156                         if engines.has("all") then
 157                                 # all engines
 158                                 engines = all_engines
 159                         end
 160                 end
 161
 162                 # check validity of targetted engines
 163                 var unknown_engines = new Array[String]
 164                 for engine in engines do if not all_engines.has(engine) then unknown_engines.add engine
 165
 166                 if not unknown_engines.is_empty then
 167                         opt_ctx.usage_error "Unknown engines: {unknown_engines.join(", ")} (expected one or most of {all_engines.join(", ")})"
 168                 end
 169                 self.engines = engines
 170         end
 171
 172         # All tasks to be performed
 173         var tasks = new Array[Task]
 174
 175         # Gather and register all tasks
 176         fun create_tasks
 177         do
 178                 # At this point we are in our local nit
 179                 var skip_path = "tests/turing.skip"
 180                 var skip
 181                 if skip_path.file_exists then
 182                         var skip_file = new FileReader.open(skip_path)
 183                         skip = skip_file.read_lines
 184                         skip_file.close
 185                 else
 186                         skip = new Array[String]
 187                 end
 188
 189                 for engine in engines do for prog in test_programs do
 190                         # Is is blacklisted?
 191                         for s in skip do if not s.is_empty and prog.has(s) then
 192                                 if verbose > 0 and rank == 0 then print "Skipping test '{prog}' because of '{s}' in turing.skip"
 193                                 continue label
 194                         end
 195
 196                         tasks.add new Task(engine, prog)
 197                 end label
 198         end
 199 end
 200
 201 # Single controller to dispatch tasks, gather results and produce stats
 202 class Controller
 203         super Processor
 204
 205         redef fun rank do return controller_rank
 206
 207         # Id as `Int` of the next task to distribute
 208         var next_task_id = 0
 209
 210         redef fun receive_signal(signal)
 211         do
 212                 shutdown
 213                 print_results
 214
 215                 mpi.finalize
 216                 exit 0
 217         end
 218
 219         redef fun run
 220         do
 221                 read_cli_options
 222                 create_tasks
 223                 distribute_tasks
 224                 print_results
 225         end
 226
 227         # Cumulated results from workers
 228         var results = new ResultSet
 229
 230         # Maintain communication with workers to distribute tasks and receiver results
 231         fun distribute_tasks
 232         do
 233                 var at_work = new Array[Rank]
 234
 235                 # send initial tasks
 236                 for r in [1..comm_world.size[ do
 237                         var sent = send_task_to(r.rank)
 238                         if sent then
 239                                 at_work.add r.rank
 240                         else
 241                                 mpi.send_empty(r.rank, quit_tag, comm_world)
 242                         end
 243                 end
 244
 245                 var status = new Status
 246                 # await results and send new tasks
 247                 while not at_work.is_empty do
 248                         check_signals
 249
 250                         # Double probe to avoid bug with some implementation of MPI
 251                         mpi.probe(new Rank.any, new Tag.any, comm_world, status)
 252                         mpi.probe(new Rank.any, new Tag.any, comm_world, status)
 253
 254                         if status.tag == result_tag then
 255                                 # Receive results fron a worker
 256                                 var count = status.count(new DataType.int)
 257                                 mpi.recv_into(buffer, 0, count, status.source, status.tag, comm_world)
 258
 259                                 # Parse results from C array to `Result` instances
 260                                 #
 261                                 # Each result is on 4 ints: task id, arg, alt and result.
 262                                 #
 263                                 # See the comments where the data is produced in `Worker::work_on_tasks` for more informationé
 264                                 assert count % 4 == 0
 265                                 for t in (count/4).times do
 266                                         var tt = t*4
 267
 268                                         var task_id = buffer[tt]
 269                                         var arg = buffer[tt+1]
 270                                         var alt = buffer[tt+2]
 271                                         var res = buffer[tt+3]
 272
 273                                         var result = new Result(tasks[task_id], arg, alt)
 274
 275                                         if res == 1 then result.ok = true
 276                                         if res == 2 then result.ok_empty = true
 277                                         if res == 3 then result.no_sav = true
 278                                         if res == 4 then result.fixme = true
 279                                         if res == 5 then result.fail = true
 280                                         if res == 6 then result.soso = true
 281                                         if res == 7 then result.skip = true
 282                                         if res == 8 then result.todo = true
 283                                         if res == 9 then result.skip_exec = true
 284                                         if res == 0 then result.unknown = true
 285
 286                                         results.add result
 287
 288                                         if verbose > 0 and results.length % 25 == 0 then print_short_results
 289                                 end
 290
 291                         else if status.tag == need_work_tag then
 292                                 # A worker needs more work
 293                                 mpi.recv_empty(status.source, status.tag, comm_world)
 294                                 var sent = send_task_to(status.source)
 295                                 if not sent then
 296                                         # no more work, quit
 297                                         mpi.send_empty(status.source, quit_tag, comm_world)
 298                                 end
 299                         else if status.tag == done_tag then
 300                                 # A worker is done and will quit
 301                                 mpi.recv_empty(status.source, status.tag, comm_world)
 302                                 at_work.remove(status.source)
 303
 304                                 if verbose > 0 then print "Worker {status.source} is done ({at_work.length} still at work)"
 305                         else
 306                                 print "Unexpected tag {status.tag}"
 307                                 shutdown
 308                                 break
 309                         end
 310                 end
 311                 status.free
 312         end
 313
 314         # Send a packet of tasks to worker at `rank`
 315         fun send_task_to(rank: Rank): Bool
 316         do
 317                 if next_task_id >= tasks.length then return false
 318
 319                 buffer[0] = next_task_id
 320                 next_task_id += tasks_per_packet
 321
 322                 mpi.send_from(buffer, 0, 1, rank, task_tag, comm_world)
 323
 324                 if verbose > 1 then print "sent tasks [{buffer[0]}..{next_task_id}[ to worker {rank}"
 325                 return true
 326         end
 327
 328         # Display the accumulated results received from workers
 329         fun print_results
 330         do
 331                 print "# results #"
 332                 print "* {results.length} total"
 333                 print "* {results.oks.length + results.ok_empties.length} oks & 0ks"
 334                 print "* {results.fails.length} fails"
 335                 print "* {results.no_savs.length} no savs"
 336                 print "* {results.fixmes.length} fixmes"
 337                 print "* {results.sosos.length} sosos"
 338                 print "* {results.skips.length} skips"
 339                 print "* {results.todos.length} todos"
 340                 print "* {results.skip_execs.length} skip execs"
 341                 print "* {results.unknowns.length} unknowns (bug in tests.sh or nitester)"
 342         end
 343
 344         fun print_short_results do print "oks & fails / total: {results.oks.length + results.ok_empties.length} " +
 345                 "& {results.fails.length} / {results.length}"
 346
 347         # Shutdown anormaly the running tests
 348         fun shutdown
 349         do
 350                 print "Shutting down"
 351                 mpi.send_empty(new Rank.any, quit_tag, comm_world)
 352         end
 353 end
 354
 355 # A worker node which actually execute the tests
 356 class Worker
 357         super Processor
 358
 359         # The `Rank` of `self`
 360         redef var rank: Rank
 361
 362         # Compilation directory
 363         var comp_dir = "/dev/shm/nit_compile{rank}" is lazy
 364
 365         # Directory to store the xml files produced for Jenkins
 366         var xml_dir = "~/jenkins_xml/"
 367
 368         # Output file of the `tests.sh` script
 369         var tests_sh_out = "/dev/shm/nit_local_out{rank}" is lazy
 370
 371         # Source Nit repository, must be already updated and `make` before execution
 372         var local_nit = "/dev/shm/nit{rank}" is lazy
 373
 374         # Remote Nit repository (actually the local source)
 375         var remote_nit = "~/nit/"
 376
 377         # Compiled `Regex` to detect the argument of an execution
 378         var re_arg: Regex = "arg [0-9]+".to_re
 379
 380         # Compiled `Regex` to detect the alternative of an execution
 381         var re_alt: Regex = "_alt[0-9]+".to_re
 382
 383         redef fun run
 384         do
 385                 read_cli_options
 386                 setup
 387                 create_tasks
 388                 work_on_tasks
 389                 cleanup
 390         end
 391
 392         # Setup the testing environment
 393         #
 394         # Clone the nit repository.
 395         fun setup
 396         do
 397                 if verbose > 0 then sys.system "hostname"
 398
 399                 if local_nit.file_exists then local_nit.rmdir
 400
 401                 exec_and_check "git clone {remote_nit} {local_nit}"
 402                 local_nit.chdir
 403                 exec_and_check "git config remote.origin.fetch +refs/remotes/origin/pr/*:refs/remotes/origin/pr/*"
 404                 exec_and_check "git fetch origin --quiet"
 405                 exec_and_check "git checkout {branch_hash}"
 406                 exec_and_check "cp {remote_nit}/bin/*  bin/"
 407                 exec_and_check "src/git-gen-version.sh"
 408         end
 409
 410         private fun exec_and_check(cmd: String)
 411         do
 412                 if verbose > 0 then
 413                         print "+ {cmd}"
 414                         var res = sys.system(cmd)
 415                         assert res == 0 else print "Command '{cmd}' failed."
 416                 end
 417         end
 418
 419         # Clean up the testing environment
 420         #
 421         # Delete all temporary files, except `ccache_dir`.
 422         fun cleanup
 423         do
 424                 if comp_dir.file_exists then comp_dir.rmdir
 425                 if tests_sh_out.file_exists then tests_sh_out.file_delete
 426                 if local_nit.file_exists then local_nit.file_delete
 427         end
 428
 429         # Single C `int` to hold the next task id received from the `Controller`
 430         var task_buffer = new CIntArray(1)
 431
 432         # Manage communication with the `Controller` and execute dispatched `Task`s
 433         fun work_on_tasks
 434         do
 435                 var status = new Status
 436                 loop
 437                         check_signals
 438
 439                         # We double probe to prevent bug where a single probes does not receive the
 440                         # real next read.
 441                         mpi.probe(controller_rank, new Tag.any, comm_world, status)
 442                         mpi.probe(controller_rank, new Tag.any, comm_world, status)
 443
 444                         if status.tag == task_tag then
 445                                 # Receive tasks to execute
 446                                 mpi.recv_into(task_buffer, 0, 1, status.source, status.tag, comm_world)
 447                                 var first_id = task_buffer[0]
 448                                 for task_id in [first_id .. first_id + tasks_per_packet[ do
 449
 450                                         # If id is over all known tasks, stop right here
 451                                         if task_id >= tasks.length then break
 452                                         var task = tasks[task_id]
 453
 454                                         "tests".chdir
 455
 456                                         # Command line to execute test
 457                                         var cmd = "XMLDIR={xml_dir} " +
 458                                                 "CCACHE_DIR={ccache_dir} CCACHE_TEMPDIR={ccache_dir} CCACHE_BASEDIR={comp_dir} " +
 459                                                 "./tests.sh --node --engine {task.engine} {task.test_program} > {tests_sh_out}"
 460
 461                                         # Execute test
 462                                         sys.system cmd
 463
 464                                         # Test results were written to file, read them
 465                                         var fstream = new FileReader.open(tests_sh_out)
 466                                         var content = fstream.read_all
 467                                         fstream.close
 468
 469                                         # Parse result and prepare them for sending
 470                                         #
 471                                         # The structure is composed of 4 ints for each result.
 472                                         # 1. task id
 473                                         # 2. arg number
 474                                         # 3. alt number
 475                                         # 4. test result as int
 476                                         var c = results_count
 477                                         for line in content.split('\n') do if not line.is_empty then
 478                                                 var cc = c*4
 479
 480                                                 buffer[cc] = task_id
 481
 482                                                 var arg_match = line.search(re_arg)
 483                                                 var arg = 0
 484                                                 if arg_match != null then arg = arg_match.to_s.substring_from(4).to_i
 485                                                 buffer[cc+1] = arg
 486
 487                                                 var alt_match = line.search(re_alt)
 488                                                 var alt = 0
 489                                                 if alt_match != null then alt = alt_match.to_s.substring_from(4).to_i
 490                                                 buffer[cc+2] = alt
 491
 492                                                 var res = null
 493                                                 if line.has("[ok]") then res = 1
 494                                                 if line.has("[0k]") then res = 2
 495                                                 if line.has("[=== no sav ===]") then res = 3
 496                                                 if line.has("[fixme]") then res = 4
 497                                                 if line.has("[======= fail") then res = 5
 498                                                 if line.has("[======= soso") then res = 6
 499                                                 if line.has("[skip]") then res = 7
 500                                                 if line.has("[todo]") then res = 8
 501                                                 if line.has("[skip exec]") then res = 9
 502
 503                                                 if res == null then
 504                                                         res = 0
 505                                                         if verbose > 1 then print "Unknown result: '{line}'"
 506                                                 end
 507                                                 buffer[cc+3] = res
 508
 509                                                 c += 1
 510
 511                                                 if verbose > 2 then print "tests.sh output line: {line}"
 512
 513                                                 # If result buffer is full, send to `Controller`
 514                                                 if c*4 == buffer.length then
 515                                                         send_results
 516                                                         c = 0
 517                                                 end
 518                                         end
 519
 520                                         if verbose > 1 then print "Done testing: {task}"
 521
 522                                         self.results_count = c
 523                                 end
 524
 525                                 mpi.send_empty(controller_rank, need_work_tag, comm_world)
 526                         else if status.tag == quit_tag then
 527                                 # Notification from the `Controller` to quit
 528                                 mpi.recv_empty(status.source, status.tag, comm_world)
 529
 530                                 # Send remaining results
 531                                 send_results
 532
 533                                 # Notify `Controller` that `self` is done and will quit
 534                                 mpi.send_empty(controller_rank, done_tag, comm_world)
 535                                 break
 536                         else
 537                                 print "Unexpected tag {status.tag}"
 538                                 break
 539                         end
 540                 end
 541                 status.free
 542         end
 543
 544         # Total results listed in `buffer` and ready to send
 545         var results_count = 0
 546
 547         # Send all results in `buffer` to the `Controller`
 548         fun send_results
 549         do
 550                 if results_count > 0 then
 551                         if verbose > 2 then print "Sending {results_count} results"
 552                         mpi.send_from(buffer, 0, results_count*4, controller_rank, result_tag, comm_world)
 553                         results_count = 0
 554                 end
 555         end
 556
 557         redef fun receive_signal(signal)
 558         do
 559                 cleanup
 560                 mpi.finalize
 561                 exit 0
 562         end
 563 end
 564
 565 # A single test task, on a `test_program` with an `engine`
 566 #
 567 # Note that a task may involve more than one program to test considering the
 568 # alts and args for the `test_program`.
 569 class Task
 570         # Engine to test executing `test_program`
 571         var engine: String
 572
 573         # Program to execute with `engine`
 574         var test_program: String
 575
 576         redef fun to_s do return "{engine} {test_program}"
 577 end
 578
 579 # Result of a `Task`
 580 #
 581 # There may be more than one result per `Task`.
 582 class Result
 583         # `Task` associated to `self`
 584         var task: Task
 585
 586         # Argument index of the execution resulting in `self`
 587         var arg: Int
 588
 589         # Alternative index of the execution resulting in `self`
 590         var alt: Int
 591
 592         # Is `self` result an _ok_?
 593         var ok = false
 594
 595         # Is `self` result an _0k_?
 596         var ok_empty = false
 597
 598         # Is `self` result a _no sav_?
 599         var no_sav = false
 600
 601         # Is `self` result a _fixme_?
 602         var fixme = false
 603
 604         # Is `self` result a _fail_?
 605         var fail = false
 606
 607         # Is `self` result a _soso_?
 608         var soso = false
 609
 610         # Has `self` been skipped?
 611         var skip = false
 612
 613         # Is `self` TODO?
 614         var todo = false
 615
 616         # Has the execution of `self` been skipped?
 617         var skip_exec = false
 618
 619         # Is `self` an unknown result, probably an error
 620         var unknown = false
 621
 622         redef fun to_s
 623         do
 624                 var err = "Unknown"
 625                 if no_sav then err = "no sav"
 626                 if ok then err = "ok"
 627                 if ok_empty then err = "0k"
 628                 if fixme then err = "fixme"
 629                 if fail then err = "fail"
 630                 if soso then err = "soso"
 631                 if skip then err = "skip"
 632                 if todo then err = "todo"
 633                 if skip_exec then err = "skip_exec"
 634
 635                 return "{task} arg{arg} alt{alt} => {err}"
 636         end
 637 end
 638
 639 # A global and sorted collection of `Result`
 640 class ResultSet
 641         super HashSet[Result]
 642
 643         var no_savs = new HashSet[Result]
 644         var oks = new HashSet[Result]
 645         var ok_empties = new HashSet[Result]
 646         var fixmes = new HashSet[Result]
 647         var fails = new HashSet[Result]
 648         var sosos = new HashSet[Result]
 649         var skips = new HashSet[Result]
 650         var todos = new HashSet[Result]
 651         var skip_execs = new HashSet[Result]
 652         var unknowns = new HashSet[Result]
 653
 654         # TODO remove
 655         var per_engines = new HashMap[String, Result]
 656
 657         redef fun add(result)
 658         do
 659                 if result.no_sav then no_savs.add result
 660                 if result.ok then oks.add result
 661                 if result.ok_empty then ok_empties.add result
 662                 if result.fixme then fixmes.add result
 663                 if result.fail then fails.add result
 664                 if result.soso then sosos.add result
 665                 if result.skip then skips.add result
 666                 if result.todo then todos.add result
 667                 if result.skip_exec then skip_execs.add result
 668                 if result.unknown then unknowns.add result
 669
 670                 super
 671         end
 672
 673         redef fun remove(r) do abort
 674
 675         redef fun clear do abort
 676 end
 677
 678 redef class OptionContext
 679
 680         # Print usage with a possible error `message`
 681         private fun usage_error(message: nullable String)
 682         do
 683                 var ret = 0
 684                 if message != null then
 685                         print "Error: {message}"
 686                         ret = 1
 687                 end
 688
 689                 if comm_world.rank == 0 then
 690                         print "Usage: mpirun nitester [Options] test_program.nit [other_test.nit [...]]"
 691                         usage
 692                 end
 693
 694                 mpi.finalize
 695                 exit ret
 696         end
 697 end
 698
 699 # On `Worker` nodes, prefix all prints with `rank/comm_world.size`
 700 redef fun print(msg: Object)
 701 do
 702         if comm_world.rank != 0.rank then
 703                 super "{comm_world.rank}/{comm_world.size}: {msg}"
 704         else super msg
 705 end
 706
 707 # Running MPI instance
 708 fun mpi: MPI do return once new MPI
 709
 710 # Launch mpi
 711 mpi
 712
 713 # Local rank
 714 var rank = comm_world.rank
 715
 716 var processor: Processor
 717 if rank == 0.rank then
 718         # If rank == 0, this is the `Controller`
 719         processor = new Controller
 720 else
 721         # This is a worker
 722         processor = new Worker(rank)
 723 end
 724 processor.run
 725
 726 mpi.finalize