contrib/nitester/src/nitester.nit

   1 # This file is part of NIT (http://www.nitlanguage.org).
   2 #
   3 # Copyright 2014 Alexis Laferrière <alexis.laf@xymus.net>
   4 #
   5 # Licensed under the Apache License, Version 2.0 (the "License");
   6 # you may not use this file except in compliance with the License.
   7 # You may obtain a copy of the License at
   8 #
   9 #     http://www.apache.org/licenses/LICENSE-2.0
  10 #
  11 # Unless required by applicable law or agreed to in writing, software
  12 # distributed under the License is distributed on an "AS IS" BASIS,
  13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 # See the License for the specific language governing permissions and
  15 # limitations under the License.
  16
  17 # Tester of Nit engines on an MPI cluster
  18 module nitester
  19
  20 import mpi
  21 import signals
  22 import opts
  23
  24 # Any processor, worker or controller
  25 #
  26 # All data and methods within this class are shared by the controller and the
  27 # workers.
  28 abstract class Processor
  29         super SignalHandler
  30
  31         # Controller rank is always 0
  32         var controller_rank: Rank = 0.rank
  33
  34         # Rank on this processor
  35         fun rank: Rank is abstract
  36
  37         # Where to store data for transfer between nodes
  38         #
  39         # Require: `buffer.length % 4 == 0`
  40         var buffer = new CIntArray(1024)
  41
  42         # Run in verbose mode, display more text
  43         var verbose = 0
  44
  45         init
  46         do
  47                 # OpenMPI sends a SIGTERM to all nodes upon receiving a SIGTERM or SIGINT
  48                 # on the first process.
  49                 handle_signal(sigterm, true)
  50         end
  51
  52         # Tag of a new task packet of size `tasks_per_packet`
  53         var task_tag: Tag = 0.tag
  54
  55         # Tag to return a set of `Result` thought `buffer`
  56         var result_tag: Tag = 1.tag
  57
  58         # Tag to notify `Worker` when to quit
  59         var quit_tag: Tag = 2.tag
  60
  61         # Tag to request more work from the `Controller` by a `Worker`
  62         var need_work_tag: Tag = 4.tag
  63
  64         # Tag to notify `Controller` that the sender `Worker` is done
  65         var done_tag: Tag = 5.tag
  66
  67         # Number of tasks within each task assignation with `task_tag`
  68         var tasks_per_packet = 1
  69
  70         # Run the main logic of this node
  71         fun run is abstract
  72
  73         # Hash or name of the branch to test
  74         var branch_hash: String is noinit
  75
  76         # Engines targeted by this execution
  77         var engines: Array[String] is noinit
  78
  79         # All known engines, used to detect errors in `engines`
  80         var all_engines: Array[String] = ["nitg-s", "nitg-sg", "nitg-g", "nitg-e", "niti", "emscripten"]
  81
  82         # Programs to test in this execution
  83         var test_programs: Array[String] is noinit
  84
  85         # Root of the temporary directory
  86         var tmp_dir = "/dev/shm/"
  87
  88         # `ccache` directory
  89         var ccache_dir = "/dev/shm/nit_ccache"
  90
  91         # Read command line options
  92         fun read_cli_options
  93         do
  94                 var opt_ctx = new OptionContext
  95                 var opt_hash = new OptionString(
  96                         "Branch to test",
  97                         "--hash", "-h")
  98                 opt_hash.mandatory = true
  99                 var opt_engines = new OptionString(
 100                         "Engines to test, separated with commas ({all_engines.join(", ")} or all)",
 101                         "--engine", "-e")
 102                 var opt_help = new OptionBool("Print this help message", "--help", "-h")
 103                 var opt_verbose = new OptionCount(
 104                         "Be verbose, repeat to increase verbose level (max with -vvv)",
 105                         "--verbose", "-v")
 106                 var opt_cleanup = new OptionBool(
 107                         "Clean up all nitester files (and do not run tests)",
 108                         "--cleanup", "-C")
 109
 110                 opt_ctx.add_option(opt_hash, opt_engines, opt_help, opt_verbose, opt_cleanup)
 111                 opt_ctx.parse args
 112
 113                 # --help?
 114                 if opt_help.value then opt_ctx.usage_error null
 115
 116                 # --verbose?
 117                 verbose = opt_verbose.value
 118
 119                 # --cleanup?
 120                 if opt_cleanup.value then
 121                         assert tmp_dir.file_exists
 122                         for file in tmp_dir.files do if file.has_prefix("nit") then
 123                                 var full_path = tmp_dir / file
 124                                 if full_path == ccache_dir then continue
 125
 126                                 assert full_path.file_exists
 127
 128                                 var stat = full_path.file_lstat
 129                                 if stat.is_dir then
 130                                         full_path.rmdir
 131                                 else
 132                                         full_path.file_delete
 133                                 end
 134                                 stat.free
 135                         end
 136                         mpi.finalize
 137                         exit 0
 138                 end
 139
 140                 # any files?
 141                 var rest = opt_ctx.rest
 142                 if rest.is_empty then opt_ctx.usage_error "This tool needs at least one test_program.nit"
 143                 test_programs = rest
 144
 145                 # hash
 146                 branch_hash = opt_hash.value.as(not null)
 147
 148                 # gather and check engines
 149                 var engines_str = opt_engines.value
 150                 var engines
 151                 if engines_str == null then
 152                         # default
 153                         engines = ["nitg-s"]
 154                 else
 155                         engines = engines_str.split(',')
 156
 157                         if engines.has("all") then
 158                                 # all engines
 159                                 engines = all_engines
 160                         end
 161                 end
 162
 163                 # check validity of targetted engines
 164                 var unknown_engines = new Array[String]
 165                 for engine in engines do if not all_engines.has(engine) then unknown_engines.add engine
 166
 167                 if not unknown_engines.is_empty then
 168                         opt_ctx.usage_error "Unknown engines: {unknown_engines.join(", ")} (expected one or most of {all_engines.join(", ")})"
 169                 end
 170                 self.engines = engines
 171         end
 172
 173         # All tasks to be performed
 174         var tasks = new Array[Task]
 175
 176         # Gather and register all tasks
 177         fun create_tasks
 178         do
 179                 # At this point we are in our local nit
 180                 var skip_path = "tests/turing.skip"
 181                 var skip
 182                 if skip_path.file_exists then
 183                         var skip_file = new IFStream.open(skip_path)
 184                         skip = skip_file.read_lines
 185                         skip_file.close
 186                 else
 187                         skip = new Array[String]
 188                 end
 189
 190                 for prog in test_programs do for engine in engines do
 191
 192                         # Is is blacklisted?
 193                         for s in skip do if not s.is_empty and prog.has(s) then
 194                                 if verbose > 0 and rank == 0 then print "Skipping test '{prog}' because of '{s}' in turing.skip"
 195                                 continue label
 196                         end
 197
 198                         tasks.add new Task(engine, prog)
 199                 end label
 200         end
 201 end
 202
 203 # Single controller to dispatch tasks, gather results and produce stats
 204 class Controller
 205         super Processor
 206
 207         redef fun rank do return controller_rank
 208
 209         # Id as `Int` of the next task to distribute
 210         var next_task_id = 0
 211
 212         redef fun receive_signal(signal)
 213         do
 214                 shutdown
 215                 print_results
 216
 217                 mpi.finalize
 218                 exit 0
 219         end
 220
 221         redef fun run
 222         do
 223                 read_cli_options
 224                 create_tasks
 225                 distribute_tasks
 226                 print_results
 227         end
 228
 229         # Cumulated results from workers
 230         var results = new ResultSet
 231
 232         # Maintain communication with workers to distribute tasks and receiver results
 233         fun distribute_tasks
 234         do
 235                 var at_work = new Array[Rank]
 236
 237                 # send initial tasks
 238                 for r in [1..comm_world.size[ do
 239                         var sent = send_task_to(r.rank)
 240                         if sent then
 241                                 at_work.add r.rank
 242                         else
 243                                 mpi.send_empty(r.rank, quit_tag, comm_world)
 244                         end
 245                 end
 246
 247                 var status = new Status
 248                 # await results and send new tasks
 249                 while not at_work.is_empty do
 250                         check_signals
 251
 252                         # Double probe to avoid bug with some implementation of MPI
 253                         mpi.probe(new Rank.any, new Tag.any, comm_world, status)
 254                         mpi.probe(new Rank.any, new Tag.any, comm_world, status)
 255
 256                         if status.tag == result_tag then
 257                                 # Receive results fron a worker
 258                                 var count = status.count(new DataType.int)
 259                                 mpi.recv_into(buffer, 0, count, status.source, status.tag, comm_world)
 260
 261                                 # Parse results from C array to `Result` instances
 262                                 #
 263                                 # Each result is on 4 ints: task id, arg, alt and result.
 264                                 #
 265                                 # See the comments where the data is produced in `Worker::work_on_tasks` for more informationé
 266                                 assert count % 4 == 0
 267                                 for t in (count/4).times do
 268                                         var tt = t*4
 269
 270                                         var task_id = buffer[tt]
 271                                         var arg = buffer[tt+1]
 272                                         var alt = buffer[tt+2]
 273                                         var res = buffer[tt+3]
 274
 275                                         var result = new Result(tasks[task_id], arg, alt)
 276
 277                                         if res == 1 then result.ok = true
 278                                         if res == 2 then result.ok_empty = true
 279                                         if res == 3 then result.no_sav = true
 280                                         if res == 4 then result.fixme = true
 281                                         if res == 5 then result.fail = true
 282                                         if res == 6 then result.soso = true
 283                                         if res == 7 then result.skip = true
 284                                         if res == 8 then result.todo = true
 285                                         if res == 9 then result.skip_exec = true
 286                                         if res == 0 then result.unknown = true
 287
 288                                         results.add result
 289
 290                                         if verbose > 0 and results.length % 25 == 0 then print_short_results
 291                                 end
 292
 293                         else if status.tag == need_work_tag then
 294                                 # A worker needs more work
 295                                 mpi.recv_empty(status.source, status.tag, comm_world)
 296                                 var sent = send_task_to(status.source)
 297                                 if not sent then
 298                                         # no more work, quit
 299                                         mpi.send_empty(status.source, quit_tag, comm_world)
 300                                 end
 301                         else if status.tag == done_tag then
 302                                 # A worker is done and will quit
 303                                 mpi.recv_empty(status.source, status.tag, comm_world)
 304                                 at_work.remove(status.source)
 305
 306                                 if verbose > 1 then print "worker {status.source} is done ({at_work.length} still at work)"
 307                         else
 308                                 print "Unexpected tag {status.tag}"
 309                                 shutdown
 310                                 break
 311                         end
 312                 end
 313                 status.free
 314         end
 315
 316         # Send a packet of tasks to worker at `rank`
 317         fun send_task_to(rank: Rank): Bool
 318         do
 319                 if next_task_id >= tasks.length then return false
 320
 321                 buffer[0] = next_task_id
 322                 next_task_id += tasks_per_packet
 323
 324                 mpi.send_from(buffer, 0, 1, rank, task_tag, comm_world)
 325
 326                 if verbose > 1 then print "sent tasks [{buffer[0]}..{next_task_id}[ to worker {rank}"
 327                 return true
 328         end
 329
 330         # Display the accumulated results received from workers
 331         fun print_results
 332         do
 333                 print "# results #"
 334                 print "* {results.length} total"
 335                 print "* {results.oks.length + results.ok_empties.length} oks & 0ks"
 336                 print "* {results.fails.length} fails"
 337                 print "* {results.no_savs.length} no savs"
 338                 print "* {results.fixmes.length} fixmes"
 339                 print "* {results.sosos.length} sosos"
 340                 print "* {results.skips.length} skips"
 341                 print "* {results.todos.length} todos"
 342                 print "* {results.skip_execs.length} skip execs"
 343                 print "* {results.unknowns.length} unknowns (bug in tests.sh or nitester)"
 344         end
 345
 346         fun print_short_results do print "oks & fails / total: {results.oks.length + results.ok_empties.length} " +
 347                 "& {results.fails.length} / {results.length}"
 348
 349         # Shutdown anormaly the running tests
 350         fun shutdown
 351         do
 352                 print "Shutting down"
 353                 mpi.send_empty(new Rank.any, quit_tag, comm_world)
 354         end
 355 end
 356
 357 # A worker node which actually execute the tests
 358 class Worker
 359         super Processor
 360
 361         # The `Rank` of `self`
 362         redef var rank: Rank
 363
 364         # Compilation directory
 365         var comp_dir = "/dev/shm/nit_compile{rank}" is lazy
 366
 367         # Directory to store the xml files produced for Jenkins
 368         var xml_dir = "~/jenkins_xml/"
 369
 370         # Output file of the `tests.sh` script
 371         var tests_sh_out = "/dev/shm/nit_local_out{rank}" is lazy
 372
 373         # Source Nit repository, must be already updated and `make` before execution
 374         var local_nit = "/dev/shm/nit{rank}" is lazy
 375
 376         # Remote Nit repository (actually the local source)
 377         var remote_nit = "~/nit/"
 378
 379         # Compiled `Regex` to detect the argument of an execution
 380         var re_arg: Regex = "arg [0-9]+".to_re
 381
 382         # Compiled `Regex` to detect the alternative of an execution
 383         var re_alt: Regex = "_alt[0-9]+".to_re
 384
 385         redef fun run
 386         do
 387                 read_cli_options
 388                 setup
 389                 create_tasks
 390                 work_on_tasks
 391                 cleanup
 392         end
 393
 394         # Setup the testing environment
 395         #
 396         # Clone the nit repository.
 397         fun setup
 398         do
 399                 if verbose > 0 then sys.system "hostname"
 400
 401                 if local_nit.file_exists then local_nit.rmdir
 402
 403                 exec_and_check "git clone {remote_nit} {local_nit}"
 404                 local_nit.chdir
 405                 exec_and_check "git config remote.origin.fetch +refs/remotes/origin/pr/*:refs/remotes/origin/pr/*"
 406                 exec_and_check "git fetch origin --quiet"
 407                 exec_and_check "git checkout {branch_hash}"
 408                 exec_and_check "cp {remote_nit}/bin/*  bin/"
 409                 exec_and_check "src/git-gen-version.sh"
 410         end
 411
 412         private fun exec_and_check(cmd: String)
 413         do
 414                 if verbose > 0 then
 415                         print "+ {cmd}"
 416                         var res = sys.system(cmd)
 417                         assert res == 0 else print "Command '{cmd}' failed."
 418                 end
 419         end
 420
 421         # Clean up the testing environment
 422         #
 423         # Delete all temporary files, except `ccache_dir`.
 424         fun cleanup
 425         do
 426                 if comp_dir.file_exists then comp_dir.rmdir
 427                 if tests_sh_out.file_exists then tests_sh_out.file_delete
 428                 if local_nit.file_exists then local_nit.file_delete
 429         end
 430
 431         # Single C `int` to hold the next task id received from the `Controller`
 432         var task_buffer = new CIntArray(1)
 433
 434         # Manage communication with the `Controller` and execute dispatched `Task`s
 435         fun work_on_tasks
 436         do
 437                 var status = new Status
 438                 loop
 439                         check_signals
 440
 441                         # We double probe to prevent bug where a single probes does not receive the
 442                         # real next read.
 443                         mpi.probe(controller_rank, new Tag.any, comm_world, status)
 444                         mpi.probe(controller_rank, new Tag.any, comm_world, status)
 445
 446                         if status.tag == task_tag then
 447                                 # Receive tasks to execute
 448                                 mpi.recv_into(task_buffer, 0, 1, status.source, status.tag, comm_world)
 449                                 var first_id = task_buffer[0]
 450                                 for task_id in [first_id .. first_id + tasks_per_packet[ do
 451
 452                                         # If id is over all known tasks, stop right here
 453                                         if task_id >= tasks.length then break
 454                                         var task = tasks[task_id]
 455
 456                                         "tests".chdir
 457
 458                                         # Command line to execute test
 459                                         var cmd = "XMLDIR={xml_dir} " +
 460                                                 "CCACHE_DIR={ccache_dir} CCACHE_TEMPDIR={ccache_dir} CCACHE_BASEDIR={comp_dir} " +
 461                                                 "./tests.sh --node --engine {task.engine} {task.test_program} > {tests_sh_out}"
 462
 463                                         # Execute test
 464                                         sys.system cmd
 465
 466                                         # Test results were written to file, read them
 467                                         var fstream = new IFStream.open(tests_sh_out)
 468                                         var content = fstream.read_all
 469                                         fstream.close
 470
 471                                         # Parse result and prepare them for sending
 472                                         #
 473                                         # The structure is composed of 4 ints for each result.
 474                                         # 1. task id
 475                                         # 2. arg number
 476                                         # 3. alt number
 477                                         # 4. test result as int
 478                                         var c = results_count
 479                                         for line in content.split('\n') do if not line.is_empty then
 480                                                 var cc = c*4
 481
 482                                                 buffer[cc] = task_id
 483
 484                                                 var arg_match = line.search(re_arg)
 485                                                 var arg = 0
 486                                                 if arg_match != null then arg = arg_match.to_s.substring_from(4).to_i
 487                                                 buffer[cc+1] = arg
 488
 489                                                 var alt_match = line.search(re_alt)
 490                                                 var alt = 0
 491                                                 if alt_match != null then alt = alt_match.to_s.substring_from(4).to_i
 492                                                 buffer[cc+2] = alt
 493
 494                                                 var res = null
 495                                                 if line.has("[ok]") then res = 1
 496                                                 if line.has("[0k]") then res = 2
 497                                                 if line.has("[=== no sav ===]") then res = 3
 498                                                 if line.has("[fixme]") then res = 4
 499                                                 if line.has("[======= fail") then res = 5
 500                                                 if line.has("[======= soso") then res = 6
 501                                                 if line.has("[skip]") then res = 7
 502                                                 if line.has("[todo]") then res = 8
 503                                                 if line.has("[skip exec]") then res = 9
 504
 505                                                 if res == null then
 506                                                         res = 0
 507                                                         if verbose > 1 then print "Unknown result: '{line}'"
 508                                                 end
 509                                                 buffer[cc+3] = res
 510
 511                                                 c += 1
 512
 513                                                 if verbose > 2 then print "tests.sh output line: {line}"
 514
 515                                                 # If result buffer is full, send to `Controller`
 516                                                 if c*4 == buffer.length then
 517                                                         send_results
 518                                                         c = 0
 519                                                 end
 520                                         end
 521
 522                                         self.results_count = c
 523                                 end
 524
 525                                 mpi.send_empty(controller_rank, need_work_tag, comm_world)
 526                         else if status.tag == quit_tag then
 527                                 # Notification from the `Controller` to quit
 528                                 mpi.recv_empty(status.source, status.tag, comm_world)
 529
 530                                 # Send remaining results
 531                                 send_results
 532
 533                                 # Notify `Controller` that `self` is done and will quit
 534                                 mpi.send_empty(controller_rank, done_tag, comm_world)
 535                                 break
 536                         else
 537                                 print "Unexpected tag {status.tag}"
 538                                 break
 539                         end
 540                 end
 541                 status.free
 542         end
 543
 544         # Total results listed in `buffer` and ready to send
 545         var results_count = 0
 546
 547         # Send all results in `buffer` to the `Controller`
 548         fun send_results
 549         do
 550                 if results_count > 0 then
 551                         if verbose > 1 then print "sending {results_count} results"
 552                         mpi.send_from(buffer, 0, results_count*4, controller_rank, result_tag, comm_world)
 553                         results_count = 0
 554                 end
 555         end
 556
 557         redef fun receive_signal(signal)
 558         do
 559                 cleanup
 560                 mpi.finalize
 561                 exit 0
 562         end
 563 end
 564
 565 # A single test task, on a `test_program` with an `engine`
 566 #
 567 # Note that a task may involve more than one program to test considering the
 568 # alts and args for the `test_program`.
 569 class Task
 570         # Engine to test executing `test_program`
 571         var engine: String
 572
 573         # Program to execute with `engine`
 574         var test_program: String
 575
 576         redef fun to_s do return "{engine} {test_program}"
 577 end
 578
 579 # Result of a `Task`
 580 #
 581 # There may be more than one result per `Task`.
 582 class Result
 583         # `Task` associated to `self`
 584         var task: Task
 585
 586         # Argument index of the execution resulting in `self`
 587         var arg: Int
 588
 589         # Alternative index of the execution resulting in `self`
 590         var alt: Int
 591
 592         # Is `self` result an _ok_?
 593         var ok = false
 594
 595         # Is `self` result an _0k_?
 596         var ok_empty = false
 597
 598         # Is `self` result a _no sav_?
 599         var no_sav = false
 600
 601         # Is `self` result a _fixme_?
 602         var fixme = false
 603
 604         # Is `self` result a _fail_?
 605         var fail = false
 606
 607         # Is `self` result a _soso_?
 608         var soso = false
 609
 610         # Has `self` been skipped?
 611         var skip = false
 612
 613         # Is `self` TODO?
 614         var todo = false
 615
 616         # Has the execution of `self` been skipped?
 617         var skip_exec = false
 618
 619         # Is `self` an unknown result, probably an error
 620         var unknown = false
 621
 622         redef fun to_s
 623         do
 624                 var err = "Unknown"
 625                 if no_sav then err = "no sav"
 626                 if ok then err = "ok"
 627                 if ok_empty then err = "0k"
 628                 if fixme then err = "fixme"
 629                 if fail then err = "fail"
 630                 if soso then err = "soso"
 631                 if skip then err = "skip"
 632                 if todo then err = "todo"
 633                 if skip_exec then err = "skip_exec"
 634
 635                 return "{task} arg{arg} alt{alt} => {err}"
 636         end
 637 end
 638
 639 # A global and sorted collection of `Result`
 640 class ResultSet
 641         super HashSet[Result]
 642
 643         var no_savs = new HashSet[Result]
 644         var oks = new HashSet[Result]
 645         var ok_empties = new HashSet[Result]
 646         var fixmes = new HashSet[Result]
 647         var fails = new HashSet[Result]
 648         var sosos = new HashSet[Result]
 649         var skips = new HashSet[Result]
 650         var todos = new HashSet[Result]
 651         var skip_execs = new HashSet[Result]
 652         var unknowns = new HashSet[Result]
 653
 654         # TODO remove
 655         var per_engines = new HashMap[String, Result]
 656
 657         redef fun add(result)
 658         do
 659                 if result.no_sav then no_savs.add result
 660                 if result.ok then oks.add result
 661                 if result.ok_empty then ok_empties.add result
 662                 if result.fixme then fixmes.add result
 663                 if result.fail then fails.add result
 664                 if result.soso then sosos.add result
 665                 if result.skip then skips.add result
 666                 if result.todo then todos.add result
 667                 if result.skip_exec then skip_execs.add result
 668                 if result.unknown then unknowns.add result
 669
 670                 super
 671         end
 672
 673         redef fun remove(r) do abort
 674
 675         redef fun clear do abort
 676 end
 677
 678 redef class OptionContext
 679
 680         # Print usage with a possible error `message`
 681         private fun usage_error(message: nullable String)
 682         do
 683                 var ret = 0
 684                 if message != null then
 685                         print "Error: {message}"
 686                         ret = 1
 687                 end
 688
 689                 if comm_world.rank == 0 then
 690                         print "Usage: mpirun nitester [Options] test_program.nit [other_test.nit [...]]"
 691                         usage
 692                 end
 693
 694                 mpi.finalize
 695                 exit ret
 696         end
 697 end
 698
 699 # On `Worker` nodes, prefix all prints with `rank/comm_world.size`
 700 redef fun print(msg: Object)
 701 do
 702         if comm_world.rank != 0.rank then
 703                 super "{comm_world.rank}/{comm_world.size}: {msg}"
 704         else super msg
 705 end
 706
 707 # Running MPI instance
 708 fun mpi: MPI do return once new MPI
 709
 710 # Launch mpi
 711 mpi
 712
 713 # Local rank
 714 var rank = comm_world.rank
 715
 716 var processor: Processor
 717 if rank == 0.rank then
 718         # If rank == 0, this is the `Controller`
 719         processor = new Controller
 720 else
 721         # This is a worker
 722         processor = new Worker(rank)
 723 end
 724 processor.run
 725
 726 mpi.finalize