contrib/nitester/src/nitester.nit

   1 # This file is part of NIT (http://www.nitlanguage.org).
   2 #
   3 # Copyright 2014 Alexis Laferrière <alexis.laf@xymus.net>
   4 #
   5 # Licensed under the Apache License, Version 2.0 (the "License");
   6 # you may not use this file except in compliance with the License.
   7 # You may obtain a copy of the License at
   8 #
   9 #     http://www.apache.org/licenses/LICENSE-2.0
  10 #
  11 # Unless required by applicable law or agreed to in writing, software
  12 # distributed under the License is distributed on an "AS IS" BASIS,
  13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 # See the License for the specific language governing permissions and
  15 # limitations under the License.
  16
  17 # Tester of Nit engines on an MPI cluster
  18 module nitester
  19
  20 import mpi
  21 import signals
  22 import opts
  23
  24 # Any processor, worker or controller
  25 #
  26 # All data and methods within this class are shared by the controller and the
  27 # workers.
  28 abstract class Processor
  29         super SignalHandler
  30
  31         # Controller rank is always 0
  32         var controller_rank: Rank = 0.rank
  33
  34         # Where to store data for transfer between nodes
  35         #
  36         # Require: `buffer.length % 4 == 0`
  37         var buffer = new CIntArray(1024)
  38
  39         # Run in verbose mode, display more text
  40         var verbose = 0
  41
  42         init
  43         do
  44                 # OpenMPI sends a SIGTERM to all nodes upon receiving a SIGTERM or SIGINT
  45                 # on the first process.
  46                 handle_signal(sigterm, true)
  47         end
  48
  49         # Tag of a new task packet of size `tasks_per_packet`
  50         var task_tag: Tag = 0.tag
  51
  52         # Tag to return a set of `Result` throught `buffer`
  53         var result_tag: Tag = 1.tag
  54
  55         # Tag to notify `Worker` when to quit
  56         var quit_tag: Tag = 2.tag
  57
  58         # Tag to request more work from the `Controller` by a `Worker`
  59         var need_work_tag: Tag = 4.tag
  60
  61         # Tag to notify `Controller` that the sender `Worker` is done
  62         var done_tag: Tag = 5.tag
  63
  64         # Number of tasks within each task assignation with `task_tag`
  65         var tasks_per_packet = 4
  66
  67         # Run the main logic of this node
  68         fun run is abstract
  69
  70         # Engines targetted by this execution
  71         var engines: Array[String] is noinit
  72
  73         # All known engines, used to detect errors in `engines`
  74         var all_engines: Array[String] = ["nitg-s", "nitg-sg", "nitg-g", "nitg-e", "niti", "emscripten"]
  75
  76         # Programs to test in this execution
  77         var test_programs: Array[String] is noinit
  78
  79         # Root of the temporary directory
  80         var tmp_dir = "/dev/shm/"
  81
  82         # `ccache` directory
  83         var ccache_dir = "/dev/shm/nit_ccache"
  84
  85         # Read command line options
  86         fun read_cli_options
  87         do
  88                 var opt_ctx = new OptionContext
  89                 var opt_engines = new OptionString(
  90                         "Engines to test, separated with commas ({all_engines.join(", ")} or all)",
  91                         "--engine", "-e")
  92                 var opt_help = new OptionBool("Print this help message", "--help", "-h")
  93                 var opt_verbose = new OptionCount(
  94                         "Be verbose, repeat to increase verbose level (max with -vvv)",
  95                         "--verbose", "-v")
  96                 var opt_cleanup = new OptionBool(
  97                         "Clean up all nitester files (and do not run tests)",
  98                         "--cleanup", "-C")
  99
 100                 opt_ctx.add_option(opt_engines, opt_help, opt_verbose, opt_cleanup)
 101                 opt_ctx.parse args
 102
 103                 # --help?
 104                 if opt_help.value then opt_ctx.usage_error null
 105
 106                 # --verbose?
 107                 verbose = opt_verbose.value
 108
 109                 # --cleanup?
 110                 if opt_cleanup.value then
 111                         assert tmp_dir.file_exists
 112                         for file in tmp_dir.files do if file.has_prefix("nit") then
 113                                 var full_path = tmp_dir / file
 114                                 if full_path == ccache_dir then continue
 115
 116                                 assert full_path.file_exists
 117
 118                                 var stat = full_path.file_lstat
 119                                 if stat.is_dir then
 120                                         full_path.rmdir
 121                                 else
 122                                         full_path.file_delete
 123                                 end
 124                                 stat.free
 125                         end
 126                         mpi.finalize
 127                         exit 0
 128                 end
 129
 130                 # any files?
 131                 var rest = opt_ctx.rest
 132                 if rest.is_empty then opt_ctx.usage_error "This tool needs at least one test_program.nit"
 133                 test_programs = rest
 134
 135                 # gather and check engines
 136                 var engines_str = opt_engines.value
 137                 var engines
 138                 if engines_str == null then
 139                         # default
 140                         engines = ["nitg-s"]
 141                 else
 142                         engines = engines_str.split(',')
 143
 144                         if engines.has("all") then
 145                                 # all engines
 146                                 engines = all_engines
 147                         end
 148                 end
 149
 150                 # check validity of targetted engines
 151                 var unknown_engines = new Array[String]
 152                 for engine in engines do if not all_engines.has(engine) then unknown_engines.add engine
 153
 154                 if not unknown_engines.is_empty then
 155                         opt_ctx.usage_error "Unknown engines: {unknown_engines.join(", ")} (expected one or most of {all_engines.join(", ")})"
 156                 end
 157                 self.engines = engines
 158         end
 159
 160         # All tasks to be performed
 161         var tasks = new Array[Task]
 162
 163         # Gather and registar all tasks
 164         fun create_tasks
 165         do
 166                 var c = 0
 167                 for engine in engines do for prog in test_programs do
 168                         tasks.add new Task(engine, prog)
 169                         c += 1
 170                 end
 171         end
 172 end
 173
 174 # Single controller to dispatch tasks, gather results and produce stats
 175 class Controller
 176         super Processor
 177
 178         # Id as `Int` of the next task to distribute
 179         var next_task_id = 0
 180
 181         redef fun receive_signal(signal)
 182         do
 183                 shutdown
 184                 print_results
 185
 186                 mpi.finalize
 187                 exit 0
 188         end
 189
 190         redef fun run
 191         do
 192                 read_cli_options
 193                 create_tasks
 194                 distribute_tasks
 195                 print_results
 196         end
 197
 198         # Cumulated results from workers
 199         var results = new ResultSet
 200
 201         # Maintain communication with workers to distribute tasks and receiver results
 202         fun distribute_tasks
 203         do
 204                 var at_work = new Array[Rank]
 205
 206                 # send initial tasks
 207                 for r in [1..comm_world.size[ do
 208                         var sent = send_task_to(r.rank)
 209                         if sent then
 210                                 at_work.add r.rank
 211                         else
 212                                 mpi.send_empty(r.rank, quit_tag, comm_world)
 213                         end
 214                 end
 215
 216                 var status = new Status
 217                 # await results and send new tasks
 218                 while not at_work.is_empty do
 219                         check_signals
 220
 221                         # Double probe to avoid bug with some implementation of MPI
 222                         mpi.probe(new Rank.any, new Tag.any, comm_world, status)
 223                         mpi.probe(new Rank.any, new Tag.any, comm_world, status)
 224
 225                         if status.tag == result_tag then
 226                                 # Receive results fron a worker
 227                                 var count = status.count(new DataType.int)
 228                                 mpi.recv_into(buffer, 0, count, status.source, status.tag, comm_world)
 229
 230                                 # Parse results from C array to `Result` instances
 231                                 #
 232                                 # Each result is on 4 ints: task id, arg, alt and result.
 233                                 #
 234                                 # See the comments where the data is produced in `Worker::work_on_tasks` for more informationé
 235                                 assert count % 4 == 0
 236                                 for t in (count/4).times do
 237                                         var tt = t*4
 238
 239                                         var task_id = buffer[tt]
 240                                         var arg = buffer[tt+1]
 241                                         var alt = buffer[tt+2]
 242                                         var res = buffer[tt+3]
 243
 244                                         var result = new Result(tasks[task_id], arg, alt)
 245
 246                                         if res == 1 then result.ok = true
 247                                         if res == 2 then result.ok_empty = true
 248                                         if res == 3 then result.no_sav = true
 249                                         if res == 4 then result.fixme = true
 250                                         if res == 5 then result.fail = true
 251                                         if res == 6 then result.soso = true
 252                                         if res == 7 then result.skip = true
 253                                         if res == 0 then result.unknown = true
 254
 255                                         results.add result
 256
 257                                         if verbose > 0 and results.length % 25 == 0 then print_short_results
 258                                 end
 259
 260                         else if status.tag == need_work_tag then
 261                                 # A worker needs more work
 262                                 mpi.recv_empty(status.source, status.tag, comm_world)
 263                                 var sent = send_task_to(status.source)
 264                                 if not sent then
 265                                         # no more work, quit
 266                                         mpi.send_empty(status.source, quit_tag, comm_world)
 267                                 end
 268                         else if status.tag == done_tag then
 269                                 # A worker is done and will quit
 270                                 mpi.recv_empty(status.source, status.tag, comm_world)
 271                                 at_work.remove(status.source)
 272
 273                                 if verbose > 1 then print "worker {status.source} is done ({at_work.length} still at work)"
 274                         else
 275                                 print "Unexpected tag {status.tag}"
 276                                 shutdown
 277                                 break
 278                         end
 279                 end
 280                 status.free
 281         end
 282
 283         # Send a packet of tasks to worker at `rank`
 284         fun send_task_to(rank: Rank): Bool
 285         do
 286                 if next_task_id >= tasks.length then return false
 287
 288                 buffer[0] = next_task_id
 289                 next_task_id += tasks_per_packet
 290
 291                 mpi.send_from(buffer, 0, 1, rank, task_tag, comm_world)
 292
 293                 if verbose > 1 then print "sent tasks [{buffer[0]}..{next_task_id}[ to worker {rank}"
 294                 return true
 295         end
 296
 297         # Display the accumulated results received from workers
 298         fun print_results
 299         do
 300                 print "# results #"
 301                 print "* {results.length} total"
 302                 print "* {results.oks.length + results.ok_empties.length} oks & 0ks"
 303                 print "* {results.fails.length} fails"
 304                 print "* {results.no_savs.length} no savs"
 305                 print "* {results.fixmes.length} fixmes"
 306                 print "* {results.sosos.length} sosos"
 307                 print "* {results.skips.length} skips"
 308                 print "* {results.unknowns.length} unknowns (bug in tests.sh or nitester)"
 309         end
 310
 311         fun print_short_results do print "oks & fails / total: {results.oks.length + results.ok_empties.length} " +
 312                 "& {results.fails.length} / {results.length}"
 313
 314         # Shutdown anormaly the running tests
 315         fun shutdown
 316         do
 317                 print "Shutting down"
 318                 mpi.send_empty(new Rank.any, quit_tag, comm_world)
 319         end
 320 end
 321
 322 # A worker node which actually execute the tests
 323 class Worker
 324         super Processor
 325
 326         # The `Rank` of `self`
 327         var rank: Rank
 328
 329         # Compilation directory
 330         var comp_dir = "/dev/shm/nit_compile{rank}" is lazy
 331
 332         # Output file directory
 333         var out_dir = "/dev/shm/nit_out{rank}" is lazy
 334
 335         # Directory to store the xml files produced for Jenkins
 336         var xml_dir = "~/jenkins_xml/"
 337
 338         # Output file of the `tests.sh` script
 339         var tests_sh_out = "/dev/shm/nit_local_out{rank}" is lazy
 340
 341         # Source Nit repository, must be already updated and `make` before execution
 342         var nit_source_dir = "~/nit"
 343
 344         # Compiled `Regex` to detect the argument of an execution
 345         var re_arg: Regex = "arg [0-9]+".to_re
 346
 347         # Compiled `Regex` to detect the alternative of an execution
 348         var re_alt: Regex = "_alt[0-9]+".to_re
 349
 350         redef fun run
 351         do
 352                 read_cli_options
 353                 setup
 354                 create_tasks
 355                 work_on_tasks
 356                 cleanup
 357         end
 358
 359         # Setup the testing environment
 360         #
 361         # Clone the nit repository.
 362         fun setup
 363         do
 364                 if verbose > 0 then sys.system "hostname"
 365         end
 366
 367         # Clean up the testing environment
 368         #
 369         # Delete all temporary files, except `ccache_dir`.
 370         fun cleanup
 371         do
 372                 if comp_dir.file_exists then comp_dir.rmdir
 373                 if out_dir.file_exists then out_dir.rmdir
 374                 if tests_sh_out.file_exists then tests_sh_out.file_delete
 375         end
 376
 377         # Single C `int` to hold the next task id received from the `Controller`
 378         var task_buffer = new CIntArray(1)
 379
 380         # Manage communication with the `Controller` and execute dispatched `Task`s
 381         fun work_on_tasks
 382         do
 383                 var status = new Status
 384                 loop
 385                         check_signals
 386
 387                         # We double probe to prevent bug where a single probes does not receive the
 388                         # real next read.
 389                         mpi.probe(controller_rank, new Tag.any, comm_world, status)
 390                         mpi.probe(controller_rank, new Tag.any, comm_world, status)
 391
 392                         if status.tag == task_tag then
 393                                 # Receive tasks to execute
 394                                 mpi.recv_into(task_buffer, 0, 1, status.source, status.tag, comm_world)
 395                                 var first_id = task_buffer[0]
 396                                 for task_id in [first_id .. first_id + tasks_per_packet] do
 397
 398                                         # If id is over all known tasks, stop right here
 399                                         if task_id >= tasks.length then break
 400                                         var task = tasks[task_id]
 401
 402                                         # Command line to execute test
 403                                         var cmd = "XMLDIR={xml_dir} ERRLIST={out_dir}/errlist TMPDIR={out_dir} " +
 404                                                 "CCACHE_DIR={ccache_dir} CCACHE_TEMPDIR={ccache_dir} CCACHE_BASEDIR={comp_dir} " +
 405                                                 "./tests.sh --compdir {comp_dir} --outdir {out_dir} " +
 406                                                 " --node --engine {task.engine} {task.test_program} > {tests_sh_out}"
 407
 408                                         # Execute test
 409                                         sys.system cmd
 410
 411                                         # Test results were written to file, read them
 412                                         var fstream = new IFStream.open(tests_sh_out)
 413                                         var content = fstream.read_all
 414                                         fstream.close
 415
 416                                         # Parse result and prepare them for sending
 417                                         #
 418                                         # The structure is composed of 4 ints for each result.
 419                                         # 1. task id
 420                                         # 2. arg number
 421                                         # 3. alt number
 422                                         # 4. test result as int
 423                                         var c = results_count
 424                                         for line in content.split('\n') do if not line.is_empty then
 425                                                 var cc = c*4
 426
 427                                                 buffer[cc] = task_id
 428
 429                                                 var arg_match = line.search(re_arg)
 430                                                 var arg = 0
 431                                                 if arg_match != null then arg = arg_match.to_s.substring_from(4).to_i
 432                                                 buffer[cc+1] = arg
 433
 434                                                 var alt_match = line.search(re_alt)
 435                                                 var alt = 0
 436                                                 if alt_match != null then alt = alt_match.to_s.substring_from(4).to_i
 437                                                 buffer[cc+2] = alt
 438
 439                                                 var res = null
 440                                                 if line.has("[ok]") then res = 1
 441                                                 if line.has("[0k]") then res = 2
 442                                                 if line.has("[=== no sav ===]") then res = 3
 443                                                 if line.has("[fixme]") then res = 4
 444                                                 if line.has("[======= fail") then res = 5
 445                                                 if line.has("[======= soso") then res = 6
 446                                                 if line.has("[skip]") then res = 7
 447
 448                                                 if res == null then
 449                                                         res = 0
 450                                                         if verbose > 1 then print "Unknown result: '{line}'"
 451                                                 end
 452                                                 buffer[cc+3] = res
 453
 454                                                 c += 1
 455
 456                                                 if verbose > 2 then print "tests.sh output line: {line}"
 457
 458                                                 # If result buffer is full, send to `Controller`
 459                                                 if c*4 == buffer.length then
 460                                                         send_results
 461                                                         c = 0
 462                                                 end
 463                                         end
 464
 465                                         self.results_count = c
 466                                 end
 467
 468                                 mpi.send_empty(controller_rank, need_work_tag, comm_world)
 469                         else if status.tag == quit_tag then
 470                                 # Notification from the `Controller` to quit
 471                                 mpi.recv_empty(status.source, status.tag, comm_world)
 472
 473                                 # Send remaining results
 474                                 send_results
 475
 476                                 # Notify `Controller` that `self` is done and will quit
 477                                 mpi.send_empty(controller_rank, done_tag, comm_world)
 478                                 break
 479                         else
 480                                 print "Unexpected tag {status.tag}"
 481                                 break
 482                         end
 483                 end
 484                 status.free
 485         end
 486
 487         # Total results listed in `buffer` and ready to send
 488         var results_count = 0
 489
 490         # Send all results in `buffer` to the `Controller`
 491         fun send_results
 492         do
 493                 if results_count > 0 then
 494                         if verbose > 1 then print "sending {results_count} results"
 495                         mpi.send_from(buffer, 0, results_count*4, controller_rank, result_tag, comm_world)
 496                         results_count = 0
 497                 end
 498         end
 499
 500         redef fun receive_signal(signal)
 501         do
 502                 cleanup
 503                 mpi.finalize
 504                 exit 0
 505         end
 506 end
 507
 508 # A single test task, on a `test_program` with an `engine`
 509 #
 510 # Note that a task may involve more than one program to test considering the
 511 # alts and args for the `test_program`.
 512 class Task
 513         # Engine to test executing `test_program`
 514         var engine: String
 515
 516         # Program to execute with `engine`
 517         var test_program: String
 518
 519         redef fun to_s do return "{engine} {test_program}"
 520 end
 521
 522 # Result of a `Task`
 523 #
 524 # There may be more than one result per `Task`.
 525 class Result
 526         # `Task` associated to `self`
 527         var task: Task
 528
 529         # Argument index of the execution resulting in `self`
 530         var arg: Int
 531
 532         # Alternative index of the execution resulting in `self`
 533         var alt: Int
 534
 535         # Is `self` result an _ok_?
 536         var ok = false
 537
 538         # Is `self` result an _0k_?
 539         var ok_empty = false
 540
 541         # Is `self` result a _no sav_?
 542         var no_sav = false
 543
 544         # Is `self` result a _fixme_?
 545         var fixme = false
 546
 547         # Is `self` result a _fail_?
 548         var fail = false
 549
 550         # Is `self` result a _soso_?
 551         var soso = false
 552
 553         # Is `self` skipped test?
 554         var skip = false
 555
 556         # Is `self` an unknown result, probably an error
 557         var unknown = false
 558
 559         redef fun to_s
 560         do
 561                 var err = "Unknown"
 562                 if no_sav then err = "no sav"
 563                 if ok then err = "ok"
 564                 if ok_empty then err = "0k"
 565                 if fixme then err = "fixme"
 566                 if fail then err = "fail"
 567
 568                 return "{task} arg{arg} alt{alt} => {err}"
 569         end
 570 end
 571
 572 # A global and sorted collection of `Result`
 573 class ResultSet
 574         super HashSet[Result]
 575
 576         var no_savs = new HashSet[Result]
 577         var oks = new HashSet[Result]
 578         var ok_empties = new HashSet[Result]
 579         var fixmes = new HashSet[Result]
 580         var fails = new HashSet[Result]
 581         var sosos = new HashSet[Result]
 582         var skips = new HashSet[Result]
 583         var unknowns = new HashSet[Result]
 584
 585         # TODO remove
 586         var per_engines = new HashMap[String, Result]
 587
 588         redef fun add(result)
 589         do
 590                 if result.no_sav then no_savs.add result
 591                 if result.ok then oks.add result
 592                 if result.ok_empty then ok_empties.add result
 593                 if result.fixme then fixmes.add result
 594                 if result.fail then fails.add result
 595                 if result.soso then sosos.add result
 596                 if result.skip then skips.add result
 597                 if result.unknown then unknowns.add result
 598
 599                 super
 600         end
 601
 602         redef fun remove(r) do abort
 603
 604         redef fun clear do abort
 605 end
 606
 607 redef class OptionContext
 608
 609         # Print usage with a possible error `message`
 610         private fun usage_error(message: nullable String)
 611         do
 612                 var ret = 0
 613                 if message != null then
 614                         print "Error: {message}"
 615                         ret = 1
 616                 end
 617
 618                 if comm_world.rank == 0 then
 619                         print "Usage: mpirun nitester [Options] test_program.nit [other_test.nit [...]]"
 620                         usage
 621                 end
 622
 623                 mpi.finalize
 624                 exit ret
 625         end
 626 end
 627
 628 # On `Worker` nodes, prefix all prints with `rank/comm_world.size`
 629 redef fun print(msg: Object)
 630 do
 631         if comm_world.rank != 0.rank then
 632                 super "{comm_world.rank}/{comm_world.size}: {msg}"
 633         else super msg
 634 end
 635
 636 # Running MPI instance
 637 fun mpi: MPI do return once new MPI
 638
 639 # Launch mpi
 640 mpi
 641
 642 # Local rank
 643 var rank = comm_world.rank
 644
 645 var processor: Processor
 646 if rank == 0.rank then
 647         # If rank == 0, this is the `Controller`
 648         processor = new Controller
 649 else
 650         # This is a worker
 651         processor = new Worker(rank)
 652 end
 653 processor.run
 654
 655 mpi.finalize