benchmarks: Added CSV benchmark to bench suite
authorLucas Bajolet <r4pass@hotmail.com>
Thu, 12 May 2016 19:22:20 +0000 (15:22 -0400)
committerLucas Bajolet <r4pass@hotmail.com>
Fri, 13 May 2016 19:21:23 +0000 (15:21 -0400)
Signed-off-by: Lucas Bajolet <r4pass@hotmail.com>

benchmarks/csv/csv_bench.sh [new file with mode: 0755]
benchmarks/csv/scripts/JavaCSV.java [new file with mode: 0644]
benchmarks/csv/scripts/csv_gen.nit [new file with mode: 0644]
benchmarks/csv/scripts/go_csv.go [new file with mode: 0644]
benchmarks/csv/scripts/nit_csv.nit [new file with mode: 0644]
benchmarks/csv/scripts/python_csv.py [new file with mode: 0644]
benchmarks/csv/scripts/python_stdcsv.py [new file with mode: 0644]
benchmarks/csv/scripts/ruby_csv.rb [new file with mode: 0644]

diff --git a/benchmarks/csv/csv_bench.sh b/benchmarks/csv/csv_bench.sh
new file mode 100755 (executable)
index 0000000..badd1e6
--- /dev/null
@@ -0,0 +1,103 @@
+#!/bin/bash
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Shell script to bench json parsers over different documents
+
+source ../bench_common.sh
+source ../bench_plot.sh
+
+## CONFIGURATION OPTIONS ##
+
+# Default number of times a command must be run with bench_command
+# Can be overrided with 'the option -n'
+count=5
+
+## HANDLE OPTIONS ##
+
+function init_repo()
+{
+       mkdir -p inputs
+       nitc --semi-global scripts/csv_gen.nit -o scripts/csv_gen
+       echo "Generating 1000 lines documents"
+       ./scripts/csv_gen 10 1000 inputs/1000_l.csv
+       ./scripts/csv_gen 10 1000 inputs/1000_uni_l.csv --unicode
+       echo "Generating 10000 lines documents"
+       ./scripts/csv_gen 10 10000 inputs/10000_l.csv
+       ./scripts/csv_gen 10 10000 inputs/10000_uni_l.csv --unicode
+       echo "Generating 100000 lines documents"
+       ./scripts/csv_gen 10 100000 inputs/100000_l.csv
+       ./scripts/csv_gen 10 100000 inputs/100000_uni_l.csv --unicode
+       echo "Generating 1000000 lines documents"
+       ./scripts/csv_gen 10 1000000 inputs/1000000_l.csv
+       ./scripts/csv_gen 10 1000000 inputs/1000000_uni_l.csv --unicode
+}
+
+function usage()
+{
+       echo "run_bench: ./csv_bench.sh [options]"
+       echo "  -v: verbose mode"
+       echo "  -n count: number of execution for each bar (default: $count)"
+       echo "  -h: this help"
+}
+
+stop=false
+fast=false
+while [ "$stop" = false ]; do
+       case "$1" in
+               -v) verbose=true; shift;;
+               --fast) fast=true; shift;;
+               -h) usage; exit;;
+               -n) count="$2"; shift; shift;;
+               *) stop=true
+       esac
+done
+
+if [ -z "$fast" ]; then
+       init_repo
+fi
+
+mkdir -p out
+
+echo "Compiling engines"
+
+echo "Java Parser"
+
+javac -cp './scripts/commons-csv-1.3.jar' scripts/JavaCSV.java
+
+echo "Go parser"
+
+go build -o scripts/go_csv scripts/go_csv.go
+
+echo "Nit/Ad-Hoc Parser"
+
+nitc --semi-global scripts/nit_csv.nit -o scripts/nit_csv
+
+declare -a script_names=('Python 3 - Pandas' 'Python 2 - Pandas' 'Go' 'Nit' 'Python 3 - Standard' 'Python 2 - Standard' 'Java - Apache commons' 'Ruby')
+declare -a script_cmds=('python3 scripts/python_csv.py' 'python2 scripts/python_csv.py' './scripts/go_csv' './scripts/nit_csv' 'python3 scripts/python_stdcsv.py' 'python2 scripts/python_stdcsv.py' "java -cp /usr/share/java/commons-csv.jar:. scripts.JavaCSV" 'ruby scripts/ruby_csv.rb')
+
+for script in `seq 1 ${#script_cmds[@]}`; do
+       echo "Preparing res for ${script_names[$script - 1]}"
+       prepare_res "./out/${script_names[$script - 1]}.dat" "${script_names[$script - 1]}" "${script_names[$script - 1]}"
+       for file in inputs/*.csv; do
+               fname=`basename $file .csv`
+               bench_command $file "Benching file $file using ${script_cmds[$script - 1]} parser" ${script_cmds[$script - 1]} $file
+       done;
+done;
+
+rm scripts/nit_csv
+rm scripts/JavaCSV.class
+rm scripts/go_csv
+
+plot out/bench_csv.gnu
diff --git a/benchmarks/csv/scripts/JavaCSV.java b/benchmarks/csv/scripts/JavaCSV.java
new file mode 100644 (file)
index 0000000..f8264ca
--- /dev/null
@@ -0,0 +1,18 @@
+package scripts;
+
+import java.io.File;
+import java.util.List;
+import java.nio.charset.Charset;
+import org.apache.commons.csv.*;
+
+class JavaCSV {
+       public static void main(String[] args) {
+               try {
+                       File csvData = new File(args[0]);
+                       CSVParser parser = CSVParser.parse(csvData, Charset.forName("UTF-8"), CSVFormat.RFC4180);
+                       List<CSVRecord> r = parser.getRecords();
+               } catch(Exception e) {
+                       System.err.println("Major fail");
+               }
+       }
+}
diff --git a/benchmarks/csv/scripts/csv_gen.nit b/benchmarks/csv/scripts/csv_gen.nit
new file mode 100644 (file)
index 0000000..123dbb6
--- /dev/null
@@ -0,0 +1,61 @@
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import csv
+
+if args.length < 3 then
+       print "Usage ./csv_gen record_length record_nb out_filepath [--unicode]"
+       exit 1
+end
+
+var record_length = args[0].to_i
+var record_nb = args[1].to_i
+var outpath = args[2]
+var unicode = false
+
+if args.length == 4 then
+       if not args[3] == "--unicode" then
+               print "Usage ./csv_gen record_length record_nb [--unicode]"
+               exit 1
+       end
+       unicode = true
+end
+
+var ocsv = new CsvDocument
+ocsv.eol = "\r\n"
+
+var sep = ocsv.separator.to_s
+var eol = ocsv.eol
+var del = ocsv.delimiter.to_s
+
+for i in [0 .. record_length[ do ocsv.header.add "Col{i}"
+
+var c = if unicode then "รก" else "a"
+for i in [0 .. record_nb[ do
+       var line = new Array[String].with_capacity(record_length)
+       for j in [0 .. record_length[ do
+               var add_sep = 100.rand > 70
+               var add_del = 100.rand > 70
+               var add_eol = 100.rand > 70
+               var ln = 10.rand
+               var s = c * ln
+               if add_sep then s = sep + s
+               if add_del then s += del
+               if add_eol then s += eol
+               line.add s
+       end
+       ocsv.records.add line
+end
+
+ocsv.write_to_file(outpath)
diff --git a/benchmarks/csv/scripts/go_csv.go b/benchmarks/csv/scripts/go_csv.go
new file mode 100644 (file)
index 0000000..5fff932
--- /dev/null
@@ -0,0 +1,18 @@
+package main
+
+import "encoding/csv"
+import "os"
+import "fmt"
+
+func main() {
+       if len(os.Args) == 1 {
+               fmt.Println("Usage ./go_csv file")
+               os.Exit(-1)
+       }
+       file, err := os.Open(os.Args[1])
+       if err != nil { panic(err) }
+
+       var read = csv.NewReader(file)
+       _, r := read.ReadAll()
+       if r != nil { panic(err) }
+}
diff --git a/benchmarks/csv/scripts/nit_csv.nit b/benchmarks/csv/scripts/nit_csv.nit
new file mode 100644 (file)
index 0000000..c8422d1
--- /dev/null
@@ -0,0 +1,25 @@
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import csv
+
+if args.is_empty then
+       print "Usage: ./nit_csv in.csv"
+       exit 1
+end
+
+var csv = new CsvReader(new FileReader.open(args[0]))
+csv.eol = "\r\n"
+
+csv.read_all
diff --git a/benchmarks/csv/scripts/python_csv.py b/benchmarks/csv/scripts/python_csv.py
new file mode 100644 (file)
index 0000000..d8addda
--- /dev/null
@@ -0,0 +1,4 @@
+import sys
+from pandas import read_csv
+
+csv = read_csv(sys.argv[1])
diff --git a/benchmarks/csv/scripts/python_stdcsv.py b/benchmarks/csv/scripts/python_stdcsv.py
new file mode 100644 (file)
index 0000000..b78cb15
--- /dev/null
@@ -0,0 +1,8 @@
+import sys
+import csv
+
+lst = list();
+with open(sys.argv[1], 'r') as f:
+    reader = csv.reader(f, delimiter=':', quoting=csv.QUOTE_NONE)
+    for row in reader:
+        list.append(lst, row)
diff --git a/benchmarks/csv/scripts/ruby_csv.rb b/benchmarks/csv/scripts/ruby_csv.rb
new file mode 100644 (file)
index 0000000..6b1fe02
--- /dev/null
@@ -0,0 +1,3 @@
+require 'csv'
+
+CSV.read(ARGV.first)