@@ -32,6 +32,7 @@ import org.apache.spark.sql.SparkSession
32
32
* Default: spark.default.parallelism
33
33
* --size: specify the size of the dataset that should be loaded into Spark.
34
34
* Default: sf_001
35
+ * Available options: sf_01, sf_1, sf_3, sf_5, sf_10
35
36
* --filesystem-url: optional arguments to specify filesystem master node URL.
36
37
* Default: file://
37
38
* --log-operators: boolean whether or not to log individual physical operators.
@@ -87,9 +88,10 @@ object Benchmark {
87
88
println("""
88
89
Available flags:
89
90
--num-partitions: specify the number of partitions the data should be split into.
90
- Default: 2 * number of executors if exists, 4 otherwise
91
+ Default: spark.default.parallelism
91
92
--size: specify the size of the dataset that should be loaded into Spark.
92
93
Default: sf_001
94
+ Available options: sf_01, sf_1, sf_3, sf_5, sf_10
93
95
--filesystem-url: optional arguments to specify filesystem master node URL.
94
96
Default: file://
95
97
--log-operators: boolean whether or not to log individual physical operators.
@@ -111,7 +113,9 @@ object Benchmark {
111
113
this .numPartitions = numPartitions.toInt
112
114
}
113
115
case Array (" --size" , size : String ) => {
114
- if (size == " sf_001" || size == " sf_01" || size == " sf_1" ) {
116
+ if (
117
+ size == " sf_001" || size == " sf_01" || size == " sf_1" || size == " sf_3" || size == " sf_5" || size == " sf_10"
118
+ ) {
115
119
this .size = size
116
120
} else {
117
121
println(s " Given size is not supported: $size" )
0 commit comments