4
4
5
5
# Requirements:
6
6
# - Run as the root user
7
- # - Required env variables: SPARK_HOME, HADOOP_VERSION, SPARK_DOWNLOAD_URL
8
- # - Optional env variables: SPARK_VERSION, SCALA_VERSION
7
+ # - Required env variable: SPARK_HOME
9
8
9
+ import argparse
10
10
import logging
11
11
import os
12
12
import subprocess
@@ -27,13 +27,10 @@ def get_all_refs(url: str) -> list[str]:
27
27
return [a ["href" ] for a in soup .find_all ("a" , href = True )]
28
28
29
29
30
- def get_spark_version () -> str :
30
+ def get_latest_spark_version () -> str :
31
31
"""
32
- If ${SPARK_VERSION} env variable is non-empty, simply returns it
33
- Otherwise, returns the last stable version of Spark using spark archive
32
+ Returns the last stable version of Spark using spark archive
34
33
"""
35
- if (version := os .environ ["SPARK_VERSION" ]) != "" :
36
- return version
37
34
LOGGER .info ("Downloading Spark versions information" )
38
35
all_refs = get_all_refs ("https://archive.apache.org/dist/spark/" )
39
36
stable_versions = [
@@ -106,12 +103,20 @@ def configure_spark(spark_dir_name: str, spark_home: Path) -> None:
106
103
if __name__ == "__main__" :
107
104
logging .basicConfig (level = logging .INFO )
108
105
109
- spark_version = get_spark_version ()
106
+ arg_parser = argparse .ArgumentParser ()
107
+ arg_parser .add_argument ("--spark-version" , required = True )
108
+ arg_parser .add_argument ("--hadoop-version" , required = True )
109
+ arg_parser .add_argument ("--scala-version" , required = True )
110
+ arg_parser .add_argument ("--spark-download-url" , type = Path , required = True )
111
+ args = arg_parser .parse_args ()
112
+
113
+ args .spark_version = args .spark_version or get_latest_spark_version ()
114
+
110
115
spark_dir_name = download_spark (
111
- spark_version = spark_version ,
112
- hadoop_version = os . environ [ "HADOOP_VERSION" ] ,
113
- scala_version = os . environ [ "SCALA_VERSION" ] ,
114
- spark_download_url = Path ( os . environ [ "SPARK_DOWNLOAD_URL" ]) ,
116
+ spark_version = args . spark_version ,
117
+ hadoop_version = args . hadoop_version ,
118
+ scala_version = args . scala_version ,
119
+ spark_download_url = args . spark_download_url ,
115
120
)
116
121
configure_spark (
117
122
spark_dir_name = spark_dir_name , spark_home = Path (os .environ ["SPARK_HOME" ])
0 commit comments