diff --git a/README.md b/README.md index 4532c4c..d28b4d6 100644 --- a/README.md +++ b/README.md @@ -349,6 +349,9 @@ Even more queries can be found [here](https://colab.research.google.com/github/R # Latest updates +## Version 0.2.0 alpha 5 +- If the initialization of the Spark session fails, we now check if SPARK_HOME is set and if it may be invalid or pointing to a different Spark version than 4.0, and output a more informative error message. + ## Version 0.2.0 alpha 4 - Added parameters to the jsoniq magic to select the desired output to print: -j, -df, -pdf - Added informative error message with a hint on how to fix when trying to get a DataFrame and there is no schema. diff --git a/pyproject.toml b/pyproject.toml index 4ceb6e0..167e745 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "jsoniq" -version = "0.2.0a4" +version = "0.2.0a5" description = "Python edition of RumbleDB, a JSONiq engine" requires-python = ">=3.11" dependencies = [ diff --git a/src/jsoniq/jars/rumbledb-1.24.0.jar b/src/jsoniq/jars/rumbledb-1.24.0.jar index 15d02f5..d74418e 100644 Binary files a/src/jsoniq/jars/rumbledb-1.24.0.jar and b/src/jsoniq/jars/rumbledb-1.24.0.jar differ diff --git a/src/jsoniq/session.py b/src/jsoniq/session.py index 0d838f2..f523994 100644 --- a/src/jsoniq/session.py +++ b/src/jsoniq/session.py @@ -10,6 +10,15 @@ with pkg_resources.path("jsoniq.jars", "rumbledb-1.24.0.jar") as jar_path: jar_path_str = "file://" + str(jar_path) +def get_spark_version(): + if os.environ.get('SPARK_HOME') != None: + spark_version = os.popen("spark-submit --version 2>&1").read() + if "version" in spark_version: + match = re.search(r'version (\d+\.\d+.\d+)', spark_version) + if match: + return match.group(1) + return None + class MetaRumbleSession(type): def __getattr__(cls, item): if item == "builder": @@ -64,7 +73,26 @@ def __init__(self): def getOrCreate(self): if RumbleSession._rumbleSession is None: - RumbleSession._rumbleSession = RumbleSession(self._sparkbuilder.getOrCreate()) + try: + RumbleSession._rumbleSession = RumbleSession(self._sparkbuilder.getOrCreate()) + except FileNotFoundError as e: + if not os.environ.get('SPARK_HOME') is None: + sys.stderr.write("[Error] SPARK_HOME environment variable may not be set properly. Please check that it points to a valid path to a Spark 4.0 directory, or maybe the easiest would be to delete the environment variable SPARK_HOME completely to fall back to the installation of Spark 4.0 packaged with pyspark.\n") + sys.stderr.write(f"Current value of SPARK_HOME: {os.environ.get('SPARK_HOME')}\n") + sys.exit(43) + else: + raise e + except TypeError as e: + spark_version = get_spark_version() + if not os.environ.get('SPARK_HOME') is None and spark_version is None: + sys.stderr.write("[Error] Could not determine Spark version. The SPARK_HOME environment variable may not be set properly. Please check that it points to a valid path to a Spark 4.0 directory, or maybe the easiest would be to delete the environment variable SPARK_HOME completely to fall back to the installation of Spark 4.0 packaged with pyspark.\n") + sys.stderr.write(f"Current value of SPARK_HOME: {os.environ.get('SPARK_HOME')}\n") + sys.exit(43) + elif not spark_version.startswith("4.0"): + sys.stderr.write(f"[Error] RumbleDB requires Spark 4.0, but found version {spark_version}. Please either set SPARK_HOME to a Spark 4.0 directory, or maybe the easiest would be to delete the environment variable SPARK_HOME completely to fall back to the installation of Spark 4.0 packaged with pyspark.\n") + sys.exit(43) + else: + raise e return RumbleSession._rumbleSession def create(self):