Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,9 @@ Even more queries can be found [here](https://colab.research.google.com/github/R

# Latest updates

## Version 0.2.0 alpha 5
- If the initialization of the Spark session fails, we now check if SPARK_HOME is set and if it may be invalid or pointing to a different Spark version than 4.0, and output a more informative error message.

## Version 0.2.0 alpha 4
- Added parameters to the jsoniq magic to select the desired output to print: -j, -df, -pdf
- Added informative error message with a hint on how to fix when trying to get a DataFrame and there is no schema.
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "jsoniq"
version = "0.2.0a4"
version = "0.2.0a5"
description = "Python edition of RumbleDB, a JSONiq engine"
requires-python = ">=3.11"
dependencies = [
Expand Down
Binary file modified src/jsoniq/jars/rumbledb-1.24.0.jar
Binary file not shown.
30 changes: 29 additions & 1 deletion src/jsoniq/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,15 @@
with pkg_resources.path("jsoniq.jars", "rumbledb-1.24.0.jar") as jar_path:
jar_path_str = "file://" + str(jar_path)

def get_spark_version():
if os.environ.get('SPARK_HOME') != None:
spark_version = os.popen("spark-submit --version 2>&1").read()
if "version" in spark_version:
match = re.search(r'version (\d+\.\d+.\d+)', spark_version)
if match:
return match.group(1)
return None

class MetaRumbleSession(type):
def __getattr__(cls, item):
if item == "builder":
Expand Down Expand Up @@ -64,7 +73,26 @@ def __init__(self):

def getOrCreate(self):
if RumbleSession._rumbleSession is None:
RumbleSession._rumbleSession = RumbleSession(self._sparkbuilder.getOrCreate())
try:
RumbleSession._rumbleSession = RumbleSession(self._sparkbuilder.getOrCreate())
except FileNotFoundError as e:
if not os.environ.get('SPARK_HOME') is None:
sys.stderr.write("[Error] SPARK_HOME environment variable may not be set properly. Please check that it points to a valid path to a Spark 4.0 directory, or maybe the easiest would be to delete the environment variable SPARK_HOME completely to fall back to the installation of Spark 4.0 packaged with pyspark.\n")
sys.stderr.write(f"Current value of SPARK_HOME: {os.environ.get('SPARK_HOME')}\n")
sys.exit(43)
else:
raise e
except TypeError as e:
spark_version = get_spark_version()
if not os.environ.get('SPARK_HOME') is None and spark_version is None:
sys.stderr.write("[Error] Could not determine Spark version. The SPARK_HOME environment variable may not be set properly. Please check that it points to a valid path to a Spark 4.0 directory, or maybe the easiest would be to delete the environment variable SPARK_HOME completely to fall back to the installation of Spark 4.0 packaged with pyspark.\n")
sys.stderr.write(f"Current value of SPARK_HOME: {os.environ.get('SPARK_HOME')}\n")
sys.exit(43)
elif not spark_version.startswith("4.0"):
sys.stderr.write(f"[Error] RumbleDB requires Spark 4.0, but found version {spark_version}. Please either set SPARK_HOME to a Spark 4.0 directory, or maybe the easiest would be to delete the environment variable SPARK_HOME completely to fall back to the installation of Spark 4.0 packaged with pyspark.\n")
sys.exit(43)
else:
raise e
return RumbleSession._rumbleSession

def create(self):
Expand Down