diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..7c32d5f --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.jar filter=lfs diff=lfs merge=lfs -text diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 2ceff31..7ba5d22 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -19,6 +19,8 @@ jobs: steps: - uses: actions/checkout@v4 + with: + lfs: true - name: Set up Python 3.11 uses: actions/setup-python@v3 with: diff --git a/README.md b/README.md index 62e5ffe..e5b0d89 100644 --- a/README.md +++ b/README.md @@ -338,6 +338,10 @@ Even more queries can be found [here](https://colab.research.google.com/github/R # Latest updates +## Version 2.0.6 +- Decoupled the internal materialization cap (when a parallel sequence of items is materialized, e.g., into an array) from the outer result size cap (for printing to screen) with now two distinct configuration parameters. The default materialization cap is set to 100'000 items while the default outer result size is set to 10. They can be changed by the user through the Rumble configuration. +- Fixed an issue in the implementation when a FLWOR gets executed locally with a return clause with an underlying RDD or DataFrame. + ## Version 2.0.5 - Support for @ (primary keys) within arrays of objects and ? for allowing null in JSound compact schemas. It corresponds to unique, and a union with js:null, in the JSound verbose syntax. diff --git a/pyproject.toml b/pyproject.toml index 6938dee..2cb1808 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "jsoniq" -version = "2.0.5" +version = "2.0.6" description = "Python edition of RumbleDB, a JSONiq engine" requires-python = ">=3.11" dependencies = [ diff --git a/src/jsoniq/jars/rumbledb-2.0.0.jar b/src/jsoniq/jars/rumbledb-2.0.0.jar index 13bec37..31c0544 100644 Binary files a/src/jsoniq/jars/rumbledb-2.0.0.jar and b/src/jsoniq/jars/rumbledb-2.0.0.jar differ diff --git a/src/jsoniq/sequence.py b/src/jsoniq/sequence.py index 2bb729d..b4844ad 100644 --- a/src/jsoniq/sequence.py +++ b/src/jsoniq/sequence.py @@ -54,10 +54,16 @@ def rdd(self): return self._rumblesession.lastResult def df(self): + if (not "DataFrame" in self._jsequence.availableOutputs()): + sys.stderr.write(self.schema_str) + return None self._rumblesession.lastResult = DataFrame(self._jsequence.getAsDataFrame(), self._sparksession) return self._rumblesession.lastResult def pdf(self): + if (not "DataFrame" in self._jsequence.availableOutputs()): + sys.stderr.write(self.schema_str) + return None self._rumblesession.lastResult = self.df().toPandas() return self._rumblesession.lastResult diff --git a/tests/test_sample.py b/tests/test_sample.py index 83b90bf..214eaf6 100644 --- a/tests/test_sample.py +++ b/tests/test_sample.py @@ -11,6 +11,7 @@ def test1(self): # All attributes and methods of SparkSession are also available on RumbleSession. rumble = RumbleSession.builder.getOrCreate(); + rumble.getRumbleConf().setResultSizeCap(100); # Just to improve readability when invoking Spark methods # (such as spark.sql() or spark.createDataFrame()).