Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/src/gandiva/function_holder_maker_registry.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ FunctionHolderMakerRegistry::MakerMap FunctionHolderMakerRegistry::DefaultHolder
{"to_date", HolderMaker<ToDateHolder>},
{"random", HolderMaker<RandomGeneratorHolder>},
{"rand", HolderMaker<RandomGeneratorHolder>},
{"rand_integer", HolderMaker<RandomIntegerGeneratorHolder>},
{"regexp_replace", HolderMaker<ReplaceHolder>},
{"regexp_extract", HolderMaker<ExtractHolder>},
{"castintervalday", HolderMaker<IntervalDaysHolder>},
Expand Down
8 changes: 8 additions & 0 deletions cpp/src/gandiva/function_registry_math_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,14 @@ std::vector<NativeFunction> GetMathOpsFunctionRegistry() {
"gdv_fn_random", NativeFunction::kNeedsFunctionHolder),
NativeFunction("random", {"rand"}, DataTypeVector{int32()}, float64(),
kResultNullNever, "gdv_fn_random_with_seed",
NativeFunction::kNeedsFunctionHolder),
NativeFunction("rand_integer", {}, DataTypeVector{}, int32(), kResultNullNever,
"gdv_fn_rand_integer", NativeFunction::kNeedsFunctionHolder),
NativeFunction("rand_integer", {}, DataTypeVector{int32()}, int32(),
kResultNullNever, "gdv_fn_rand_integer_with_range",
NativeFunction::kNeedsFunctionHolder),
NativeFunction("rand_integer", {}, DataTypeVector{int32(), int32()}, int32(),
kResultNullNever, "gdv_fn_rand_integer_with_min_max",
NativeFunction::kNeedsFunctionHolder)};

return math_fn_registry_;
Expand Down
39 changes: 38 additions & 1 deletion cpp/src/gandiva/gdv_function_stubs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,33 @@ double gdv_fn_random(int64_t ptr) {
return (*holder)();
}

double gdv_fn_random_with_seed(int64_t ptr, int32_t seed, bool seed_validity) {
double gdv_fn_random_with_seed(int64_t ptr, int32_t /*seed*/, bool /*seed_validity*/) {
gandiva::RandomGeneratorHolder* holder =
reinterpret_cast<gandiva::RandomGeneratorHolder*>(ptr);
return (*holder)();
}

int32_t gdv_fn_rand_integer(int64_t ptr) {
gandiva::RandomIntegerGeneratorHolder* holder =
reinterpret_cast<gandiva::RandomIntegerGeneratorHolder*>(ptr);
return (*holder)();
}

int32_t gdv_fn_rand_integer_with_range(int64_t ptr, int32_t /*range*/,
bool /*range_validity*/) {
gandiva::RandomIntegerGeneratorHolder* holder =
reinterpret_cast<gandiva::RandomIntegerGeneratorHolder*>(ptr);
return (*holder)();
}

int32_t gdv_fn_rand_integer_with_min_max(int64_t ptr, int32_t /*min*/,
bool /*min_validity*/, int32_t /*max*/,
bool /*max_validity*/) {
gandiva::RandomIntegerGeneratorHolder* holder =
reinterpret_cast<gandiva::RandomIntegerGeneratorHolder*>(ptr);
return (*holder)();
}

bool gdv_fn_in_expr_lookup_int32(int64_t ptr, int32_t value, bool in_validity) {
if (!in_validity) {
return false;
Expand Down Expand Up @@ -864,6 +885,22 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const {
engine->AddGlobalMappingForFunc("gdv_fn_random_with_seed", types->double_type(), args,
reinterpret_cast<void*>(gdv_fn_random_with_seed));

// gdv_fn_rand_integer
args = {types->i64_type()};
engine->AddGlobalMappingForFunc("gdv_fn_rand_integer", types->i32_type(), args,
reinterpret_cast<void*>(gdv_fn_rand_integer));

args = {types->i64_type(), types->i32_type(), types->i1_type()};
engine->AddGlobalMappingForFunc(
"gdv_fn_rand_integer_with_range", types->i32_type(), args,
reinterpret_cast<void*>(gdv_fn_rand_integer_with_range));

args = {types->i64_type(), types->i32_type(), types->i1_type(), types->i32_type(),
types->i1_type()};
engine->AddGlobalMappingForFunc(
"gdv_fn_rand_integer_with_min_max", types->i32_type(), args,
reinterpret_cast<void*>(gdv_fn_rand_integer_with_min_max));

// gdv_fn_dec_from_string
args = {
types->i64_type(), // context
Expand Down
61 changes: 61 additions & 0 deletions cpp/src/gandiva/random_generator_holder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
// under the License.

#include "gandiva/random_generator_holder.h"

#include <limits>

#include "gandiva/node.h"

namespace gandiva {
Expand All @@ -40,4 +43,62 @@ Result<std::shared_ptr<RandomGeneratorHolder>> RandomGeneratorHolder::Make(
return std::shared_ptr<RandomGeneratorHolder>(new RandomGeneratorHolder(
literal->is_null() ? 0 : std::get<int32_t>(literal->holder())));
}

Result<std::shared_ptr<RandomIntegerGeneratorHolder>> RandomIntegerGeneratorHolder::Make(
const FunctionNode& node) {
ARROW_RETURN_IF(
node.children().size() > 2,
Status::Invalid("'rand_integer' function requires at most two parameters"));

// No params: full int32 range [INT32_MIN, INT32_MAX]
if (node.children().empty()) {
return std::shared_ptr<RandomIntegerGeneratorHolder>(
new RandomIntegerGeneratorHolder());
}

// One param: range [0, range - 1]
if (node.children().size() == 1) {
auto literal = dynamic_cast<LiteralNode*>(node.children().at(0).get());
ARROW_RETURN_IF(
literal == nullptr,
Status::Invalid("'rand_integer' function requires a literal as parameter"));
ARROW_RETURN_IF(
literal->return_type()->id() != arrow::Type::INT32,
Status::Invalid(
"'rand_integer' function requires an int32 literal as parameter"));

// NULL range defaults to INT32_MAX (full positive range)
int32_t range = literal->is_null() ? std::numeric_limits<int32_t>::max()
: std::get<int32_t>(literal->holder());
ARROW_RETURN_IF(range <= 0,
Status::Invalid("'rand_integer' function range must be positive"));

return std::shared_ptr<RandomIntegerGeneratorHolder>(
new RandomIntegerGeneratorHolder(range));
}

// Two params: min, max [min, max] inclusive
auto min_literal = dynamic_cast<LiteralNode*>(node.children().at(0).get());
auto max_literal = dynamic_cast<LiteralNode*>(node.children().at(1).get());

ARROW_RETURN_IF(
min_literal == nullptr || max_literal == nullptr,
Status::Invalid("'rand_integer' function requires literals as parameters"));
ARROW_RETURN_IF(
min_literal->return_type()->id() != arrow::Type::INT32 ||
max_literal->return_type()->id() != arrow::Type::INT32,
Status::Invalid("'rand_integer' function requires int32 literals as parameters"));

// NULL min defaults to 0, NULL max defaults to INT32_MAX
int32_t min_val = min_literal->is_null() ? 0 : std::get<int32_t>(min_literal->holder());
int32_t max_val = max_literal->is_null() ? std::numeric_limits<int32_t>::max()
: std::get<int32_t>(max_literal->holder());

ARROW_RETURN_IF(min_val > max_val,
Status::Invalid("'rand_integer' function min must be <= max"));

return std::shared_ptr<RandomIntegerGeneratorHolder>(
new RandomIntegerGeneratorHolder(min_val, max_val));
}

} // namespace gandiva
33 changes: 33 additions & 0 deletions cpp/src/gandiva/random_generator_holder.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#pragma once

#include <limits>
#include <memory>
#include <random>

Expand Down Expand Up @@ -53,4 +54,36 @@ class GANDIVA_EXPORT RandomGeneratorHolder : public FunctionHolder {
std::uniform_real_distribution<> distribution_;
};

/// Function Holder for 'rand_integer'
class GANDIVA_EXPORT RandomIntegerGeneratorHolder : public FunctionHolder {
public:
~RandomIntegerGeneratorHolder() override = default;

static Result<std::shared_ptr<RandomIntegerGeneratorHolder>> Make(
const FunctionNode& node);

int32_t operator()() { return distribution_(generator_); }

private:
// Full range: [INT32_MIN, INT32_MAX]
RandomIntegerGeneratorHolder()
: distribution_(std::numeric_limits<int32_t>::min(),
std::numeric_limits<int32_t>::max()) {
generator_.seed(::arrow::internal::GetRandomSeed());
}

// Range: [0, range - 1]
explicit RandomIntegerGeneratorHolder(int32_t range) : distribution_(0, range - 1) {
generator_.seed(::arrow::internal::GetRandomSeed());
}

// Min/Max: [min, max] inclusive
RandomIntegerGeneratorHolder(int32_t min, int32_t max) : distribution_(min, max) {
generator_.seed(::arrow::internal::GetRandomSeed());
}

std::mt19937_64 generator_;
std::uniform_int_distribution<int32_t> distribution_;
};

} // namespace gandiva
159 changes: 159 additions & 0 deletions cpp/src/gandiva/random_generator_holder_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@

#include "gandiva/random_generator_holder.h"

#include <limits>
#include <memory>

#include <gmock/gmock.h>
#include <gtest/gtest.h>

#include "arrow/testing/gtest_util.h"
Expand Down Expand Up @@ -87,4 +89,161 @@ TEST_F(TestRandGenHolder, WithInValidSeed) {
EXPECT_EQ(random_1(), random_2());
}

// Test that non-literal seed argument is rejected
TEST_F(TestRandGenHolder, NonLiteralSeedRejected) {
auto field_node = std::make_shared<FieldNode>(arrow::field("seed", arrow::int32()));
FunctionNode rand_func = {"rand", {field_node}, arrow::float64()};

EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
::testing::HasSubstr("requires a literal as parameter"),
RandomGeneratorHolder::Make(rand_func).status());
}

class TestRandIntGenHolder : public ::testing::Test {
public:
FunctionNode BuildRandIntFunc() { return {"rand_integer", {}, arrow::int32()}; }

FunctionNode BuildRandIntWithRangeFunc(int32_t range, bool range_is_null) {
auto range_node = std::make_shared<LiteralNode>(arrow::int32(), LiteralHolder(range),
range_is_null);
return {"rand_integer", {range_node}, arrow::int32()};
}

FunctionNode BuildRandIntWithMinMaxFunc(int32_t min, bool min_is_null, int32_t max,
bool max_is_null) {
auto min_node =
std::make_shared<LiteralNode>(arrow::int32(), LiteralHolder(min), min_is_null);
auto max_node =
std::make_shared<LiteralNode>(arrow::int32(), LiteralHolder(max), max_is_null);
return {"rand_integer", {min_node, max_node}, arrow::int32()};
}
};

TEST_F(TestRandIntGenHolder, NoParams) {
FunctionNode rand_func = BuildRandIntFunc();
EXPECT_OK_AND_ASSIGN(auto rand_gen_holder,
RandomIntegerGeneratorHolder::Make(rand_func));

auto& random = *rand_gen_holder;
// Generate multiple values and verify they are integers
for (int i = 0; i < 10; i++) {
int32_t val = random();
EXPECT_GE(val, std::numeric_limits<int32_t>::min());
EXPECT_LE(val, std::numeric_limits<int32_t>::max());
}
}

TEST_F(TestRandIntGenHolder, WithRange) {
FunctionNode rand_func = BuildRandIntWithRangeFunc(100, false);
EXPECT_OK_AND_ASSIGN(auto rand_gen_holder,
RandomIntegerGeneratorHolder::Make(rand_func));

auto& random = *rand_gen_holder;
// Generate multiple values and verify they are in range [0, 99]
for (int i = 0; i < 100; i++) {
int32_t val = random();
EXPECT_GE(val, 0);
EXPECT_LT(val, 100);
}
}

TEST_F(TestRandIntGenHolder, WithMinMax) {
FunctionNode rand_func = BuildRandIntWithMinMaxFunc(10, false, 20, false);
EXPECT_OK_AND_ASSIGN(auto rand_gen_holder,
RandomIntegerGeneratorHolder::Make(rand_func));

auto& random = *rand_gen_holder;
// Generate multiple values and verify they are in range [10, 20]
for (int i = 0; i < 100; i++) {
int32_t val = random();
EXPECT_GE(val, 10);
EXPECT_LE(val, 20);
}
}

TEST_F(TestRandIntGenHolder, WithNegativeMinMax) {
FunctionNode rand_func = BuildRandIntWithMinMaxFunc(-50, false, -10, false);
EXPECT_OK_AND_ASSIGN(auto rand_gen_holder,
RandomIntegerGeneratorHolder::Make(rand_func));

auto& random = *rand_gen_holder;
// Generate multiple values and verify they are in range [-50, -10]
for (int i = 0; i < 100; i++) {
int32_t val = random();
EXPECT_GE(val, -50);
EXPECT_LE(val, -10);
}
}

TEST_F(TestRandIntGenHolder, InvalidRangeZero) {
FunctionNode rand_func = BuildRandIntWithRangeFunc(0, false);
EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("range must be positive"),
RandomIntegerGeneratorHolder::Make(rand_func).status());
}

TEST_F(TestRandIntGenHolder, InvalidRangeNegative) {
FunctionNode rand_func = BuildRandIntWithRangeFunc(-5, false);
EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("range must be positive"),
RandomIntegerGeneratorHolder::Make(rand_func).status());
}

TEST_F(TestRandIntGenHolder, InvalidMinGreaterThanMax) {
FunctionNode rand_func = BuildRandIntWithMinMaxFunc(20, false, 10, false);
EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("min must be <= max"),
RandomIntegerGeneratorHolder::Make(rand_func).status());
}

TEST_F(TestRandIntGenHolder, NullRangeDefaultsToMaxInt) {
FunctionNode rand_func = BuildRandIntWithRangeFunc(0, true); // null range
EXPECT_OK_AND_ASSIGN(auto rand_gen_holder,
RandomIntegerGeneratorHolder::Make(rand_func));

auto& random = *rand_gen_holder;
// With NULL range defaulting to INT32_MAX, values should be in [0, INT32_MAX-1]
for (int i = 0; i < 100; i++) {
int32_t val = random();
EXPECT_GE(val, 0);
EXPECT_LT(val, std::numeric_limits<int32_t>::max());
}
}

// Test that non-literal arguments are rejected
TEST_F(TestRandIntGenHolder, NonLiteralRangeRejected) {
// Create a FieldNode instead of LiteralNode for the range parameter
auto field_node = std::make_shared<FieldNode>(arrow::field("range", arrow::int32()));
FunctionNode rand_func = {"rand_integer", {field_node}, arrow::int32()};

EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
::testing::HasSubstr("requires a literal as parameter"),
RandomIntegerGeneratorHolder::Make(rand_func).status());
}

TEST_F(TestRandIntGenHolder, NonLiteralMinMaxRejected) {
// Create FieldNodes instead of LiteralNodes for min/max parameters
auto min_field = std::make_shared<FieldNode>(arrow::field("min", arrow::int32()));
auto max_literal =
std::make_shared<LiteralNode>(arrow::int32(), LiteralHolder(100), false);
FunctionNode rand_func = {"rand_integer", {min_field, max_literal}, arrow::int32()};

EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
::testing::HasSubstr("requires literals as parameters"),
RandomIntegerGeneratorHolder::Make(rand_func).status());
}

TEST_F(TestRandIntGenHolder, NullMinMaxDefaults) {
// Test null handling for 2-arg form: NULL min defaults to 0, NULL max defaults to
// INT32_MAX
FunctionNode rand_func = BuildRandIntWithMinMaxFunc(0, true, 0, true); // both null
EXPECT_OK_AND_ASSIGN(auto rand_gen_holder,
RandomIntegerGeneratorHolder::Make(rand_func));

auto& random = *rand_gen_holder;
// With NULL min=0, NULL max=INT32_MAX, values should be in [0, INT32_MAX]
for (int i = 0; i < 100; i++) {
int32_t val = random();
EXPECT_GE(val, 0);
EXPECT_LE(val, std::numeric_limits<int32_t>::max());
}
}

} // namespace gandiva
Loading
Loading