Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions encodings/runend/src/arbitrary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@ pub struct ArbitraryRunEndArray(pub RunEndArray);

impl<'a> Arbitrary<'a> for ArbitraryRunEndArray {
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
// RunEnd supports Bool or Primitive types for values
// Pick a random primitive type for values
// Pick a random primitive type for values.
let ptype: PType = u.arbitrary()?;
let nullability: Nullability = u.arbitrary()?;
let dtype = DType::Primitive(ptype, nullability);
Expand Down
83 changes: 50 additions & 33 deletions encodings/runend/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@ use vortex_array::IntoArray;
use vortex_array::Precision;
use vortex_array::ProstMetadata;
use vortex_array::SerializeMetadata;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::arrays::PrimitiveVTable;
use vortex_array::arrays::TakeExecute;
use vortex_array::arrays::VarBinViewArray;
use vortex_array::arrays::VarBinViewVTable;
use vortex_array::buffer::BufferHandle;
use vortex_array::dtype::DType;
use vortex_array::dtype::Nullability;
Expand Down Expand Up @@ -239,12 +243,6 @@ impl RunEndArray {
"run ends must be unsigned integers, was {}",
ends.dtype(),
);
vortex_ensure!(
values.dtype().is_primitive() || values.dtype().is_boolean(),
"RunEnd array can only have Bool or Primitive values, {} given",
values.dtype()
);

vortex_ensure!(
ends.len() == values.len(),
"run ends len != run values len, {} != {}",
Expand Down Expand Up @@ -342,32 +340,7 @@ impl RunEndArray {
///
/// # Validation
///
/// The `ends` must be non-nullable unsigned integers. The values may be `Bool` or `Primitive`
/// types.
///
/// # Examples
///
/// ```
/// # use vortex_array::arrays::{BoolArray, VarBinViewArray};
/// # use vortex_array::IntoArray;
/// # use vortex_buffer::buffer;
/// # use vortex_runend::RunEndArray;
///
/// // Error to provide incorrectly-typed values!
/// let result = RunEndArray::try_new(
/// buffer![1u8, 2u8].into_array(),
/// VarBinViewArray::from_iter_str(["bad", "dtype"]).into_array(),
/// );
/// assert!(result.is_err());
///
/// // This array is happy
/// let result = RunEndArray::try_new(
/// buffer![1u8, 2u8].into_array(),
/// BoolArray::from_iter([false, true]).into_array(),
/// );
///
/// assert!(result.is_ok());
/// ```
/// The `ends` must be non-nullable unsigned integers.
pub fn try_new(ends: ArrayRef, values: ArrayRef) -> VortexResult<Self> {
let length: usize = if ends.is_empty() {
0
Expand Down Expand Up @@ -510,6 +483,7 @@ pub(super) fn run_end_canonicalize(
ctx: &mut ExecutionCtx,
) -> VortexResult<ArrayRef> {
let pends = array.ends().clone().execute_as("ends", ctx)?;

Ok(match array.dtype() {
DType::Bool(_) => {
let bools = array.values().clone().execute_as("values", ctx)?;
Expand All @@ -519,13 +493,27 @@ pub(super) fn run_end_canonicalize(
let pvalues = array.values().clone().execute_as("values", ctx)?;
runend_decode_primitive(pends, pvalues, array.offset(), array.len())?.into_array()
}
_ => vortex_panic!("Only Primitive and Bool values are supported"),
DType::Utf8(_) | DType::Binary(_) => {
let num_runs = array.values().len();
let indices_values = PrimitiveArray::from_iter(0u64..num_runs as u64);
let flat_indices =
runend_decode_primitive(pends, indices_values, array.offset(), array.len())?;
let values = array
.values()
.clone()
.execute_as::<VarBinViewArray>("values", ctx)?;
VarBinViewVTable::take(&values, &flat_indices.into_array(), ctx)?
.vortex_expect("take varbinview should not return None")
Comment on lines +505 to +506
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would think you want a kernel like (runend_decode_primitive) instead of a take (which will be much slower). Since you have a VarBinViewArray you need do exactly primitive decode on a u128/i128 (the view size).

Copy link
Contributor

@joseph-isaacs joseph-isaacs Mar 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you might get away with runend_decode_typed_primitive. Apply this to VarBinViewArray::views

}
_ => vortex_bail!("Unsupported RunEnd value type: {}", array.dtype()),
})
}

#[cfg(test)]
mod tests {
use vortex_array::IntoArray;
use vortex_array::arrays::DictArray;
use vortex_array::arrays::VarBinViewArray;
use vortex_array::assert_arrays_eq;
use vortex_array::dtype::DType;
use vortex_array::dtype::Nullability;
Expand All @@ -552,4 +540,33 @@ mod tests {
let expected = buffer![1, 1, 2, 2, 2, 3, 3, 3, 3, 3].into_array();
assert_arrays_eq!(arr.to_array(), expected);
}

#[test]
fn test_runend_utf8() {
let values = VarBinViewArray::from_iter_str(["a", "b", "c"]).into_array();
let arr = RunEndArray::new(buffer![2u32, 5, 10].into_array(), values);
assert_eq!(arr.len(), 10);
assert_eq!(arr.dtype(), &DType::Utf8(Nullability::NonNullable));

let expected =
VarBinViewArray::from_iter_str(["a", "a", "b", "b", "b", "c", "c", "c", "c", "c"])
.into_array();
assert_arrays_eq!(arr.to_array(), expected);
}

#[test]
fn test_runend_dict() {
let dict_values = VarBinViewArray::from_iter_str(["x", "y", "z"]).into_array();
let dict_codes = buffer![0u32, 1, 2].into_array();
let dict = DictArray::try_new(dict_codes, dict_values).unwrap();

let arr =
RunEndArray::try_new(buffer![2u32, 5, 10].into_array(), dict.into_array()).unwrap();
assert_eq!(arr.len(), 10);

let expected =
VarBinViewArray::from_iter_str(["x", "x", "y", "y", "y", "z", "z", "z", "z", "z"])
.into_array();
assert_arrays_eq!(arr.to_array(), expected);
}
}
Loading