diff --git a/encodings/runend/src/arbitrary.rs b/encodings/runend/src/arbitrary.rs index cb561e7a20e..f4a3a83f150 100644 --- a/encodings/runend/src/arbitrary.rs +++ b/encodings/runend/src/arbitrary.rs @@ -22,8 +22,7 @@ pub struct ArbitraryRunEndArray(pub RunEndArray); impl<'a> Arbitrary<'a> for ArbitraryRunEndArray { fn arbitrary(u: &mut Unstructured<'a>) -> Result { - // RunEnd supports Bool or Primitive types for values - // Pick a random primitive type for values + // Pick a random primitive type for values. let ptype: PType = u.arbitrary()?; let nullability: Nullability = u.arbitrary()?; let dtype = DType::Primitive(ptype, nullability); diff --git a/encodings/runend/src/array.rs b/encodings/runend/src/array.rs index 0f4b2949300..a0c16375798 100644 --- a/encodings/runend/src/array.rs +++ b/encodings/runend/src/array.rs @@ -14,7 +14,11 @@ use vortex_array::IntoArray; use vortex_array::Precision; use vortex_array::ProstMetadata; use vortex_array::SerializeMetadata; +use vortex_array::arrays::PrimitiveArray; use vortex_array::arrays::PrimitiveVTable; +use vortex_array::arrays::TakeExecute; +use vortex_array::arrays::VarBinViewArray; +use vortex_array::arrays::VarBinViewVTable; use vortex_array::buffer::BufferHandle; use vortex_array::dtype::DType; use vortex_array::dtype::Nullability; @@ -239,12 +243,6 @@ impl RunEndArray { "run ends must be unsigned integers, was {}", ends.dtype(), ); - vortex_ensure!( - values.dtype().is_primitive() || values.dtype().is_boolean(), - "RunEnd array can only have Bool or Primitive values, {} given", - values.dtype() - ); - vortex_ensure!( ends.len() == values.len(), "run ends len != run values len, {} != {}", @@ -342,32 +340,7 @@ impl RunEndArray { /// /// # Validation /// - /// The `ends` must be non-nullable unsigned integers. The values may be `Bool` or `Primitive` - /// types. - /// - /// # Examples - /// - /// ``` - /// # use vortex_array::arrays::{BoolArray, VarBinViewArray}; - /// # use vortex_array::IntoArray; - /// # use vortex_buffer::buffer; - /// # use vortex_runend::RunEndArray; - /// - /// // Error to provide incorrectly-typed values! - /// let result = RunEndArray::try_new( - /// buffer![1u8, 2u8].into_array(), - /// VarBinViewArray::from_iter_str(["bad", "dtype"]).into_array(), - /// ); - /// assert!(result.is_err()); - /// - /// // This array is happy - /// let result = RunEndArray::try_new( - /// buffer![1u8, 2u8].into_array(), - /// BoolArray::from_iter([false, true]).into_array(), - /// ); - /// - /// assert!(result.is_ok()); - /// ``` + /// The `ends` must be non-nullable unsigned integers. pub fn try_new(ends: ArrayRef, values: ArrayRef) -> VortexResult { let length: usize = if ends.is_empty() { 0 @@ -510,6 +483,7 @@ pub(super) fn run_end_canonicalize( ctx: &mut ExecutionCtx, ) -> VortexResult { let pends = array.ends().clone().execute_as("ends", ctx)?; + Ok(match array.dtype() { DType::Bool(_) => { let bools = array.values().clone().execute_as("values", ctx)?; @@ -519,13 +493,27 @@ pub(super) fn run_end_canonicalize( let pvalues = array.values().clone().execute_as("values", ctx)?; runend_decode_primitive(pends, pvalues, array.offset(), array.len())?.into_array() } - _ => vortex_panic!("Only Primitive and Bool values are supported"), + DType::Utf8(_) | DType::Binary(_) => { + let num_runs = array.values().len(); + let indices_values = PrimitiveArray::from_iter(0u64..num_runs as u64); + let flat_indices = + runend_decode_primitive(pends, indices_values, array.offset(), array.len())?; + let values = array + .values() + .clone() + .execute_as::("values", ctx)?; + VarBinViewVTable::take(&values, &flat_indices.into_array(), ctx)? + .vortex_expect("take varbinview should not return None") + } + _ => vortex_bail!("Unsupported RunEnd value type: {}", array.dtype()), }) } #[cfg(test)] mod tests { use vortex_array::IntoArray; + use vortex_array::arrays::DictArray; + use vortex_array::arrays::VarBinViewArray; use vortex_array::assert_arrays_eq; use vortex_array::dtype::DType; use vortex_array::dtype::Nullability; @@ -552,4 +540,33 @@ mod tests { let expected = buffer![1, 1, 2, 2, 2, 3, 3, 3, 3, 3].into_array(); assert_arrays_eq!(arr.to_array(), expected); } + + #[test] + fn test_runend_utf8() { + let values = VarBinViewArray::from_iter_str(["a", "b", "c"]).into_array(); + let arr = RunEndArray::new(buffer![2u32, 5, 10].into_array(), values); + assert_eq!(arr.len(), 10); + assert_eq!(arr.dtype(), &DType::Utf8(Nullability::NonNullable)); + + let expected = + VarBinViewArray::from_iter_str(["a", "a", "b", "b", "b", "c", "c", "c", "c", "c"]) + .into_array(); + assert_arrays_eq!(arr.to_array(), expected); + } + + #[test] + fn test_runend_dict() { + let dict_values = VarBinViewArray::from_iter_str(["x", "y", "z"]).into_array(); + let dict_codes = buffer![0u32, 1, 2].into_array(); + let dict = DictArray::try_new(dict_codes, dict_values).unwrap(); + + let arr = + RunEndArray::try_new(buffer![2u32, 5, 10].into_array(), dict.into_array()).unwrap(); + assert_eq!(arr.len(), 10); + + let expected = + VarBinViewArray::from_iter_str(["x", "x", "y", "y", "y", "z", "z", "z", "z", "z"]) + .into_array(); + assert_arrays_eq!(arr.to_array(), expected); + } }