pydata · dhruvak001 · Dec 1, 2025 · Dec 3, 2025 · Dec 3, 2025 · Dec 27, 2025
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -23,6 +23,13 @@ New Features
   (:pull:`10849`).
   By `Stephan Hoyer <https://github.com/shoyer>`_.
 
+- Added ``create_index`` parameter to :py:meth:`Dataset.to_dataframe`, :py:meth:`DataArray.to_dataframe`,
+  :py:meth:`Dataset.to_dask_dataframe`, and :py:meth:`DataArray.to_dask_dataframe` methods.
+  When ``create_index=False``, the resulting DataFrame will use a :py:class:`pandas.RangeIndex`
+  instead of setting dimension coordinates as the index, which can significantly improve performance
+  when the default multi-index is not needed.
+  By `Sanjay Kumar <https://github.com/sanjay>`_.
+
 Breaking Changes
 ~~~~~~~~~~~~~~~~
 

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -3954,7 +3954,10 @@ def to_pandas(self) -> Self | pd.Series | pd.DataFrame:
         return pandas_object
 
     def to_dataframe(
-        self, name: Hashable | None = None, dim_order: Sequence[Hashable] | None = None
+        self,
+        name: Hashable | None = None,
+        dim_order: Sequence[Hashable] | None = None,
+        create_index: bool = True,
     ) -> pd.DataFrame:
         """Convert this array and its coordinates into a tidy pandas.DataFrame.
 
@@ -3979,6 +3982,11 @@ def to_dataframe(
 
             If provided, must include all dimensions of this DataArray. By default,
             dimensions are sorted according to the DataArray dimensions order.
+        create_index : bool, default: True
+            If True (default), create a :py:class:`pandas.MultiIndex` from the Cartesian product
+            of this DataArray's indices. If False, use a :py:class:`pandas.RangeIndex` instead.
+            This can be useful to avoid the potentially expensive MultiIndex
+            creation.
 
         Returns
         -------
@@ -4013,7 +4021,7 @@ def to_dataframe(
         else:
             ordered_dims = ds._normalize_dim_order(dim_order=dim_order)
 
-        df = ds._to_dataframe(ordered_dims)
+        df = ds._to_dataframe(ordered_dims, create_index=create_index)
         df.columns = [name if c == unique_name else c for c in df.columns]
         return df
 
@@ -7579,6 +7587,7 @@ def to_dask_dataframe(
         self,
         dim_order: Sequence[Hashable] | None = None,
         set_index: bool = False,
+        create_index: bool = True,
     ) -> DaskDataFrame:
         """Convert this array into a dask.dataframe.DataFrame.
 
@@ -7594,6 +7603,13 @@ def to_dask_dataframe(
             If set_index=True, the dask DataFrame is indexed by this dataset's
             coordinate. Since dask DataFrames do not support multi-indexes,
             set_index only works if the dataset only contains one dimension.
+        create_index : bool, default: True
+            If ``create_index=False``, the resulting DataFrame will use a
+            :py:class:`pandas.RangeIndex` instead of setting dimensions as index columns.
+            This can significantly improve performance when the default index is not needed.
+            ``create_index=False`` is incompatible with ``set_index=True``.
+
+            .. versionadded:: 2025.01.1
 
         Returns
         -------
@@ -7638,7 +7654,7 @@ def to_dask_dataframe(
             )
         name = self.name
         ds = self._to_dataset_whole(name, shallow_copy=False)
-        return ds.to_dask_dataframe(dim_order, set_index)
+        return ds.to_dask_dataframe(dim_order, set_index, create_index)
 
     # this needs to be at the end, or mypy will confuse with `str`
     # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -7200,7 +7200,7 @@ def to_pandas(self) -> pd.Series | pd.DataFrame:
             "Please use Dataset.to_dataframe() instead."
         )
 
-    def _to_dataframe(self, ordered_dims: Mapping[Any, int]):
+    def _to_dataframe(self, ordered_dims: Mapping[Any, int], create_index: bool = True):
         from xarray.core.extension_array import PandasExtensionArray
 
         # All and only non-index arrays (whether data or coordinates) should
@@ -7231,7 +7231,15 @@ def _to_dataframe(self, ordered_dims: Mapping[Any, int]):
             self._variables[k].set_dims(ordered_dims).values.reshape(-1)
             for k in non_extension_array_columns
         ]
-        index = self.coords.to_index([*ordered_dims])
+        if create_index:
+            index = self.coords.to_index([*ordered_dims])
+        else:
+            # Use a simple RangeIndex when create_index=False
+            # Calculate the total size from ordered_dims
+            total_size = (
+                int(np.prod(list(ordered_dims.values()))) if ordered_dims else 0
+            )
+            index = pd.RangeIndex(total_size)
         broadcasted_df = pd.DataFrame(
             {
                 **dict(zip(non_extension_array_columns, data, strict=True)),
@@ -7259,7 +7267,11 @@ def _to_dataframe(self, ordered_dims: Mapping[Any, int]):
             broadcasted_df = broadcasted_df.join(extension_array_df)
         return broadcasted_df[columns_in_order]
 
-    def to_dataframe(self, dim_order: Sequence[Hashable] | None = None) -> pd.DataFrame:
+    def to_dataframe(
+        self,
+        dim_order: Sequence[Hashable] | None = None,
+        create_index: bool = True,
+    ) -> pd.DataFrame:
         """Convert this dataset into a pandas.DataFrame.
 
         Non-index variables in this dataset form the columns of the
@@ -7278,6 +7290,11 @@ def to_dataframe(self, dim_order: Sequence[Hashable] | None = None) -> pd.DataFr
 
             If provided, must include all dimensions of this dataset. By
             default, dimensions are in the same order as in `Dataset.sizes`.
+        create_index : bool, default: True
+            If True (default), create a :py:class:`pandas.MultiIndex` from the Cartesian product
+            of this dataset's indices. If False, use a :py:class:`pandas.RangeIndex` instead.
+            This can be useful to avoid the potentially expensive MultiIndex
+            creation.
 
         Returns
         -------
@@ -7288,7 +7305,7 @@ def to_dataframe(self, dim_order: Sequence[Hashable] | None = None) -> pd.DataFr
 
         ordered_dims = self._normalize_dim_order(dim_order=dim_order)
 
-        return self._to_dataframe(ordered_dims=ordered_dims)
+        return self._to_dataframe(ordered_dims=ordered_dims, create_index=create_index)
 
     def _set_sparse_data_from_dataframe(
         self, idx: pd.Index, arrays: list[tuple[Hashable, np.ndarray]], dims: tuple
@@ -7446,7 +7463,10 @@ def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> Self:
         return obj[dataframe.columns] if len(dataframe.columns) else obj
 
     def to_dask_dataframe(
-        self, dim_order: Sequence[Hashable] | None = None, set_index: bool = False
+        self,
+        dim_order: Sequence[Hashable] | None = None,
+        set_index: bool = False,
+        create_index: bool = True,
     ) -> DaskDataFrame:
         """
         Convert this dataset into a dask.dataframe.DataFrame.
@@ -7470,6 +7490,13 @@ def to_dask_dataframe(
             If set_index=True, the dask DataFrame is indexed by this dataset's
             coordinate. Since dask DataFrames do not support multi-indexes,
             set_index only works if the dataset only contains one dimension.
+        create_index : bool, default: True
+            If ``create_index=False``, the resulting DataFrame will use a
+            :py:class:`pandas.RangeIndex` instead of setting dimensions as index columns.
+            This can significantly improve performance when the default index is not needed.
+            ``create_index=False`` is incompatible with ``set_index=True``.
+
+            .. versionadded:: 2025.01.1
 
         Returns
         -------
@@ -7479,11 +7506,20 @@ def to_dask_dataframe(
         import dask.array as da
         import dask.dataframe as dd
 
+        if not create_index and set_index:
+            raise ValueError("create_index=False is incompatible with set_index=True")
+
         ordered_dims = self._normalize_dim_order(dim_order=dim_order)
 
-        columns = list(ordered_dims)
-        columns.extend(k for k in self.coords if k not in self.dims)
-        columns.extend(self.data_vars)
+        if create_index:
+            columns = list(ordered_dims)
+            columns.extend(k for k in self.coords if k not in self.dims)
+            columns.extend(self.data_vars)
+        else:
+            # When create_index=False, exclude dimensions from columns
+            columns = []
+            columns.extend(k for k in self.coords if k not in self.dims)
+            columns.extend(self.data_vars)
 
         ds_chunks = self.chunks
 

diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
@@ -941,6 +941,96 @@ def test_to_dask_dataframe_dim_order(self):
         with pytest.raises(ValueError, match=r"does not match the set of dimensions"):
             ds.to_dask_dataframe(dim_order=["x"])
 
+    def test_to_dask_dataframe_create_index_false(self):
+        # Test that create_index=False uses RangeIndex instead of dimension columns
+        x = np.random.randn(10)
+        y = np.arange(10, dtype="uint8")
+        t = list("abcdefghij")
+
+        ds = Dataset(
+            {"a": ("t", da.from_array(x, chunks=4)), "b": ("t", y), "t": ("t", t)}
+        )
+
+        # With create_index=False, we should get a RangeIndex and no dimension columns
+        actual = ds.to_dask_dataframe(create_index=False)
+        assert isinstance(actual, dd.DataFrame)
+        actual_computed = actual.compute()
+
+        # Check that index is RangeIndex
+        assert isinstance(actual_computed.index, pd.RangeIndex)
+
+        # Check that dimension columns are not present
+        assert "t" not in actual_computed.columns
+
+        # Check that data columns are present
+        assert "a" in actual_computed.columns
+        assert "b" in actual_computed.columns
+
+        # Verify values are correct
+        assert_array_equal(actual_computed["a"].values, x)
+        assert_array_equal(actual_computed["b"].values, y)
+
+    def test_to_dask_dataframe_create_index_incompatible_with_set_index(self):
+        # Test that create_index=False and set_index=True raises an error
+        ds = Dataset({"a": ("t", da.from_array([1, 2, 3], chunks=2))})
+
+        with pytest.raises(
+            ValueError,
+            match="create_index=False is incompatible with set_index=True",
+        ):
+            ds.to_dask_dataframe(create_index=False, set_index=True)
+
+    def test_to_dask_dataframe_create_index_2D(self):
+        # Test create_index=False with 2D dataset
+        w = np.random.randn(2, 3)
+        ds = Dataset({"w": (("x", "y"), da.from_array(w, chunks=(1, 2)))})
+        ds["x"] = ("x", np.array([0, 1], np.int64))
+        ds["y"] = ("y", list("abc"))
+
+        actual = ds.to_dask_dataframe(create_index=False)
+        assert isinstance(actual, dd.DataFrame)
+        actual_computed = actual.compute()
+
+        # Check that index is RangeIndex
+        assert isinstance(actual_computed.index, pd.RangeIndex)
+
+        # Check that dimension columns are not present
+        assert "x" not in actual_computed.columns
+        assert "y" not in actual_computed.columns
+
+        # Check that data column is present
+        assert "w" in actual_computed.columns
+
+        # Verify values are correct (flattened)
+        assert_array_equal(actual_computed["w"].values, w.reshape(-1))
+
+    def test_to_dask_dataframe_create_index_dataarray(self):
+        # Test create_index parameter for DataArray.to_dask_dataframe
+        arr_np = np.arange(3 * 4).reshape(3, 4)
+        arr = DataArray(
+            da.from_array(arr_np, chunks=(2, 2)),
+            [("B", [1, 2, 3]), ("A", list("cdef"))],
+            name="foo",
+        )
+
+        # With create_index=False, should use RangeIndex
+        actual = arr.to_dask_dataframe(create_index=False)
+        assert isinstance(actual, dd.DataFrame)
+        actual_computed = actual.compute()
+
+        assert isinstance(actual_computed.index, pd.RangeIndex)
+        assert "B" not in actual_computed.columns
+        assert "A" not in actual_computed.columns
+        assert "foo" in actual_computed.columns
+        assert_array_equal(actual_computed["foo"].values, arr_np.reshape(-1))
+
+        # Test incompatibility with set_index=True
+        with pytest.raises(
+            ValueError,
+            match="create_index=False is incompatible with set_index=True",
+        ):
+            arr.to_dask_dataframe(create_index=False, set_index=True)
+
 
 @pytest.mark.parametrize("method", ["load", "compute"])
 def test_dask_kwargs_variable(method):

diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
@@ -3655,6 +3655,32 @@ def test_to_dataframe_0length(self) -> None:
         assert len(actual) == 0
         assert_array_equal(actual.index.names, list("ABC"))
 
+    def test_to_dataframe_create_index(self) -> None:
+        # Test create_index parameter
+        arr_np = np.arange(12).reshape(3, 4)
+        arr = DataArray(arr_np, [("x", [1, 2, 3]), ("y", list("abcd"))], name="foo")
+
+        # Default behavior: create MultiIndex
+        df_with_index = arr.to_dataframe()
+        assert isinstance(df_with_index.index, pd.MultiIndex)
+        assert df_with_index.index.names == ["x", "y"]
+        assert len(df_with_index) == 12
+
+        # With create_index=False: use RangeIndex
+        df_without_index = arr.to_dataframe(create_index=False)
+        assert isinstance(df_without_index.index, pd.RangeIndex)
+        assert len(df_without_index) == 12
+
+        # Data should be the same regardless
+        assert_array_equal(df_with_index["foo"].values, df_without_index["foo"].values)
+
+        # Test with coords that have different dimensions
+        arr.coords["z"] = ("x", [-1, -2, -3])
+        df_with_coords = arr.to_dataframe(create_index=False)
+        assert isinstance(df_with_coords.index, pd.RangeIndex)
+        assert "z" in df_with_coords.columns
+        assert len(df_with_coords) == 12
+
     @pytest.mark.parametrize(
         "x_dtype,y_dtype,v_dtype",
         [

diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
@@ -2282,6 +2282,38 @@ def test_to_pandas(self) -> None:
         with pytest.raises(ValueError, match=r"cannot convert Datasets"):
             Dataset({"a": (["t", "r"], x2d), "b": (["t", "r"], y2d)}).to_pandas()
 
+    def test_to_dataframe_create_index(self) -> None:
+        # Test create_index parameter for Dataset
+        x = np.random.randn(3, 4)
+        y = np.random.randn(3, 4)
+        ds = Dataset(
+            {"a": (("x", "y"), x), "b": (("x", "y"), y)},
+            coords={"x": [1, 2, 3], "y": list("abcd")},
+        )
+
+        # Default behavior: create MultiIndex
+        df_with_index = ds.to_dataframe()
+        assert isinstance(df_with_index.index, pd.MultiIndex)
+        assert df_with_index.index.names == ["x", "y"]
+        assert len(df_with_index) == 12
+
+        # With create_index=False: use RangeIndex
+        df_without_index = ds.to_dataframe(create_index=False)
+        assert isinstance(df_without_index.index, pd.RangeIndex)
+        assert len(df_without_index) == 12
+
+        # Data should be the same regardless
+        assert_array_equal(df_with_index["a"].values, df_without_index["a"].values)
+        assert_array_equal(df_with_index["b"].values, df_without_index["b"].values)
+
+        # Test with dim_order and create_index=False
+        df_reordered = ds.to_dataframe(dim_order=["y", "x"], create_index=False)
+        assert isinstance(df_reordered.index, pd.RangeIndex)
+        assert len(df_reordered) == 12
+        # Check that dim_order affects the data ordering
+        df_reordered_with_idx = ds.to_dataframe(dim_order=["y", "x"])
+        assert_array_equal(df_reordered["a"].values, df_reordered_with_idx["a"].values)
+
     def test_reindex_like(self) -> None:
         data = create_test_data()
         data["letters"] = ("dim3", 10 * ["a"])