Skip to content

Commit 9b8598a

Browse files
API: Copy inputs in Index subclass constructors by default (GH#63388) (pandas-dev#63398)
1 parent b95f65b commit 9b8598a

File tree

12 files changed

+177
-20
lines changed

12 files changed

+177
-20
lines changed

‎doc/source/whatsnew/v3.0.0.rst‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -820,6 +820,7 @@ Other API changes
820820
:meth:`~DataFrame.ffill`, :meth:`~DataFrame.bfill`, :meth:`~DataFrame.interpolate`,
821821
:meth:`~DataFrame.where`, :meth:`~DataFrame.mask`, :meth:`~DataFrame.clip`) now return
822822
the modified DataFrame or Series (``self``) instead of ``None`` when ``inplace=True`` (:issue:`63207`)
823+
- All Index constructors now copy ``numpy.ndarray`` and ``ExtensionArray`` inputs by default when ``copy=None``, consistent with :class:`Series` behavior (:issue:`63388`)
823824

824825
.. ---------------------------------------------------------------------------
825826
.. _whatsnew_300.deprecations:

‎pandas/core/indexes/base.py‎

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -505,12 +505,8 @@ def __new__(
505505
ifnotcopyandisinstance(data, (ABCSeries, Index)):
506506
refs=data._references
507507

508-
ifisinstance(data, (ExtensionArray, np.ndarray)):
509-
# GH 63306
510-
ifcopyisnotFalse:
511-
ifdtypeisNoneorastype_is_view(data.dtype, dtype):
512-
data=data.copy()
513-
copy=False
508+
# GH 63306, GH 63388
509+
data, copy=cls._maybe_copy_array_input(data, copy, dtype)
514510

515511
# range
516512
ifisinstance(data, (range, RangeIndex)):
@@ -5197,6 +5193,21 @@ def _raise_scalar_data_error(cls, data):
51975193
"was passed"
51985194
)
51995195

5196+
@classmethod
5197+
def_maybe_copy_array_input(
5198+
cls, data, copy: bool|None, dtype
5199+
) ->tuple[Any, bool]:
5200+
"""
5201+
Ensure that the input data is copied if necessary.
5202+
GH#63388
5203+
"""
5204+
ifisinstance(data, (ExtensionArray, np.ndarray)):
5205+
ifcopyisnotFalse:
5206+
ifdtypeisNoneorastype_is_view(data.dtype, pandas_dtype(dtype)):
5207+
data=data.copy()
5208+
copy=False
5209+
returndata, bool(copy)
5210+
52005211
def_validate_fill_value(self, value):
52015212
"""
52025213
Check if the value can be inserted into our array without casting,

‎pandas/core/indexes/datetimes.py‎

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,13 @@ class DatetimeIndex(DatetimeTimedeltaMixin):
181181
If True parse dates in `data` with the year first order.
182182
dtype : numpy.dtype or DatetimeTZDtype or str, default None
183183
Note that the only NumPy dtype allowed is `datetime64[ns]`.
184-
copy : bool, default False
185-
Make a copy of input ndarray.
184+
copy : bool, default None
185+
Whether to copy input data, only relevant for array, Series, and Index
186+
inputs (for other input, e.g. a list, a new array is created anyway).
187+
Defaults to True for array input and False for Index/Series.
188+
Set to False to avoid copying array input at your own risk (if you
189+
know the input data won't be modified elsewhere).
190+
Set to True to force copying Series/Index up front.
186191
name : label, default None
187192
Name to be stored in the index.
188193
@@ -669,7 +674,7 @@ def __new__(
669674
dayfirst: bool=False,
670675
yearfirst: bool=False,
671676
dtype: Dtype|None=None,
672-
copy: bool=False,
677+
copy: bool|None=None,
673678
name: Hashable|None=None,
674679
) ->Self:
675680
ifis_scalar(data):
@@ -679,6 +684,9 @@ def __new__(
679684

680685
name=maybe_extract_name(name, data, cls)
681686

687+
# GH#63388
688+
data, copy=cls._maybe_copy_array_input(data, copy, dtype)
689+
682690
if (
683691
isinstance(data, DatetimeArray)
684692
andfreqislib.no_default

‎pandas/core/indexes/interval.py‎

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -169,8 +169,13 @@ class IntervalIndex(ExtensionIndex):
169169
neither.
170170
dtype : dtype or None, default None
171171
If None, dtype will be inferred.
172-
copy : bool, default False
173-
Copy the input data.
172+
copy : bool, default None
173+
Whether to copy input data, only relevant for array, Series, and Index
174+
inputs (for other input, e.g. a list, a new array is created anyway).
175+
Defaults to True for array input and False for Index/Series.
176+
Set to False to avoid copying array input at your own risk (if you
177+
know the input data won't be modified elsewhere).
178+
Set to True to force copying Series/Index input up front.
174179
name : object, optional
175180
Name to be stored in the index.
176181
verify_integrity : bool, default True
@@ -252,12 +257,15 @@ def __new__(
252257
data,
253258
closed: IntervalClosedType|None=None,
254259
dtype: Dtype|None=None,
255-
copy: bool=False,
260+
copy: bool|None=None,
256261
name: Hashable|None=None,
257262
verify_integrity: bool=True,
258263
) ->Self:
259264
name=maybe_extract_name(name, data, cls)
260265

266+
# GH#63388
267+
data, copy=cls._maybe_copy_array_input(data, copy, dtype)
268+
261269
withrewrite_exception("IntervalArray", cls.__name__):
262270
array=IntervalArray(
263271
data,

‎pandas/core/indexes/period.py‎

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,13 @@ class PeriodIndex(DatetimeIndexOpsMixin):
101101
One of pandas period strings or corresponding objects.
102102
dtype : str or PeriodDtype, default None
103103
A dtype from which to extract a freq.
104-
copy : bool
105-
Make a copy of input ndarray.
104+
copy : bool, default None
105+
Whether to copy input data, only relevant for array, Series, and Index
106+
inputs (for other input, e.g. a list, a new array is created anyway).
107+
Defaults to True for array input and False for Index/Series.
108+
Set to False to avoid copying array input at your own risk (if you
109+
know the input data won't be modified elsewhere).
110+
Set to True to force copying Series/Index input up front.
106111
name : str, default None
107112
Name of the resulting PeriodIndex.
108113
@@ -220,7 +225,7 @@ def __new__(
220225
data=None,
221226
freq=None,
222227
dtype: Dtype|None=None,
223-
copy: bool=False,
228+
copy: bool|None=None,
224229
name: Hashable|None=None,
225230
) ->Self:
226231
refs=None
@@ -231,6 +236,9 @@ def __new__(
231236

232237
freq=validate_dtype_freq(dtype, freq)
233238

239+
# GH#63388
240+
data, copy=cls._maybe_copy_array_input(data, copy, dtype)
241+
234242
# PeriodIndex allow PeriodIndex(period_index, freq=different)
235243
# Let's not encourage that kind of behavior in PeriodArray.
236244

‎pandas/core/indexes/timedeltas.py‎

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,13 @@ class TimedeltaIndex(DatetimeTimedeltaMixin):
8181
dtype : numpy.dtype or str, default None
8282
Valid ``numpy`` dtypes are ``timedelta64[ns]``, ``timedelta64[us]``,
8383
``timedelta64[ms]``, and ``timedelta64[s]``.
84-
copy : bool
85-
Make a copy of input array.
84+
copy : bool, default None
85+
Whether to copy input data, only relevant for array, Series, and Index
86+
inputs (for other input, e.g. a list, a new array is created anyway).
87+
Defaults to True for array input and False for Index/Series.
88+
Set to False to avoid copying array input at your own risk (if you
89+
know the input data won't be modified elsewhere).
90+
Set to True to force copying Series/Index input up front.
8691
name : object
8792
Name to be stored in the index.
8893
@@ -158,11 +163,14 @@ def __new__(
158163
data=None,
159164
freq=lib.no_default,
160165
dtype=None,
161-
copy: bool=False,
166+
copy: bool|None=None,
162167
name=None,
163168
):
164169
name=maybe_extract_name(name, data, cls)
165170

171+
# GH#63388
172+
data, copy=cls._maybe_copy_array_input(data, copy, dtype)
173+
166174
ifis_scalar(data):
167175
cls._raise_scalar_data_error(data)
168176

‎pandas/tests/arrays/test_datetimelike.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -707,7 +707,7 @@ def test_array_object_dtype(self, arr1d):
707707
deftest_array_tz(self, arr1d):
708708
# GH#23524
709709
arr=arr1d
710-
dti=self.index_cls(arr1d)
710+
dti=self.index_cls(arr1d, copy=False)
711711
copy_false=Noneifnp_version_gt2elseFalse
712712

713713
expected=dti.asi8.view("M8[ns]")

‎pandas/tests/copy_view/index/test_datetimeindex.py‎

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
1+
importnumpyasnp
12
importpytest
23

34
frompandasimport (
45
DatetimeIndex,
56
Series,
67
Timestamp,
8+
array,
79
date_range,
810
)
911
importpandas._testingastm
12+
frompandas.tests.copy_view.utilimportget_array
1013

1114
pytestmark=pytest.mark.filterwarnings(
1215
"ignore:Setting a value on a view:FutureWarning"
@@ -54,3 +57,30 @@ def test_index_values():
5457
idx=date_range("2019-12-31", periods=3, freq="D")
5558
result=idx.values
5659
assertresult.flags.writeableisFalse
60+
61+
62+
deftest_constructor_copy_input_datetime_ndarray_default():
63+
# GH 63388
64+
arr=np.array(["2020-01-01", "2020-01-02"], dtype="datetime64[ns]")
65+
idx=DatetimeIndex(arr)
66+
assertnotnp.shares_memory(arr, get_array(idx))
67+
68+
69+
deftest_constructor_copy_input_datetime_ea_default():
70+
# GH 63388
71+
arr=array(["2020-01-01", "2020-01-02"], dtype="datetime64[ns]")
72+
idx=DatetimeIndex(arr)
73+
assertnottm.shares_memory(arr, idx.array)
74+
75+
76+
deftest_series_from_temporary_datetimeindex_readonly_data():
77+
# GH 63388
78+
arr=np.array(["2020-01-01", "2020-01-02"], dtype="datetime64[ns]")
79+
arr.flags.writeable=False
80+
ser=Series(DatetimeIndex(arr))
81+
assertnotnp.shares_memory(arr, get_array(ser))
82+
ser.iloc[0] =Timestamp("2020-01-01")
83+
expected=Series(
84+
[Timestamp("2020-01-01"), Timestamp("2020-01-02")], dtype="datetime64[ns]"
85+
)
86+
tm.assert_series_equal(ser, expected)
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
importnumpyasnp
2+
3+
frompandasimport (
4+
Interval,
5+
IntervalIndex,
6+
Series,
7+
array,
8+
)
9+
importpandas._testingastm
10+
frompandas.tests.copy_view.utilimportget_array
11+
12+
13+
deftest_constructor_copy_input_interval_ea_default():
14+
# GH 63388
15+
arr=array([Interval(0, 1), Interval(1, 2)])
16+
idx=IntervalIndex(arr)
17+
assertnottm.shares_memory(arr, idx.array)
18+
19+
20+
deftest_series_from_temporary_intervalindex_readonly_data():
21+
# GH 63388
22+
arr=array([Interval(0, 1), Interval(1, 2)])
23+
arr._left.flags.writeable=False
24+
arr._right.flags.writeable=False
25+
ser=Series(IntervalIndex(arr))
26+
assertnotnp.shares_memory(arr._left, get_array(ser)._left)
27+
ser.iloc[0] =Interval(5, 6)
28+
expected=Series([Interval(5, 6), Interval(1, 2)], dtype="interval[int64, right]")
29+
tm.assert_series_equal(ser, expected)

‎pandas/tests/copy_view/index/test_periodindex.py‎

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
1+
importnumpyasnp
12
importpytest
23

34
frompandasimport (
45
Period,
56
PeriodIndex,
67
Series,
8+
array,
79
period_range,
810
)
911
importpandas._testingastm
12+
frompandas.tests.copy_view.utilimportget_array
1013

1114
pytestmark=pytest.mark.filterwarnings(
1215
"ignore:Setting a value on a view:FutureWarning"
@@ -21,3 +24,24 @@ def test_periodindex(box):
2124
expected=idx.copy(deep=True)
2225
ser.iloc[0] =Period("2020-12-31")
2326
tm.assert_index_equal(idx, expected)
27+
28+
29+
deftest_constructor_copy_input_period_ea_default():
30+
# GH 63388
31+
arr=array(["2020-01-01", "2020-01-02"], dtype="period[D]")
32+
idx=PeriodIndex(arr)
33+
assertnottm.shares_memory(arr, idx.array)
34+
35+
36+
deftest_series_from_temporary_periodindex_readonly_data():
37+
# GH 63388
38+
arr=array(["2020-01-01", "2020-01-02"], dtype="period[D]")
39+
arr._ndarray.flags.writeable=False
40+
ser=Series(PeriodIndex(arr))
41+
assertnotnp.shares_memory(arr._ndarray, get_array(ser))
42+
ser.iloc[0] =Period("2022-01-01", freq="D")
43+
expected=Series(
44+
[Period("2022-01-01", freq="D"), Period("2020-01-02", freq="D")],
45+
dtype="period[D]",
46+
)
47+
tm.assert_series_equal(ser, expected)

0 commit comments

Comments
(0)