6
6
from numpy .random import randint
7
7
import pytest
8
8
9
+
9
10
from pandas import DataFrame , Index , MultiIndex , Series , concat , isna , notna
10
11
import pandas .core .strings as strings
11
12
import pandas .util .testing as tm
@@ -892,27 +893,39 @@ def test_casemethods(self):
892
893
def test_replace (self ):
893
894
values = Series (['fooBAD__barBAD' , NA ])
894
895
895
- result = values .str .replace ('BAD[_]*' , '' )
896
+ result = values .str .replace ('BAD[_]*' , '' , regex = True )
896
897
exp = Series (['foobar' , NA ])
897
898
tm .assert_series_equal (result , exp )
898
899
899
- result = values .str .replace ('BAD[_]*' , '' , n = 1 )
900
+ result = values .str .replace ('BAD[_]*' , '' , regex = True , n = 1 )
900
901
exp = Series (['foobarBAD' , NA ])
901
902
tm .assert_series_equal (result , exp )
902
903
903
904
# mixed
904
905
mixed = Series (['aBAD' , NA , 'bBAD' , True , datetime .today (), 'fooBAD' ,
905
906
None , 1 , 2. ])
906
907
907
- rs = Series (mixed ).str .replace ('BAD[_]*' , '' )
908
+ rs = Series (mixed ).str .replace ('BAD[_]*' , '' , regex = True )
908
909
xp = Series (['a' , NA , 'b' , NA , NA , 'foo' , NA , NA , NA ])
909
910
assert isinstance (rs , Series )
910
911
tm .assert_almost_equal (rs , xp )
911
912
913
+ # unicode
914
+ values = Series ([u'fooBAD__barBAD' , NA ])
915
+
916
+ result = values .str .replace ('BAD[_]*' , '' , regex = True )
917
+ exp = Series ([u'foobar' , NA ])
918
+ tm .assert_series_equal (result , exp )
919
+
920
+ result = values .str .replace ('BAD[_]*' , '' , n = 1 , regex = True )
921
+ exp = Series ([u'foobarBAD' , NA ])
922
+ tm .assert_series_equal (result , exp )
923
+
912
924
# flags + unicode
913
925
values = Series ([b"abcd,\xc3 \xa0 " .decode ("utf-8" )])
914
926
exp = Series ([b"abcd, \xc3 \xa0 " .decode ("utf-8" )])
915
- result = values .str .replace (r"(?<=\w),(?=\w)" , ", " , flags = re .UNICODE )
927
+ result = values .str .replace (r"(?<=\w),(?=\w)" , ", " , regex = True ,
928
+ flags = re .UNICODE )
916
929
tm .assert_series_equal (result , exp )
917
930
918
931
# GH 13438
@@ -930,7 +943,7 @@ def test_replace_callable(self):
930
943
931
944
# test with callable
932
945
repl = lambda m : m .group (0 ).swapcase ()
933
- result = values .str .replace ('[a-z][A-Z]{2}' , repl , n = 2 )
946
+ result = values .str .replace ('[a-z][A-Z]{2}' , repl , n = 2 , regex = True )
934
947
exp = Series (['foObaD__baRbaD' , NA ])
935
948
tm .assert_series_equal (result , exp )
936
949
@@ -940,21 +953,21 @@ def test_replace_callable(self):
940
953
941
954
repl = lambda : None
942
955
with pytest .raises (TypeError , match = p_err ):
943
- values .str .replace ('a' , repl )
956
+ values .str .replace ('a' , repl , regex = True )
944
957
945
958
repl = lambda m , x : None
946
959
with pytest .raises (TypeError , match = p_err ):
947
- values .str .replace ('a' , repl )
960
+ values .str .replace ('a' , repl , regex = True )
948
961
949
962
repl = lambda m , x , y = None : None
950
963
with pytest .raises (TypeError , match = p_err ):
951
- values .str .replace ('a' , repl )
964
+ values .str .replace ('a' , repl , regex = True )
952
965
953
966
# test regex named groups
954
967
values = Series (['Foo Bar Baz' , NA ])
955
968
pat = r"(?P<first>\w+) (?P<middle>\w+) (?P<last>\w+)"
956
969
repl = lambda m : m .group ('middle' ).swapcase ()
957
- result = values .str .replace (pat , repl )
970
+ result = values .str .replace (pat , repl , regex = True )
958
971
exp = Series (['bAR' , NA ])
959
972
tm .assert_series_equal (result , exp )
960
973
@@ -964,28 +977,39 @@ def test_replace_compiled_regex(self):
964
977
965
978
# test with compiled regex
966
979
pat = re .compile (r'BAD[_]*' )
967
- result = values .str .replace (pat , '' )
980
+ result = values .str .replace (pat , '' , regex = True )
968
981
exp = Series (['foobar' , NA ])
969
982
tm .assert_series_equal (result , exp )
970
983
971
- result = values .str .replace (pat , '' , n = 1 )
984
+ result = values .str .replace (pat , '' , n = 1 , regex = True )
972
985
exp = Series (['foobarBAD' , NA ])
973
986
tm .assert_series_equal (result , exp )
974
987
975
988
# mixed
976
989
mixed = Series (['aBAD' , NA , 'bBAD' , True , datetime .today (), 'fooBAD' ,
977
990
None , 1 , 2. ])
978
991
979
- rs = Series (mixed ).str .replace (pat , '' )
992
+ rs = Series (mixed ).str .replace (pat , '' , regex = True )
980
993
xp = Series (['a' , NA , 'b' , NA , NA , 'foo' , NA , NA , NA ])
981
994
assert isinstance (rs , Series )
982
995
tm .assert_almost_equal (rs , xp )
983
996
997
+ # unicode
998
+ values = Series ([u'fooBAD__barBAD' , NA ])
999
+
1000
+ result = values .str .replace (pat , '' , regex = True )
1001
+ exp = Series ([u'foobar' , NA ])
1002
+ tm .assert_series_equal (result , exp )
1003
+
1004
+ result = values .str .replace (pat , '' , n = 1 , regex = True )
1005
+ exp = Series ([u'foobarBAD' , NA ])
1006
+ tm .assert_series_equal (result , exp )
1007
+
984
1008
# flags + unicode
985
1009
values = Series ([b"abcd,\xc3 \xa0 " .decode ("utf-8" )])
986
1010
exp = Series ([b"abcd, \xc3 \xa0 " .decode ("utf-8" )])
987
1011
pat = re .compile (r"(?<=\w),(?=\w)" , flags = re .UNICODE )
988
- result = values .str .replace (pat , ", " )
1012
+ result = values .str .replace (pat , ", " , regex = True )
989
1013
tm .assert_series_equal (result , exp )
990
1014
991
1015
# case and flags provided to str.replace will have no effect
@@ -995,29 +1019,30 @@ def test_replace_compiled_regex(self):
995
1019
996
1020
with pytest .raises (ValueError ,
997
1021
match = "case and flags cannot be" ):
998
- result = values .str .replace (pat , '' , flags = re .IGNORECASE )
1022
+ result = values .str .replace (pat , '' , flags = re .IGNORECASE ,
1023
+ regex = True )
999
1024
1000
1025
with pytest .raises (ValueError ,
1001
1026
match = "case and flags cannot be" ):
1002
- result = values .str .replace (pat , '' , case = False )
1027
+ result = values .str .replace (pat , '' , case = False , regex = True )
1003
1028
1004
1029
with pytest .raises (ValueError ,
1005
1030
match = "case and flags cannot be" ):
1006
- result = values .str .replace (pat , '' , case = True )
1031
+ result = values .str .replace (pat , '' , case = True , regex = True )
1007
1032
1008
1033
# test with callable
1009
1034
values = Series (['fooBAD__barBAD' , NA ])
1010
1035
repl = lambda m : m .group (0 ).swapcase ()
1011
1036
pat = re .compile ('[a-z][A-Z]{2}' )
1012
- result = values .str .replace (pat , repl , n = 2 )
1037
+ result = values .str .replace (pat , repl , n = 2 , regex = True )
1013
1038
exp = Series (['foObaD__baRbaD' , NA ])
1014
1039
tm .assert_series_equal (result , exp )
1015
1040
1016
1041
def test_replace_literal (self ):
1017
1042
# GH16808 literal replace (regex=False vs regex=True)
1018
1043
values = Series (['f.o' , 'foo' , NA ])
1019
1044
exp = Series (['bao' , 'bao' , NA ])
1020
- result = values .str .replace ('f.' , 'ba' )
1045
+ result = values .str .replace ('f.' , 'ba' , regex = True )
1021
1046
tm .assert_series_equal (result , exp )
1022
1047
1023
1048
exp = Series (['bao' , 'foo' , NA ])
@@ -2710,6 +2735,7 @@ def test_partition_deprecation(self):
2710
2735
result = values .str .rpartition (pat = '_' )
2711
2736
tm .assert_frame_equal (result , expected )
2712
2737
2738
+ @pytest .mark .filterwarnings ("ignore: '|' is interpreted as a literal" )
2713
2739
def test_pipe_failures (self ):
2714
2740
# #2119
2715
2741
s = Series (['A|B|C' ])
@@ -2719,7 +2745,7 @@ def test_pipe_failures(self):
2719
2745
2720
2746
tm .assert_series_equal (result , exp )
2721
2747
2722
- result = s .str .replace ('|' , ' ' )
2748
+ result = s .str .replace ('|' , ' ' , regex = None )
2723
2749
exp = Series (['A B C' ])
2724
2750
2725
2751
tm .assert_series_equal (result , exp )
@@ -2980,17 +3006,17 @@ def test_replace_moar(self):
2980
3006
s = Series (['A' , 'B' , 'C' , 'Aaba' , 'Baca' , '' , NA , 'CABA' ,
2981
3007
'dog' , 'cat' ])
2982
3008
2983
- result = s .str .replace ('A' , 'YYY' )
3009
+ result = s .str .replace ('A' , 'YYY' , regex = True )
2984
3010
expected = Series (['YYY' , 'B' , 'C' , 'YYYaba' , 'Baca' , '' , NA ,
2985
3011
'CYYYBYYY' , 'dog' , 'cat' ])
2986
3012
assert_series_equal (result , expected )
2987
3013
2988
- result = s .str .replace ('A' , 'YYY' , case = False )
3014
+ result = s .str .replace ('A' , 'YYY' , case = False , regex = True )
2989
3015
expected = Series (['YYY' , 'B' , 'C' , 'YYYYYYbYYY' , 'BYYYcYYY' , '' , NA ,
2990
3016
'CYYYBYYY' , 'dog' , 'cYYYt' ])
2991
3017
assert_series_equal (result , expected )
2992
3018
2993
- result = s .str .replace ('^.a|dog' , 'XX-XX ' , case = False )
3019
+ result = s .str .replace ('^.a|dog' , 'XX-XX ' , case = False , regex = True )
2994
3020
expected = Series (['A' , 'B' , 'C' , 'XX-XX ba' , 'XX-XX ca' , '' , NA ,
2995
3021
'XX-XX BA' , 'XX-XX ' , 'XX-XX t' ])
2996
3022
assert_series_equal (result , expected )
@@ -3162,6 +3188,40 @@ def test_method_on_bytes(self):
3162
3188
match = "Cannot use .str.cat with values of.*" ):
3163
3189
lhs .str .cat (rhs )
3164
3190
3191
+ @pytest .mark .filterwarnings ("ignore: '.' is interpreted as a literal" )
3192
+ @pytest .mark .parametrize ("regex, expected_array" , [
3193
+ (True , ['foofoofoo' , 'foofoofoo' ]),
3194
+ (False , ['abc' , '123' ]),
3195
+ (None , ['abc' , '123' ])
3196
+ ])
3197
+ def test_replace_single_pattern (self , regex , expected_array ):
3198
+ values = Series (['abc' , '123' ])
3199
+ # GH: 24804
3200
+ result = values .str .replace ('.' , 'foo' , regex = regex )
3201
+ expected = Series (expected_array )
3202
+ tm .assert_series_equal (result , expected )
3203
+
3204
+ @pytest .mark .parametrize ("input_array, single_char, replace_char, "
3205
+ "expect_array, warn" ,
3206
+ [("a.c" , "." , "b" , "abc" , True ),
3207
+ ("a@c" , "@" , "at" , "aatc" , False )]
3208
+ )
3209
+ def test_replace_warning_single_character (self , input_array ,
3210
+ single_char , replace_char ,
3211
+ expect_array , warn ):
3212
+ # GH: 24804
3213
+ values = Series ([input_array ])
3214
+ if warn :
3215
+ with tm .assert_produces_warning (FutureWarning ,
3216
+ check_stacklevel = False ):
3217
+ result = values .str .replace (single_char , replace_char ,
3218
+ regex = None )
3219
+ else :
3220
+ result = values .str .replace (single_char , replace_char )
3221
+
3222
+ expected = Series ([expect_array ])
3223
+ tm .assert_series_equal (result , expected )
3224
+
3165
3225
def test_casefold (self ):
3166
3226
# GH25405
3167
3227
expected = Series (['ss' , NA , 'case' , 'ssd' ])
0 commit comments