Python for R Users
Daniel Chen
Instructor
df <- data.frame(
'A' = c(1, 2, 3),
'B' = c(4, 5, 6)
)
df
A B
1 1 4
2 2 5
3 3 6
class(df)
"data.frame"
import pandas as pd
df = pd.DataFrame(
{'A': [1, 2, 3],
'B':[4, 5, 6]})
df
A Bd
0 1 4
1 2 5
2 3 6
type(df)
pandas.core.frame.DataFrame
str(df)
'data.frame': 3 obs. of 2 variables:
$ A: num 1 2 3
$ B: num 4 5 6
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
# Column Non-Null Count Dtype
<hr /> ------ -------------- -----
0 A 3 non-null int64
1 B 3 non-null int64
dtypes: int64(2)
memory usage: 176.0 bytes
df$A <- as.character(df$A)
str(df)
'data.frame': 3 obs. of 2 variables:
$ A: chr "1" "2" "3"
$ B: num 4 5 6
df['A'] = df['A'].astype(str)
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
# Column Non-Null Count Dtype
<hr /> ------ -------------- -----
0 A 3 non-null object
1 B 3 non-null int64
dtypes: int64(1), object(1)
memory usage: 176.0+ bytes
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
A 3 non-null object
B 3 non-null int64
dtypes: int64(1), object(1)
memory usage: 128.0+ bytes
str
accessordf = pd.DataFrame({'name': ['Daniel ',' Eric', ' Julia ']})
df
name
0 Daniel
1 Eric
2 Julia
df['name_strip'] = df['name'].str.strip()
df
name name_strip
0 Daniel Daniel
1 Eric Eric
2 Julia Julia
df = pd.DataFrame({'name': ['Daniel','Eric', 'Julia'],
...: 'gender':['Male', 'Male', 'Female']})
df.dtypes
Out[3]:
gender object
name object
dtype: object
df['gender_cat'] = df['gender'].astype('category')
df.dtypes
gender object
name object
gender_cat category
dtype: object
df['gender_cat'].cat.categories
Index(['Female', 'Male'], dtype='object')
df.gender_cat.cat.codes
0 1
1 1
2 0
dtype: int8
df = pd.DataFrame({'name': ['Rosaline Franklin', 'William Gosset'],
'born': ['1920-07-25', '1876-06-13']})
df['born_dt'] = pd.to_datetime(df['born'])
df
born name born_dt
0 1920-07-25 Rosaline Franklin 1920-07-25
1 1876-06-13 William Gosset 1876-06-13
df.dtypes
born object
name object
born_dt datetime64[ns]
dtype: object
df['born_dt'].dt.day
0 25
1 13
Name: born_dt, dtype: int64
df['born_dt'].dt.month
0 7
1 6
Name: born_dt, dtype: int64
df['born_dt'].dt.year
0 1920
1 1876
Name: born_dt, dtype: int64
Python for R Users