-
Notifications
You must be signed in to change notification settings - Fork 1.9k
/
data_cleaning4.py
32 lines (23 loc) · 890 Bytes
/
data_cleaning4.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import pandas as pd
# Config settings
pd.set_option('max_columns', None)
pd.set_option('max_rows', 12)
# Import CSV data
data_frames = pd.read_csv (r'simulated_data.csv')
# Data Type Conversion
# Remove '$' from donation strings
data_frames['donation'] = data_frames['donation'].str.strip('$')
# Convert donation stings into numerical data type
data_frames['donation'] = data_frames['donation'].astype('float64')
# Handle Data Inconsistencies
# Capitalize strings
data_frames['street_address'] = data_frames['street_address'].str.split()
def capitalize_words(arr):
for index, word in enumerate(arr):
if index == 0:
pass
else:
arr[index] = word.capitalize()
data_frames['street_address'].apply(lambda x: capitalize_words(x))
data_frames['street_address'] = data_frames['street_address'].str.join(' ')
print(data_frames['street_address'])