Load a CSV File



Data Source


Import Pandas

# Import Pandas
import pandas as pd

Read CSV file

#Load CSV file into a dataframe with Pandas
df = pd.read_csv('data/cereal.csv')

#Output Dataframe
df

name mfr type calories protein fat sodium fiber carbo sugars potass vitamins shelf weight cups rating
0 100% Bran N C 70 4 1 130 10.0 5.0 6 280 25 3 1.0 0.33 68.402973
1 100% Natural Bran Q C 120 3 5 15 2.0 8.0 8 135 0 3 1.0 1.00 33.983679
2 All-Bran K C 70 4 1 260 9.0 7.0 5 320 25 3 1.0 0.33 59.425505
3 All-Bran with Extra Fiber K C 50 4 0 140 14.0 8.0 0 330 25 3 1.0 0.50 93.704912
4 Almond Delight R C 110 2 2 200 1.0 14.0 8 -1 25 3 1.0 0.75 34.384843
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
72 Triples G C 110 2 1 250 0.0 21.0 3 60 25 3 1.0 0.75 39.106174
73 Trix G C 110 1 1 140 0.0 13.0 12 25 25 2 1.0 1.00 27.753301
74 Wheat Chex R C 100 3 1 230 3.0 17.0 3 115 25 1 1.0 0.67 49.787445
75 Wheaties G C 100 3 1 200 3.0 17.0 3 110 25 1 1.0 1.00 51.592193
76 Wheaties Honey Gold G C 110 2 1 200 1.0 16.0 8 60 25 1 1.0 0.75 36.187559

77 rows × 16 columns

Read CSV file with specific columns

# Load a csv file with specific columns
df = pd.read_csv('data/cereal.csv',
        usecols=['name', 'protein', 'fiber', 'vitamins'])
df

name protein fiber vitamins
0 100% Bran 4 10.0 25
1 100% Natural Bran 3 2.0 0
2 All-Bran 4 9.0 25
3 All-Bran with Extra Fiber 4 14.0 25
4 Almond Delight 2 1.0 25
... ... ... ... ...
72 Triples 2 0.0 25
73 Trix 1 0.0 25
74 Wheat Chex 3 3.0 25
75 Wheaties 3 3.0 25
76 Wheaties Honey Gold 2 1.0 25

77 rows × 4 columns

Load a csv file with encoding - UTF-8

# Load a csv file with encoding - UTF-8
df = pd.read_csv('data/cereal.csv', encoding='UTF-8')
df

name mfr type calories protein fat sodium fiber carbo sugars potass vitamins shelf weight cups rating
0 100% Bran N C 70 4 1 130 10.0 5.0 6 280 25 3 1.0 0.33 68.402973
1 100% Natural Bran Q C 120 3 5 15 2.0 8.0 8 135 0 3 1.0 1.00 33.983679
2 All-Bran K C 70 4 1 260 9.0 7.0 5 320 25 3 1.0 0.33 59.425505
3 All-Bran with Extra Fiber K C 50 4 0 140 14.0 8.0 0 330 25 3 1.0 0.50 93.704912
4 Almond Delight R C 110 2 2 200 1.0 14.0 8 -1 25 3 1.0 0.75 34.384843
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
72 Triples G C 110 2 1 250 0.0 21.0 3 60 25 3 1.0 0.75 39.106174
73 Trix G C 110 1 1 140 0.0 13.0 12 25 25 2 1.0 1.00 27.753301
74 Wheat Chex R C 100 3 1 230 3.0 17.0 3 115 25 1 1.0 0.67 49.787445
75 Wheaties G C 100 3 1 200 3.0 17.0 3 110 25 1 1.0 1.00 51.592193
76 Wheaties Honey Gold G C 110 2 1 200 1.0 16.0 8 60 25 1 1.0 0.75 36.187559

77 rows × 16 columns

# Write dataframe to csv
df.to_csv('data/dataframe_to_csv.csv')