import pandas as pd
import numpy as np
def header(msg):
print('-'*50)
print('['+msg+']')
df = pd.DataFrame(
[['Jan',58,42,74,22,2.95],
['Feb',61,45,78,26,3.02],
['Mar',65,48,84,25,2.34],
['Apr',67,50,92,28,1.02],
['May',71,53,98,35,0.48],
['Jun',75,56,107,41,0.11],
['Jul',77,58,105,44,0.0],
['Aug',77,59,102,43,0.03],
['Sep',77,57,103,40,0.17],
['Oct',73,54,96,34,0.81],
['Nov',64,48,84,30,1.7],
['Dec',58,42,73,21,2.56]],
index = [0,1,2,3,4,5,6,7,8,9,10,11],
columns = ['month','avg_high','avg_low','record_high','record_low','avg_precipitation'])
print(df)
df.to_csv('foo.csv')
month avg_high avg_low record_high record_low avg_precipitation
0 Jan 58 42 74 22 2.95
1 Feb 61 45 78 26 3.02
2 Mar 65 48 84 25 2.34
3 Apr 67 50 92 28 1.02
4 May 71 53 98 35 0.48
5 Jun 75 56 107 41 0.11
6 Jul 77 58 105 44 0.00
7 Aug 77 59 102 43 0.03
8 Sep 77 57 103 40 0.17
9 Oct 73 54 96 34 0.81
10 Nov 64 48 84 30 1.70
11 Dec 58 42 73 21 2.56
header("2.df.head()")
print(df.head())
header("3.df.tail()")
print(df.head())
--------------------------------------------------
[2.df.head()]
month avg_high avg_low record_high record_low avg_precipitation
0 Jan 58 42 74 22 2.95
1 Feb 61 45 78 26 3.02
2 Mar 65 48 84 25 2.34
3 Apr 67 50 92 28 1.02
4 May 71 53 98 35 0.48
--------------------------------------------------
[3.df.tail()]
month avg_high avg_low record_high record_low avg_precipitation
0 Jan 58 42 74 22 2.95
1 Feb 61 45 78 26 3.02
2 Mar 65 48 84 25 2.34
3 Apr 67 50 92 28 1.02
4 May 71 53 98 35 0.48
header("4.df.dtypes()")
print(df.dtypes)
--------------------------------------------------
[4.df.dtypes()]
month object
avg_high int64
avg_low int64
record_high int64
record_low int64
avg_precipitation float64
dtype: object
header("5.df.index()")
print(df.index)
--------------------------------------------------
[5.df.index()]
Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], dtype='int64')
header("6.df.columns()")
print(df.columns)
--------------------------------------------------
[6.df.columns()]
Index(['month', 'avg_high', 'avg_low', 'record_high', 'record_low',
'avg_precipitation'],
dtype='object')
header("7.df.values()")
print(df.values)
--------------------------------------------------
[7.df.values()]
[['Jan' 58 42 74 22 2.95]
['Feb' 61 45 78 26 3.02]
['Mar' 65 48 84 25 2.34]
['Apr' 67 50 92 28 1.02]
['May' 71 53 98 35 0.48]
['Jun' 75 56 107 41 0.11]
['Jul' 77 58 105 44 0.0]
['Aug' 77 59 102 43 0.03]
['Sep' 77 57 103 40 0.17]
['Oct' 73 54 96 34 0.81]
['Nov' 64 48 84 30 1.7]
['Dec' 58 42 73 21 2.56]]
header("8.df.describe()")
df.describe()
--------------------------------------------------
[8.df.describe()]
avg_high | avg_low | record_high | record_low | avg_precipitation | |
---|---|---|---|---|---|
count | 12.000000 | 12.000000 | 12.000000 | 12.000000 | 12.000000 |
mean | 68.583333 | 51.000000 | 91.333333 | 32.416667 | 1.265833 |
std | 7.366488 | 6.060303 | 12.323911 | 8.240238 | 1.186396 |
min | 58.000000 | 42.000000 | 73.000000 | 21.000000 | 0.000000 |
25% | 63.250000 | 47.250000 | 82.500000 | 25.750000 | 0.155000 |
50% | 69.000000 | 51.500000 | 94.000000 | 32.000000 | 0.915000 |
75% | 75.500000 | 56.250000 | 102.250000 | 40.250000 | 2.395000 |
max | 77.000000 | 59.000000 | 107.000000 | 44.000000 | 3.020000 |
header("8.df.sort_values()")
df.sort_values("record_high" , ascending= "False")
--------------------------------------------------
[8.df.sort_values()]
month | avg_high | avg_low | record_high | record_low | avg_precipitation | |
---|---|---|---|---|---|---|
11 | Dec | 58 | 42 | 73 | 21 | 2.56 |
0 | Jan | 58 | 42 | 74 | 22 | 2.95 |
1 | Feb | 61 | 45 | 78 | 26 | 3.02 |
2 | Mar | 65 | 48 | 84 | 25 | 2.34 |
10 | Nov | 64 | 48 | 84 | 30 | 1.70 |
3 | Apr | 67 | 50 | 92 | 28 | 1.02 |
9 | Oct | 73 | 54 | 96 | 34 | 0.81 |
4 | May | 71 | 53 | 98 | 35 | 0.48 |
7 | Aug | 77 | 59 | 102 | 43 | 0.03 |
8 | Sep | 77 | 57 | 103 | 40 | 0.17 |
6 | Jul | 77 | 58 | 105 | 44 | 0.00 |
5 | Jun | 75 | 56 | 107 | 41 | 0.11 |
df.avg_low
0 42 1 45 2 48 3 50 4 53 5 56 6 58 7 59 8 57 9 54 10 48 11 42 Name: avg_low, dtype: int64
df['avg_low']
0 42 1 45 2 48 3 50 4 53 5 56 6 58 7 59 8 57 9 54 10 48 11 42 Name: avg_low, dtype: int64
df[2:4]
month | avg_high | avg_low | record_high | record_low | avg_precipitation | |
---|---|---|---|---|---|---|
2 | Mar | 65 | 48 | 84 | 25 | 2.34 |
3 | Apr | 67 | 50 | 92 | 28 | 1.02 |
df[['avg_low','avg_high','record_high']]
avg_low | avg_high | record_high | |
---|---|---|---|
0 | 42 | 58 | 74 |
1 | 45 | 61 | 78 |
2 | 48 | 65 | 84 |
3 | 50 | 67 | 92 |
4 | 53 | 71 | 98 |
5 | 56 | 75 | 107 |
6 | 58 | 77 | 105 |
7 | 59 | 77 | 102 |
8 | 57 | 77 | 103 |
9 | 54 | 73 | 96 |
10 | 48 | 64 | 84 |
11 | 42 | 58 | 73 |
df.iloc[5:8 , [0,3]] #it will use as array
month | record_high | |
---|---|---|
5 | Jun | 107 |
6 | Jul | 105 |
7 | Aug | 102 |
df.loc[5:8 , ['avg_low','avg_high','record_high']]
avg_low | avg_high | record_high | |
---|---|---|---|
5 | 56 | 75 | 107 |
6 | 58 | 77 | 105 |
7 | 59 | 77 | 102 |
8 | 57 | 77 | 103 |
df.loc[9, ['avg_low','avg_high','record_high']]
avg_low 54 avg_high 73 record_high 96 Name: 9, dtype: object
df.iloc[3:5,[0,3]]
month | record_high | |
---|---|---|
3 | Apr | 92 |
4 | May | 98 |