Wednesday, July 3, 2019

Python Panda pipe and groupby functions





Python Panda pipe,apply, apply_map and groupby() functions




'''
def adder(a):
    #return a+2
    return np.sqrt(a)
def adder2(a):
    return a/2
import pandas as pd
d1 = {'quantity':[28,36,25,14,25,48],
      'rate':[11,22,33,44,55,66],
      }
df = pd.DataFrame(d1)
print(df)
print('*'*30)
#df1 = df.pipe(adder)
#df1=df.pipe(adder).pipe(adder2)
#df1 = df.pipe(np.mean,axis=1)
#df1 = df.apply(np.mean,axis=0)
#df1 = df.apply(np.sqrt,axis=0)
#df2 = df.applymap(np.sqrt)
#df1 = df.applymap(np.mean)
df1 = df.pipe(np.cumsum)
df2 = df.apply(np.cumsum)
df3 = df.applymap(np.cumsum)
print(df1)
print(df2)
print(df3)
'''
'''
d = {'science_marks':pd.Series([2,5]),
'english_marks':pd.Series([8,7])}
df= pd.DataFrame(d)
print(df)
print('*'*30)
df1 = df.pipe(np.mean)
df2=df.apply(np.mean,axis=0)
df3=df.applymap(np.mean)
print (df1)
print('*'*30)
print (df2)
print('*'*30)
print (df3)
'''
'''
d = {'2014':pd.Series([2,5,3,2]),
'2015':pd.Series([8,7,3,6]),
'2016':pd.Series([8,2,2,7]),
'2017':pd.Series([1,2,8,7])}
df= pd.DataFrame(d,index=['Jan','Feb','March','April'])
'''
'''
a = {'Jan':10,'Feb':15,'March':20,'April':2}
b = {'Jan':5,'Feb':20,'March':10,'April':3}
c = {'Jan':15,'Feb':25,'March':15,'April':10}
d = {'Jan':10,'Feb':10,'March':10,'April':1}
e = {'Jan':5,'Feb':5,'March':20,'April':2}
dd = {'2014':a,'2015':b,'2016':c,'2017':d,'2018':e}
df= pd.DataFrame(dd)
print(df)
print('*'*30)
df1=df.pipe(np.sum) #YEAR  WISE TOTAL SALES
df2=df.pipe(np.sum).pipe(np.max) #YEAR  WISE MAX SALES
print (df1)
print('*'*30)
print (df2)
'''
import pandas as pd
d1 = {'item':['AC','TV','CAR','TV','BIKE','TV'],
      'quantity':[28,36,25,14,25,48],
      'rate':[11,22,33,44,55,66],
      'country':['USA','PAK','JAPAN','USA','INDIA','USA']}
df = pd.DataFrame(d1)
print(df)
print('*'*30)
gp = df.groupby(['item'])
#print(gp.groups)
#print(gp.get_group(('TV')))
#print(gp.size())
#print(gp.count())
#print(gp.max())
#print(gp.min())
gp = df.groupby(['item','country'])
print(gp.groups)
#print(gp.get_group(('TV','USA')))