There are plenty of instances where analysts are regularly forwarded xls spreadsheets and tasked with summarizing the data. In many cases, these scenarios can be automated through fairly simple Python scripts. In the following code, I take an Excel spreadsheet with two sheets, summarize each sheet using a pivot table, and add those results to sheets in a new spreadsheet.
# IMPORT MODULES
import os
import sys
import pandas as pd
import numpy as np
import xlsxwriter
def automate(file1, file2):
# check file
if os.stat(file1).st_size == 0:
print "File Status: Empty"
sys.exit("Error")
else:
xl = pd.ExcelFile(file1)
writer = pd.ExcelWriter(file2, engine='xlsxwriter')
# Sheet1 pivot
dat1 = xl.parse("Sheet1")
dat1_result = dat1[["buyer_aid","lead_payout"]].groupby("buyer_aid").aggregate(np.mean).reset_index().sort(['lead_payout'], ascending=[False])
dat1_result.to_excel(writer, sheet_name='Average_Payout', index=False, header=True)
# Sheet2 pivot
dat2 = xl.parse("Sheet2")
dat2_result = dat2[["buyer_aid","lead_payout"]].groupby("buyer_aid").aggregate(np.sum).reset_index().sort(['lead_payout'], ascending=[False])
dat2_result.to_excel(writer, sheet_name='Sum_Payout', index=False, header=True)
dat3 = pd.merge(dat1_result, dat2_result, how="inner")
dat3.to_excel(writer, sheet_name='All_Results', index=False, header=True)
# save the Excel file.
writer.save()
automate("LP_Data.xls", 'New_Report.xls')