A Few Days of Python: Automating Tasks Involving Excel Files

There are plenty of instances where analysts are regularly forwarded xls spreadsheets and tasked with summarizing the data. In many cases, these scenarios can be automated through fairly simple Python scripts. In the following code, I take an Excel spreadsheet with two sheets, summarize each sheet using a pivot table, and add those results to sheets in a new spreadsheet.

import os 
import sys
import pandas as pd
import numpy as np
import xlsxwriter

def automate(file1, file2):

    # check file
    if os.stat(file1).st_size == 0:
       print "File Status: Empty"
       xl = pd.ExcelFile(file1)
       writer = pd.ExcelWriter(file2, engine='xlsxwriter')

    # Sheet1 pivot 
    dat1 = xl.parse("Sheet1")
    dat1_result = dat1[["buyer_aid","lead_payout"]].groupby("buyer_aid").aggregate(np.mean).reset_index().sort(['lead_payout'], ascending=[False])
    dat1_result.to_excel(writer, sheet_name='Average_Payout', index=False, header=True)
    # Sheet2 pivot 
    dat2 = xl.parse("Sheet2")
    dat2_result = dat2[["buyer_aid","lead_payout"]].groupby("buyer_aid").aggregate(np.sum).reset_index().sort(['lead_payout'], ascending=[False])
    dat2_result.to_excel(writer, sheet_name='Sum_Payout', index=False, header=True)
    dat3 = pd.merge(dat1_result, dat2_result, how="inner")
    dat3.to_excel(writer, sheet_name='All_Results', index=False, header=True)
    # save the Excel file.    

automate("LP_Data.xls", 'New_Report.xls')