Extract Google Trends Data with Python

Anyone who has regularly worked with Google Trends data has had to deal with the slightly tedious task of grabbing keyword level data and reformatting the spreadsheet provided by Google. After looking for a seamless way to pull the data, I came upon the PyTrends library on GitHub, and sought to put together some quick user defined functions to manage the task of pulling daily and weekly trends data.


# set working directory (location where code is)
import os
os.chdir("path")

import re
import csv
import time
import pandas as pd
from random import randint
from GT_Automation_Code import pyGTrends

# set gmail credentials and path to extract data
google_username = "*****@gmail.com"
google_password = "*****"

Daily_Data = [ ]
        
# define daily pull code
def GT_Daily_Run(keys):
    
    path = 'path'

    # connect to Google
    connector = pyGTrends(google_username, google_password)
    # make request
    connector.request_report(keys, date="today 90-d", geo="US")
    # wait a random amount of time between requests to avoid bot detection
    time.sleep(randint(5, 10))
    # download file
    connector.save_csv(path, '_' + "GT_Daily" + '_' + keys.replace(' ', '_'))

    name = path + '_' + "GT_Daily" + '_' + keys.replace(' ', '_')
      
    with open(name + '.csv', 'rt') as csvfile:    
        csvReader = csv.reader(csvfile)
        data = []

        for row in csvReader:
            if any('2015' in s for s in row): 
                data.append(row)

        day_df = pd.DataFrame(data)
        cols = ["Day", keys]    
        day_df.columns = [cols]  
        Daily_Data.append(day_df) 

keywords = ['soccer', 'football', 'baseball']    

map(lambda x: GT_Daily_Run(x), keywords)  

rge = [Daily_Data[0], Daily_Data[1], Daily_Data[2]]    

df_final_daily = reduce(lambda left,right: pd.merge(left,right, on='Day'), rge)
df_final_daily = df_final_daily.loc[:, (df_final_daily != "0").any(axis=0)]
df_final_daily.to_csv("Daily_Trends_Data.csv", index=False)

Weekly_Data = [ ]     
        
# define weekly pull code
def GT_Weekly_Run(keys):

    path = 'path'
    
    # connect to Google
    connector = pyGTrends(google_username, google_password)
    # make request
    connector.request_report(keys, geo="US")
    # wait a random amount of time between requests to avoid bot detection
    time.sleep(randint(5, 10))
    # download file
    connector.save_csv(path, '_' + "GT_Weekly" + '_' + keys.replace(' ', '_'))

    name = path + '_' + "GT_Weekly" + '_' + keys.replace(' ', '_') 

    with open(name + '.csv', 'rt') as csvfile:    
        csvReader = csv.reader(csvfile)
        data = []
        datex = re.compile('(19|20)dd-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])')

        for row in csvReader:
            if datex.search(str(row)):
                data.append(row)
       
        week_df = pd.DataFrame(data)
        cols = ["Week", keys]    
        week_df.columns = [cols]
        Weekly_Data.append(week_df)              
             
map(lambda x: GT_Weekly_Run(x), keywords)  

rge = [Weekly_Data[0], Weekly_Data[1], Weekly_Data[2]]    

df_final_weekly = reduce(lambda left,right: pd.merge(left,right, on='Week'), rge)
df_final_weekly = df_final_weekly.loc[:, (df_final_weekly != "0").any(axis=0)] 
df_final_weekly.to_csv("Weekly_Trends_Data.csv", index=False)


Advertisements

One thought on “Extract Google Trends Data with Python

  1. Pingback: Distilled News | Data Analytics & R

Comments are closed.