Automagic TV Show Calendar
A little while ago I was browsing the web and discovered a website called tvrage.com[1] which seems to be the definitive online TV guide. I didn't originally enter the site on the main index but on a page describing the functionality of an XML API[2] they host for accessing their database of TV shows.
To me, this is like opening presents on christmas day. Just imagine the possibilities! I immediately began exploring the kind of data they provide. The very first idea I had was to use this to create events on my google calendar automatically for unaired episodes of my favorite TV shows.
I've previously written python scripts that interface with gdata but I find their implementation for python to be kind of cumbersome to deal with so I began researching their Protocol API[3]. At first I wasted a lot of time attempting to build the necessary XML structures to add events and the like. This got old very fast and I decided to just give JSON-C[4] a try. Turns out you can use the built-in JSON module in python for creating the necessary structures.
For parsing the results I got from tvrage I ended up using python's xml.etree.ElementTree which was simple enough to setup to retrieve only the information for each episode I was interested in.[5]
I had a bit of trouble initially with adding events to google calendar. This stemmed from the fact that google often will return an HTTP Redirect which includes a url with an appended gsession attribute which you're supposed to resubmit the exact data from the first request to. Once I figured this out it was turtles all the way down. I even managed to get the whole script multi-threaded to speed things up since it's impossible to perform batch-requests with JSON-C.
I should note that for the configuration file the calendar should be the "Calendar ID" for the calendar that can be found by looking at the settings page for the individual calendar, it is grouped with the XML and iCal feeds.
ShowList.txt:[6]
1 2 3 4 5 6 7 8 9 10 11 12 | Castle 19267 House 3908 Bones 2870 Big Bang Theory, The 8511 Mentalist, The 18967 Rizzoli & Isles 24996 Venture Bros., The 6270 Top Gear 6753 Mythbusters 4605 Archer 23354 NCIS 4628 Community 22589 |
Config.cfg:
1 2 3 4 | [Credentials] username = someuser@gmail.com password = somebase64encodedpassword calendar = somecalendarid@group.calendar.google.com |
AirDate.py:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 | import urllib2, urllib, json, ConfigParser, base64 from datetime import date from xml.etree import ElementTree from threading import Thread calendar = "" header = {} # Thread for retrieving a list of episodes for a given show_id class airDate(Thread): # Initialize thread and set some local attributes def __init__(self, show_name, show_id): Thread.__init__(self) self.show_name = show_name self.show_id = show_id # Get episode list from tvrage.com based on the show_id def run(self): # Retrieve XML episode_list from tvrage.com xml_data = urllib2.urlopen("http://services.tvrage.com/feeds/episode_list.php?sid=%s" % self.show_id).read() # Pares XML into ElementTree.Element() xml_tree = ElementTree.fromstring(xml_data) self.result = [] # For each season for season in xml_tree.findall("Episodelist/Season"): # Get the season number season_num = int(season.get("no")) # For each episode in the episode list for episode in season.findall("episode"): # Get episode number and title episode_num = int(episode.find("seasonnum").text) episode_title = episode.find("title").text # Build the episode code S##E## episode_code = "S%02dE%02d" % (season_num, episode_num) # Parse the airdate into year, month and day year, month, day = map(lambda x: int(x), episode.find("airdate").text.split("-")) try: episode_airdate = date(year, month, day) today = date.today() # If episode hasn't aired yet if episode_airdate >= today: # Add episode to results list self.result.append("%s %s - %s" % (str(episode_airdate), self.show_name, episode_code)) except ValueError: # If the airdate is invalid (tvrage.com sometimes # includes 00's for unknown sections of the date pass class addEvent(Thread): # Thread for adding events to google calendar # Initialize thread and set local episode variable def __init__(self, episode): Thread.__init__(self) self.episode = episode # Add new entry to google calendar def run(self): # Build entry structure entry = {"data": {"details": self.episode, "quickAdd": True}} # Convert to JSON entry = json.dumps(entry) # Build request including necessary headers and data calReq = urllib2.Request("http://www.google.com/calendar/feeds/%s/private/full?alt=jsonc" % (calendar), entry, header) # Execute the request calRes = urllib2.urlopen(calReq) # Get the redirect url (gsession appended) redirectReq = urllib2.Request(calRes.geturl(), entry, header) try: redirectRes = urllib2.urlopen(redirectReq) except HTTPError: # If we get some sort of HTTP error code # skip entry, can always run again pass # Get list of events already added to # the calendar from previous executions def getExistingEpisodes(header): # Get JSON-C representation of calendar calReq = urllib2.Request(url="https://www.google.com/calendar/feeds/%s/private/full?alt=jsonc" % (calendar), headers=header) calRes = urllib2.urlopen(calReq) # Parse JSON-C data = json.loads(calRes.read()) # If the calendar has events on it if "items" in data["data"]: # Get the list of events events = data["data"]["items"] existing_episodes = [] # For each event for event in events: # Append just the title of the event to the results existing_episodes.append(event["title"]) return existing_episodes else: # We don't have any events on this calendar # so just return an empty list return [] if __name__ == '__main__': # Open the configuration file and get the necessary # credentials and settings config = ConfigParser.ConfigParser() config.readfp(open("Config.cfg")) username = config.get("Credentials", "username") password = config.get("Credentials", "password") # Password is stored as base64 encoded string just so # we don't have our password sitting out in plain sight password = base64.b64decode(password) calendar = config.get("Credentials", "calendar") # Build loginData structure, this is used to get # authentication data from google loginData = { "Email": username, "Passwd": password, "source": "BeMasher-ETR-2", "service": "cl" } # Encode the loginData for usage in a url loginData = urllib.urlencode(loginData) # Get authentication data gdataLogin = urllib2.urlopen("https://www.google.com/accounts/ClientLogin", data=loginData) SID, LSID, Auth = gdataLogin.read().splitlines() # Build header structure, this will be used for # all requests to google calendar from now on header = { "Authorization": "GoogleLogin %s" % (Auth), "GData-Version": 2, "Content-Type": "application/json" } # Open a list of the shows we're interested in # Stored as "show_name\tshow_id", one per line show_list = open("ShowList.txt") jobs = [] for line in show_list: show = line.strip().split("\t") jobs.append(show) # Get a list of existing events from previous # executions so we don't wind up with duplicates existingEpisodes = getExistingEpisodes(header) threadQueue = [] # For each episode we've retrieved that is unaired for job in jobs: show_name, show_id = job # Create an instance of the airDate thread thread = airDate(show_name, show_id) # Start it thread.start() # Add it to the threadQueue threadQueue.append(thread) episodes = [] # While we've still got running threads while len(threadQueue) > 0: # Get a thread from the queue thread = threadQueue.pop() # Block until it completes thread.join() # For each episode in the results for episode in thread.result: # If it hasn't already been added to google calendar if episode[11:] not in existingEpisodes: print episode # Add to list of episodes that need events created episodes.append(episode) # For each episode that doesn't have an # event on google calendar already for episode in episodes: # Create an addEvent thread, start it # and add it to the threadQueue thread = addEvent(episode) thread.start() threadQueue.append(thread) # While we still have threads running while len(threadQueue) > 0: # Get a thread from the queue thread = threadQueue.pop() # Block until it completes thread.join() |
This was all done shortly before I discovered that tvrage.com also provides iCal feeds for your favorite shows provided that you register and add some to your list. Unfortunately the iCal feed they generate creates events for exact air times of each episode which I'm not really all that concerned about. So I use this script still to add all-day events for each episode which is easier to view//see when there's a new episode.
I did write another script using their XML API but that will have to wait for another post.
- http://tvrage.com/ [↩]
- http://services.tvrage.com/ [↩]
- Data API Developer's Guide: The Protocol [↩]
- Google's own flavor of JSON which is almost identical to plain old JSON. [↩]
- I only really needed the original air date, title, season number and episode number. [↩]
- You can find the show_id via the show search found on their XML API page. [↩]
Reply to Searches
Every couple of days I stop and take a look through the stats on my blog to see how things are progressing, and I notice plenty of times where people have used certain search terms to come to my blog. Sometimes the terms are very specific, sometimes they are very vague, but I always wonder the same thing: did they find what they were looking for?
I often times wish I could some how contact the person that made a certain query to ask them if they found what they wanted to find. I wonder if they took the time to read through the post i wrote that relates to their query and found their answer. I know how I read blogs and it's often not very thorough, I figure that a majority of the time I don't find what i'm looking for specifically because I just skim over it. I do admit though that I'm probably not making it very easy for the readers to find very specific information, there's often a lot of fluff and cruft surrounding the important bits of information in my posts.
Google can you help me? I want to be able to answer the questions posed by your users' queries. Yes I already know you're going to tell me that that's what comments are for but sadly not enough people use comments. I also already know you've implemented what you call a SearchWiki[1] and that seems to have failed miserably, but I like the idea, just wish I could contribute my own results and findings to other's SearchWiki's.
- http://www.google.com/support/websearch/bin/answer.py?hl=en&answer=115764 SearchWiki lets you customize your Google Web Search results by ranking, removing, and adding notes to them. [↩]
Google Trends
So my brother and I recently had a discussion on the pros and cons between him buying a Toyota FJ Cruiser and a Toyota Tacoma. Eventually we turned to Google Trends.
As a result, we got severely side-tracked. Here are the results of our tom-foolery:
- Toyota Tacoma vs. Toyota FJ Cruiser
- Intel vs. nVidia
- Cats vs. Dogs
- Kittens vs. Puppies
- Seal Clubbing vs. Puppy Kicking
- Your face vs. My face
- Your mom vs. My mom
As you can see there is a distinct trend in the decline of intelligence relating to each successive query, go figure.
BeMasher.net Wordle
I was looking through my starred posts in Google Reader and found a post from a while ago. Turns out i can make a "Wordle" just by giving the rss feed of my blog to the tool it will process it. Another thing that makes this interesting is that I can change various settings to my liking.

Free Subversion Hosting
I've been in a very coding-prone mood lately. I've been working on developing a Python ETR (Employee Time Record) script for a friend and his club here at the University of Arizona. The project has grown significantly since I started it and this is one of the first projects in a while that I've developed for someone to use other than myself and I've been wanting a way to manage my code better.
I did some searching for free subversion hosting. I've seen google code hosting before and looked at it's feature set, which is quite complete. In terms of project management google code hosting is probably the best for my needs. Though after reading through more of their help//support section I discovered that there's a maximum project creation limit of 10. Supposedly you can email support at the google code hosting service and work out a deal to get more than 10 projects but that's really a hassle. So I started looking elsewhere. Currently though the PythonETR script is hosted at google code.
I stumbled upon ProjectLocker which looked really promising. It turned out to be a very well put together system you get 300MB of storage and unlimited subversion repositories along with Trac instances for each repository. There's just a big HOWEVER in the middle of what seems to be an awesome service. The however is that there is NO public anonymous subversion access. If you want your projects to be available there's no way for you to allow the public to check out a read-only copy of your project. They also only allow a maximum of 2 user accounts and you count as one of them.
Once I discovered all the limitations of ProjectLocker I kept on searching. The next promising service I found was XP-Dev. XP-Dev pretty much one-up's ProjectLocker on just about everything except a few crucial parts. There's no Trac, they provide their own "project tracking" tools like: stories, blogs, wikis, bugs. There's no real way to associate projects with subversion repositories on this service. You get 1.5GB of storage and as many subversion repositories//projects you want to fill that up with. If all you're looking for is free subversion hosting with no project tracking then this is probably the service for you. Though if you're even the slightest bit paranoid about crypto this service definitely isn't for you. Most of the defaults are for non-SSL connections and the services that allow SSL use a self-signed certificate by XP-Dev. I did discover that they do allow public read-only access to subversion repositories but only if you choose to enable this feature so for private projects you don't have to make them publicly available.
Google Data API
I've got a friend who has been doing a lot of web development for the UA Baja Racing Club and I've been offering ideas for developing certain things he wanted on the website like electronic time sheet submission. I suggested he just make a pdf form and have that post to a php script on his site which would then add the vales submitted to a table in his mysql database for the site. The reason I suggested a pdf form is that it would automatically do form validation for him without having to write or find extra code to do that in javascript or in the php part himself.
That ended up being a little too complicated for what he was looking at and wasn't a very feasible solution since he had no real experience with sql. So I started thinking about a few other ways he could do this that would be easier for him to implement himself. Eventually I remembered I wrote a python script for this very thing a few weeks ago that I use to tally up the amount of time I worked in the last pay period for when i'm getting ready to submit my time sheets at work.
The python script uses the gdata api to access my calendars and tally up the total amount of time I worked each day and displays it in a user-friendly format for entering into the website I submit the ETR to.
My grand idea was to simply use their master google account for the club to create calendars for each person//team, share them with each person such that they have permission to modify//create events. Then modify the python script I wrote to spit out CSV files for each shared calendar for easy import into excel (where they manage all of the info for each team and their members).
If you're interested in the script I could post it, it's mostly been kludged together from the python gdata tutorials I found so it's by no means original and I've only been programming in python for a couple of weeks so forgive any glaring mistakes:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 | try: from xml.etree import ElementTree # for Python 2.5 users except ImportError: from elementtree import ElementTree import gdata.calendar.service import gdata.service import atom.service import gdata.calendar import atom import getopt import sys import string import time import base64 import re import datetime import operator from datetime import date from datetime import time def gCalLogin(email, password): calendar_service = gdata.calendar.service.CalendarService() calendar_service.email = email calendar_service.password = base64.b64decode(password) calendar_service.source = 'PythonETR' calendar_service.ProgrammaticLogin() return calendar_service def FindCalendar(calendar_service, title): feed = calendar_service.GetOwnCalendarsFeed() for i, a_calendar in enumerate(feed.entry): if(a_calendar.title.text == title): return a_calendar.id.text return False def DateRangeQuery(calendar_service, calendar_id='default', start_date='2009-01-01', end_date='2009-01-30', event_title='Work for ARL'): result = [] parseISO8601 = re.compile("(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+).(\d+)([+-]\d+:\d+)") query = gdata.calendar.service.CalendarEventQuery(calendar_id, 'private', 'full') query.start_min = start_date query.start_max = end_date feed = calendar_service.CalendarQuery(query) for i, an_event in enumerate(feed.entry): if (an_event.title.text == event_title) and (an_event.event_status.value != "CANCELED"): for when in an_event.when: current_when = [parseISO8601.findall(when.start_time), parseISO8601.findall(when.end_time)] if(current_when not in result): result.append(current_when) return result def FindWeekBounds(today, weekday): weekstart = datetime.timedelta(days=int(weekday) + 1) weekend = datetime.timedelta(days=(5 - int(weekday))) return [today - weekstart, today + weekend] calendar_service = gCalLogin("emailaddress@gmail.com", "passwordb64encoded") work_calendar = FindCalendar(calendar_service, "maincalendarnamehere") p = re.compile('[\w\d]*?%40group.calendar.google.com', re.IGNORECASE) work_calendar = ''.join(p.findall(work_calendar)).replace('%40', '@') today = datetime.date.today() work_week = {} start, end = FindWeekBounds(date.today(), date.today().weekday()) work_events = DateRangeQuery(calendar_service, calendar_id=work_calendar, start_date=(start - datetime.timedelta(days=7)).isoformat(), end_date=end.isoformat()) for event in work_events: start = [int(x) for x in event[0][0] if x[0] not in ("-", "+")] end = [int(x) for x in event[1][0] if x[0] not in ("-", "+")] start_datetime = datetime.datetime(start[0], start[1], start[2], start[3], start[4], start[5], start[6]) end_datetime = datetime.datetime(end[0], end[1], end[2], end[3], end[4], end[5], end[6]) duration = end_datetime - start_datetime try: work_week[start_datetime.strftime("%x")] += duration except KeyError: work_week[start_datetime.strftime("%x")] = duration days = work_week.keys() days.sort() for day in days: print day, work_week[day] |
I did a little more work on the script since I posted the code on pastebin and I found a much simpler method for retrieving lists of calendars:
1 2 3 4 5 6 7 8 9 10 11 12 | def GetAllCalendars(calendar_service): feed = calendar_service.GetAllCalendarsFeed() return map(lambda x: x[1], list(enumerate(feed.entry))) def GetUserCalendars(calendar_service): feed = calendar_service.GetOwnCalendarsFeed() return map(lambda x: x[1], list(enumerate(feed.entry))) def GetSharedCalendars(calendar_service): return filter(lambda x: x.title.text not in map(lambda y: y.title.text, GetUserCalendars(calendar_service)), GetAllCalendars(calendar_service)) |

