Download ALL The Music

Given a file containing a list of songs, one per line, in the format “Artist – Song Title”, download the audio of the first youtube video link on a Google search for that song. This is quite useful if you want to the MP3 for every song you ever gave a thumbs up on Pandora. On my computer, this averages about 4 songs a minute.

The Requests API and BeautifulSoup make writing screenscrapers and automating the web really clean and easy.

#!/usr/bin/python

# Takes a list of titles of songs, in the format "artist - song" and searches for each
# song on google. The first youtube link is passed off to youtube-dl to download it and 
# get the MP3 out. This doesn't have any throttling because (in theory) the conversion step
# takes enough time to provide throttling. 

import requests
import re
from BeautifulSoup import BeautifulSoup
from subprocess import call

def queryConverter(videoURL):
	call(["youtube-dl", "--extract-audio",  "--audio-format", "mp3", videoURL])

def queryGoogle(songTitle):
	reqPreamble = "https://www.google.nl/search"
	reqData = {'q':songTitle}
	r = requests.get(reqPreamble, params=reqData)
	if r.status_code != 200:
		print "Failed to issue request to {0}".format(r.url)
	else:
		bs = BeautifulSoup(r.text)
		tubelinks = bs.findAll("a", attrs={'href':re.compile("watch")})
		if len(tubelinks) > 0:
			vidUrl = re.search("https[^&]*", tubelinks[0]['href'])
			vidUrl = requests.utils.unquote(vidUrl.group(0))
			return vidUrl
		else:
			print "No video for {0}".format(songTitle)

if __name__=="__main__":
	with open("./all_pandora_likes", 'r') as inFile:
		for line in inFile:
			videoURL = queryGoogle(line)
			if videoURL is not None:
				queryConverter(videoURL)