Download ALL The Music
Given a file containing a list of songs, one per line, in the format “Artist – Song Title”, download the audio of the first youtube video link on a Google search for that song. This is quite useful if you want to the MP3 for every song you ever gave a thumbs up on Pandora. On my computer, this averages about 4 songs a minute.
The Requests API and BeautifulSoup make writing screenscrapers and automating the web really clean and easy.
#!/usr/bin/python
# Takes a list of titles of songs, in the format "artist - song" and searches for each
# song on google. The first youtube link is passed off to youtube-dl to download it and
# get the MP3 out. This doesn't have any throttling because (in theory) the conversion step
# takes enough time to provide throttling.
import requests
import re
from BeautifulSoup import BeautifulSoup
from subprocess import call
def queryConverter(videoURL):
call(["youtube-dl", "--extract-audio", "--audio-format", "mp3", videoURL])
def queryGoogle(songTitle):
reqPreamble = "https://www.google.nl/search"
reqData = {'q':songTitle}
r = requests.get(reqPreamble, params=reqData)
if r.status_code != 200:
print "Failed to issue request to {0}".format(r.url)
else:
bs = BeautifulSoup(r.text)
tubelinks = bs.findAll("a", attrs={'href':re.compile("watch")})
if len(tubelinks) > 0:
vidUrl = re.search("https[^&]*", tubelinks[0]['href'])
vidUrl = requests.utils.unquote(vidUrl.group(0))
return vidUrl
else:
print "No video for {0}".format(songTitle)
if __name__=="__main__":
with open("./all_pandora_likes", 'r') as inFile:
for line in inFile:
videoURL = queryGoogle(line)
if videoURL is not None:
queryConverter(videoURL)
Recent Comments