Code diary 01 – Mutagen for Recovered Files

So after an incredibly stupid hard drive reformatting incident, I was left with my music collection reduced to recovered files – better than nothing, but nameless and unsorted, like so:
recoveredMusic
Luckily, most music files include metadata identifying the song title, album name, artist etc. A package such as Python’s Mutagen can read this metadata and this can be used to re-organise a large quantity of files. The below is my undeniably shoddy code to do so:

# -*- coding: utf-8 -*-
"""
Created on Wed Jun 10 14:39:13 2015

@author: Oscar
"""
forbiddenChars = ['\\','/','?','*',':','>','<','|', '"', '"'] 
import mutagen 
import os 
import sys 

def genNames():
     i = 0
     while True:
         yield "unknown track {:0>2d}".format(i)
         i += 1

def identify(path):
    fileData = {}
    namer = genNames()
    initFiles = os.listdir(path)
    for i in initFiles:
        if os.path.isfile(path + i):
            d = getData(path + i, namer)
            if d != None:
                fileData[i] = d
    return fileData

def getData(path, namer):
    try:
        rawdata = mutagen.File(path, easy=True)
    except:
        return None
    print "Getting data for {}".format(path)
    if rawdata == None:
        return None
    if rawdata.has_key('title'):
        title = str(rawdata['title'][0].strip())
        # Strip removes trailing whitespace, which windows doesn't like
    else:
        title = namer.next()
    if rawdata.has_key('album'):        
        album = str(rawdata['album'][0].strip())
    else:
        album = "Unknowns"
    ext = '.' + path[-3:]
    for i in forbiddenChars:
        if i in title:
            k = title.split(i)
            k.insert(1, ' - ')
            title = ''.join(k)
        if i in album:
            k = album.split(i)
            k.insert(1, ' - ')
            album = ''.join(k)
    return (title, album, ext)

if __name__ == "__main__":
    loc = sys.argv[1]
    print "Identifying tracks..."
    music = identify(loc)
    print "Moving files..."
    print "Found: ", music
    for f in music:
        destination = loc + music[f][1] + '/' + music[f][0] + music[f][2]
        if os.path.isdir(loc + music[f][1]):
            # If there is already a folder for this track's album name            
            if not os.path.isfile(destination):
                # If this track hasn't already been moved
                print "Moving {} to {}".format(loc + f, destination)
                os.rename(loc + f, destination)
            else:
                # If the track does already exist:
                os.remove(loc + f)
        else:
            # If there's not already a folder
            os.mkdir(loc + music[f][1])
            print "Moving {} to {}".format(loc + f, destination)
            os.rename(loc + f, destination)
    print "Finished"

Overall this wasn’t especially fun to create – every time I thought I’d included all the possible test cases, it would turn out there was something new hiding in the real directory of files. Stuff like album names ending in whitespace or random .zip files that Mutagen isn’t designed to handle. Much faster than moving them all by hand, though!

Advertisements
Code diary 01 – Mutagen for Recovered Files

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s