75335a1463fb3275ffc28731bae0954493c07069
[jamaendo] / jamaendo / api.py
1 import urllib, threading, os, gzip, time, simplejson, re
2 _DUMP_URL = '''http://img.jamendo.com/data/dbdump_artistalbumtrack.xml.gz'''
3 _DUMP = os.path.expanduser('''~/.cache/jamaendo/dbdump.xml.gz''')
4 _DUMP_TMP = os.path.expanduser('''~/.cache/jamaendo/new_dbdump.xml.gz''')
5
6 try:
7     os.makedirs(os.path.dirname(_DUMP))
8 except OSError:
9     pass
10
11 def has_dump():
12     return os.path.isfile(_DUMP)
13
14 def _file_is_old(fil, old_age):
15     return os.path.getmtime(fil) < (time.time() - old_age)
16
17 def _dump_is_old():
18     return not has_dump() or _file_is_old(_DUMP, 60*60*24) # 1 day
19
20 def refresh_dump(complete_callback, progress_callback=None, force=False):
21     if force or _dump_is_old():
22         downloader = Downloader(complete_callback, progress_callback)
23         downloader.start()
24     else:
25         complete_callback(True)
26
27 class Downloader(threading.Thread):
28     def __init__(self, complete_callback, progress_callback):
29         threading.Thread.__init__(self)
30         self.complete_callback = complete_callback
31         self.progress_callback = progress_callback
32
33     def actual_callback(self, numblocks, blocksize, filesize):
34         if self.progress_callback:
35             try:
36                 percent = min((numblocks*blocksize*100)/filesize, 100)
37             except:
38                 percent = 100
39             self.progress_callback(percent)
40
41     def run(self):
42         success = True
43         try:
44             urllib.urlretrieve(_DUMP_URL, _DUMP_TMP, self.actual_callback)
45             if os.path.isfile(_DUMP):
46                 os.remove(_DUMP)
47             os.rename(_DUMP_TMP, _DUMP)
48         except Exception, e:
49             success = False
50         self.complete_callback(success)
51
52 def fast_iter(context, func):
53     for event, elem in context:
54         func(elem)
55         elem.clear()
56         while elem.getprevious() is not None:
57             del elem.getparent()[0]
58     del context
59
60 from lxml import etree
61
62 class Obj(object):
63     def __repr__(self):
64         def printable(v):
65             if isinstance(v, basestring):
66                 return v.encode('utf-8')
67             else:
68                 return str(v)
69         return "{%s}" % (", ".join("%s=%s"%(k.encode('utf-8'), printable(v)) \
70                              for k,v in self.__dict__.iteritems() if not k.startswith('_')))
71
72 class LocalDB(object):
73     def __init__(self):
74         self.fil = None
75
76     def connect(self):
77         self.fil = gzip.open(_DUMP)
78
79     def close(self):
80         self.fil.close()
81
82     def make_album_brief(self, element):
83         ret = {}
84         for info in element:
85             if info.tag == 'id':
86                 ret['id'] = int(info.text)
87             elif info.tag == 'name':
88                 ret['name'] = info.text
89         return ret
90
91     def make_artist_obj(self, element):
92         ret = {}
93         for child in element:
94             if child.tag == 'id':
95                 ret['id'] = int(child.text)
96             elif child.tag in ('name', 'image'):
97                 ret[child.tag] = child.text
98             elif child.tag == 'Albums':
99                 ret['albums'] = [self.make_album_brief(a) for a in child]
100         return ret
101
102     def make_track_obj(self, element):
103         ret = {}
104         for info in element:
105             if info.tag == 'id':
106                 _id = int(info.text)
107                 ret['id'] = _id
108                 ret['mp3'] = Query.track_mp3(_id)
109                 ret['ogg'] = Query.track_ogg(_id)
110             elif info.tag in ('name', 'numalbum'):
111                 ret[info.tag] = info.text
112         return ret
113
114     def make_album_obj(self, element):
115         ret = {}
116         artist = element.getparent().getparent()
117         if artist is not None:
118             for child in artist:
119                 if child.tag == 'name':
120                     ret['artist_name'] = child.text
121                 elif child.tag == 'id':
122                     ret['artist_id'] = int(child.text)
123         for child in element:
124             if child.tag == 'id':
125                 ret['id'] = int(child.text)
126             elif child.tag in ('name', 'image'):
127                 if child.text:
128                     ret[child.tag] = child.text
129                 else:
130                     ret[child.tag] = ""
131             elif child.tag == 'Tracks':
132                 ret['tracks'] = [self.make_track_obj(t) for t in child]
133         return ret
134
135     def artist_walker(self, name_match):
136         for event, element in etree.iterparse(self.fil, tag="artist"):
137             name = element.xpath('./name')[0].text.lower()
138             if name and name.find(name_match) > -1:
139                 yield self.make_artist_obj(element)
140             element.clear()
141             while element.getprevious() is not None:
142                 del element.getparent()[0]
143         raise StopIteration
144
145     def album_walker(self, name_match):
146         for event, element in etree.iterparse(self.fil, tag="album"):
147             name = element.xpath('./name')[0].text
148             if name and name.lower().find(name_match) > -1:
149                 yield self.make_album_obj(element)
150             element.clear()
151             while element.getprevious() is not None:
152                 del element.getparent()[0]
153         raise StopIteration
154
155     def artistid_walker(self, artistids):
156         for event, element in etree.iterparse(self.fil, tag="artist"):
157             _id = element.xpath('./id')[0].text
158             if _id and int(_id) in artistids:
159                 yield self.make_artist_obj(element)
160             element.clear()
161             while element.getprevious() is not None:
162                 del element.getparent()[0]
163         raise StopIteration
164
165     def albumid_walker(self, albumids):
166         for event, element in etree.iterparse(self.fil, tag="album"):
167             _id = element.xpath('./id')[0].text
168             if _id and (int(_id) in albumids):
169                 yield self.make_album_obj(element)
170             element.clear()
171             while element.getprevious() is not None:
172                 del element.getparent()[0]
173         raise StopIteration
174
175     def search_artists(self, substr):
176         substr = substr.lower()
177         return (artist for artist in self.artist_walker(substr))
178
179     def search_albums(self, substr):
180         substr = substr.lower()
181         return (album for album in self.album_walker(substr))
182
183     def get_artists(self, artistids):
184         return (artist for artist in self.artistid_walker(artistids))
185
186     def get_albums(self, albumids):
187         return (album for album in self.albumid_walker(albumids))
188
189 _GET2 = '''http://api.jamendo.com/get2/'''
190
191 class Query(object):
192     last_query = time.time()
193     caching = True
194     cache_time = 60*60*24
195     rate_limit = 1.0 # max queries per second
196
197     def __init__(self,
198                  select=['id', 'name', 'image', 'artist_name', 'artist_id'],
199                  request='album',
200                  track=['track_album', 'album_artist']):
201         if request == 'track':
202             self.url = "%s%s/%s/json/%s" % (_GET2, '+'.join(select), request, '+'.join(track))
203         else:
204             self.url = "%s%s/%s/json/" % (_GET2, '+'.join(select), request)
205
206     def __call__(self, order=None, count=5, query=None, albumids=None):
207         return self.emit(order=order, count=count, query=query, albumids=albumids)
208
209     def emit(self, order=None, count=5, query=None, albumids=None):
210         """ratelimited query"""
211         self._ratelimit()
212         paramdict = {'n':count}
213         if order is not None:
214             paramdict['order'] = order
215         if query is not None:
216             paramdict['searchquery'] = query
217         if albumids is not None:
218             paramdict['album_id'] = " ".join(str(_id) for _id in albumids)
219         params = urllib.urlencode(paramdict)
220         url = self.url + "?%s" % (params)
221         f = urllib.urlopen(url)
222         ret = simplejson.load(f)
223         f.close()
224         return ret
225
226     def _ratelimit(self):
227         now = time.time()
228         if now - self.last_query < self.rate_limit:
229             time.sleep(self.rate_limit - (now - self.last_query))
230         self.last_query = now
231
232
233     @staticmethod
234     def album_cover(albumid, size=200):
235         to = '~/.cache/jamaendo/cover-%d-%d.jpg'%(albumid, size)
236         if not os.path.isfile(to):
237             url = _GET2+'image/album/redirect/?id=%d&imagesize=%d'%(albumid, size)
238             urllib.urlretrieve(url, to)
239         return to
240
241     @staticmethod
242     def track_ogg(trackid):
243        return _GET2+ 'stream/track/redirect/?id=%d&streamencoding=ogg2'%(trackid)
244
245     @staticmethod
246     def track_mp3(trackid):
247        return _GET2+ 'stream/track/redirect/?id=%d&streamencoding=mp31'%(trackid)
248
249 class Queries(object):
250     @staticmethod
251     def albums_this_week():
252         return Query().emit(order='ratingweek_desc')
253     @staticmethod
254     def albums_all_time():
255         return Query().emit(order='ratingtotal_desc')
256     @staticmethod
257     def albums_this_month():
258         return Query().emit(order='ratingmonth_desc')
259     @staticmethod
260     def albums_today():
261         return Query().emit(order='ratingday_desc')
262     @staticmethod
263     def playlists_all_time():
264         q = Query(select=['id','name', 'user_idstr'], request='playlist')
265         return q.emit(order='ratingtotal_desc')
266
267     @staticmethod
268     def tracks_this_month():
269         q = Query(select=['id', 'name',
270                           'stream',
271                           'album_name', 'artist_name',
272                           'album_id', 'artist_id'],
273                   request='track')
274         return q.emit(order='ratingmonth_desc')
275
276     @staticmethod
277     def search_albums(query):
278         q = Query()
279         return q.emit(order='searchweight_desc', query=query)
280
281     @staticmethod
282     def search_artists(query):
283         q = Query(request='artist', select=['id', 'name', 'image'])
284         return q.emit(order='searchweight_desc', query=query)
285
286     @staticmethod
287     def album_tracks(albumids, select=['id', 'name', 'numalbum']):
288         #http://api.jamendo.com/get2/id+name/track/jsonpretty/?album_id=33+46
289         q = Query(select=select,
290                   request='track')
291         ret = q.emit(albumids=albumids, count=100)
292         for track in ret:
293             track['mp3'] = Query.track_mp3(int(track['id']))
294             track['ogg'] = Query.track_ogg(int(track['id']))
295         return ret