Tried out an optimization that didn't seem to work.
authorEd Page <eopage@byu.net>
Sat, 25 Apr 2009 02:57:04 +0000 (21:57 -0500)
committerEd Page <eopage@byu.net>
Sat, 25 Apr 2009 02:57:04 +0000 (21:57 -0500)
Instead of looping over sleeps until no empty chunks are returned, an
alternative was made that compares against the length specified in the
header.  For some reason this still didn't return the full string.

src/rtm_api.py

index 15c5d84..b748b38 100644 (file)
@@ -92,19 +92,52 @@ class RTMapi(object):
                return urllib2.urlopen(url)
 
        @staticmethod
-       def read(connection, timeout):
+       def read_by_length(connection, timeout):
                # It appears that urllib uses the non-blocking variant of file objects
                # which means reads might not always be complete, so grabbing as much
                # of the data as possible with a sleep in between to give it more time
                # to grab data.
                contentLengthField = "Content-Length"
+               assert contentLengthField in connection.info(), "Connection didn't provide content length info"
+               specifiedLength = int(connection.info()["Content-Length"])
 
+               actuallyRead = 0
                chunks = []
                chunk = connection.read()
-               while chunk:
+               while 0 < timeout:
+                       actuallyRead += len(chunk)
+                       if actuallyRead == specifiedLength:
+                               break
                        chunks.append(chunk)
                        time.sleep(1)
+                       timeout -= 1
                        chunk = connection.read()
+               chunks.append(chunk)
+               json = "".join(chunks)
+
+               if "Content-Length" in connection.info():
+                       assert len(json) == int(connection.info()["Content-Length"]), "The packet header promised %s of data but only was able to read %s of data" % (
+                               connection.info()["Content-Length"],
+                               len(json),
+                       )
+
+               return json
+
+       @staticmethod
+       def read_by_guess(connection, timeout):
+               # It appears that urllib uses the non-blocking variant of file objects
+               # which means reads might not always be complete, so grabbing as much
+               # of the data as possible with a sleep in between to give it more time
+               # to grab data.
+
+               chunks = []
+               chunk = connection.read()
+               while chunk and 0 < timeout:
+                       chunks.append(chunk)
+                       time.sleep(1)
+                       timeout -= 1
+                       chunk = connection.read()
+               chunks.append(chunk)
                json = "".join(chunks)
 
                if "Content-Length" in connection.info():
@@ -122,7 +155,8 @@ class RTMapi(object):
                params['api_sig'] = self._sign(params)
 
                connection = self.open_url(SERVICE_URL, params)
-               json = self.read(connection, 5)
+               json = self.read_by_guess(connection, 5)
+               # json = self.read_by_length(connection, 5)
 
                data = DottedDict('ROOT', parse_json(json))
                rsp = data.rsp