[1.4.x] Fixed #24158 -- Allowed GZipMiddleware to work with streaming responses

Backport of django.utils.text.compress_sequence and fix for django.middleware.gzip.GZipMiddleware when using iterators as response.content.
author: Benjamin Richter <richter.benjamin@gmail.com> 2015-01-25 23:22:46 +0100
committer: Tim Graham <timograham@gmail.com> 2015-01-26 19:22:47 -0500
commit: 1e39d0f6280abf34c7719db5e7ed1c333f5e5919 (patch)
tree: da2dedc80c43585cbbf25ef183290b44b93c04c0
parent: 9435474068c2ae2261105adbbe7aebdb80b778f3 (diff)
5 files changed, 80 insertions, 8 deletions
diff --git a/django/middleware/gzip.py b/django/middleware/gzip.py
index 69f938cf0a..eb4d8bff42 100644
--- a/django/middleware/gzip.py
+++ b/django/middleware/gzip.py
@@ -1,6 +1,6 @@
 import re
 
-from django.utils.text import compress_string
+from django.utils.text import compress_string, compress_sequence
 from django.utils.cache import patch_vary_headers
 
 re_accepts_gzip = re.compile(r'\bgzip\b')
@@ -12,8 +12,9 @@ class GZipMiddleware(object):
     on the Accept-Encoding header.
     """
     def process_response(self, request, response):
+        # The response object can tell us whether content is a string or an iterable
         # It's not worth attempting to compress really short responses.
-        if len(response.content) < 200:
+        if not response._base_content_is_iter and len(response.content) < 200:
             return response
 
         patch_vary_headers(response, ('Accept-Encoding',))
@@ -32,15 +33,23 @@ class GZipMiddleware(object):
         if not re_accepts_gzip.search(ae):
             return response
 
-        # Return the compressed content only if it's actually shorter.
-        compressed_content = compress_string(response.content)
-        if len(compressed_content) >= len(response.content):
-            return response
+        # The response object can tell us whether content is a string or an iterable
+        if response._base_content_is_iter:
+            # If the response content is iterable we don't know the length, so delete the header.
+            del response['Content-Length']
+            # Wrap the response content in a streaming gzip iterator (direct access to inner response._container)
+            response.content = compress_sequence(response._container)
+        else:
+            # Return the compressed content only if it's actually shorter.
+            compressed_content = compress_string(response.content)
+            if len(compressed_content) >= len(response.content):
+                return response
+            response.content = compressed_content
+            response['Content-Length'] = str(len(response.content))
 
         if response.has_header('ETag'):
             response['ETag'] = re.sub('"$', ';gzip"', response['ETag'])
 
-        response.content = compressed_content
         response['Content-Encoding'] = 'gzip'
-        response['Content-Length'] = str(len(response.content))
+
         return response
diff --git a/django/utils/text.py b/django/utils/text.py
index eaafb96d7c..8e43dc9652 100644
--- a/django/utils/text.py
+++ b/django/utils/text.py
@@ -286,6 +286,39 @@ def compress_string(s):
 
 ustring_re = re.compile(u"([\u0080-\uffff])")
 
+# Backported from django 1.5
+class StreamingBuffer(object):
+    def __init__(self):
+        self.vals = []
+
+    def write(self, val):
+        self.vals.append(val)
+
+    def read(self):
+        ret = ''.join(self.vals)
+        self.vals = []
+        return ret
+
+    def flush(self):
+        return
+
+    def close(self):
+        return
+
+# Backported from django 1.5
+# Like compress_string, but for iterators of strings.
+def compress_sequence(sequence):
+    buf = StreamingBuffer()
+    zfile = GzipFile(mode='wb', compresslevel=6, fileobj=buf)
+    # Output headers...
+    yield buf.read()
+    for item in sequence:
+        zfile.write(item)
+        zfile.flush()
+        yield buf.read()
+    zfile.close()
+    yield buf.read()
+
 def javascript_quote(s, quote_double_quotes=False):
 
     def fix(match):
diff --git a/docs/releases/1.4.19.txt b/docs/releases/1.4.19.txt
new file mode 100644
index 0000000000..da813fa7eb
--- /dev/null
+++ b/docs/releases/1.4.19.txt
@@ -0,0 +1,16 @@
+===========================
+Django 1.4.19 release notes
+===========================
+
+*Under development*
+
+Django 1.4.19 fixes a regression in the 1.4.18 security release.
+
+Bugfixes
+========
+
+* ``GZipMiddleware`` now supports streaming responses. As part of the 1.4.18
+  security release, the ``django.views.static.serve()`` function was altered
+  to stream the files it serves. Unfortunately, the ``GZipMiddleware`` consumed
+  the stream prematurely and prevented files from being served properly
+  (`#24158 <http://code.djangoproject.com/ticket/24158>`_).
diff --git a/docs/releases/index.txt b/docs/releases/index.txt
index 98f69adc20..58b32f05c6 100644
--- a/docs/releases/index.txt
+++ b/docs/releases/index.txt
@@ -19,6 +19,7 @@ Final releases
 .. toctree::
    :maxdepth: 1
 
+   1.4.19
    1.4.18
    1.4.17
    1.4.16
diff --git a/tests/regressiontests/middleware/tests.py b/tests/regressiontests/middleware/tests.py
index 138ee50e43..87b19fb6da 100644
--- a/tests/regressiontests/middleware/tests.py
+++ b/tests/regressiontests/middleware/tests.py
@@ -514,6 +514,7 @@ class GZipMiddlewareTest(TestCase):
     short_string = "This string is too short to be worth compressing."
     compressible_string = 'a' * 500
     uncompressible_string = ''.join(chr(random.randint(0, 255)) for _ in xrange(500))
+    iterator_as_content = iter(compressible_string)
 
     def setUp(self):
         self.req = HttpRequest()
@@ -589,6 +590,18 @@ class GZipMiddlewareTest(TestCase):
         self.assertEqual(r.content, self.uncompressible_string)
         self.assertEqual(r.get('Content-Encoding'), None)
 
+    def test_streaming_compression(self):
+        """
+        Tests that iterators as response content return a compressed stream without consuming
+        the whole response.content while doing so.
+        See #24158.
+        """
+        self.resp.content = self.iterator_as_content
+        r = GZipMiddleware().process_response(self.req, self.resp)
+        self.assertEqual(self.decompress(''.join(r.content)), self.compressible_string)
+        self.assertEqual(r.get('Content-Encoding'), 'gzip')
+        self.assertEqual(r.get('Content-Length'), None)
+
 
 class ETagGZipMiddlewareTest(TestCase):
     """
author	Benjamin Richter <richter.benjamin@gmail.com>	2015-01-25 23:22:46 +0100
committer	Tim Graham <timograham@gmail.com>	2015-01-26 19:22:47 -0500
commit	1e39d0f6280abf34c7719db5e7ed1c333f5e5919 (patch)
tree	da2dedc80c43585cbbf25ef183290b44b93c04c0
parent	9435474068c2ae2261105adbbe7aebdb80b778f3 (diff)