summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJon Dufresne <jon.dufresne@gmail.com>2014-09-29 18:24:33 -0700
committerTim Graham <timograham@gmail.com>2014-10-30 11:52:59 -0400
commiteb4f6de980c5148ba48d4ed67f31cca27dd132a8 (patch)
tree787358de9347c8fcfd0bb5fa8c12778f90bf15d3
parenteab3dc195eff4123460fb99d7819a7f6627e7115 (diff)
Fixed #8149 -- Made File.__iter__() support universal newlines.
The following are recognized as ending a line: the Unix end-of-line convention '\n', the Windows convention '\r\n', and the old Macintosh convention '\r'. http://www.python.org/dev/peps/pep-0278 Thanks tchaumeny for review.
-rw-r--r--django/core/files/base.py41
-rw-r--r--docs/ref/files/file.txt9
-rw-r--r--docs/ref/files/uploads.txt13
-rw-r--r--docs/releases/1.8.txt7
-rw-r--r--tests/files/tests.py50
5 files changed, 108 insertions, 12 deletions
diff --git a/django/core/files/base.py b/django/core/files/base.py
index 159a98e52f..c1c9199cb5 100644
--- a/django/core/files/base.py
+++ b/django/core/files/base.py
@@ -102,16 +102,22 @@ class File(FileProxyMixin):
# Iterate over this file-like object by newlines
buffer_ = None
for chunk in self.chunks():
- chunk_buffer = BytesIO(chunk)
-
- for line in chunk_buffer:
+ for line in chunk.splitlines(True):
if buffer_:
- line = buffer_ + line
+ if endswith_cr(buffer_) and not equals_lf(line):
+ # Line split after a \r newline; yield buffer_.
+ yield buffer_
+ # Continue with line.
+ else:
+ # Line either split without a newline (line
+ # continues after buffer_) or with \r\n
+ # newline (line == b'\n').
+ line = buffer_ + line
+ # buffer_ handled, clear it.
buffer_ = None
- # If this is the end of a line, yield
- # otherwise, wait for the next round
- if line[-1:] in (b'\n', b'\r'):
+ # If this is the end of a \n or \r\n line, yield.
+ if endswith_lf(line):
yield line
else:
buffer_ = line
@@ -165,3 +171,24 @@ class ContentFile(File):
def close(self):
pass
+
+
+def endswith_cr(line):
+ """
+ Return True if line (a text or byte string) ends with '\r'.
+ """
+ return line.endswith('\r' if isinstance(line, six.text_type) else b'\r')
+
+
+def endswith_lf(line):
+ """
+ Return True if line (a text or byte string) ends with '\n'.
+ """
+ return line.endswith('\n' if isinstance(line, six.text_type) else b'\n')
+
+
+def equals_lf(line):
+ """
+ Return True if line (a text or byte string) equals '\n'.
+ """
+ return line == ('\n' if isinstance(line, six.text_type) else b'\n')
diff --git a/docs/ref/files/file.txt b/docs/ref/files/file.txt
index 6874936eab..57f8e9eab3 100644
--- a/docs/ref/files/file.txt
+++ b/docs/ref/files/file.txt
@@ -53,6 +53,15 @@ The ``File`` Class
Iterate over the file yielding one line at a time.
+ .. versionchanged:: 1.8
+
+ ``File`` now uses `universal newlines`_. The following are
+ recognized as ending a line: the Unix end-of-line convention
+ ``'\n'``, the Windows convention ``'\r\n'``, and the old Macintosh
+ convention ``'\r'``.
+
+ .. _universal newlines: http://www.python.org/dev/peps/pep-0278
+
.. method:: chunks([chunk_size=None])
Iterate over the file yielding "chunks" of a given size. ``chunk_size``
diff --git a/docs/ref/files/uploads.txt b/docs/ref/files/uploads.txt
index 1b9103bb3b..0817816197 100644
--- a/docs/ref/files/uploads.txt
+++ b/docs/ref/files/uploads.txt
@@ -82,10 +82,15 @@ Here are some useful attributes of ``UploadedFile``:
for line in uploadedfile:
do_something_with(line)
- However, *unlike* standard Python files, :class:`UploadedFile` only
- understands ``\n`` (also known as "Unix-style") line endings. If you know
- that you need to handle uploaded files with different line endings, you'll
- need to do so in your view.
+ Lines are split using `universal newlines`_. The following are recognized
+ as ending a line: the Unix end-of-line convention ``'\n'``, the Windows
+ convention ``'\r\n'``, and the old Macintosh convention ``'\r'``.
+
+ .. _universal newlines: http://www.python.org/dev/peps/pep-0278
+
+ .. versionchanged:: 1.8
+
+ Previously lines were only split on the Unix end-of-line ``'\n'``.
Subclasses of ``UploadedFile`` include:
diff --git a/docs/releases/1.8.txt b/docs/releases/1.8.txt
index 23c6deeef1..dbdff7b7e0 100644
--- a/docs/releases/1.8.txt
+++ b/docs/releases/1.8.txt
@@ -659,6 +659,13 @@ Miscellaneous
* By default, :ref:`call_command <call-command>` now always skips the check
framework (unless you pass it ``skip_checks=False``).
+* When iterating over lines, :class:`~django.core.files.File` now uses
+ `universal newlines`_. The following are recognized as ending a line: the
+ Unix end-of-line convention ``'\n'``, the Windows convention ``'\r\n'``, and
+ the old Macintosh convention ``'\r'``.
+
+ .. _universal newlines: http://www.python.org/dev/peps/pep-0278
+
.. _deprecated-features-1.8:
Features deprecated in 1.8
diff --git a/tests/files/tests.py b/tests/files/tests.py
index be243b2527..f2f1df3626 100644
--- a/tests/files/tests.py
+++ b/tests/files/tests.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
-from io import BytesIO
+from io import BytesIO, StringIO
import os
import gzip
import tempfile
@@ -72,6 +72,54 @@ class FileTests(unittest.TestCase):
file = File(BytesIO(b'one\ntwo\nthree'))
self.assertEqual(list(file), [b'one\n', b'two\n', b'three'])
+ def test_file_iteration_windows_newlines(self):
+ """
+ #8149 - File objects with \r\n line endings should yield lines
+ when iterated over.
+ """
+ f = File(BytesIO(b'one\r\ntwo\r\nthree'))
+ self.assertEqual(list(f), [b'one\r\n', b'two\r\n', b'three'])
+
+ def test_file_iteration_mac_newlines(self):
+ """
+ #8149 - File objects with \r line endings should yield lines
+ when iterated over.
+ """
+ f = File(BytesIO(b'one\rtwo\rthree'))
+ self.assertEqual(list(f), [b'one\r', b'two\r', b'three'])
+
+ def test_file_iteration_mixed_newlines(self):
+ f = File(BytesIO(b'one\rtwo\nthree\r\nfour'))
+ self.assertEqual(list(f), [b'one\r', b'two\n', b'three\r\n', b'four'])
+
+ def test_file_iteration_with_unix_newline_at_chunk_boundary(self):
+ f = File(BytesIO(b'one\ntwo\nthree'))
+ # Set chunk size to create a boundary after \n:
+ # b'one\n...
+ # ^
+ f.DEFAULT_CHUNK_SIZE = 4
+ self.assertEqual(list(f), [b'one\n', b'two\n', b'three'])
+
+ def test_file_iteration_with_windows_newline_at_chunk_boundary(self):
+ f = File(BytesIO(b'one\r\ntwo\r\nthree'))
+ # Set chunk size to create a boundary between \r and \n:
+ # b'one\r\n...
+ # ^
+ f.DEFAULT_CHUNK_SIZE = 4
+ self.assertEqual(list(f), [b'one\r\n', b'two\r\n', b'three'])
+
+ def test_file_iteration_with_mac_newline_at_chunk_boundary(self):
+ f = File(BytesIO(b'one\rtwo\rthree'))
+ # Set chunk size to create a boundary after \r:
+ # b'one\r...
+ # ^
+ f.DEFAULT_CHUNK_SIZE = 4
+ self.assertEqual(list(f), [b'one\r', b'two\r', b'three'])
+
+ def test_file_iteration_with_text(self):
+ f = File(StringIO('one\ntwo\nthree'))
+ self.assertEqual(list(f), ['one\n', 'two\n', 'three'])
+
class NoNameFileTestCase(unittest.TestCase):
"""