diff options
| author | Brian Rosner <brosner@gmail.com> | 2008-07-01 15:49:08 +0000 |
|---|---|---|
| committer | Brian Rosner <brosner@gmail.com> | 2008-07-01 15:49:08 +0000 |
| commit | 0e8710d5900a75b9a4a1caebb82c939896e99cff (patch) | |
| tree | f3db8fb3f6b932bfc48526a70c332efce66d5cac /django | |
| parent | 595e9191f519af9b1c0c4b657fd3923c0997938c (diff) | |
newforms-admin: Merged from trunk up to [7814].
git-svn-id: http://code.djangoproject.com/svn/django/branches/newforms-admin@7815 bcc190cf-cafb-0310-a4f2-bffc1f526a37
Diffstat (limited to 'django')
| -rw-r--r-- | django/conf/global_settings.py | 15 | ||||
| -rw-r--r-- | django/contrib/sitemaps/management/commands/ping_google.py | 14 | ||||
| -rw-r--r-- | django/core/files/__init__.py | 0 | ||||
| -rw-r--r-- | django/core/files/locks.py | 66 | ||||
| -rw-r--r-- | django/core/files/move.py | 59 | ||||
| -rw-r--r-- | django/core/files/uploadedfile.py | 190 | ||||
| -rw-r--r-- | django/core/files/uploadhandler.py | 235 | ||||
| -rw-r--r-- | django/core/handlers/modpython.py | 3 | ||||
| -rw-r--r-- | django/core/handlers/wsgi.py | 5 | ||||
| -rw-r--r-- | django/db/models/base.py | 60 | ||||
| -rw-r--r-- | django/db/models/fields/__init__.py | 18 | ||||
| -rw-r--r-- | django/db/models/sql/query.py | 2 | ||||
| -rw-r--r-- | django/http/__init__.py | 64 | ||||
| -rw-r--r-- | django/http/multipartparser.py | 658 | ||||
| -rw-r--r-- | django/newforms/fields.py | 62 | ||||
| -rw-r--r-- | django/oldforms/__init__.py | 19 | ||||
| -rw-r--r-- | django/test/client.py | 26 | ||||
| -rw-r--r-- | django/utils/datastructures.py | 50 | ||||
| -rw-r--r-- | django/utils/text.py | 24 |
19 files changed, 1494 insertions, 76 deletions
diff --git a/django/conf/global_settings.py b/django/conf/global_settings.py index 006ab421dd..2c9720da10 100644 --- a/django/conf/global_settings.py +++ b/django/conf/global_settings.py @@ -231,6 +231,21 @@ MEDIA_ROOT = '' # Example: "http://media.lawrence.com" MEDIA_URL = '' +# List of upload handler classes to be applied in order. +FILE_UPLOAD_HANDLERS = ( + 'django.core.files.uploadhandler.MemoryFileUploadHandler', + 'django.core.files.uploadhandler.TemporaryFileUploadHandler', +) + +# Maximum size, in bytes, of a request before it will be streamed to the +# file system instead of into memory. +FILE_UPLOAD_MAX_MEMORY_SIZE = 2621440 # i.e. 2.5 MB + +# Directory in which upload streamed files will be temporarily saved. A value of +# `None` will make Django use the operating system's default temporary directory +# (i.e. "/tmp" on *nix systems). +FILE_UPLOAD_TEMP_DIR = None + # Default formatting for date objects. See all available format strings here: # http://www.djangoproject.com/documentation/templates/#now DATE_FORMAT = 'N j, Y' diff --git a/django/contrib/sitemaps/management/commands/ping_google.py b/django/contrib/sitemaps/management/commands/ping_google.py new file mode 100644 index 0000000000..afff04b39c --- /dev/null +++ b/django/contrib/sitemaps/management/commands/ping_google.py @@ -0,0 +1,14 @@ +from django.core.management.base import BaseCommand +from django.contrib.sitemaps import ping_google + + +class Command(BaseCommand): + help = "Ping google with an updated sitemap, pass optional url of sitemap" + + def execute(self, *args, **options): + if len(args) == 1: + sitemap_url = args[0] + else: + sitemap_url = None + ping_google(sitemap_url=sitemap_url) + diff --git a/django/core/files/__init__.py b/django/core/files/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/django/core/files/__init__.py diff --git a/django/core/files/locks.py b/django/core/files/locks.py new file mode 100644 index 0000000000..212b51a73d --- /dev/null +++ b/django/core/files/locks.py @@ -0,0 +1,66 @@ +""" +Portable file locking utilities. + +Based partially on example by Jonathan Feignberg <jdf@pobox.com> in the Python +Cookbook, licensed under the Python Software License. + + http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/65203 + +Example Usage:: + + >>> from django.core.files import locks + >>> f = open('./file', 'wb') + >>> locks.lock(f, locks.LOCK_EX) + >>> f.write('Django') + >>> f.close() +""" + +__all__ = ('LOCK_EX','LOCK_SH','LOCK_NB','lock','unlock') + +system_type = None + +try: + import win32con + import win32file + import pywintypes + LOCK_EX = win32con.LOCKFILE_EXCLUSIVE_LOCK + LOCK_SH = 0 + LOCK_NB = win32con.LOCKFILE_FAIL_IMMEDIATELY + __overlapped = pywintypes.OVERLAPPED() + system_type = 'nt' +except (ImportError, AttributeError): + pass + +try: + import fcntl + LOCK_EX = fcntl.LOCK_EX + LOCK_SH = fcntl.LOCK_SH + LOCK_NB = fcntl.LOCK_NB + system_type = 'posix' +except (ImportError, AttributeError): + pass + +if system_type == 'nt': + def lock(file, flags): + hfile = win32file._get_osfhandle(file.fileno()) + win32file.LockFileEx(hfile, flags, 0, -0x10000, __overlapped) + + def unlock(file): + hfile = win32file._get_osfhandle(file.fileno()) + win32file.UnlockFileEx(hfile, 0, -0x10000, __overlapped) +elif system_type == 'posix': + def lock(file, flags): + fcntl.flock(file.fileno(), flags) + + def unlock(file): + fcntl.flock(file.fileno(), fcntl.LOCK_UN) +else: + # File locking is not supported. + LOCK_EX = LOCK_SH = LOCK_NB = None + + # Dummy functions that don't do anything. + def lock(file, flags): + pass + + def unlock(file): + pass diff --git a/django/core/files/move.py b/django/core/files/move.py new file mode 100644 index 0000000000..66873d450c --- /dev/null +++ b/django/core/files/move.py @@ -0,0 +1,59 @@ +""" +Move a file in the safest way possible:: + + >>> from django.core.files.move import file_move_save + >>> file_move_save("/tmp/old_file", "/tmp/new_file") +""" + +import os +from django.core.files import locks + +__all__ = ['file_move_safe'] + +try: + import shutil + file_move = shutil.move +except ImportError: + file_move = os.rename + +def file_move_safe(old_file_name, new_file_name, chunk_size = 1024*64, allow_overwrite=False): + """ + Moves a file from one location to another in the safest way possible. + + First, try using ``shutils.move``, which is OS-dependent but doesn't break + if moving across filesystems. Then, try ``os.rename``, which will break + across filesystems. Finally, streams manually from one file to another in + pure Python. + + If the destination file exists and ``allow_overwrite`` is ``False``, this + function will throw an ``IOError``. + """ + + # There's no reason to move if we don't have to. + if old_file_name == new_file_name: + return + + if not allow_overwrite and os.path.exists(new_file_name): + raise IOError("Cannot overwrite existing file '%s'." % new_file_name) + + try: + file_move(old_file_name, new_file_name) + return + except OSError: + # This will happen with os.rename if moving to another filesystem + pass + + # If the built-in didn't work, do it the hard way. + new_file = open(new_file_name, 'wb') + locks.lock(new_file, locks.LOCK_EX) + old_file = open(old_file_name, 'rb') + current_chunk = None + + while current_chunk != '': + current_chunk = old_file.read(chunk_size) + new_file.write(current_chunk) + + new_file.close() + old_file.close() + + os.remove(old_file_name) diff --git a/django/core/files/uploadedfile.py b/django/core/files/uploadedfile.py new file mode 100644 index 0000000000..51cec172d4 --- /dev/null +++ b/django/core/files/uploadedfile.py @@ -0,0 +1,190 @@ +""" +Classes representing uploaded files. +""" + +import os +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO + +__all__ = ('UploadedFile', 'TemporaryUploadedFile', 'InMemoryUploadedFile') + +class UploadedFile(object): + """ + A abstract uploadded file (``TemporaryUploadedFile`` and + ``InMemoryUploadedFile`` are the built-in concrete subclasses). + + An ``UploadedFile`` object behaves somewhat like a file object and + represents some file data that the user submitted with a form. + """ + DEFAULT_CHUNK_SIZE = 64 * 2**10 + + def __init__(self, file_name=None, content_type=None, file_size=None, charset=None): + self.file_name = file_name + self.file_size = file_size + self.content_type = content_type + self.charset = charset + + def __repr__(self): + return "<%s: %s (%s)>" % (self.__class__.__name__, self.file_name, self.content_type) + + def _set_file_name(self, name): + # Sanitize the file name so that it can't be dangerous. + if name is not None: + # Just use the basename of the file -- anything else is dangerous. + name = os.path.basename(name) + + # File names longer than 255 characters can cause problems on older OSes. + if len(name) > 255: + name, ext = os.path.splitext(name) + name = name[:255 - len(ext)] + ext + + self._file_name = name + + def _get_file_name(self): + return self._file_name + + file_name = property(_get_file_name, _set_file_name) + + def chunk(self, chunk_size=None): + """ + Read the file and yield chucks of ``chunk_size`` bytes (defaults to + ``UploadedFile.DEFAULT_CHUNK_SIZE``). + """ + if not chunk_size: + chunk_size = UploadedFile.DEFAULT_CHUNK_SIZE + + if hasattr(self, 'seek'): + self.seek(0) + # Assume the pointer is at zero... + counter = self.file_size + + while counter > 0: + yield self.read(chunk_size) + counter -= chunk_size + + def multiple_chunks(self, chunk_size=None): + """ + Returns ``True`` if you can expect multiple chunks. + + NB: If a particular file representation is in memory, subclasses should + always return ``False`` -- there's no good reason to read from memory in + chunks. + """ + if not chunk_size: + chunk_size = UploadedFile.DEFAULT_CHUNK_SIZE + return self.file_size < chunk_size + + # Abstract methods; subclasses *must* default read() and probably should + # define open/close. + def read(self, num_bytes=None): + raise NotImplementedError() + + def open(self): + pass + + def close(self): + pass + + # Backwards-compatible support for uploaded-files-as-dictionaries. + def __getitem__(self, key): + import warnings + warnings.warn( + message = "The dictionary access of uploaded file objects is deprecated. Use the new object interface instead.", + category = DeprecationWarning, + stacklevel = 2 + ) + backwards_translate = { + 'filename': 'file_name', + 'content-type': 'content_type', + } + + if key == 'content': + return self.read() + elif key == 'filename': + return self.file_name + elif key == 'content-type': + return self.content_type + else: + return getattr(self, key) + +class TemporaryUploadedFile(UploadedFile): + """ + A file uploaded to a temporary location (i.e. stream-to-disk). + """ + + def __init__(self, file, file_name, content_type, file_size, charset): + super(TemporaryUploadedFile, self).__init__(file_name, content_type, file_size, charset) + self.file = file + self.path = file.name + self.file.seek(0) + + def temporary_file_path(self): + """ + Returns the full path of this file. + """ + return self.path + + def read(self, *args, **kwargs): + return self.file.read(*args, **kwargs) + + def open(self): + self.seek(0) + + def seek(self, *args, **kwargs): + self.file.seek(*args, **kwargs) + +class InMemoryUploadedFile(UploadedFile): + """ + A file uploaded into memory (i.e. stream-to-memory). + """ + def __init__(self, file, field_name, file_name, content_type, charset, file_size): + super(InMemoryUploadedFile, self).__init__(file_name, content_type, charset, file_size) + self.file = file + self.field_name = field_name + self.file.seek(0) + + def seek(self, *args, **kwargs): + self.file.seek(*args, **kwargs) + + def open(self): + self.seek(0) + + def read(self, *args, **kwargs): + return self.file.read(*args, **kwargs) + + def chunk(self, chunk_size=None): + self.file.seek(0) + yield self.read() + + def multiple_chunks(self, chunk_size=None): + # Since it's in memory, we'll never have multiple chunks. + return False + +class SimpleUploadedFile(InMemoryUploadedFile): + """ + A simple representation of a file, which just has content, size, and a name. + """ + def __init__(self, name, content, content_type='text/plain'): + self.file = StringIO(content or '') + self.file_name = name + self.field_name = None + self.file_size = len(content or '') + self.content_type = content_type + self.charset = None + self.file.seek(0) + + def from_dict(cls, file_dict): + """ + Creates a SimpleUploadedFile object from + a dictionary object with the following keys: + - filename + - content-type + - content + """ + return cls(file_dict['filename'], + file_dict['content'], + file_dict.get('content-type', 'text/plain')) + + from_dict = classmethod(from_dict) diff --git a/django/core/files/uploadhandler.py b/django/core/files/uploadhandler.py new file mode 100644 index 0000000000..034953972a --- /dev/null +++ b/django/core/files/uploadhandler.py @@ -0,0 +1,235 @@ +""" +Base file upload handler classes, and the built-in concrete subclasses +""" +import os +import tempfile +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO + +from django.conf import settings +from django.core.exceptions import ImproperlyConfigured +from django.core.files.uploadedfile import TemporaryUploadedFile, InMemoryUploadedFile + +__all__ = ['UploadFileException','StopUpload', 'SkipFile', 'FileUploadHandler', + 'TemporaryFileUploadHandler', 'MemoryFileUploadHandler', + 'load_handler'] + +class UploadFileException(Exception): + """ + Any error having to do with uploading files. + """ + pass + +class StopUpload(UploadFileException): + """ + This exception is raised when an upload must abort. + """ + def __init__(self, connection_reset=False): + """ + If ``connection_reset`` is ``True``, Django knows will halt the upload + without consuming the rest of the upload. This will cause the browser to + show a "connection reset" error. + """ + self.connection_reset = connection_reset + + def __unicode__(self): + if self.connection_reset: + return u'StopUpload: Halt current upload.' + else: + return u'StopUpload: Consume request data, then halt.' + +class SkipFile(UploadFileException): + """ + This exception is raised by an upload handler that wants to skip a given file. + """ + pass + +class StopFutureHandlers(UploadFileException): + """ + Upload handers that have handled a file and do not want future handlers to + run should raise this exception instead of returning None. + """ + pass + +class FileUploadHandler(object): + """ + Base class for streaming upload handlers. + """ + chunk_size = 64 * 2 ** 10 #: The default chunk size is 64 KB. + + def __init__(self, request=None): + self.file_name = None + self.content_type = None + self.content_length = None + self.charset = None + self.request = request + + def handle_raw_input(self, input_data, META, content_length, boundary, encoding=None): + """ + Handle the raw input from the client. + + Parameters: + + :input_data: + An object that supports reading via .read(). + :META: + ``request.META``. + :content_length: + The (integer) value of the Content-Length header from the + client. + :boundary: The boundary from the Content-Type header. Be sure to + prepend two '--'. + """ + pass + + def new_file(self, field_name, file_name, content_type, content_length, charset=None): + """ + Signal that a new file has been started. + + Warning: As with any data from the client, you should not trust + content_length (and sometimes won't even get it). + """ + self.field_name = field_name + self.file_name = file_name + self.content_type = content_type + self.content_length = content_length + self.charset = charset + + def receive_data_chunk(self, raw_data, start): + """ + Receive data from the streamed upload parser. ``start`` is the position + in the file of the chunk. + """ + raise NotImplementedError() + + def file_complete(self, file_size): + """ + Signal that a file has completed. File size corresponds to the actual + size accumulated by all the chunks. + + Subclasses must should return a valid ``UploadedFile`` object. + """ + raise NotImplementedError() + + def upload_complete(self): + """ + Signal that the upload is complete. Subclasses should perform cleanup + that is necessary for this handler. + """ + pass + +class TemporaryFileUploadHandler(FileUploadHandler): + """ + Upload handler that streams data into a temporary file. + """ + def __init__(self, *args, **kwargs): + super(TemporaryFileUploadHandler, self).__init__(*args, **kwargs) + + def new_file(self, file_name, *args, **kwargs): + """ + Create the file object to append to as data is coming in. + """ + super(TemporaryFileUploadHandler, self).new_file(file_name, *args, **kwargs) + self.file = TemporaryFile(settings.FILE_UPLOAD_TEMP_DIR) + self.write = self.file.write + + def receive_data_chunk(self, raw_data, start): + self.write(raw_data) + + def file_complete(self, file_size): + self.file.seek(0) + return TemporaryUploadedFile(self.file, self.file_name, + self.content_type, file_size, + self.charset) + +class MemoryFileUploadHandler(FileUploadHandler): + """ + File upload handler to stream uploads into memory (used for small files). + """ + + def handle_raw_input(self, input_data, META, content_length, boundary, encoding=None): + """ + Use the content_length to signal whether or not this handler should be in use. + """ + # Check the content-length header to see if we should + # If the the post is too large, we cannot use the Memory handler. + if content_length > settings.FILE_UPLOAD_MAX_MEMORY_SIZE: + self.activated = False + else: + self.activated = True + + def new_file(self, *args, **kwargs): + super(MemoryFileUploadHandler, self).new_file(*args, **kwargs) + if self.activated: + self.file = StringIO() + raise StopFutureHandlers() + + def receive_data_chunk(self, raw_data, start): + """ + Add the data to the StringIO file. + """ + if self.activated: + self.file.write(raw_data) + else: + return raw_data + + def file_complete(self, file_size): + """ + Return a file object if we're activated. + """ + if not self.activated: + return + + return InMemoryUploadedFile(self.file, self.field_name, self.file_name, + self.content_type, self.charset, file_size) + +class TemporaryFile(object): + """ + A temporary file that tries to delete itself when garbage collected. + """ + def __init__(self, dir): + if not dir: + dir = tempfile.gettempdir() + try: + (fd, name) = tempfile.mkstemp(suffix='.upload', dir=dir) + self.file = os.fdopen(fd, 'w+b') + except (OSError, IOError): + raise OSError("Could not create temporary file for uploading, have you set settings.FILE_UPLOAD_TEMP_DIR correctly?") + self.name = name + + def __getattr__(self, name): + a = getattr(self.__dict__['file'], name) + if type(a) != type(0): + setattr(self, name, a) + return a + + def __del__(self): + try: + os.unlink(self.name) + except OSError: + pass + +def load_handler(path, *args, **kwargs): + """ + Given a path to a handler, return an instance of that handler. + + E.g.:: + >>> load_handler('django.core.files.uploadhandler.TemporaryFileUploadHandler', request) + <TemporaryFileUploadHandler object at 0x...> + + """ + i = path.rfind('.') + module, attr = path[:i], path[i+1:] + try: + mod = __import__(module, {}, {}, [attr]) + except ImportError, e: + raise ImproperlyConfigured('Error importing upload handler module %s: "%s"' % (module, e)) + except ValueError, e: + raise ImproperlyConfigured('Error importing upload handler module. Is FILE_UPLOAD_HANDLERS a correctly defined list or tuple?') + try: + cls = getattr(mod, attr) + except AttributeError: + raise ImproperlyConfigured('Module "%s" does not define a "%s" upload handler backend' % (module, attr)) + return cls(*args, **kwargs) diff --git a/django/core/handlers/modpython.py b/django/core/handlers/modpython.py index abab399009..332df6f54c 100644 --- a/django/core/handlers/modpython.py +++ b/django/core/handlers/modpython.py @@ -53,7 +53,8 @@ class ModPythonRequest(http.HttpRequest): def _load_post_and_files(self): "Populates self._post and self._files" if 'content-type' in self._req.headers_in and self._req.headers_in['content-type'].startswith('multipart'): - self._post, self._files = http.parse_file_upload(self._req.headers_in, self.raw_post_data) + self._raw_post_data = '' + self._post, self._files = self.parse_file_upload(self.META, self._req) else: self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict() diff --git a/django/core/handlers/wsgi.py b/django/core/handlers/wsgi.py index df2ba19b65..795f139042 100644 --- a/django/core/handlers/wsgi.py +++ b/django/core/handlers/wsgi.py @@ -112,9 +112,8 @@ class WSGIRequest(http.HttpRequest): # Populates self._post and self._files if self.method == 'POST': if self.environ.get('CONTENT_TYPE', '').startswith('multipart'): - header_dict = dict([(k, v) for k, v in self.environ.items() if k.startswith('HTTP_')]) - header_dict['Content-Type'] = self.environ.get('CONTENT_TYPE', '') - self._post, self._files = http.parse_file_upload(header_dict, self.raw_post_data) + self._raw_post_data = '' + self._post, self._files = self.parse_file_upload(self.META, self.environ['wsgi.input']) else: self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict() else: diff --git a/django/db/models/base.py b/django/db/models/base.py index 12019f23df..888e1d243f 100644 --- a/django/db/models/base.py +++ b/django/db/models/base.py @@ -19,6 +19,8 @@ from django.dispatch import dispatcher from django.utils.datastructures import SortedDict from django.utils.functional import curry from django.utils.encoding import smart_str, force_unicode, smart_unicode +from django.core.files.move import file_move_safe +from django.core.files import locks from django.conf import settings try: @@ -466,16 +468,51 @@ class Model(object): def _get_FIELD_size(self, field): return os.path.getsize(self._get_FIELD_filename(field)) - def _save_FIELD_file(self, field, filename, raw_contents, save=True): + def _save_FIELD_file(self, field, filename, raw_field, save=True): directory = field.get_directory_name() try: # Create the date-based directory if it doesn't exist. os.makedirs(os.path.join(settings.MEDIA_ROOT, directory)) except OSError: # Directory probably already exists. pass + + # + # Check for old-style usage (files-as-dictionaries). Warn here first + # since there are multiple locations where we need to support both new + # and old usage. + # + if isinstance(raw_field, dict): + import warnings + warnings.warn( + message = "Representing uploaded files as dictionaries is"\ + " deprected. Use django.core.files.SimpleUploadedFile"\ + " instead.", + category = DeprecationWarning, + stacklevel = 2 + ) + from django.core.files.uploadedfile import SimpleUploadedFile + raw_field = SimpleUploadedFile.from_dict(raw_field) + + elif isinstance(raw_field, basestring): + import warnings + warnings.warn( + message = "Representing uploaded files as strings is "\ + " deprecated. Use django.core.files.SimpleUploadedFile "\ + " instead.", + category = DeprecationWarning, + stacklevel = 2 + ) + from django.core.files.uploadedfile import SimpleUploadedFile + raw_field = SimpleUploadedFile(filename, raw_field) + + if filename is None: + filename = raw_field.file_name + filename = field.get_filename(filename) + # # If the filename already exists, keep adding an underscore to the name of # the file until the filename doesn't exist. + # while os.path.exists(os.path.join(settings.MEDIA_ROOT, filename)): try: dot_index = filename.rindex('.') @@ -483,14 +520,27 @@ class Model(object): filename += '_' else: filename = filename[:dot_index] + '_' + filename[dot_index:] + # + # Save the file name on the object and write the file to disk + # - # Write the file to disk. setattr(self, field.attname, filename) full_filename = self._get_FIELD_filename(field) - fp = open(full_filename, 'wb') - fp.write(raw_contents) - fp.close() + + if hasattr(raw_field, 'temporary_file_path'): + # This file has a file path that we can move. + raw_field.close() + file_move_safe(raw_field.temporary_file_path(), full_filename) + + else: + # This is a normal uploadedfile that we can stream. + fp = open(full_filename, 'wb') + locks.lock(fp, locks.LOCK_EX) + for chunk in raw_field.chunk(): + fp.write(chunk) + locks.unlock(fp) + fp.close() # Save the width and/or height, if applicable. if isinstance(field, ImageField) and (field.width_field or field.height_field): diff --git a/django/db/models/fields/__init__.py b/django/db/models/fields/__init__.py index 8b58e69db1..b81b63d1b8 100644 --- a/django/db/models/fields/__init__.py +++ b/django/db/models/fields/__init__.py @@ -796,7 +796,7 @@ class FileField(Field): setattr(cls, 'get_%s_filename' % self.name, curry(cls._get_FIELD_filename, field=self)) setattr(cls, 'get_%s_url' % self.name, curry(cls._get_FIELD_url, field=self)) setattr(cls, 'get_%s_size' % self.name, curry(cls._get_FIELD_size, field=self)) - setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_contents, save=True: instance._save_FIELD_file(self, filename, raw_contents, save)) + setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_field, save=True: instance._save_FIELD_file(self, filename, raw_field, save)) dispatcher.connect(self.delete_file, signal=signals.post_delete, sender=cls) def delete_file(self, instance): @@ -819,9 +819,19 @@ class FileField(Field): if new_data.get(upload_field_name, False): func = getattr(new_object, 'save_%s_file' % self.name) if rel: - func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0]["content"], save) + file = new_data[upload_field_name][0] else: - func(new_data[upload_field_name]["filename"], new_data[upload_field_name]["content"], save) + file = new_data[upload_field_name] + + # Backwards-compatible support for files-as-dictionaries. + # We don't need to raise a warning because Model._save_FIELD_file will + # do so for us. + try: + file_name = file.file_name + except AttributeError: + file_name = file['filename'] + + func(file_name, file, save) def get_directory_name(self): return os.path.normpath(force_unicode(datetime.datetime.now().strftime(smart_str(self.upload_to)))) @@ -834,7 +844,7 @@ class FileField(Field): def save_form_data(self, instance, data): from django.newforms.fields import UploadedFile if data and isinstance(data, UploadedFile): - getattr(instance, "save_%s_file" % self.name)(data.filename, data.content, save=False) + getattr(instance, "save_%s_file" % self.name)(data.filename, data.data, save=False) def formfield(self, **kwargs): defaults = {'form_class': forms.FileField} diff --git a/django/db/models/sql/query.py b/django/db/models/sql/query.py index 6c06609969..6eccaf997e 100644 --- a/django/db/models/sql/query.py +++ b/django/db/models/sql/query.py @@ -1131,7 +1131,7 @@ class Query(object): entry.negate() self.where.add(entry, AND) break - elif not (lookup_type == 'in' and not value): + elif not (lookup_type == 'in' and not value) and field.null: # Leaky abstraction artifact: We have to specifically # exclude the "foo__in=[]" case from this handling, because # it's short-circuited in the Where class. diff --git a/django/http/__init__.py b/django/http/__init__.py index 7faa3c875e..ef15479983 100644 --- a/django/http/__init__.py +++ b/django/http/__init__.py @@ -9,14 +9,15 @@ try: except ImportError: from cgi import parse_qsl -from django.utils.datastructures import MultiValueDict, FileDict +from django.utils.datastructures import MultiValueDict, ImmutableList from django.utils.encoding import smart_str, iri_to_uri, force_unicode - +from django.http.multipartparser import MultiPartParser +from django.conf import settings +from django.core.files import uploadhandler from utils import * RESERVED_CHARS="!*'();:@&=+$,/?%#[]" - class Http404(Exception): pass @@ -25,6 +26,7 @@ class HttpRequest(object): # The encoding used in GET/POST dicts. None means use default setting. _encoding = None + _upload_handlers = [] def __init__(self): self.GET, self.POST, self.COOKIES, self.META, self.FILES = {}, {}, {}, {}, {} @@ -102,39 +104,31 @@ class HttpRequest(object): encoding = property(_get_encoding, _set_encoding) -def parse_file_upload(header_dict, post_data): - """Returns a tuple of (POST QueryDict, FILES MultiValueDict).""" - import email, email.Message - from cgi import parse_header - raw_message = '\r\n'.join(['%s:%s' % pair for pair in header_dict.items()]) - raw_message += '\r\n\r\n' + post_data - msg = email.message_from_string(raw_message) - POST = QueryDict('', mutable=True) - FILES = MultiValueDict() - for submessage in msg.get_payload(): - if submessage and isinstance(submessage, email.Message.Message): - name_dict = parse_header(submessage['Content-Disposition'])[1] - # name_dict is something like {'name': 'file', 'filename': 'test.txt'} for file uploads - # or {'name': 'blah'} for POST fields - # We assume all uploaded files have a 'filename' set. - if 'filename' in name_dict: - assert type([]) != type(submessage.get_payload()), "Nested MIME messages are not supported" - if not name_dict['filename'].strip(): - continue - # IE submits the full path, so trim everything but the basename. - # (We can't use os.path.basename because that uses the server's - # directory separator, which may not be the same as the - # client's one.) - filename = name_dict['filename'][name_dict['filename'].rfind("\\")+1:] - FILES.appendlist(name_dict['name'], FileDict({ - 'filename': filename, - 'content-type': 'Content-Type' in submessage and submessage['Content-Type'] or None, - 'content': submessage.get_payload(), - })) - else: - POST.appendlist(name_dict['name'], submessage.get_payload()) - return POST, FILES + def _initialize_handlers(self): + self._upload_handlers = [uploadhandler.load_handler(handler, self) + for handler in settings.FILE_UPLOAD_HANDLERS] + + def _set_upload_handlers(self, upload_handlers): + if hasattr(self, '_files'): + raise AttributeError("You cannot set the upload handlers after the upload has been processed.") + self._upload_handlers = upload_handlers + + def _get_upload_handlers(self): + if not self._upload_handlers: + # If thre are no upload handlers defined, initialize them from settings. + self._initialize_handlers() + return self._upload_handlers + + upload_handlers = property(_get_upload_handlers, _set_upload_handlers) + def parse_file_upload(self, META, post_data): + """Returns a tuple of (POST QueryDict, FILES MultiValueDict).""" + self.upload_handlers = ImmutableList( + self.upload_handlers, + warning = "You cannot alter upload handlers after the upload has been processed." + ) + parser = MultiPartParser(META, post_data, self.upload_handlers, self.encoding) + return parser.parse() class QueryDict(MultiValueDict): """ diff --git a/django/http/multipartparser.py b/django/http/multipartparser.py new file mode 100644 index 0000000000..8bed5681cf --- /dev/null +++ b/django/http/multipartparser.py @@ -0,0 +1,658 @@ +""" +Multi-part parsing for file uploads. + +Exposes one class, ``MultiPartParser``, which feeds chunks of uploaded data to +file upload handlers for processing. +""" +import cgi +from django.conf import settings +from django.core.exceptions import SuspiciousOperation +from django.utils.datastructures import MultiValueDict +from django.utils.encoding import force_unicode +from django.utils.text import unescape_entities +from django.core.files.uploadhandler import StopUpload, SkipFile, StopFutureHandlers + +__all__ = ('MultiPartParser','MultiPartParserError','InputStreamExhausted') + +class MultiPartParserError(Exception): + pass + +class InputStreamExhausted(Exception): + """ + No more reads are allowed from this device. + """ + pass + +RAW = "raw" +FILE = "file" +FIELD = "field" + +class MultiPartParser(object): + """ + A rfc2388 multipart/form-data parser. + + ``MultiValueDict.parse()`` reads the input stream in ``chunk_size`` chunks + and returns a tuple of ``(MultiValueDict(POST), MultiValueDict(FILES))``. If + ``file_upload_dir`` is defined files will be streamed to temporary files in + that directory. + """ + def __init__(self, META, input_data, upload_handlers, encoding=None): + """ + Initialize the MultiPartParser object. + + :META: + The standard ``META`` dictionary in Django request objects. + :input_data: + The raw post data, as a bytestring. + :upload_handler: + An UploadHandler instance that performs operations on the uploaded + data. + :encoding: + The encoding with which to treat the incoming data. + """ + + # + # Content-Type should containt multipart and the boundary information. + # + + content_type = META.get('HTTP_CONTENT_TYPE', META.get('CONTENT_TYPE', '')) + if not content_type.startswith('multipart/'): + raise MultiPartParserError('Invalid Content-Type: %s' % content_type) + + # Parse the header to get the boundary to split the parts. + ctypes, opts = parse_header(content_type) + boundary = opts.get('boundary') + if not boundary or not cgi.valid_boundary(boundary): + raise MultiPartParserError('Invalid boundary in multipart: %s' % boundary) + + + # + # Content-Length should contain the length of the body we are about + # to receive. + # + try: + content_length = int(META.get('HTTP_CONTENT_LENGTH', META.get('CONTENT_LENGTH',0))) + except (ValueError, TypeError): + # For now set it to 0; we'll try again later on down. + content_length = 0 + + if content_length <= 0: + # This means we shouldn't continue...raise an error. + raise MultiPartParserError("Invalid content length: %r" % content_length) + + self._boundary = boundary + self._input_data = input_data + + # For compatibility with low-level network APIs (with 32-bit integers), + # the chunk size should be < 2^31, but still divisible by 4. + self._chunk_size = min(2**31-4, *[x.chunk_size for x in upload_handlers if x.chunk_size]) + + self._meta = META + self._encoding = encoding or settings.DEFAULT_CHARSET + self._content_length = content_length + self._upload_handlers = upload_handlers + + def parse(self): + """ + Parse the POST data and break it into a FILES MultiValueDict and a POST + MultiValueDict. + + Returns a tuple containing the POST and FILES dictionary, respectively. + """ + # We have to import QueryDict down here to avoid a circular import. + from django.http import QueryDict + + encoding = self._encoding + handlers = self._upload_handlers + + limited_input_data = LimitBytes(self._input_data, self._content_length) + + # See if the handler will want to take care of the parsing. + # This allows overriding everything if somebody wants it. + for handler in handlers: + result = handler.handle_raw_input(limited_input_data, + self._meta, + self._content_length, + self._boundary, + encoding) + if result is not None: + return result[0], result[1] + + # Create the data structures to be used later. + self._post = QueryDict('', mutable=True) + self._files = MultiValueDict() + + # Instantiate the parser and stream: + stream = LazyStream(ChunkIter(limited_input_data, self._chunk_size)) + + # Whether or not to signal a file-completion at the beginning of the loop. + old_field_name = None + counters = [0] * len(handlers) + + try: + for item_type, meta_data, field_stream in Parser(stream, self._boundary): + if old_field_name: + # We run this at the beginning of the next loop + # since we cannot be sure a file is complete until + # we hit the next boundary/part of the multipart content. + self.handle_file_complete(old_field_name, counters) + + try: + disposition = meta_data['content-disposition'][1] + field_name = disposition['name'].strip() + except (KeyError, IndexError, AttributeError): + continue + + transfer_encoding = meta_data.get('content-transfer-encoding') + field_name = force_unicode(field_name, encoding, errors='replace') + + if item_type == FIELD: + # This is a post field, we can just set it in the post + if transfer_encoding == 'base64': + raw_data = field_stream.read() + try: + data = str(raw_data).decode('base64') + except: + data = raw_data + else: + data = field_stream.read() + + self._post.appendlist(field_name, + force_unicode(data, encoding, errors='replace')) + elif item_type == FILE: + # This is a file, use the handler... + file_successful = True + file_name = disposition.get('filename') + if not file_name: + continue + file_name = force_unicode(file_name, encoding, errors='replace') + file_name = self.IE_sanitize(unescape_entities(file_name)) + + content_type = meta_data.get('content-type', ('',))[0].strip() + try: + charset = meta_data.get('content-type', (0,{}))[1].get('charset', None) + except: + charset = None + + try: + content_length = int(meta_data.get('content-length')[0]) + except (IndexError, TypeError, ValueError): + content_length = None + + counters = [0] * len(handlers) + try: + for handler in handlers: + try: + handler.new_file(field_name, file_name, + content_type, content_length, + charset) + except StopFutureHandlers: + break + + for chunk in field_stream: + if transfer_encoding == 'base64': + # We only special-case base64 transfer encoding + try: + chunk = str(chunk).decode('base64') + except Exception, e: + # Since this is only a chunk, any error is an unfixable error. + raise MultiPartParserError("Could not decode base64 data: %r" % e) + + for i, handler in enumerate(handlers): + chunk_length = len(chunk) + chunk = handler.receive_data_chunk(chunk, + counters[i]) + counters[i] += chunk_length + if chunk is None: + # If the chunk received by the handler is None, then don't continue. + break + + except SkipFile, e: + file_successful = False + # Just use up the rest of this file... + exhaust(field_stream) + else: + # Handle file upload completions on next iteration. + old_field_name = field_name + else: + # If this is neither a FIELD or a FILE, just exhaust the stream. + exhaust(stream) + except StopUpload, e: + if not e.connection_reset: + exhaust(limited_input_data) + else: + # Make sure that the request data is all fed + exhaust(limited_input_data) + + # Signal that the upload has completed. + for handler in handlers: + retval = handler.upload_complete() + if retval: + break + + return self._post, self._files + + def handle_file_complete(self, old_field_name, counters): + """ + Handle all the signalling that takes place when a file is complete. + """ + for i, handler in enumerate(self._upload_handlers): + file_obj = handler.file_complete(counters[i]) + if file_obj: + # If it returns a file object, then set the files dict. + self._files.appendlist(force_unicode(old_field_name, + self._encoding, + errors='replace'), + file_obj) + break + + def IE_sanitize(self, filename): + """Cleanup filename from Internet Explorer full paths.""" + return filename and filename[filename.rfind("\\")+1:].strip() + +class LazyStream(object): + """ + The LazyStream wrapper allows one to get and "unget" bytes from a stream. + + Given a producer object (an iterator that yields bytestrings), the + LazyStream object will support iteration, reading, and keeping a "look-back" + variable in case you need to "unget" some bytes. + """ + def __init__(self, producer, length=None): + """ + Every LazyStream must have a producer when instantiated. + + A producer is an iterable that returns a string each time it + is called. + """ + self._producer = producer + self._empty = False + self._leftover = '' + self.length = length + self._position = 0 + self._remaining = length + + # These fields are to do sanity checking to make sure we don't + # have infinite loops getting/ungetting from the stream. The + # purpose overall is to raise an exception if we perform lots + # of stream get/unget gymnastics without getting + # anywhere. Naturally this is not sound, but most probably + # would indicate a bug if the exception is raised. + + # largest position tell us how far this lazystream has ever + # been advanced + self._largest_position = 0 + + # "modifications since" will start at zero and increment every + # time the position is modified but a new largest position is + # not achieved. + self._modifications_since = 0 + + def tell(self): + return self.position + + def read(self, size=None): + def parts(): + remaining = (size is not None and [size] or [self._remaining])[0] + # do the whole thing in one shot if no limit was provided. + if remaining is None: + yield ''.join(self) + return + + # otherwise do some bookkeeping to return exactly enough + # of the stream and stashing any extra content we get from + # the producer + while remaining != 0: + assert remaining > 0, 'remaining bytes to read should never go negative' + + chunk = self.next() + + emitting = chunk[:remaining] + self.unget(chunk[remaining:]) + remaining -= len(emitting) + yield emitting + + out = ''.join(parts()) + return out + + def next(self): + """ + Used when the exact number of bytes to read is unimportant. + + This procedure just returns whatever is chunk is conveniently returned + from the iterator instead. Useful to avoid unnecessary bookkeeping if + performance is an issue. + """ + if self._leftover: + output = self._leftover + self._leftover = '' + else: + output = self._producer.next() + self.position += len(output) + return output + + def close(self): + """ + Used to invalidate/disable this lazy stream. + + Replaces the producer with an empty list. Any leftover bytes that have + already been read will still be reported upon read() and/or next(). + """ + self._producer = [] + + def __iter__(self): + return self + + def unget(self, bytes): + """ + Places bytes back onto the front of the lazy stream. + + Future calls to read() will return those bytes first. The + stream position and thus tell() will be rewound. + """ + self.position -= len(bytes) + self._leftover = ''.join([bytes, self._leftover]) + + def _set_position(self, value): + if value > self._largest_position: + self._modifications_since = 0 + self._largest_position = value + else: + self._modifications_since += 1 + if self._modifications_since > 500: + raise SuspiciousOperation( + "The multipart parser got stuck, which shouldn't happen with" + " normal uploaded files. Check for malicious upload activity;" + " if there is none, report this to the Django developers." + ) + + self._position = value + + position = property(lambda self: self._position, _set_position) + +class ChunkIter(object): + """ + An iterable that will yield chunks of data. Given a file-like object as the + constructor, this object will yield chunks of read operations from that + object. + """ + def __init__(self, flo, chunk_size=64 * 1024): + self.flo = flo + self.chunk_size = chunk_size + + def next(self): + try: + data = self.flo.read(self.chunk_size) + except InputStreamExhausted: + raise StopIteration() + if data: + return data + else: + raise StopIteration() + + def __iter__(self): + return self + +class LimitBytes(object): + """ Limit bytes for a file object. """ + def __init__(self, fileobject, length): + self._file = fileobject + self.remaining = length + + def read(self, num_bytes=None): + """ + Read data from the underlying file. + If you ask for too much or there isn't anything left, + this will raise an InputStreamExhausted error. + """ + if self.remaining <= 0: + raise InputStreamExhausted() + if num_bytes is None: + num_bytes = self.remaining + else: + num_bytes = min(num_bytes, self.remaining) + self.remaining -= num_bytes + return self._file.read(num_bytes) + +class InterBoundaryIter(object): + """ + A Producer that will iterate over boundaries. + """ + def __init__(self, stream, boundary): + self._stream = stream + self._boundary = boundary + + def __iter__(self): + return self + + def next(self): + try: + return LazyStream(BoundaryIter(self._stream, self._boundary)) + except InputStreamExhausted: + raise StopIteration() + +class BoundaryIter(object): + """ + A Producer that is sensitive to boundaries. + + Will happily yield bytes until a boundary is found. Will yield the bytes + before the boundary, throw away the boundary bytes themselves, and push the + post-boundary bytes back on the stream. + + The future calls to .next() after locating the boundary will raise a + StopIteration exception. + """ + + def __init__(self, stream, boundary): + self._stream = stream + self._boundary = boundary + self._done = False + # rollback an additional six bytes because the format is like + # this: CRLF<boundary>[--CRLF] + self._rollback = len(boundary) + 6 + + # Try to use mx fast string search if available. Otherwise + # use Python find. Wrap the latter for consistency. + unused_char = self._stream.read(1) + if not unused_char: + raise InputStreamExhausted() + self._stream.unget(unused_char) + try: + from mx.TextTools import FS + self._fs = FS(boundary).find + except ImportError: + self._fs = lambda data: data.find(boundary) + + def __iter__(self): + return self + + def next(self): + if self._done: + raise StopIteration() + + stream = self._stream + rollback = self._rollback + + bytes_read = 0 + chunks = [] + for bytes in stream: + bytes_read += len(bytes) + chunks.append(bytes) + if bytes_read > rollback: + break + if not bytes: + break + else: + self._done = True + + if not chunks: + raise StopIteration() + + chunk = ''.join(chunks) + boundary = self._find_boundary(chunk, len(chunk) < self._rollback) + + if boundary: + end, next = boundary + stream.unget(chunk[next:]) + self._done = True + return chunk[:end] + else: + # make sure we dont treat a partial boundary (and + # its separators) as data + if not chunk[:-rollback]:# and len(chunk) >= (len(self._boundary) + 6): + # There's nothing left, we should just return and mark as done. + self._done = True + return chunk + else: + stream.unget(chunk[-rollback:]) + return chunk[:-rollback] + + def _find_boundary(self, data, eof = False): + """ + Finds a multipart boundary in data. + + Should no boundry exist in the data None is returned instead. Otherwise + a tuple containing the indices of the following are returned: + + * the end of current encapsulation + * the start of the next encapsulation + """ + index = self._fs(data) + if index < 0: + return None + else: + end = index + next = index + len(self._boundary) + data_len = len(data) - 1 + # backup over CRLF + if data[max(0,end-1)] == '\n': + end -= 1 + if data[max(0,end-1)] == '\r': + end -= 1 + # skip over --CRLF + #if data[min(data_len,next)] == '-': + # next += 1 + #if data[min(data_len,next)] == '-': + # next += 1 + #if data[min(data_len,next)] == '\r': + # next += 1 + #if data[min(data_len,next)] == '\n': + # next += 1 + return end, next + +def exhaust(stream_or_iterable): + """ + Completely exhausts an iterator or stream. + + Raise a MultiPartParserError if the argument is not a stream or an iterable. + """ + iterator = None + try: + iterator = iter(stream_or_iterable) + except TypeError: + iterator = ChunkIter(stream_or_iterable, 16384) + + if iterator is None: + raise MultiPartParserError('multipartparser.exhaust() was passed a non-iterable or stream parameter') + + for __ in iterator: + pass + +def parse_boundary_stream(stream, max_header_size): + """ + Parses one and exactly one stream that encapsulates a boundary. + """ + # Stream at beginning of header, look for end of header + # and parse it if found. The header must fit within one + # chunk. + chunk = stream.read(max_header_size) + + # 'find' returns the top of these four bytes, so we'll + # need to munch them later to prevent them from polluting + # the payload. + header_end = chunk.find('\r\n\r\n') + + def _parse_header(line): + main_value_pair, params = parse_header(line) + try: + name, value = main_value_pair.split(':', 1) + except: + raise ValueError("Invalid header: %r" % line) + return name, (value, params) + + if header_end == -1: + # we find no header, so we just mark this fact and pass on + # the stream verbatim + stream.unget(chunk) + return (RAW, {}, stream) + + header = chunk[:header_end] + + # here we place any excess chunk back onto the stream, as + # well as throwing away the CRLFCRLF bytes from above. + stream.unget(chunk[header_end + 4:]) + + TYPE = RAW + outdict = {} + + # Eliminate blank lines + for line in header.split('\r\n'): + # This terminology ("main value" and "dictionary of + # parameters") is from the Python docs. + try: + name, (value, params) = _parse_header(line) + except: + continue + + if name == 'content-disposition': + TYPE = FIELD + if params.get('filename'): + TYPE = FILE + + outdict[name] = value, params + + if TYPE == RAW: + stream.unget(chunk) + + return (TYPE, outdict, stream) + +class Parser(object): + def __init__(self, stream, boundary): + self._stream = stream + self._separator = '--' + boundary + + def __iter__(self): + boundarystream = InterBoundaryIter(self._stream, self._separator) + for sub_stream in boundarystream: + # Iterate over each part + yield parse_boundary_stream(sub_stream, 1024) + +def parse_header(line): + """ Parse the header into a key-value. """ + plist = _parse_header_params(';' + line) + key = plist.pop(0).lower() + pdict = {} + for p in plist: + i = p.find('=') + if i >= 0: + name = p[:i].strip().lower() + value = p[i+1:].strip() + if len(value) >= 2 and value[0] == value[-1] == '"': + value = value[1:-1] + value = value.replace('\\\\', '\\').replace('\\"', '"') + pdict[name] = value + return key, pdict + +def _parse_header_params(s): + plist = [] + while s[:1] == ';': + s = s[1:] + end = s.find(';') + while end > 0 and s.count('"', 0, end) % 2: + end = s.find(';', end + 1) + if end < 0: + end = len(s) + f = s[:end] + plist.append(f.strip()) + s = s[end:] + return plist diff --git a/django/newforms/fields.py b/django/newforms/fields.py index 6734c5450e..1feef31ee0 100644 --- a/django/newforms/fields.py +++ b/django/newforms/fields.py @@ -7,6 +7,11 @@ import datetime import os import re import time +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO + # Python 2.3 fallbacks try: from decimal import Decimal, DecimalException @@ -416,9 +421,9 @@ except ImportError: class UploadedFile(StrAndUnicode): "A wrapper for files uploaded in a FileField" - def __init__(self, filename, content): + def __init__(self, filename, data): self.filename = filename - self.content = content + self.data = data def __unicode__(self): """ @@ -444,15 +449,34 @@ class FileField(Field): return None elif not data and initial: return initial + + if isinstance(data, dict): + # We warn once, then support both ways below. + import warnings + warnings.warn( + message = "Representing uploaded files as dictionaries is"\ + " deprecated. Use django.core.files.SimpleUploadedFile "\ + " instead.", + category = DeprecationWarning, + stacklevel = 2 + ) + try: - f = UploadedFile(data['filename'], data['content']) - except TypeError: + file_name = data.file_name + file_size = data.file_size + except AttributeError: + try: + file_name = data.get('filename') + file_size = bool(data['content']) + except (AttributeError, KeyError): + raise ValidationError(self.error_messages['invalid']) + + if not file_name: raise ValidationError(self.error_messages['invalid']) - except KeyError: - raise ValidationError(self.error_messages['missing']) - if not f.content: + if not file_size: raise ValidationError(self.error_messages['empty']) - return f + + return UploadedFile(file_name, data) class ImageField(FileField): default_error_messages = { @@ -470,15 +494,31 @@ class ImageField(FileField): elif not data and initial: return initial from PIL import Image - from cStringIO import StringIO + + # We need to get a file object for PIL. We might have a path or we might + # have to read the data into memory. + if hasattr(data, 'temporary_file_path'): + file = data.temporary_file_path() + else: + if hasattr(data, 'read'): + file = StringIO(data.read()) + else: + file = StringIO(data['content']) + try: # load() is the only method that can spot a truncated JPEG, # but it cannot be called sanely after verify() - trial_image = Image.open(StringIO(f.content)) + trial_image = Image.open(file) trial_image.load() + + # Since we're about to use the file again we have to reset the + # file object if possible. + if hasattr(file, 'reset'): + file.reset() + # verify() is the only method that can spot a corrupt PNG, # but it must be called immediately after the constructor - trial_image = Image.open(StringIO(f.content)) + trial_image = Image.open(file) trial_image.verify() except Exception: # Python Imaging Library doesn't recognize it as an image raise ValidationError(self.error_messages['invalid_image']) diff --git a/django/oldforms/__init__.py b/django/oldforms/__init__.py index fc8727185f..ee838d234a 100644 --- a/django/oldforms/__init__.py +++ b/django/oldforms/__init__.py @@ -680,18 +680,27 @@ class FileUploadField(FormField): self.field_name, self.is_required = field_name, is_required self.validator_list = [self.isNonEmptyFile] + validator_list - def isNonEmptyFile(self, field_data, all_data): + def isNonEmptyFile(self, new_data, all_data): + if hasattr(new_data, 'upload_errors'): + upload_errors = new_data.upload_errors() + if upload_errors: + raise validators.CriticalValidationError, upload_errors try: - content = field_data['content'] - except TypeError: - raise validators.CriticalValidationError, ugettext("No file was submitted. Check the encoding type on the form.") - if not content: + file_size = new_data.file_size + except AttributeError: + file_size = len(new_data['content']) + if not file_size: raise validators.CriticalValidationError, ugettext("The submitted file is empty.") def render(self, data): return mark_safe(u'<input type="file" id="%s" class="v%s" name="%s" />' % \ (self.get_id(), self.__class__.__name__, self.field_name)) + def prepare(self, new_data): + if hasattr(new_data, 'upload_errors'): + upload_errors = new_data.upload_errors() + new_data[self.field_name] = { '_file_upload_error': upload_errors } + def html2python(data): if data is None: raise EmptyValue diff --git a/django/test/client.py b/django/test/client.py index a15876e6f9..6313181d61 100644 --- a/django/test/client.py +++ b/django/test/client.py @@ -1,7 +1,10 @@ import urllib import sys import os -from cStringIO import StringIO +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO from django.conf import settings from django.contrib.auth import authenticate, login from django.core.handlers.base import BaseHandler @@ -19,6 +22,25 @@ from django.utils.itercompat import is_iterable BOUNDARY = 'BoUnDaRyStRiNg' MULTIPART_CONTENT = 'multipart/form-data; boundary=%s' % BOUNDARY +class FakePayload(object): + """ + A wrapper around StringIO that restricts what can be read since data from + the network can't be seeked and cannot be read outside of its content + length. This makes sure that views can't do anything under the test client + that wouldn't work in Real Life. + """ + def __init__(self, content): + self.__content = StringIO(content) + self.__len = len(content) + + def read(self, num_bytes=None): + if num_bytes is None: + num_bytes = self.__len or 1 + assert self.__len >= num_bytes, "Cannot read more than the available bytes from the HTTP incoming data." + content = self.__content.read(num_bytes) + self.__len -= num_bytes + return content + class ClientHandler(BaseHandler): """ A HTTP Handler that can be used for testing purposes. @@ -236,7 +258,7 @@ class Client: 'CONTENT_TYPE': content_type, 'PATH_INFO': urllib.unquote(path), 'REQUEST_METHOD': 'POST', - 'wsgi.input': StringIO(post_data), + 'wsgi.input': FakePayload(post_data), } r.update(extra) diff --git a/django/utils/datastructures.py b/django/utils/datastructures.py index 21a72f2d1e..f27bc1cfff 100644 --- a/django/utils/datastructures.py +++ b/django/utils/datastructures.py @@ -332,17 +332,49 @@ class DotExpandedDict(dict): except TypeError: # Special-case if current isn't a dict. current = {bits[-1]: v} -class FileDict(dict): +class ImmutableList(tuple): """ - A dictionary used to hold uploaded file contents. The only special feature - here is that repr() of this object won't dump the entire contents of the - file to the output. A handy safeguard for a large file upload. + A tuple-like object that raises useful errors when it is asked to mutate. + + Example:: + + >>> a = ImmutableList(range(5), warning="You cannot mutate this.") + >>> a[3] = '4' + Traceback (most recent call last): + ... + AttributeError: You cannot mutate this. """ - def __repr__(self): - if 'content' in self: - d = dict(self, content='<omitted>') - return dict.__repr__(d) - return dict.__repr__(self) + + def __new__(cls, *args, **kwargs): + if 'warning' in kwargs: + warning = kwargs['warning'] + del kwargs['warning'] + else: + warning = 'ImmutableList object is immutable.' + self = tuple.__new__(cls, *args, **kwargs) + self.warning = warning + return self + + def complain(self, *wargs, **kwargs): + if isinstance(self.warning, Exception): + raise self.warning + else: + raise AttributeError, self.warning + + # All list mutation functions complain. + __delitem__ = complain + __delslice__ = complain + __iadd__ = complain + __imul__ = complain + __setitem__ = complain + __setslice__ = complain + append = complain + extend = complain + insert = complain + pop = complain + remove = complain + sort = complain + reverse = complain class DictWrapper(dict): """ diff --git a/django/utils/text.py b/django/utils/text.py index aa190c8c4f..3686a454a8 100644 --- a/django/utils/text.py +++ b/django/utils/text.py @@ -3,6 +3,7 @@ from django.conf import settings from django.utils.encoding import force_unicode from django.utils.functional import allow_lazy from django.utils.translation import ugettext_lazy +from htmlentitydefs import name2codepoint # Capitalizes the first letter of a string. capfirst = lambda x: x and force_unicode(x)[0].upper() + force_unicode(x)[1:] @@ -222,3 +223,26 @@ def smart_split(text): yield bit smart_split = allow_lazy(smart_split, unicode) +def _replace_entity(match): + text = match.group(1) + if text[0] == u'#': + text = text[1:] + try: + if text[0] in u'xX': + c = int(text[1:], 16) + else: + c = int(text) + return unichr(c) + except ValueError: + return match.group(0) + else: + try: + return unichr(name2codepoint[text]) + except (ValueError, KeyError): + return match.group(0) + +_entity_re = re.compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));") + +def unescape_entities(text): + return _entity_re.sub(_replace_entity, text) +unescape_entities = allow_lazy(unescape_entities, unicode) |
