summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorClaude Paroz <claude@2xlibre.net>2013-05-24 17:55:50 +0200
committerClaude Paroz <claude@2xlibre.net>2013-05-25 12:10:53 +0200
commitf940e564e4623d531eb97a2cf1b116851003f9fd (patch)
tree8667da411b9c87410586eb74e7a796e6f9b404ef
parent6de81d65f443a01961c23139ca5d7653ef012d35 (diff)
Fixed #20099 -- Eased subclassing of BrokenLinkEmailsMiddleware
Thanks Ram Rachum for the report and the initial patch, and Simon Charette for the review.
-rw-r--r--AUTHORS1
-rw-r--r--django/middleware/common.py24
-rw-r--r--docs/howto/error-reporting.txt5
-rw-r--r--tests/middleware/tests.py19
4 files changed, 40 insertions, 9 deletions
diff --git a/AUTHORS b/AUTHORS
index b8047d92c9..b5f73f73c6 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -472,6 +472,7 @@ answer newbie questions, and generally made Django that much better:
Jyrki Pulliainen <jyrki.pulliainen@gmail.com>
Thejaswi Puthraya <thejaswi.puthraya@gmail.com>
Johann Queuniet <johann.queuniet@adh.naellia.eu>
+ Ram Rachum <ram@rachum.com>
Jan Rademaker
Michael Radziej <mir@noris.de>
Laurent Rahuel <laurent.rahuel@gmail.com>
diff --git a/django/middleware/common.py b/django/middleware/common.py
index 250737970d..1131bd698d 100644
--- a/django/middleware/common.py
+++ b/django/middleware/common.py
@@ -142,15 +142,17 @@ class BrokenLinkEmailsMiddleware(object):
domain = request.get_host()
path = request.get_full_path()
referer = force_text(request.META.get('HTTP_REFERER', ''), errors='replace')
- is_internal = self.is_internal_request(domain, referer)
- is_not_search_engine = '?' not in referer
- is_ignorable = self.is_ignorable_404(path)
- if referer and (is_internal or is_not_search_engine) and not is_ignorable:
+
+ if not self.is_ignorable_request(request, path, domain, referer):
ua = request.META.get('HTTP_USER_AGENT', '<none>')
ip = request.META.get('REMOTE_ADDR', '<none>')
mail_managers(
- "Broken %slink on %s" % (('INTERNAL ' if is_internal else ''), domain),
- "Referrer: %s\nRequested URL: %s\nUser agent: %s\nIP address: %s\n" % (referer, path, ua, ip),
+ "Broken %slink on %s" % (
+ ('INTERNAL ' if self.is_internal_request(domain, referer) else ''),
+ domain
+ ),
+ "Referrer: %s\nRequested URL: %s\nUser agent: %s\n"
+ "IP address: %s\n" % (referer, path, ua, ip),
fail_silently=True)
return response
@@ -159,10 +161,14 @@ class BrokenLinkEmailsMiddleware(object):
Returns True if the referring URL is the same domain as the current request.
"""
# Different subdomains are treated as different domains.
- return re.match("^https?://%s/" % re.escape(domain), referer)
+ return bool(re.match("^https?://%s/" % re.escape(domain), referer))
- def is_ignorable_404(self, uri):
+ def is_ignorable_request(self, request, uri, domain, referer):
"""
- Returns True if a 404 at the given URL *shouldn't* notify the site managers.
+ Returns True if the given request *shouldn't* notify the site managers.
"""
+ # '?' in referer is identified as search engine source
+ if (not referer or
+ (not self.is_internal_request(domain, referer) and '?' in referer)):
+ return True
return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS)
diff --git a/docs/howto/error-reporting.txt b/docs/howto/error-reporting.txt
index 27f11f4936..987a503e95 100644
--- a/docs/howto/error-reporting.txt
+++ b/docs/howto/error-reporting.txt
@@ -98,6 +98,11 @@ crawlers often request::
(Note that these are regular expressions, so we put a backslash in front of
periods to escape them.)
+If you'd like to customize the behavior of
+:class:`django.middleware.common.BrokenLinkEmailsMiddleware` further (for
+example to ignore requests coming from web crawlers), you should subclass it
+and override its methods.
+
.. seealso::
404 errors are logged using the logging framework. By default, these log
diff --git a/tests/middleware/tests.py b/tests/middleware/tests.py
index 1ff8390f31..265eb97c36 100644
--- a/tests/middleware/tests.py
+++ b/tests/middleware/tests.py
@@ -326,6 +326,25 @@ class BrokenLinkEmailsMiddlewareTest(TestCase):
BrokenLinkEmailsMiddleware().process_response(self.req, self.resp)
self.assertEqual(len(mail.outbox), 1)
+ def test_custom_request_checker(self):
+ class SubclassedMiddleware(BrokenLinkEmailsMiddleware):
+ ignored_user_agent_patterns = (re.compile(r'Spider.*'),
+ re.compile(r'Robot.*'))
+ def is_ignorable_request(self, request, uri, domain, referer):
+ '''Check user-agent in addition to normal checks.'''
+ if super(SubclassedMiddleware, self).is_ignorable_request(request, uri, domain, referer):
+ return True
+ user_agent = request.META['HTTP_USER_AGENT']
+ return any(pattern.search(user_agent) for pattern in
+ self.ignored_user_agent_patterns)
+
+ self.req.META['HTTP_REFERER'] = '/another/url/'
+ self.req.META['HTTP_USER_AGENT'] = 'Spider machine 3.4'
+ SubclassedMiddleware().process_response(self.req, self.resp)
+ self.assertEqual(len(mail.outbox), 0)
+ self.req.META['HTTP_USER_AGENT'] = 'My user agent'
+ SubclassedMiddleware().process_response(self.req, self.resp)
+ self.assertEqual(len(mail.outbox), 1)
class ConditionalGetMiddlewareTest(TestCase):
urls = 'middleware.cond_get_urls'