diff options
| author | Malcolm Tredinnick <malcolm.tredinnick@gmail.com> | 2008-07-26 05:07:16 +0000 |
|---|---|---|
| committer | Malcolm Tredinnick <malcolm.tredinnick@gmail.com> | 2008-07-26 05:07:16 +0000 |
| commit | badde8a7e5090347feea0b39221dbdea428582b8 (patch) | |
| tree | 11cc9f8fc35d281ec8305c1ae0f526c34632cfe5 | |
| parent | a26ba33111a41d87eaea23b4ba2ae48be4e08b18 (diff) | |
Fixed #7793 -- Handle sitemaps with more than 50,000 URLs in them (by using
pagination). Patch from Julian Bez.
The docs patch here could probably do with some rewording.
git-svn-id: http://code.djangoproject.com/svn/django/trunk@8088 bcc190cf-cafb-0310-a4f2-bffc1f526a37
| -rw-r--r-- | AUTHORS | 3 | ||||
| -rw-r--r-- | django/contrib/sitemaps/__init__.py | 16 | ||||
| -rw-r--r-- | django/contrib/sitemaps/views.py | 24 | ||||
| -rw-r--r-- | docs/sitemaps.txt | 4 |
4 files changed, 38 insertions, 9 deletions
@@ -71,7 +71,7 @@ answer newbie questions, and generally made Django that much better: Esdras Beleza <linux@esdrasbeleza.com> Chris Bennett <chrisrbennett@yahoo.com> James Bennett - Ben Godfrey <http://aftnn.org> + Julian Bez Arvis Bickovskis <viestards.lists@gmail.com> Paul Bissex <http://e-scribe.com/> Simon Blanchard @@ -166,6 +166,7 @@ answer newbie questions, and generally made Django that much better: glin@seznam.cz martin.glueck@gmail.com Artyom Gnilov <boobsd@gmail.com> + Ben Godfrey <http://aftnn.org> GomoX <gomo@datafull.com> Guilherme Mesquita Gondim <semente@taurinus.org> Mario Gonzalez <gonzalemario@gmail.com> diff --git a/django/contrib/sitemaps/__init__.py b/django/contrib/sitemaps/__init__.py index 30949837e4..13e667e142 100644 --- a/django/contrib/sitemaps/__init__.py +++ b/django/contrib/sitemaps/__init__.py @@ -1,4 +1,4 @@ -from django.core import urlresolvers +from django.core import urlresolvers, paginator import urllib PING_URL = "http://www.google.com/webmasters/tools/ping" @@ -34,6 +34,10 @@ def ping_google(sitemap_url=None, ping_url=PING_URL): urllib.urlopen("%s?%s" % (ping_url, params)) class Sitemap: + # This limit is defined by Google. See the index documentation at + # http://sitemaps.org/protocol.php#index. + limit = 50000 + def __get(self, name, obj, default=None): try: attr = getattr(self, name) @@ -49,11 +53,17 @@ class Sitemap: def location(self, obj): return obj.get_absolute_url() - def get_urls(self): + def _get_paginator(self): + if not hasattr(self, "paginator"): + self.paginator = paginator.Paginator(self.items(), self.limit) + return self.paginator + paginator = property(_get_paginator) + + def get_urls(self, page=1): from django.contrib.sites.models import Site current_site = Site.objects.get_current() urls = [] - for item in self.items(): + for item in self.paginator.page(page).object_list: loc = "http://%s%s" % (current_site.domain, self.__get('location', item)) url_info = { 'location': loc, diff --git a/django/contrib/sitemaps/views.py b/django/contrib/sitemaps/views.py index 86ef1e3526..7a5fe38a08 100644 --- a/django/contrib/sitemaps/views.py +++ b/django/contrib/sitemaps/views.py @@ -3,14 +3,22 @@ from django.template import loader from django.contrib.sites.models import Site from django.core import urlresolvers from django.utils.encoding import smart_str +from django.core.paginator import EmptyPage, PageNotAnInteger def index(request, sitemaps): current_site = Site.objects.get_current() sites = [] protocol = request.is_secure() and 'https' or 'http' - for section in sitemaps.keys(): + for section, site in sitemaps.items(): + if callable(site): + pages = site().paginator.num_pages + else: + pages = site.paginator.num_pages sitemap_url = urlresolvers.reverse('django.contrib.sitemaps.views.sitemap', kwargs={'section': section}) sites.append('%s://%s%s' % (protocol, current_site.domain, sitemap_url)) + if pages > 1: + for page in range(2, pages+1): + sites.append('%s://%s%s?p=%s' % (protocol, current_site.domain, sitemap_url, page)) xml = loader.render_to_string('sitemap_index.xml', {'sitemaps': sites}) return HttpResponse(xml, mimetype='application/xml') @@ -22,10 +30,16 @@ def sitemap(request, sitemaps, section=None): maps.append(sitemaps[section]) else: maps = sitemaps.values() + page = request.GET.get("p", 1) for site in maps: - if callable(site): - urls.extend(site().get_urls()) - else: - urls.extend(site.get_urls()) + try: + if callable(site): + urls.extend(site().get_urls(page)) + else: + urls.extend(site.get_urls(page)) + except EmptyPage: + raise Http404("Page %s empty" % page) + except PageNotAnInteger: + raise Http404("No page '%s'" % page) xml = smart_str(loader.render_to_string('sitemap.xml', {'urlset': urls})) return HttpResponse(xml, mimetype='application/xml') diff --git a/docs/sitemaps.txt b/docs/sitemaps.txt index 6a16e61879..3e7411c168 100644 --- a/docs/sitemaps.txt +++ b/docs/sitemaps.txt @@ -282,6 +282,10 @@ This will automatically generate a ``sitemap.xml`` file that references both ``sitemap-flatpages.xml`` and ``sitemap-blog.xml``. The ``Sitemap`` classes and the ``sitemaps`` dict don't change at all. +If one of your sitemaps is going to have more than 50,000 URLs you should +create an index file. Your sitemap will be paginated and the index will +reflect that. + Pinging Google ============== |
