tests/modeltests/aggregation/models.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379

# coding: utf-8
from django.db import models

try:
    sorted
except NameError:
    from django.utils.itercompat import sorted      # For Python 2.3

class Author(models.Model):
   name = models.CharField(max_length=100)
   age = models.IntegerField()
   friends = models.ManyToManyField('self', blank=True)

   def __unicode__(self):
      return self.name

class Publisher(models.Model):
   name = models.CharField(max_length=300)
   num_awards = models.IntegerField()

   def __unicode__(self):
      return self.name

class Book(models.Model):
   isbn = models.CharField(max_length=9)
   name = models.CharField(max_length=300)
   pages = models.IntegerField()
   rating = models.FloatField()
   price = models.DecimalField(decimal_places=2, max_digits=6)
   authors = models.ManyToManyField(Author)
   publisher = models.ForeignKey(Publisher)
   pubdate = models.DateField()

   def __unicode__(self):
      return self.name

class Store(models.Model):
   name = models.CharField(max_length=300)
   books = models.ManyToManyField(Book)
   original_opening = models.DateTimeField()
   friday_night_closing = models.TimeField()

   def __unicode__(self):
      return self.name

class Entries(models.Model):
   EntryID = models.AutoField(primary_key=True, db_column='Entry ID')
   Entry = models.CharField(unique=True, max_length=50)
   Exclude = models.BooleanField()

class Clues(models.Model):
   ID = models.AutoField(primary_key=True)
   EntryID = models.ForeignKey(Entries, verbose_name='Entry', db_column = 'Entry ID')
   Clue = models.CharField(max_length=150)

# Tests on 'aggergate'
# Different backends and numbers.
__test__ = {'API_TESTS': """
>>> from django.core import management
>>> try:
...     from decimal import Decimal
... except:
...     from django.utils._decimal import Decimal
>>> from datetime import date

# Reset the database representation of this app.
# This will return the database to a clean initial state.
>>> management.call_command('flush', verbosity=0, interactive=False)

# Empty Call - request nothing, get nothing.
>>> Author.objects.all().aggregate()
{}

>>> from django.db.models import Avg, Sum, Count, Max, Min

# Single model aggregation
#

# Single aggregate
# Average age of Authors
>>> Author.objects.all().aggregate(Avg('age'))
{'age__avg': 37.4...}

# Multiple aggregates
# Average and Sum of Author ages
>>> Author.objects.all().aggregate(Sum('age'), Avg('age'))
{'age__sum': 337, 'age__avg': 37.4...}

# Aggreates interact with filters, and only
# generate aggregate values for the filtered values
# Sum of the age of those older than 29 years old
>>> Author.objects.all().filter(age__gt=29).aggregate(Sum('age'))
{'age__sum': 254}

# Depth-1 Joins
#

# On Relationships with self
# Average age of the friends of each author
>>> Author.objects.all().aggregate(Avg('friends__age'))
{'friends__age__avg': 34.07...}

# On ManyToMany Relationships
#

# Forward
# Average age of the Authors of Books with a rating of less than 4.5
>>> Book.objects.all().filter(rating__lt=4.5).aggregate(Avg('authors__age'))
{'authors__age__avg': 38.2...}

# Backward
# Average rating of the Books whose Author's name contains the letter 'a'
>>> Author.objects.all().filter(name__contains='a').aggregate(Avg('book__rating'))
{'book__rating__avg': 4.0}

# On OneToMany Relationships
#

# Forward
# Sum of the number of awards of each Book's Publisher
>>> Book.objects.all().aggregate(Sum('publisher__num_awards'))
{'publisher__num_awards__sum': 30}

# Backward
# Sum of the price of every Book that has a Publisher
>>> Publisher.objects.all().aggregate(Sum('book__price'))
{'book__price__sum': Decimal("270.27")}

# Multiple Joins
#

# Forward
>>> Store.objects.all().aggregate(Max('books__authors__age'))
{'books__authors__age__max': 57}

# Backward
# Note that the very long default alias may be truncated
>>> Author.objects.all().aggregate(Min('book__publisher__num_awards'))
{'book__publisher__num_award...': 1}

# Aggregate outputs can also be aliased.

# Average amazon.com Book rating
>>> Store.objects.filter(name='Amazon.com').aggregate(amazon_mean=Avg('books__rating'))
{'amazon_mean': 4.08...}

# Tests on annotate()

# An empty annotate call does nothing but return the same QuerySet
>>> Book.objects.all().annotate().order_by('pk')
[<Book: The Definitive Guide to Django: Web Development Done Right>, <Book: Sams Teach Yourself Django in 24 Hours>, <Book: Practical Django Projects>, <Book: Python Web Development with Django>, <Book: Artificial Intelligence: A Modern Approach>, <Book: Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp>]

# Annotate inserts the alias into the model object with the aggregated result
>>> books = Book.objects.all().annotate(mean_age=Avg('authors__age'))
>>> books.get(pk=1).name
u'The Definitive Guide to Django: Web Development Done Right'

>>> books.get(pk=1).mean_age
34.5

# On ManyToMany Relationships

# Forward
# Average age of the Authors of each book with a rating less than 4.5
>>> books = Book.objects.all().filter(rating__lt=4.5).annotate(Avg('authors__age'))
>>> sorted([(b.name, b.authors__age__avg) for b in books])
[(u'Artificial Intelligence: A Modern Approach', 51.5), (u'Practical Django Projects', 29.0), (u'Python Web Development with Django', 30.3...), (u'Sams Teach Yourself Django in 24 Hours', 45.0)]

# Count the number of authors of each book
>>> books = Book.objects.annotate(num_authors=Count('authors'))
>>> sorted([(b.name, b.num_authors) for b in books])
[(u'Artificial Intelligence: A Modern Approach', 2), (u'Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp', 1), (u'Practical Django Projects', 1), (u'Python Web Development with Django', 3), (u'Sams Teach Yourself Django in 24 Hours', 1), (u'The Definitive Guide to Django: Web Development Done Right', 2)]

# Backward
# Average rating of the Books whose Author's names contains the letter 'a'
>>> authors = Author.objects.all().filter(name__contains='a').annotate(Avg('book__rating'))
>>> sorted([(a.name, a.book__rating__avg) for a in authors])
[(u'Adrian Holovaty', 4.5), (u'Brad Dayley', 3.0), (u'Jacob Kaplan-Moss', 4.5), (u'James Bennett', 4.0), (u'Paul Bissex', 4.0), (u'Stuart Russell', 4.0)]

# Count the number of books written by each author
>>> authors = Author.objects.annotate(num_books=Count('book'))
>>> sorted([(a.name, a.num_books) for a in authors])
[(u'Adrian Holovaty', 1), (u'Brad Dayley', 1), (u'Jacob Kaplan-Moss', 1), (u'James Bennett', 1), (u'Jeffrey Forcier ', 1), (u'Paul Bissex', 1), (u'Peter Norvig', 2), (u'Stuart Russell', 1), (u'Wesley J. Chun', 1)]

# On OneToMany Relationships

# Forward
# Annotate each book with the number of awards of each Book's Publisher
>>> books = Book.objects.all().annotate(Sum('publisher__num_awards'))
>>> sorted([(b.name, b.publisher__num_awards__sum) for b in books])
[(u'Artificial Intelligence: A Modern Approach', 7), (u'Paradigms of Artificial Intelligence Programming: Case Studies in Common Lisp', 9), (u'Practical Django Projects', 3), (u'Python Web Development with Django', 7), (u'Sams Teach Yourself Django in 24 Hours', 1), (u'The Definitive Guide to Django: Web Development Done Right', 3)]

# Backward
# Annotate each publisher with the sum of the price of all books sold
>>> publishers = Publisher.objects.all().annotate(Sum('book__price'))
>>> sorted([(p.name, p.book__price__sum) for p in publishers])
[(u'Apress', Decimal("59.69")), (u'Morgan Kaufmann', Decimal("75.00")), (u'Prentice Hall', Decimal("112.49")), (u'Sams', Decimal("23.09"))]

# Calls to values() are not commutative over annotate().

# Calling values on a queryset that has annotations returns the output
# as a dictionary
>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values()
[{'rating': 4.5, 'isbn': u'159059725', 'name': u'The Definitive Guide to Django: Web Development Done Right', 'pubdate': datetime.date(2007, 12, 6), 'price': Decimal("30..."), 'id': 1, 'publisher_id': 1, 'pages': 447, 'mean_age': 34.5}]

>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values('pk', 'isbn', 'mean_age')
[{'pk': 1, 'isbn': u'159059725', 'mean_age': 34.5}]

# Calling it with paramters reduces the output but does not remove the
# annotation.
>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values('name')
[{'name': u'The Definitive Guide to Django: Web Development Done Right', 'mean_age': 34.5}]

# An empty values() call before annotating has the same effect as an
# empty values() call after annotating
>>> Book.objects.filter(pk=1).values().annotate(mean_age=Avg('authors__age'))
[{'rating': 4.5, 'isbn': u'159059725', 'name': u'The Definitive Guide to Django: Web Development Done Right', 'pubdate': datetime.date(2007, 12, 6), 'price': Decimal("30..."), 'id': 1, 'publisher_id': 1, 'pages': 447, 'mean_age': 34.5}]

# Calling annotate() on a ValuesQuerySet annotates over the groups of
# fields to be selected by the ValuesQuerySet.

# Note that an extra parameter is added to each dictionary. This
# parameter is a queryset representing the objects that have been
# grouped to generate the annotation

>>> Book.objects.all().values('rating').annotate(n_authors=Count('authors__id'), mean_age=Avg('authors__age')).order_by('rating')
[{'rating': 3.0, 'n_authors': 1, 'mean_age': 45.0}, {'rating': 4.0, 'n_authors': 6, 'mean_age': 37.1...}, {'rating': 4.5, 'n_authors': 2, 'mean_age': 34.5}, {'rating': 5.0, 'n_authors': 1, 'mean_age': 57.0}]

# If a join doesn't match any objects, an aggregate returns None
>>> authors = Author.objects.all().annotate(Avg('friends__age')).order_by('id')
>>> len(authors)
9
>>> sorted([(a.name, a.friends__age__avg) for a in authors])
[(u'Adrian Holovaty', 32.0), (u'Brad Dayley', None), (u'Jacob Kaplan-Moss', 29.5), (u'James Bennett', 34.0), (u'Jeffrey Forcier ', 27.0), (u'Paul Bissex', 31.0), (u'Peter Norvig', 46.0), (u'Stuart Russell', 57.0), (u'Wesley J. Chun', 33.6...)]


# The Count aggregation function allows an extra parameter: distinct.
# This restricts the count results to unique items
>>> Book.objects.all().aggregate(Count('rating'))
{'rating__count': 6}

>>> Book.objects.all().aggregate(Count('rating', distinct=True))
{'rating__count': 4}

# Retreiving the grouped objects

# When using Count you can also omit the primary key and refer only to
# the related field name if you want to count all the related objects
# and not a specific column
>>> explicit = list(Author.objects.annotate(Count('book__id')))
>>> implicit = list(Author.objects.annotate(Count('book')))
>>> explicit == implicit
True

# Ordering is allowed on aggregates
>>> Book.objects.values('rating').annotate(oldest=Max('authors__age')).order_by('oldest', 'rating')
[{'rating': 4.5, 'oldest': 35}, {'rating': 3.0, 'oldest': 45}, {'rating': 4.0, 'oldest': 57}, {'rating': 5.0, 'oldest': 57}]

>>> Book.objects.values('rating').annotate(oldest=Max('authors__age')).order_by('-oldest', '-rating')
[{'rating': 5.0, 'oldest': 57}, {'rating': 4.0, 'oldest': 57}, {'rating': 3.0, 'oldest': 45}, {'rating': 4.5, 'oldest': 35}]

# It is possible to aggregate over anotated values
>>> Book.objects.all().annotate(num_authors=Count('authors__id')).aggregate(Avg('num_authors'))
{'num_authors__avg': 1.66...}

# You can filter the results based on the aggregation alias.

# Lets add a publisher to test the different possibilities for filtering
>>> p = Publisher(name='Expensive Publisher', num_awards=0)
>>> p.save()
>>> Book(name='ExpensiveBook1', pages=1, isbn='111', rating=3.5, price=Decimal("1000"), publisher=p, pubdate=date(2008,12,1)).save()
>>> Book(name='ExpensiveBook2', pages=1, isbn='222', rating=4.0, price=Decimal("1000"), publisher=p, pubdate=date(2008,12,2)).save()
>>> Book(name='ExpensiveBook3', pages=1, isbn='333', rating=4.5, price=Decimal("35"), publisher=p, pubdate=date(2008,12,3)).save()

# Publishers that have:

# (i) more than one book
>>> Publisher.objects.annotate(num_books=Count('book__id')).filter(num_books__gt=1).order_by('pk')
[<Publisher: Apress>, <Publisher: Prentice Hall>, <Publisher: Expensive Publisher>]

# (ii) a book that cost less than 40
>>> Publisher.objects.filter(book__price__lt=Decimal("40.0")).order_by('pk')
[<Publisher: Apress>, <Publisher: Apress>, <Publisher: Sams>, <Publisher: Prentice Hall>, <Publisher: Expensive Publisher>]

# (iii) more than one book and (at least) a book that cost less than 40
>>> Publisher.objects.annotate(num_books=Count('book__id')).filter(num_books__gt=1, book__price__lt=Decimal("40.0")).order_by('pk')
[<Publisher: Apress>, <Publisher: Prentice Hall>, <Publisher: Expensive Publisher>]

# (iv) more than one book that costs less than $40
>>> Publisher.objects.filter(book__price__lt=Decimal("40.0")).annotate(num_books=Count('book__id')).filter(num_books__gt=1).order_by('pk')
[<Publisher: Apress>]

# Now a bit of testing on the different lookup types
#

>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__range=[1, 3]).order_by('pk')
[<Publisher: Apress>, <Publisher: Sams>, <Publisher: Prentice Hall>, <Publisher: Morgan Kaufmann>, <Publisher: Expensive Publisher>]

>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__range=[1, 2]).order_by('pk')
[<Publisher: Apress>, <Publisher: Sams>, <Publisher: Prentice Hall>, <Publisher: Morgan Kaufmann>]

>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__in=[1, 3]).order_by('pk')
[<Publisher: Sams>, <Publisher: Morgan Kaufmann>, <Publisher: Expensive Publisher>]

>>> Publisher.objects.annotate(num_books=Count('book')).filter(num_books__isnull=True)
[]

>>> p.delete()

# Does Author X have any friends? (or better, how many friends does author X have)
>> Author.objects.filter(pk=1).aggregate(Count('friends__id'))
{'friends__id__count': 2.0}

# Give me a list of all Books with more than 1 authors
>>> Book.objects.all().annotate(num_authors=Count('authors__name')).filter(num_authors__ge=2).order_by('pk')
[<Book: The Definitive Guide to Django: Web Development Done Right>, <Book: Artificial Intelligence: A Modern Approach>]

# Give me a list of all Authors that have no friends
>>> Author.objects.all().annotate(num_friends=Count('friends__id', distinct=True)).filter(num_friends=0).order_by('pk')
[<Author: Brad Dayley>]

# Give me a list of all publishers that have published more than 1 books
>>> Publisher.objects.all().annotate(num_books=Count('book__id')).filter(num_books__gt=1).order_by('pk')
[<Publisher: Apress>, <Publisher: Prentice Hall>]

# Give me a list of all publishers that have published more than 1 books that cost less than 40
>>> Publisher.objects.all().filter(book__price__lt=Decimal("40.0")).annotate(num_books=Count('book__id')).filter(num_books__gt=1)
[<Publisher: Apress>]

# Give me a list of all Books that were written by X and one other author.
>>> Book.objects.all().annotate(num_authors=Count('authors__id')).filter(authors__name__contains='Norvig', num_authors__gt=1)
[<Book: Artificial Intelligence: A Modern Approach>]

# Give me the average rating of all Books that were written by X and one other author.
#(Aggregate over objects discovered using membership of the m2m set)

# Adding an existing author to another book to test it the right way
>>> a = Author.objects.get(name__contains='Norvig')
>>> b = Book.objects.get(name__contains='Done Right')
>>> b.authors.add(a)
>>> b.save()

# This should do it
>>> Book.objects.all().annotate(num_authors=Count('authors__id')).filter(authors__name__contains='Norvig', num_authors__gt=1).aggregate(Avg('rating'))
{'rating__avg': 4.25}
>>> b.authors.remove(a)

# Give me a list of all Authors that have published a book with at least one other person
# (Filters over a count generated on a related object)
#
# Cheating: [a for a in Author.objects.all().annotate(num_coleagues=Count('book__authors__id'), num_books=Count('book__id', distinct=True)) if a.num_coleagues - a.num_books > 0]
# F-Syntax is required. Will be fixed after F objects are available

# Tests on fields with non-default table and column names.
>>> Clues.objects.values('EntryID__Entry').annotate(Appearances=Count('EntryID'), Distinct_Clues=Count('Clue', distinct=True))
[]

# Aggregates also work on dates, times and datetimes
>>> Publisher.objects.annotate(earliest_book=Min('book__pubdate')).order_by('earliest_book').values()
[{'earliest_book': datetime.date(1991, 10, 15), 'num_awards': 9, 'id': 4, 'name': u'Morgan Kaufmann'}, {'earliest_book': datetime.date(1995, 1, 15), 'num_awards': 7, 'id': 3, 'name': u'Prentice Hall'}, {'earliest_book': datetime.date(2007, 12, 6), 'num_awards': 3, 'id': 1, 'name': u'Apress'}, {'earliest_book': datetime.date(2008, 3, 3), 'num_awards': 1, 'id': 2, 'name': u'Sams'}]

>>> Store.objects.aggregate(Max('friday_night_closing'), Min("original_opening"))
{'friday_night_closing__max': datetime.time(23, 59, 59), 'original_opening__min': datetime.datetime(1945, 4, 25, 16, 24, 14)}

# values_list() can also be used

>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values_list('pk', 'isbn', 'mean_age')
[(1, u'159059725', 34.5)]

>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values_list('isbn')
[(u'159059725',)]

>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values_list('mean_age')
[(34.5,)]

>>> Book.objects.filter(pk=1).annotate(mean_age=Avg('authors__age')).values_list('mean_age', flat=True)
[34.5]

"""}