[3.0.x] Fixed #31150 -- Included subqueries that reference related fields in GROUP BY clauses.

Thanks Johannes Hoppe for the report. Regression in fb3f034f1c63160c0ff13c609acd01c18be12f80. Co-authored-by: Simon Charette <charette.s@gmail.com> Backport of 7b8fa1653fde578ab3a496d9974ab1d4261b8b26 from master
author: Mariusz Felisiak <felisiak.mariusz@gmail.com> 2020-03-02 13:20:36 +0100
committer: Mariusz Felisiak <felisiak.mariusz@gmail.com> 2020-03-03 11:26:36 +0100
commit: c5cfaad2f1f08b31ba04b9534f1a46a6ef1003bf (patch)
tree: 6349e9e7d1b96fe6be5be2f35644173e2f85cc57
parent: 4977f2084ec828c1214817e0a7a82ff96cba7863 (diff)
3 files changed, 44 insertions, 1 deletions
diff --git a/django/db/models/expressions.py b/django/db/models/expressions.py
index 3adec334f7..2733fbada9 100644
--- a/django/db/models/expressions.py
+++ b/django/db/models/expressions.py
@@ -6,6 +6,7 @@ from decimal import Decimal
 from django.core.exceptions import EmptyResultSet, FieldError
 from django.db import connection
 from django.db.models import fields
+from django.db.models.constants import LOOKUP_SEP
 from django.db.models.query_utils import Q
 from django.db.utils import NotSupportedError
 from django.utils.deconstruct import deconstructible
@@ -558,6 +559,14 @@ class ResolvedOuterRef(F):
             'only be used in a subquery.'
         )
 
+    def resolve_expression(self, *args, **kwargs):
+        col = super().resolve_expression(*args, **kwargs)
+        # FIXME: Rename possibly_multivalued to multivalued and fix detection
+        # for non-multivalued JOINs (e.g. foreign key fields). This should take
+        # into account only many-to-many and one-to-many relationships.
+        col.possibly_multivalued = LOOKUP_SEP in self.name
+        return col
+
     def relabeled_clone(self, relabels):
         return self
 
@@ -744,6 +753,7 @@ class Random(Expression):
 class Col(Expression):
 
     contains_column_references = True
+    possibly_multivalued = False
 
     def __init__(self, alias, target, output_field=None):
         if output_field is None:
@@ -1068,7 +1078,10 @@ class Subquery(Expression):
     def get_group_by_cols(self, alias=None):
         if alias:
             return [Ref(alias, self)]
-        return self.query.get_external_cols()
+        external_cols = self.query.get_external_cols()
+        if any(col.possibly_multivalued for col in external_cols):
+            return [self]
+        return external_cols
 
 
 class Exists(Subquery):
diff --git a/docs/releases/3.0.4.txt b/docs/releases/3.0.4.txt
index 1da9a22552..ad9752addb 100644
--- a/docs/releases/3.0.4.txt
+++ b/docs/releases/3.0.4.txt
@@ -27,3 +27,6 @@ Bugfixes
 * Fixed a regression in Django 3.0.3 that caused misplacing parameters of SQL
   queries when subtracting ``DateField`` or ``DateTimeField`` expressions on
   MySQL (:ticket:`31312`).
+
+* Fixed a regression in Django 3.0 that didn't include subqueries spanning
+  multivalued relations in the ``GROUP BY`` clause (:ticket:`31150`).
diff --git a/tests/aggregation/tests.py b/tests/aggregation/tests.py
index dc745e4138..342507da01 100644
--- a/tests/aggregation/tests.py
+++ b/tests/aggregation/tests.py
@@ -1,6 +1,7 @@
 import datetime
 import re
 from decimal import Decimal
+from unittest import skipIf
 
 from django.core.exceptions import FieldError
 from django.db import connection
@@ -1189,6 +1190,26 @@ class AggregateTestCase(TestCase):
             },
         ])
 
+    @skipUnlessDBFeature('supports_subqueries_in_group_by')
+    @skipIf(
+        connection.vendor == 'mysql',
+        'GROUP BY optimization does not work properly when ONLY_FULL_GROUP_BY '
+        'mode is enabled on MySQL, see #31331.',
+    )
+    def test_aggregation_subquery_annotation_multivalued(self):
+        """
+        Subquery annotations must be included in the GROUP BY if they use
+        potentially multivalued relations (contain the LOOKUP_SEP).
+        """
+        subquery_qs = Author.objects.filter(
+            pk=OuterRef('pk'),
+            book__name=OuterRef('book__name'),
+        ).values('pk')
+        author_qs = Author.objects.annotate(
+            subquery_id=Subquery(subquery_qs),
+        ).annotate(count=Count('book'))
+        self.assertEqual(author_qs.count(), Author.objects.count())
+
     def test_aggregation_order_by_not_selected_annotation_values(self):
         result_asc = [
             self.b4.pk,
@@ -1247,6 +1268,7 @@ class AggregateTestCase(TestCase):
         ).annotate(total=Count('*'))
         self.assertEqual(dict(has_long_books_breakdown), {True: 2, False: 3})
 
+    @skipUnlessDBFeature('supports_subqueries_in_group_by')
     def test_aggregation_subquery_annotation_related_field(self):
         publisher = Publisher.objects.create(name=self.a9.name, num_awards=2)
         book = Book.objects.create(
@@ -1266,3 +1288,8 @@ class AggregateTestCase(TestCase):
             contact_publisher__isnull=False,
         ).annotate(count=Count('authors'))
         self.assertSequenceEqual(books_qs, [book])
+        # FIXME: GROUP BY doesn't need to include a subquery with
+        # non-multivalued JOINs, see Col.possibly_multivalued (refs #31150):
+        # with self.assertNumQueries(1) as ctx:
+        #     self.assertSequenceEqual(books_qs, [book])
+        # self.assertEqual(ctx[0]['sql'].count('SELECT'), 2)
author	Mariusz Felisiak <felisiak.mariusz@gmail.com>	2020-03-02 13:20:36 +0100
committer	Mariusz Felisiak <felisiak.mariusz@gmail.com>	2020-03-03 11:26:36 +0100
commit	c5cfaad2f1f08b31ba04b9534f1a46a6ef1003bf (patch)
tree	6349e9e7d1b96fe6be5be2f35644173e2f85cc57
parent	4977f2084ec828c1214817e0a7a82ff96cba7863 (diff)