summaryrefslogtreecommitdiff
path: root/tests/postgres_tests
diff options
context:
space:
mode:
authorGappleBee <irrationalmathematicspro7@gmail.com>2017-04-06 16:42:49 +0100
committerJacob Walls <jacobtylerwalls@gmail.com>2025-09-16 15:09:11 -0400
commit218f69f05eb51da1ea17d62a914a67ceff5bfd55 (patch)
treec4509f9db421ab66aa599048deff97a5c1fa0a1b /tests/postgres_tests
parente08fa42fa6d0e9f2a74e8fcdc5a47f5c3b825877 (diff)
Fixed #28041 -- Added Lexeme expression to contrib.postgres.search.
This expression automatically escapes its input and allows fine-grained control over prefix matching and term weighting via logical combinations. Thanks Mariusz Felisiak, Adam Zapletal, Paolo Melchiorre, Jacob Walls, Adam Johnson, and Simon Charette for reviews. Co-authored-by: joetsoi <joetsoi@users.noreply.github.com> Co-authored-by: Karl Hobley <karl@kaed.uk> Co-authored-by: Alexandr Tatarinov <tatarinov1997@gmail.com>
Diffstat (limited to 'tests/postgres_tests')
-rw-r--r--tests/postgres_tests/test_search.py223
1 files changed, 223 insertions, 0 deletions
diff --git a/tests/postgres_tests/test_search.py b/tests/postgres_tests/test_search.py
index a7118e7c79..c206c69747 100644
--- a/tests/postgres_tests/test_search.py
+++ b/tests/postgres_tests/test_search.py
@@ -6,6 +6,7 @@ All text copyright Python (Monty) Pictures. Thanks to sacred-texts.com for the
transcript.
"""
+from django.db import connection
from django.db.models import F, Value
from . import PostgreSQLSimpleTestCase, PostgreSQLTestCase
@@ -13,11 +14,13 @@ from .models import Character, Line, LineSavedSearch, Scene
try:
from django.contrib.postgres.search import (
+ Lexeme,
SearchConfig,
SearchHeadline,
SearchQuery,
SearchRank,
SearchVector,
+ quote_lexeme,
)
except ImportError:
pass
@@ -769,3 +772,223 @@ class SearchHeadlineTests(GrailTestData, PostgreSQLTestCase):
"<b>Brave</b>, <b>brave</b>, <b>brave</b>...<br>"
"<b>brave</b> <b>Sir</b> <b>Robin</b>",
)
+
+
+class TestLexemes(GrailTestData, PostgreSQLTestCase):
+ def test_and(self):
+ searched = Line.objects.annotate(
+ search=SearchVector("scene__setting", "dialogue"),
+ ).filter(search=SearchQuery(Lexeme("bedemir") & Lexeme("scales")))
+ self.assertSequenceEqual(searched, [self.bedemir0])
+
+ def test_multiple_and(self):
+ searched = Line.objects.annotate(
+ search=SearchVector("scene__setting", "dialogue"),
+ ).filter(
+ search=SearchQuery(
+ Lexeme("bedemir") & Lexeme("scales") & Lexeme("nostrils")
+ )
+ )
+ self.assertSequenceEqual(searched, [])
+
+ searched = Line.objects.annotate(
+ search=SearchVector("scene__setting", "dialogue"),
+ ).filter(search=SearchQuery(Lexeme("shall") & Lexeme("use") & Lexeme("larger")))
+ self.assertSequenceEqual(searched, [self.bedemir0])
+
+ def test_or(self):
+ searched = Line.objects.annotate(search=SearchVector("dialogue")).filter(
+ search=SearchQuery(Lexeme("kneecaps") | Lexeme("nostrils"))
+ )
+ self.assertCountEqual(searched, [self.verse1, self.verse2])
+
+ def test_multiple_or(self):
+ searched = Line.objects.annotate(search=SearchVector("dialogue")).filter(
+ search=SearchQuery(
+ Lexeme("kneecaps") | Lexeme("nostrils") | Lexeme("Sir Robin")
+ )
+ )
+ self.assertCountEqual(searched, [self.verse1, self.verse2, self.verse0])
+
+ def test_advanced(self):
+ """
+ Combination of & and |
+ This is mainly helpful for checking the test_advanced_invert below
+ """
+ searched = Line.objects.annotate(search=SearchVector("dialogue")).filter(
+ search=SearchQuery(
+ Lexeme("shall") & Lexeme("use") & Lexeme("larger") | Lexeme("nostrils")
+ )
+ )
+ self.assertCountEqual(searched, [self.bedemir0, self.verse2])
+
+ def test_invert(self):
+ searched = Line.objects.annotate(search=SearchVector("dialogue")).filter(
+ character=self.minstrel, search=SearchQuery(~Lexeme("kneecaps"))
+ )
+ self.assertCountEqual(searched, [self.verse0, self.verse2])
+
+ def test_advanced_invert(self):
+ """
+ Inverting a query that uses a combination of & and |
+ should return the opposite of test_advanced.
+ """
+ searched = Line.objects.annotate(search=SearchVector("dialogue")).filter(
+ search=SearchQuery(
+ ~(
+ Lexeme("shall") & Lexeme("use") & Lexeme("larger")
+ | Lexeme("nostrils")
+ )
+ )
+ )
+ expected_result = Line.objects.exclude(
+ id__in=[self.bedemir0.id, self.verse2.id]
+ )
+ self.assertCountEqual(searched, expected_result)
+
+ def test_as_sql(self):
+ query = Line.objects.all().query
+ compiler = query.get_compiler(connection.alias)
+
+ tests = (
+ (Lexeme("a"), ("'a'",)),
+ (Lexeme("a", invert=True), ("!'a'",)),
+ (~Lexeme("a"), ("!'a'",)),
+ (Lexeme("a", prefix=True), ("'a':*",)),
+ (Lexeme("a", weight="D"), ("'a':D",)),
+ (Lexeme("a", invert=True, prefix=True, weight="D"), ("!'a':*D",)),
+ (Lexeme("a") | Lexeme("b") & ~Lexeme("c"), ("('a' | ('b' & !'c'))",)),
+ (
+ ~(Lexeme("a") | Lexeme("b") & ~Lexeme("c")),
+ ("(!'a' & (!'b' | 'c'))",),
+ ),
+ )
+
+ for expression, expected_params in tests:
+ with self.subTest(expression=expression, expected_params=expected_params):
+ _, params = expression.as_sql(compiler, connection)
+ self.assertEqual(params, expected_params)
+
+ def test_quote_lexeme(self):
+ tests = (
+ ("L'amour piqué par une abeille", "'L amour piqué par une abeille'"),
+ ("'starting quote", "'starting quote'"),
+ ("ending quote'", "'ending quote'"),
+ ("double quo''te", "'double quo te'"),
+ ("triple quo'''te", "'triple quo te'"),
+ ("backslash\\", "'backslash'"),
+ ("exclamation!", "'exclamation'"),
+ ("ampers&nd", "'ampers nd'"),
+ )
+ for lexeme, quoted in tests:
+ with self.subTest(lexeme=lexeme):
+ self.assertEqual(quote_lexeme(lexeme), quoted)
+
+ def test_prefix_searching(self):
+ searched = Line.objects.annotate(
+ search=SearchVector("scene__setting", "dialogue"),
+ ).filter(search=SearchQuery(Lexeme("hear", prefix=True)))
+
+ self.assertSequenceEqual(searched, [self.verse2])
+
+ def test_inverse_prefix_searching(self):
+ searched = Line.objects.annotate(
+ search=SearchVector("scene__setting", "dialogue"),
+ ).filter(search=SearchQuery(Lexeme("Robi", prefix=True, invert=True)))
+ self.assertEqual(
+ set(searched),
+ {
+ self.verse2,
+ self.bedemir0,
+ self.bedemir1,
+ self.french,
+ self.crowd,
+ self.witch,
+ self.duck,
+ },
+ )
+
+ def test_lexemes_multiple_and(self):
+ searched = Line.objects.annotate(
+ search=SearchVector("scene__setting", "dialogue"),
+ ).filter(
+ search=SearchQuery(
+ Lexeme("Robi", prefix=True) & Lexeme("Camel", prefix=True)
+ )
+ )
+
+ self.assertSequenceEqual(searched, [self.verse0])
+
+ def test_lexemes_multiple_or(self):
+ searched = Line.objects.annotate(
+ search=SearchVector("scene__setting", "dialogue"),
+ ).filter(
+ search=SearchQuery(
+ Lexeme("kneecap", prefix=True) | Lexeme("afrai", prefix=True)
+ )
+ )
+
+ self.assertSequenceEqual(searched, [self.verse0, self.verse1])
+
+ def test_config_query_explicit(self):
+ searched = Line.objects.annotate(
+ search=SearchVector("scene__setting", "dialogue", config="french"),
+ ).filter(search=SearchQuery(Lexeme("cadeaux"), config="french"))
+
+ self.assertSequenceEqual(searched, [self.french])
+
+ def test_config_query_implicit(self):
+ searched = Line.objects.annotate(
+ search=SearchVector("scene__setting", "dialogue", config="french"),
+ ).filter(search=Lexeme("cadeaux"))
+
+ self.assertSequenceEqual(searched, [self.french])
+
+ def test_config_from_field_explicit(self):
+ searched = Line.objects.annotate(
+ search=SearchVector(
+ "scene__setting", "dialogue", config=F("dialogue_config")
+ ),
+ ).filter(search=SearchQuery(Lexeme("cadeaux"), config=F("dialogue_config")))
+ self.assertSequenceEqual(searched, [self.french])
+
+ def test_config_from_field_implicit(self):
+ searched = Line.objects.annotate(
+ search=SearchVector(
+ "scene__setting", "dialogue", config=F("dialogue_config")
+ ),
+ ).filter(search=Lexeme("cadeaux"))
+ self.assertSequenceEqual(searched, [self.french])
+
+ def test_invalid_combinations(self):
+ msg = "A Lexeme can only be combined with another Lexeme, got NoneType."
+ with self.assertRaisesMessage(TypeError, msg):
+ Line.objects.filter(dialogue__search=None | Lexeme("kneecaps"))
+
+ with self.assertRaisesMessage(TypeError, msg):
+ Line.objects.filter(dialogue__search=None & Lexeme("kneecaps"))
+
+ def test_invalid_weights(self):
+ invalid_weights = ["E", "Drandom", "AB", "C ", 0, "", " ", [1, 2, 3]]
+ for weight in invalid_weights:
+ with self.subTest(weight=weight):
+ with self.assertRaisesMessage(
+ ValueError,
+ f"Weight must be one of 'A', 'B', 'C', and 'D', got {weight!r}.",
+ ):
+ Line.objects.filter(
+ dialogue__search=Lexeme("kneecaps", weight=weight)
+ )
+
+ def test_empty(self):
+ with self.assertRaisesMessage(ValueError, "Lexeme value cannot be empty."):
+ Line.objects.annotate(
+ search=SearchVector("scene__setting", "dialogue")
+ ).filter(search=SearchQuery(Lexeme("")))
+
+ def test_non_string_values(self):
+ msg = "Lexeme value must be a string, got NoneType."
+ with self.assertRaisesMessage(TypeError, msg):
+ Line.objects.annotate(
+ search=SearchVector("scene__setting", "dialogue")
+ ).filter(search=SearchQuery(Lexeme(None)))