summaryrefslogtreecommitdiff
path: root/django/utils/text.py
diff options
context:
space:
mode:
authorChristopher Long <indirecthit@gmail.com>2007-06-17 22:18:54 +0000
committerChristopher Long <indirecthit@gmail.com>2007-06-17 22:18:54 +0000
commitae22b6d403dcf25098c77f0dfcf59ae58b186461 (patch)
treec37fc631e99a7e4d909d6b6d236f495003731ea7 /django/utils/text.py
parent0cf7bc439129c66df8d64601e885f83b256b4f25 (diff)
per-object-permissions: Merged to trunk [5486] NOTE: Not fully tested, will be working on this over the next few weeks.
git-svn-id: http://code.djangoproject.com/svn/django/branches/per-object-permissions@5488 bcc190cf-cafb-0310-a4f2-bffc1f526a37
Diffstat (limited to 'django/utils/text.py')
-rw-r--r--django/utils/text.py67
1 files changed, 64 insertions, 3 deletions
diff --git a/django/utils/text.py b/django/utils/text.py
index 217f42491b..c73ab908f3 100644
--- a/django/utils/text.py
+++ b/django/utils/text.py
@@ -17,7 +17,7 @@ def wrap(text, width):
pos = len(word) - word.rfind('\n') - 1
for word in it:
if "\n" in word:
- lines = word.splitlines()
+ lines = word.split('\n')
else:
lines = (word,)
pos += len(lines[0]) + 1
@@ -41,6 +41,66 @@ def truncate_words(s, num):
words.append('...')
return ' '.join(words)
+def truncate_html_words(s, num):
+ """
+ Truncates html to a certain number of words (not counting tags and comments).
+ Closes opened tags if they were correctly closed in the given html.
+ """
+ length = int(num)
+ if length <= 0:
+ return ''
+ html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
+ # Set up regular expressions
+ re_words = re.compile(r'&.*?;|<.*?>|([A-Za-z0-9][\w-]*)')
+ re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
+ # Count non-HTML words and keep note of open tags
+ pos = 0
+ ellipsis_pos = 0
+ words = 0
+ open_tags = []
+ while words <= length:
+ m = re_words.search(s, pos)
+ if not m:
+ # Checked through whole string
+ break
+ pos = m.end(0)
+ if m.group(1):
+ # It's an actual non-HTML word
+ words += 1
+ if words == length:
+ ellipsis_pos = pos
+ continue
+ # Check for tag
+ tag = re_tag.match(m.group(0))
+ if not tag or ellipsis_pos:
+ # Don't worry about non tags or tags after our truncate point
+ continue
+ closing_tag, tagname, self_closing = tag.groups()
+ tagname = tagname.lower() # Element names are always case-insensitive
+ if self_closing or tagname in html4_singlets:
+ pass
+ elif closing_tag:
+ # Check for match in open tags list
+ try:
+ i = open_tags.index(tagname)
+ except ValueError:
+ pass
+ else:
+ # SGML: An end tag closes, back to the matching start tag, all unclosed intervening start tags with omitted end tags
+ open_tags = open_tags[i+1:]
+ else:
+ # Add it to the start of the open tags list
+ open_tags.insert(0, tagname)
+ if words <= length:
+ # Don't try to close tags if we don't need to truncate
+ return s
+ out = s[:ellipsis_pos] + ' ...'
+ # Close any tags still open
+ for tag in open_tags:
+ out += '</%s>' % tag
+ # Return string
+ return out
+
def get_valid_filename(s):
"""
Returns the given string converted to a string that can be used for a clean
@@ -131,14 +191,15 @@ def smart_split(text):
Supports both single and double quotes, and supports escaping quotes with
backslashes. In the output, strings will keep their initial and trailing
quote marks.
+
>>> list(smart_split('This is "a person\'s" test.'))
['This', 'is', '"a person\'s"', 'test.']
"""
for bit in smart_split_re.finditer(text):
bit = bit.group(0)
- if bit[0] == '"':
+ if bit[0] == '"' and bit[-1] == '"':
yield '"' + bit[1:-1].replace('\\"', '"').replace('\\\\', '\\') + '"'
- elif bit[0] == "'":
+ elif bit[0] == "'" and bit[-1] == "'":
yield "'" + bit[1:-1].replace("\\'", "'").replace("\\\\", "\\") + "'"
else:
yield bit