[Python-checkins] bpo-28660: Make TextWrapper break long words on hyphens (GH-22721)
Irit Katriel
webhook-mailer at python.org
Sun Oct 18 13:01:27 EDT 2020
https://github.com/python/cpython/commit/b81c833ab51fb7d7f0f8eaace37f60ef7455aa85
commit: b81c833ab51fb7d7f0f8eaace37f60ef7455aa85
branch: master
author: Irit Katriel <iritkatriel at yahoo.com>
committer: GitHub <noreply at github.com>
date: 2020-10-18T20:01:15+03:00
summary:
bpo-28660: Make TextWrapper break long words on hyphens (GH-22721)
files:
A Misc/NEWS.d/next/Library/2020-10-16-16-08-04.bpo-28660.eX9pvD.rst
M Lib/test/test_textwrap.py
M Lib/textwrap.py
diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py
index ed97f70ba1fa4..dfbc2b93dfc0d 100644
--- a/Lib/test/test_textwrap.py
+++ b/Lib/test/test_textwrap.py
@@ -640,6 +640,78 @@ def test_max_lines_long(self):
max_lines=4)
+class LongWordWithHyphensTestCase(BaseTestCase):
+ def setUp(self):
+ self.wrapper = TextWrapper()
+ self.text1 = '''\
+We used enyzme 2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate synthase.
+'''
+ self.text2 = '''\
+1234567890-1234567890--this_is_a_very_long_option_indeed-good-bye"
+'''
+
+ def test_break_long_words_on_hyphen(self):
+ expected = ['We used enyzme 2-succinyl-6-hydroxy-2,4-',
+ 'cyclohexadiene-1-carboxylate synthase.']
+ self.check_wrap(self.text1, 50, expected)
+
+ expected = ['We used', 'enyzme 2-', 'succinyl-', '6-hydroxy-', '2,4-',
+ 'cyclohexad', 'iene-1-', 'carboxylat', 'e', 'synthase.']
+ self.check_wrap(self.text1, 10, expected)
+
+ expected = ['1234567890', '-123456789', '0--this_is', '_a_very_lo',
+ 'ng_option_', 'indeed-', 'good-bye"']
+ self.check_wrap(self.text2, 10, expected)
+
+ def test_break_long_words_not_on_hyphen(self):
+ expected = ['We used enyzme 2-succinyl-6-hydroxy-2,4-cyclohexad',
+ 'iene-1-carboxylate synthase.']
+ self.check_wrap(self.text1, 50, expected, break_on_hyphens=False)
+
+ expected = ['We used', 'enyzme 2-s', 'uccinyl-6-', 'hydroxy-2,',
+ '4-cyclohex', 'adiene-1-c', 'arboxylate', 'synthase.']
+ self.check_wrap(self.text1, 10, expected, break_on_hyphens=False)
+
+ expected = ['1234567890', '-123456789', '0--this_is', '_a_very_lo',
+ 'ng_option_', 'indeed-', 'good-bye"']
+ self.check_wrap(self.text2, 10, expected)
+
+ def test_break_on_hyphen_but_not_long_words(self):
+ expected = ['We used enyzme',
+ '2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate',
+ 'synthase.']
+
+ self.check_wrap(self.text1, 50, expected, break_long_words=False)
+
+ expected = ['We used', 'enyzme',
+ '2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate',
+ 'synthase.']
+ self.check_wrap(self.text1, 10, expected, break_long_words=False)
+
+ expected = ['1234567890', '-123456789', '0--this_is', '_a_very_lo',
+ 'ng_option_', 'indeed-', 'good-bye"']
+ self.check_wrap(self.text2, 10, expected)
+
+
+ def test_do_not_break_long_words_or_on_hyphens(self):
+ expected = ['We used enyzme',
+ '2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate',
+ 'synthase.']
+ self.check_wrap(self.text1, 50, expected,
+ break_long_words=False,
+ break_on_hyphens=False)
+
+ expected = ['We used', 'enyzme',
+ '2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate',
+ 'synthase.']
+ self.check_wrap(self.text1, 10, expected,
+ break_long_words=False,
+ break_on_hyphens=False)
+
+ expected = ['1234567890', '-123456789', '0--this_is', '_a_very_lo',
+ 'ng_option_', 'indeed-', 'good-bye"']
+ self.check_wrap(self.text2, 10, expected)
+
class IndentTestCases(BaseTestCase):
# called before each test method
diff --git a/Lib/textwrap.py b/Lib/textwrap.py
index 30e693c8de035..841de9baecf5d 100644
--- a/Lib/textwrap.py
+++ b/Lib/textwrap.py
@@ -215,8 +215,16 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
# If we're allowed to break long words, then do so: put as much
# of the next chunk onto the current line as will fit.
if self.break_long_words:
- cur_line.append(reversed_chunks[-1][:space_left])
- reversed_chunks[-1] = reversed_chunks[-1][space_left:]
+ end = space_left
+ chunk = reversed_chunks[-1]
+ if self.break_on_hyphens and len(chunk) > space_left:
+ # break after last hyphen, but only if there are
+ # non-hyphens before it
+ hyphen = chunk.rfind('-', 0, space_left)
+ if hyphen > 0 and any(c != '-' for c in chunk[:hyphen]):
+ end = hyphen + 1
+ cur_line.append(chunk[:end])
+ reversed_chunks[-1] = chunk[end:]
# Otherwise, we have to preserve the long word intact. Only add
# it to the current line if there's nothing already there --
diff --git a/Misc/NEWS.d/next/Library/2020-10-16-16-08-04.bpo-28660.eX9pvD.rst b/Misc/NEWS.d/next/Library/2020-10-16-16-08-04.bpo-28660.eX9pvD.rst
new file mode 100644
index 0000000000000..d67993492f9ff
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-10-16-16-08-04.bpo-28660.eX9pvD.rst
@@ -0,0 +1 @@
+:func:`textwrap.wrap` now attempts to break long words after hyphens when ``break_long_words=True`` and ``break_on_hyphens=True``.
More information about the Python-checkins
mailing list