1 """Text wrapping and filling.
4 # Copyright (C) 1999-2001 Gregory P. Ward.
5 # Copyright (C) 2002, 2003 Python Software Foundation.
6 # Written by Greg Ward <gward@python.net>
8 __revision__ = "$Id: textwrap.py,v 1.32.8.2 2004/05/13 01:48:15 gward Exp $"
18 # Do the right thing with boolean values for all known Python versions
19 # (so this module can be copied to projects that don't depend on Python
20 # 2.3, e.g. Optik and Docutils).
24 (True, False) = (1, 0)
26 __all__ = ['TextWrapper', 'wrap', 'fill']
28 # Hardcode the recognized whitespace characters to the US-ASCII
29 # whitespace characters. The main reason for doing this is that in
30 # ISO-8859-1, 0xa0 is non-breaking whitespace, so in certain locales
31 # that character winds up in string.whitespace. Respecting
32 # string.whitespace in those cases would 1) make textwrap treat 0xa0 the
33 # same as any other whitespace char, which is clearly wrong (it's a
34 # *non-breaking* space), 2) possibly cause problems with Unicode,
35 # since 0xa0 is not in range(128).
36 _whitespace = '\t\n\x0b\x0c\r '
40 Object for wrapping/filling text. The public interface consists of
41 the wrap() and fill() methods; the other methods are just there for
42 subclasses to override in order to tweak the default behaviour.
43 If you want to completely replace the main wrapping algorithm,
44 you'll probably have to override _wrap_chunks().
46 Several instance attributes control various aspects of wrapping:
48 the maximum width of wrapped lines (unless break_long_words
50 initial_indent (default: "")
51 string that will be prepended to the first line of wrapped
52 output. Counts towards the line's width.
53 subsequent_indent (default: "")
54 string that will be prepended to all lines save the first
55 of wrapped output; also counts towards each line's width.
56 expand_tabs (default: true)
57 Expand tabs in input text to spaces before further processing.
58 Each tab will become 1 .. 8 spaces, depending on its position in
59 its line. If false, each tab is treated as a single character.
60 replace_whitespace (default: true)
61 Replace all whitespace characters in the input text by spaces
62 after tab expansion. Note that if expand_tabs is false and
63 replace_whitespace is true, every tab will be converted to a
65 fix_sentence_endings (default: false)
66 Ensure that sentence-ending punctuation is always followed
67 by two spaces. Off by default because the algorithm is
68 (unavoidably) imperfect.
69 break_long_words (default: true)
70 Break words longer than 'width'. If false, those words will not
71 be broken, and some lines might be longer than 'width'.
74 whitespace_trans = string.maketrans(_whitespace, ' ' * len(_whitespace))
76 unicode_whitespace_trans = {}
78 uspace = eval("ord(u' ')")
80 # Python1.5 doesn't understand u'' syntax, in which case we
81 # won't actually use the unicode translation below, so it
82 # doesn't matter what value we put in the table.
84 for x in map(ord, _whitespace):
85 unicode_whitespace_trans[x] = uspace
87 # This funky little regex is just the trick for splitting
88 # text up into word-wrappable chunks. E.g.
89 # "Hello there -- you goof-ball, use the -b option!"
91 # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
92 # (after stripping out empty strings).
94 wordsep_re = re.compile(r'(\s+|' # any whitespace
95 r'[^\s\w]*\w{2,}-(?=\w{2,})|' # hyphenated words
96 r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash
98 # Pre-2.0 Python versions don't have the (?<= negative look-behind
99 # assertion. It mostly doesn't matter for the simple input
100 # SCons is going to give it, so just leave it out.
101 wordsep_re = re.compile(r'(\s+|' # any whitespace
102 r'-*\w{2,}-(?=\w{2,}))') # hyphenated words
104 # XXX will there be a locale-or-charset-aware version of
105 # string.lowercase in 2.3?
106 sentence_end_re = re.compile(r'[%s]' # lowercase letter
107 r'[\.\!\?]' # sentence-ending punct.
108 r'[\"\']?' # optional end-of-quote
115 subsequent_indent="",
117 replace_whitespace=True,
118 fix_sentence_endings=False,
119 break_long_words=True):
121 self.initial_indent = initial_indent
122 self.subsequent_indent = subsequent_indent
123 self.expand_tabs = expand_tabs
124 self.replace_whitespace = replace_whitespace
125 self.fix_sentence_endings = fix_sentence_endings
126 self.break_long_words = break_long_words
129 # -- Private methods -----------------------------------------------
130 # (possibly useful for subclasses to override)
132 def _munge_whitespace(self, text):
133 """_munge_whitespace(text : string) -> string
135 Munge whitespace in text: expand tabs and convert all other
136 whitespace characters to spaces. Eg. " foo\tbar\n\nbaz"
137 becomes " foo bar baz".
140 text = string.expandtabs(text)
141 if self.replace_whitespace:
142 if type(text) == type(''):
143 text = string.translate(text, self.whitespace_trans)
144 elif isinstance(text, unicode):
145 text = string.translate(text, self.unicode_whitespace_trans)
149 def _split(self, text):
150 """_split(text : string) -> [string]
152 Split the text to wrap into indivisible chunks. Chunks are
153 not quite the same as words; see wrap_chunks() for full
154 details. As an example, the text
155 Look, goof-ball -- use the -b option!
156 breaks into the following chunks:
157 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
158 'use', ' ', 'the', ' ', '-b', ' ', 'option!'
160 chunks = self.wordsep_re.split(text)
161 chunks = filter(None, chunks)
164 def _fix_sentence_endings(self, chunks):
165 """_fix_sentence_endings(chunks : [string])
167 Correct for sentence endings buried in 'chunks'. Eg. when the
168 original text contains "... foo.\nBar ...", munge_whitespace()
169 and split() will convert that to [..., "foo.", " ", "Bar", ...]
170 which has one too few spaces; this method simply changes the one
174 pat = self.sentence_end_re
175 while i < len(chunks)-1:
176 if chunks[i+1] == " " and pat.search(chunks[i]):
182 def _handle_long_word(self, chunks, cur_line, cur_len, width):
183 """_handle_long_word(chunks : [string],
185 cur_len : int, width : int)
187 Handle a chunk of text (most likely a word, not whitespace) that
188 is too long to fit in any line.
190 space_left = max(width - cur_len, 1)
192 # If we're allowed to break long words, then do so: put as much
193 # of the next chunk onto the current line as will fit.
194 if self.break_long_words:
195 cur_line.append(chunks[0][0:space_left])
196 chunks[0] = chunks[0][space_left:]
198 # Otherwise, we have to preserve the long word intact. Only add
199 # it to the current line if there's nothing already there --
200 # that minimizes how much we violate the width constraint.
202 cur_line.append(chunks.pop(0))
204 # If we're not allowed to break long words, and there's already
205 # text on the current line, do nothing. Next time through the
206 # main loop of _wrap_chunks(), we'll wind up here again, but
207 # cur_len will be zero, so the next line will be entirely
208 # devoted to the long word that we can't handle right now.
210 def _wrap_chunks(self, chunks):
211 """_wrap_chunks(chunks : [string]) -> [string]
213 Wrap a sequence of text chunks and return a list of lines of
214 length 'self.width' or less. (If 'break_long_words' is false,
215 some lines may be longer than this.) Chunks correspond roughly
216 to words and the whitespace between them: each chunk is
217 indivisible (modulo 'break_long_words'), but a line break can
218 come between any two chunks. Chunks should not have internal
219 whitespace; ie. a chunk is either all whitespace or a "word".
220 Whitespace chunks will be removed from the beginning and end of
221 lines, but apart from that whitespace is preserved.
225 raise ValueError("invalid width %r (must be > 0)" % self.width)
229 # Start the list of chunks that will make up the current line.
230 # cur_len is just the length of all the chunks in cur_line.
234 # Figure out which static string will prefix this line.
236 indent = self.subsequent_indent
238 indent = self.initial_indent
240 # Maximum width for this line.
241 width = self.width - len(indent)
243 # First chunk on line is whitespace -- drop it, unless this
244 # is the very beginning of the text (ie. no lines started yet).
245 if string.strip(chunks[0]) == '' and lines:
251 # Can at least squeeze this chunk onto the current line.
252 if cur_len + l <= width:
253 cur_line.append(chunks.pop(0))
254 cur_len = cur_len + l
256 # Nope, this line is full.
260 # The current line is full, and the next chunk is too big to
261 # fit on *any* line (not just this one).
262 if chunks and len(chunks[0]) > width:
263 self._handle_long_word(chunks, cur_line, cur_len, width)
265 # If the last chunk on this line is all whitespace, drop it.
266 if cur_line and string.strip(cur_line[-1]) == '':
269 # Convert current line back to a string and store it in list
270 # of all lines (return value).
272 lines.append(indent + string.join(cur_line, ''))
277 # -- Public interface ----------------------------------------------
279 def wrap(self, text):
280 """wrap(text : string) -> [string]
282 Reformat the single paragraph in 'text' so it fits in lines of
283 no more than 'self.width' columns, and return a list of wrapped
284 lines. Tabs in 'text' are expanded with string.expandtabs(),
285 and all other whitespace characters (including newline) are
288 text = self._munge_whitespace(text)
289 indent = self.initial_indent
290 chunks = self._split(text)
291 if self.fix_sentence_endings:
292 self._fix_sentence_endings(chunks)
293 return self._wrap_chunks(chunks)
295 def fill(self, text):
296 """fill(text : string) -> string
298 Reformat the single paragraph in 'text' to fit in lines of no
299 more than 'self.width' columns, and return a new string
300 containing the entire wrapped paragraph.
302 return string.join(self.wrap(text), "\n")
305 # -- Convenience interface ---------------------------------------------
307 def wrap(text, width=70, **kwargs):
308 """Wrap a single paragraph of text, returning a list of wrapped lines.
310 Reformat the single paragraph in 'text' so it fits in lines of no
311 more than 'width' columns, and return a list of wrapped lines. By
312 default, tabs in 'text' are expanded with string.expandtabs(), and
313 all other whitespace characters (including newline) are converted to
314 space. See TextWrapper class for available keyword args to customize
319 w = apply(TextWrapper, (), kw)
322 def fill(text, width=70, **kwargs):
323 """Fill a single paragraph of text, returning a new string.
325 Reformat the single paragraph in 'text' to fit in lines of no more
326 than 'width' columns, and return a new string containing the entire
327 wrapped paragraph. As with wrap(), tabs are expanded and other
328 whitespace characters converted to space. See TextWrapper class for
329 available keyword args to customize wrapping behaviour.
333 w = apply(TextWrapper, (), kw)
337 # -- Loosely related functionality -------------------------------------
340 """dedent(text : string) -> string
342 Remove any whitespace than can be uniformly removed from the left
343 of every line in `text`.
345 This can be used e.g. to make triple-quoted strings line up with
346 the left edge of screen/whatever, while still presenting it in the
347 source code in indented form.
352 # end first line with \ to avoid the empty line!
357 print repr(s) # prints ' hello\n world\n '
358 print repr(dedent(s)) # prints 'hello\n world\n'
360 lines = text.expandtabs().split('\n')
363 content = line.lstrip()
366 indent = len(line) - len(content)
370 margin = min(margin, indent)
372 if margin is not None and margin > 0:
373 for i in range(len(lines)):
374 lines[i] = lines[i][margin:]
376 return string.join(lines, '\n')