Spaces:
Build error
Build error
File size: 12,483 Bytes
5bd179e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 |
"""text_file
provides the TextFile class, which gives an interface to text files
that (optionally) takes care of stripping comments, ignoring blank
lines, and joining lines with backslashes."""
import sys, io
class TextFile:
"""Provides a file-like object that takes care of all the things you
commonly want to do when processing a text file that has some
line-by-line syntax: strip comments (as long as "#" is your
comment character), skip blank lines, join adjacent lines by
escaping the newline (ie. backslash at end of line), strip
leading and/or trailing whitespace. All of these are optional
and independently controllable.
Provides a 'warn()' method so you can generate warning messages that
report physical line number, even if the logical line in question
spans multiple physical lines. Also provides 'unreadline()' for
implementing line-at-a-time lookahead.
Constructor is called as:
TextFile (filename=None, file=None, **options)
It bombs (RuntimeError) if both 'filename' and 'file' are None;
'filename' should be a string, and 'file' a file object (or
something that provides 'readline()' and 'close()' methods). It is
recommended that you supply at least 'filename', so that TextFile
can include it in warning messages. If 'file' is not supplied,
TextFile creates its own using 'io.open()'.
The options are all boolean, and affect the value returned by
'readline()':
strip_comments [default: true]
strip from "#" to end-of-line, as well as any whitespace
leading up to the "#" -- unless it is escaped by a backslash
lstrip_ws [default: false]
strip leading whitespace from each line before returning it
rstrip_ws [default: true]
strip trailing whitespace (including line terminator!) from
each line before returning it
skip_blanks [default: true}
skip lines that are empty *after* stripping comments and
whitespace. (If both lstrip_ws and rstrip_ws are false,
then some lines may consist of solely whitespace: these will
*not* be skipped, even if 'skip_blanks' is true.)
join_lines [default: false]
if a backslash is the last non-newline character on a line
after stripping comments and whitespace, join the following line
to it to form one "logical line"; if N consecutive lines end
with a backslash, then N+1 physical lines will be joined to
form one logical line.
collapse_join [default: false]
strip leading whitespace from lines that are joined to their
predecessor; only matters if (join_lines and not lstrip_ws)
errors [default: 'strict']
error handler used to decode the file content
Note that since 'rstrip_ws' can strip the trailing newline, the
semantics of 'readline()' must differ from those of the builtin file
object's 'readline()' method! In particular, 'readline()' returns
None for end-of-file: an empty string might just be a blank line (or
an all-whitespace line), if 'rstrip_ws' is true but 'skip_blanks' is
not."""
default_options = { 'strip_comments': 1,
'skip_blanks': 1,
'lstrip_ws': 0,
'rstrip_ws': 1,
'join_lines': 0,
'collapse_join': 0,
'errors': 'strict',
}
def __init__(self, filename=None, file=None, **options):
"""Construct a new TextFile object. At least one of 'filename'
(a string) and 'file' (a file-like object) must be supplied.
They keyword argument options are described above and affect
the values returned by 'readline()'."""
if filename is None and file is None:
raise RuntimeError("you must supply either or both of 'filename' and 'file'")
# set values for all options -- either from client option hash
# or fallback to default_options
for opt in self.default_options.keys():
if opt in options:
setattr(self, opt, options[opt])
else:
setattr(self, opt, self.default_options[opt])
# sanity check client option hash
for opt in options.keys():
if opt not in self.default_options:
raise KeyError("invalid TextFile option '%s'" % opt)
if file is None:
self.open(filename)
else:
self.filename = filename
self.file = file
self.current_line = 0 # assuming that file is at BOF!
# 'linebuf' is a stack of lines that will be emptied before we
# actually read from the file; it's only populated by an
# 'unreadline()' operation
self.linebuf = []
def open(self, filename):
"""Open a new file named 'filename'. This overrides both the
'filename' and 'file' arguments to the constructor."""
self.filename = filename
self.file = io.open(self.filename, 'r', errors=self.errors)
self.current_line = 0
def close(self):
"""Close the current file and forget everything we know about it
(filename, current line number)."""
file = self.file
self.file = None
self.filename = None
self.current_line = None
file.close()
def gen_error(self, msg, line=None):
outmsg = []
if line is None:
line = self.current_line
outmsg.append(self.filename + ", ")
if isinstance(line, (list, tuple)):
outmsg.append("lines %d-%d: " % tuple(line))
else:
outmsg.append("line %d: " % line)
outmsg.append(str(msg))
return "".join(outmsg)
def error(self, msg, line=None):
raise ValueError("error: " + self.gen_error(msg, line))
def warn(self, msg, line=None):
"""Print (to stderr) a warning message tied to the current logical
line in the current file. If the current logical line in the
file spans multiple physical lines, the warning refers to the
whole range, eg. "lines 3-5". If 'line' supplied, it overrides
the current line number; it may be a list or tuple to indicate a
range of physical lines, or an integer for a single physical
line."""
sys.stderr.write("warning: " + self.gen_error(msg, line) + "\n")
def readline(self):
"""Read and return a single logical line from the current file (or
from an internal buffer if lines have previously been "unread"
with 'unreadline()'). If the 'join_lines' option is true, this
may involve reading multiple physical lines concatenated into a
single string. Updates the current line number, so calling
'warn()' after 'readline()' emits a warning about the physical
line(s) just read. Returns None on end-of-file, since the empty
string can occur if 'rstrip_ws' is true but 'strip_blanks' is
not."""
# If any "unread" lines waiting in 'linebuf', return the top
# one. (We don't actually buffer read-ahead data -- lines only
# get put in 'linebuf' if the client explicitly does an
# 'unreadline()'.
if self.linebuf:
line = self.linebuf[-1]
del self.linebuf[-1]
return line
buildup_line = ''
while True:
# read the line, make it None if EOF
line = self.file.readline()
if line == '':
line = None
if self.strip_comments and line:
# Look for the first "#" in the line. If none, never
# mind. If we find one and it's the first character, or
# is not preceded by "\", then it starts a comment --
# strip the comment, strip whitespace before it, and
# carry on. Otherwise, it's just an escaped "#", so
# unescape it (and any other escaped "#"'s that might be
# lurking in there) and otherwise leave the line alone.
pos = line.find("#")
if pos == -1: # no "#" -- no comments
pass
# It's definitely a comment -- either "#" is the first
# character, or it's elsewhere and unescaped.
elif pos == 0 or line[pos-1] != "\\":
# Have to preserve the trailing newline, because it's
# the job of a later step (rstrip_ws) to remove it --
# and if rstrip_ws is false, we'd better preserve it!
# (NB. this means that if the final line is all comment
# and has no trailing newline, we will think that it's
# EOF; I think that's OK.)
eol = (line[-1] == '\n') and '\n' or ''
line = line[0:pos] + eol
# If all that's left is whitespace, then skip line
# *now*, before we try to join it to 'buildup_line' --
# that way constructs like
# hello \\
# # comment that should be ignored
# there
# result in "hello there".
if line.strip() == "":
continue
else: # it's an escaped "#"
line = line.replace("\\#", "#")
# did previous line end with a backslash? then accumulate
if self.join_lines and buildup_line:
# oops: end of file
if line is None:
self.warn("continuation line immediately precedes "
"end-of-file")
return buildup_line
if self.collapse_join:
line = line.lstrip()
line = buildup_line + line
# careful: pay attention to line number when incrementing it
if isinstance(self.current_line, list):
self.current_line[1] = self.current_line[1] + 1
else:
self.current_line = [self.current_line,
self.current_line + 1]
# just an ordinary line, read it as usual
else:
if line is None: # eof
return None
# still have to be careful about incrementing the line number!
if isinstance(self.current_line, list):
self.current_line = self.current_line[1] + 1
else:
self.current_line = self.current_line + 1
# strip whitespace however the client wants (leading and
# trailing, or one or the other, or neither)
if self.lstrip_ws and self.rstrip_ws:
line = line.strip()
elif self.lstrip_ws:
line = line.lstrip()
elif self.rstrip_ws:
line = line.rstrip()
# blank line (whether we rstrip'ed or not)? skip to next line
# if appropriate
if (line == '' or line == '\n') and self.skip_blanks:
continue
if self.join_lines:
if line[-1] == '\\':
buildup_line = line[:-1]
continue
if line[-2:] == '\\\n':
buildup_line = line[0:-2] + '\n'
continue
# well, I guess there's some actual content there: return it
return line
def readlines(self):
"""Read and return the list of all logical lines remaining in the
current file."""
lines = []
while True:
line = self.readline()
if line is None:
return lines
lines.append(line)
def unreadline(self, line):
"""Push 'line' (a string) onto an internal buffer that will be
checked by future 'readline()' calls. Handy for implementing
a parser with line-at-a-time lookahead."""
self.linebuf.append(line)
|