Finding Blank Columns in CSV
Peter Otten
__peter__ at web.de
Wed Oct 7 03:49:39 EDT 2015
Peter Otten wrote:
> I really meant it when I asked you to post the code you actually ran, and
> the traceback it produces.
Anyway, here's a complete script that should work. It uses indices instead
names, but the underlying logic is the same.
#!/usr/bin/env python
import csv
import sys
from contextlib import contextmanager
if sys.version_info.major == 3:
READ_MODE = "r"
WRITE_MODE = "w"
else:
READ_MODE = "rb"
WRITE_MODE = "wb"
@contextmanager
def ext_open(file, mode):
"""Open a file if passed a string, pass on everything else unchanged.
"-", b"-", and None indicate stdin or stdout depending on `mode`.
>>> with ext_open(None, "w") as f:
... print("foo", file=f)
foo
>>> with ext_open(["one\\n", "two\\n"], "r") as f:
... for line in f: print(line, end="")
one
two
"""
if file is None or file == "-" or file == b"-":
yield sys.stdout if "w" in mode else sys.stdin
elif isinstance(file, (str, bytes)):
with open(file, mode) as stream:
yield stream
else:
yield file
def non_empty_columns(infile):
"""Find indices of columns that contain data.
Doesn't check the first row assumed to contain the fieldnames.
"""
with ext_open(infile, READ_MODE) as instream:
reader = csv.reader(instream)
colcount = len(next(reader))
empty_columns = set(range(colcount))
for row in reader:
assert len(row) == colcount
empty_columns = {i for i in empty_columns if not row[i]}
if not empty_columns:
break
return [i for i in range(colcount) if i not in empty_columns]
def copy_csv(infile, outfile, columns):
"""Copy the specified columns from infile to outfile.
"""
with ext_open(infile, READ_MODE) as instream:
with ext_open(outfile, WRITE_MODE) as outstream:
reader = csv.reader(instream)
writer = csv.writer(outstream)
for row in reader:
writer.writerow([row[i] for i in columns])
def main():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("infile", nargs="?", default=sys.stdin)
parser.add_argument("outfile", nargs="?", default=sys.stdout)
args = parser.parse_args()
if args.infile is sys.stdin or args.infile == "-":
args.infile = list(sys.stdin)
copy_csv(
args.infile, args.outfile,
columns=non_empty_columns(args.infile))
if __name__ == "__main__":
main()
More information about the Python-list
mailing list