Problem--IOError: [Errno 13] Permission denied

Sun Oct 28 17:25:04 EDT 2007

Hi all,

After sludging my way through many obstacles with this interesting
puzzle of a text parsing program, I found myself with one final error:

Traceback (most recent call last):
  File "C:\Python24\Lib\site-packages\pythonwin\pywin\framework
\scriptutils.py", line 310, in RunScript
    exec codeObject in __main__.__dict__
  File "C:\Documents and Settings\Patrick Waldo\My Documents\Python
\WORD\try5-2-file-1-all patterns.py", line 77, in ?
    input = codecs.open(input_text, 'r','utf8')
  File "C:\Python24\lib\codecs.py", line 666, in open
    file = __builtin__.open(filename, mode, buffering)
IOError: [Errno 13] Permission denied: 'C:\\text_samples\\test\
\output'

The error doesn't stop the program from functioning as it should,
except the last line of every document gets split with | in between
the words, which is just strange.  I have no idea why either is
happening, but perhaps they are related.

Any ideas?

#For text files in a directory...
#Analyzes a randomly organized UTF8 document with EINECS, CAS,
Chemical, and Chemical Formula
#into a document structured as EINECS|CAS|Chemical|Chemical Formula.

import os
import codecs
import re

path = "C:\\text_samples\\test\\"
path2 = "C:\\text_samples\\test\\output\\"

EINECS = re.compile(r'^\d\d\d-\d\d\d-\d$')
FORMULA = re.compile(r'([A-Z][a-zA-Z0-9]*\.?[A-Za-z0-9]*/?[A-Za-
z0-9]*)')
FALSE_POS = re.compile(r'^[A-Z][a-z]{4,40}\)?\.?')
FALSE_POS1 = re.compile(r'C\.I\..*')
FALSE_POS2 = re.compile(r'vit.*')
FALSE_NEG = re.compile(r'C\d+\.')

def iter_elements(tokens):
    product = []
    for tok in tokens:
        if EINECS.match(tok) and len(product) >= 3:
            match = re.match(FORMULA,product[-1])
            match_false_pos = re.match(FALSE_POS,product[-1])
            match_false_pos1 = re.match(FALSE_POS1,product[-1])
            match_false_pos2 = re.match(FALSE_POS2,product[2])
            match_false_neg = re.match(FALSE_NEG,product[-1])
            if match_false_neg:
                product[2:-1] = [' '.join(product[2:])]
                del product[-1]
                yield product
                product = []
            elif match_false_pos:
                product[2:-1] = [' '.join(product[2:])]
                del product[-1]
                yield product
                product = []
            elif match:
                product[2:-1] = [' '.join(product[2:-1])]
                yield product
                product = []
            elif match_false_pos1 or match_false_pos2:
                product[2:-1] = [' '.join(product[2:])]
                del product[-1]
                yield product
                product = []
            else:
                product[2:-1] = [' '.join(product[2:])]
                del product[-1]
                yield product
                product = []
        product.append(tok)
    yield product

for text in os.listdir(path):
    input_text = os.path.join(path,text)
    output_text = os.path.join(path2,text)
    input = codecs.open(input_text, 'r','utf8')
    output = codecs.open(output_text, 'w', 'utf8')
    tokens = input.read().split()
    for element in iter_elements(tokens):
        output.write('|'.join(element))
        output.write("\r\n")
input.close()
output.close()