[Python-Dev] subprocess research - max limit for piped output

anatoly techtonik techtonik at gmail.com
Sun Jul 20 16:34:27 CEST 2014


I am trying to figure out what is maximum size
for piped input in subprocess.check_output()

I've got limitation of about 500Mb after which
Python exits with MemoryError without any
additional details.

I have only 2.76Gb memory used out of 8Gb,
so what limit do I hit?

1. subprocess output read buffer
2. Python limit on size of variable
3. some OS limit on output pipes

Testcase attached.


C:\discovery\interface\subprocess>py dead.py
Testing size: 520Mb
..truncating to 545259520
..
Traceback (most recent call last):
  File "dead.py", line 66, in <module>
    backticks(r'type largefile')
  File "dead.py", line 36, in backticks
    output = subprocess.check_output(command, shell=True)
  File "C:\Python27\lib\subprocess.py", line 567, in check_output
    output, unused_err = process.communicate()
  File "C:\Python27\lib\subprocess.py", line 791, in communicate
    stdout = _eintr_retry_call(self.stdout.read)
  File "C:\Python27\lib\subprocess.py", line 476, in _eintr_retry_call
    return func(*args)
MemoryError
The process tried to write to a nonexistent pipe.

-- 
anatoly t.
-------------- next part --------------
import subprocess

# --- replacing shell backticks ---
# https://docs.python.org/2/library/subprocess.html#replacing-bin-sh-shell-backquote
#   output=`mycmd myarg`
#   output = check_output(["mycmd", "myarg"])
# not true, because mycmd is not passed to shell
try:
    pass #output = subprocess.check_output(["mycmd", "myarg"], shell=True)
except OSError as ex:
    # command not found.
    # it is impossible to catch output here, but shell outputs
    # message to stderr, which backticks doesn't catch either
    output = ''
except subprocess.CalledProcessError as ex:
    output = ex.output
# ^ information about error condition is lost
# ^ output in case of OSError is lost

# ux notes:
# - `mycmd myarg` > ["mycmd", "myarg"]
# - `` is invisible
#   subprocess.check_output is hardly rememberable
# - exception checking is excessive and not needed
#   (common pattern is to check return code)


def backticks(command):
   '''
   - no return code
   - no stderr capture
   '''
   try:
       # this doesn't escape shell patterns, such as:
       # ^ (windows cmd.exe shell)
       output = subprocess.check_output(command, shell=True)
   except OSError as ex:
       # command not found.
       # it is impossible to catch output here, but shell outputs
       # message to stderr, which backticks doesn't catch either
       output = ''
   except subprocess.CalledProcessError as ex:
       output = ex.output
   return output


import os
for size in range(520, 600, 2):
    print("Testing size: %sMb" % size)
    #cursize = os.path.getsize("largefile")
    with open("largefile", "ab") as data:
        data.seek(0, 2)
        cursize = data.tell()
        #print(cursize)
        limit = size*1024**2
        if cursize > limit:
            print('..truncating to %s' % limit)
            data.truncate(limit)
        else:
            print('..extending to %s' % limit)
            while cursize < limit:
                toadd = min(100, limit-cursize)
                data.write('1'*99+'\n')
                cursize += 100
    print("..")
    backticks(r'type largefile')



More information about the Python-Dev mailing list