multiprocessing.Process call blocks other processes from running

Rodrick Brown rodrick.brown at gmail.com
Sat Jan 14 13:46:19 EST 2017


I'm trying to implement a script that tracks how much space certain
applications are using and send a metric to a statsd service for realtime
analysis however when running this code it nots forking multiple processes
its still running sequential at some point the forking was working but I
can't seem to figure out where I went wrong please advise

I'm just trying to keep track of the growth rate of certain dirs these dirs
have hundreds of thousands of files and take sometime to run I use du as it
runs much faster than os.walk() and it also returns the correct compressed
size on the file system pythons getsize() does not.

Thanks.

from datadog import initialize
from datadog import api
from datadog import statsd
import os
import subprocess
from import Process, Queue
from datadog import ThreadStats
import time
import datetime
from hashlib import md5

options = {
  'api_key': 'xx',
  'app_key': 'xx'
}

def getWhispererLogsDirSize(clientcfg, queue):
  clientName, logPath = clientcfg.items()[0]
  totalSize = 0
  clientResult = {}
  for item in os.listdir(logPath):
    logDir = logPath + "/" + item
    try:
      totalSize = totalSize +
int(subprocess.check_output(["du","-s",logDir]).split('\t')[0])
    except subprocess.CalledProcessError:
      print("Error processing {0} skipping.....".format(logDir))
      continue
  clientResult[clientName] = [os.path.basename(logPath),totalSize]
  queue.put(clientResult)
  return

if __name__ == '__main__':

  title = 'Whisperer client marketdata usage'
  text = 'This simple utility sends whisperer logs into datadog based on
client usage on disk'
  tags = ['version:1']
  initialize(**options)
  #api.Event.create(title=title, text=text, tags=tags)
  #stats = ThreadStats()
  #stats.start()
  queue = Queue()
  jobs = []
  clients = [
    {'xx1':'/mnt/auto/glusterfs/app/NYC01-xx-PROD-01'},
    {'xx2':'/mnt/auto/glusterfs/app/NYC01-xx-PROD-01'},
    {'xx3':'/mnt/auto/glusterfs/app/NYC01-xx-PROD-01'},
    {'xx4':'/mnt/auto/glusterfs/app/NYC01-xx-PROD-01'}
  ]
  tags = []
  while True:
    for client in clients:
      stats = ThreadStats()
      stats.start()
      p = Process(target=getWhispererLogsDirSize, args=(client,queue,))
      jobs.append(p)
      p.start()
      p.join()
      clientinfo = queue.get()

      clientName = clientinfo.values()[0][0]
      clientPath = clientinfo.keys()[0]
      clientLogSize = clientinfo.values()[0][1]
      tags = [clientName,clientPath]
      aggregation_key = md5(clientName).hexdigest()

      print(clientName, clientPath, clientLogSize)
      with open('/tmp/dogstatd_out.log', 'a+') as fp:
        fp.write("{0} {1} {2}
{3}\n".format(str(datetime.datetime.now()),clientName, clientPath,
clientLogSize))

stats.gauge('whisperer.marketdata.clientlogsize',int(clientLogSize),tags=tags)



More information about the Python-list mailing list