how to debug this distributed program?

meInvent bbird jobmattcon at gmail.com
Tue Nov 1 03:40:06 EDT 2016


https://drive.google.com/open?id=0Bxs_ao6uuBDUSEc5S3U3Nko5ZjA


A. i am not sure whether parameter start from 0 or 1
n[0] or n[1] in compute function

B. it run a very long time and nothing to see in amazon linux instance
there is no python program in top command

C. in distributed programming web site, there is no authentication method
using private key in python program,  how do distributed program access
the nodes in amazon cloud when run python program below in window locally?

import random, dispy

def compute(n): # executed on nodes
    import random, time, socket
    name = socket.gethostname()
    cur_best = 1
    for ii in range(n[0],n[0]):
        for jj in range(n[1],n[1]):
            for kk in range(n[2],n[3],100):     
                dispy_provisional_result((name, r))
                cur_best = r
                time.sleep(0.1)
                # final result
    return (name, cur_best)

def job_callback(job): # executed at the client
    if job.status == dispy.DispyJob.ProvisionalResult:
        #if job.result[1] < 0.005:
            # acceptable result; terminate jobs
            print('%s computed: %s %s %s %s' % (job.result[0], job.result[1], job.result[2], job.result[3], job.result[4]))
            # 'jobs' and 'cluster' are created in '__main__' below
            for j in jobs:
                if j.status in [dispy.DispyJob.Created, dispy.DispyJob.Running,
                                dispy.DispyJob.ProvisionalResult]:
                    cluster.cancel(j)

if __name__ == '__main__':
    #cluster = dispy.JobCluster(compute, callback=job_callback)
    cluster = dispy.JobCluster(compute, nodes=['ec2-100-162-137-237.us-west-2.compute.amazonaws.com'], callback=job_callback)
    jobs = []
    prevk = 1
    count = 0
    for ii in range(1,2):
        for jj in range(1,2000):
            for kk in range(1,2000,100):
                if ii < jj and jj < kk:
                    job = cluster.submit([ii,jj,prevk,kk])
                    prevk = kk
                    if job is None:
                        print('creating job %s failed!' % n)
                        continue
                    job.id = count
                    count = count + 1
                    jobs.append(job)
    cluster.wait()
    cluster.print_status()
    cluster.close()



More information about the Python-list mailing list