[Tutor] script guidance

renukesh nk renukeshnk15 at gmail.com
Mon Oct 23 00:33:21 EDT 2017


i want to download zip files from website , my script first lists all the
url links to a text file and then fetches each url and tries to download
zip files.


but i am getting error as below:
Running script..
https://sagamusix.dehttps://
sagamusix.de/other/Saga%20Musix%20-%20Colors%20of%20Synth1%20v1.0.zip
/n
https://sagamusix.dehttps://sagamusix.de/sample_collection/bass.zip
/n
https://sagamusix.dehttps://sagamusix.de/sample_collection/bass_drums.zip
/n
https://sagamusix.dehttps://sagamusix.de/sample_collection/drums.zip
/n
https://sagamusix.dehttps://sagamusix.de/sample_collection/fx.zip
/n
https://sagamusix.dehttps://sagamusix.de/sample_collection/pads_strings.zip
/n
https://sagamusix.dehttps://sagamusix.de/sample_collection/powerchords.zip
/n
https://sagamusix.dehttps://sagamusix.de/sample_collection/synths.zip
/n
https://sagamusix.dehttps://sagamusix.de/sample_collection/tr-808.zip
/n
https://sagamusix.dehttps://sagamusix.de/sample_collection/tr-909.zip
/n
Saga%20Musix%20-%20Colors%20of%20Synth1%20v1.0.zip

Trying to reach https://sagamusix.dehttps://
sagamusix.de/other/Saga%20Musix%20-%20Colors%20of%20Synth1%20v1.0.zip

We failed to reach a server.https://sagamusix.dehttps://
sagamusix.de/other/Saga%20Musix%20-%20Colors%20of%20Synth1%20v1.0.zip

Reason:  [Errno 11001] getaddrinfo failed
bass.zip

please help me to fix so that i acn download all the zip files

code:

import urllib2
from urllib2 import Request, urlopen, URLError
#import urllib
import os
from bs4 import BeautifulSoup
# import socket
# socket.getaddrinfo('localhost', 8080)

#Create a new directory to put the files into
#Get the current working directory and create a new directory in it named test
cwd = os.getcwd()
newdir = cwd +"\\test"
print "The current Working directory is " + cwd
os.mkdir( newdir);
print "Created new directory " + newdir
newfile = open('zipfiles.txt','w')
print newfile


print "Running script.. "
#Set variable for page to be open and url to be concatenated
url = "https://sagamusix.de"
page = urllib2.urlopen('https://sagamusix.de/en/samples/').read()

#File extension to be looked for.
extension = ".zip"

#Use BeautifulSoup to clean up the page
soup = BeautifulSoup(page, "html5lib")
soup.prettify()

#Find all the links on the page that end in .zip
for anchor in soup.findAll('a', href=True):
    links = url + anchor['href']
    if links.endswith(extension):
        newfile.write(links + '\n')
newfile.close()

#Read what is saved in zipfiles.txt and output it to the user
#This is done to create presistent data
newfile = open('zipfiles.txt', 'r')
for line in newfile:
    print line + '/n'
newfile.close()

#Read through the lines in the text file and download the zip files.
#Handle exceptions and print exceptions to the console
with open('zipfiles.txt', 'r') as url:
    for line in url:
        if line.find('/'):
            print line.rsplit('/', 1)[1]

            try:
                ziplink = line
                #Removes the first 48 characters of the url to get the
name of the file
                zipfile = line[24:]
                #Removes the last 4 characters to remove the .zip
                zipfile2 = zipfile[:3]
                print "Trying to reach " + ziplink
                response = urllib2.urlopen(ziplink)
            except URLError as e:

                    print 'We failed to reach a server.'+ziplink
                    if hasattr(e, 'reason'):
                     print 'Reason: ', e.reason
                     continue
                    elif hasattr(e, 'code'):
                     print 'The server couldnt fulfill the request.'
                    print 'Error code: ', e.code
                    continue
            else:
                zipcontent = response.read()
                completeName = os.path.join(newdir, zipfile2+ ".zip")
                with open (completeName, 'w') as f:
                    print "downloading.. " + zipfile
                    f.write(zipcontent)
                    f.close()
print "Script completed"


More information about the Tutor mailing list