[Tutor] Python Crawl problem help

CPECC张涛 sccdzt at foxmail.com
Tue May 25 08:47:34 EDT 2021


Dear sir :
Could you please help solve the below problem that can not run and the result is 
"E:\Python Data Visualization\Scripts\python.exe" "E:/Python Data Visualization/爬虫/wallpaper2.py"
None
Traceback (most recent call last):
  File "E:/Python Data Visualization/爬虫/wallpaper2.py", line 52, in <module>
    start()
  File "E:/Python Data Visualization/爬虫/wallpaper2.py", line 49, in start
    imgs=getimgdata(page)
  File "E:/Python Data Visualization/爬虫/wallpaper2.py", line 28, in getimgdata
    for item in content_list.find_all('figure'):
AttributeError: 'NoneType' object has no attribute 'find_all'


Process finished with exit code 1



import requests
import re
import time
import os
from bs4 import BeautifulSoup
import urllib


def getpage():
    url='https://www.zhihu.com/question/451014453/answer/1797338225'
    headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1;'
                       'Win64; x64) AppleWebKit/537.36 (KHTML, like'
                       'Gecko) Chrome/69.0.3497.100'
                       'Safari/537.36','Referer': "https://www.zhihu.com"
                       "/question/37787176"}
    try:
        r = requests.get(url, headers=headers).content
        return r
    except:
        return print('coneection error')

def getimgdata(data):
    soup=BeautifulSoup(data,'lxml')
    content_list=soup.find('div',attrs={'class':'list'})
    print(content_list)
    img_list=[]
    for item in content_list.find_all('figure'):
        img=item.find('img')['src']
        img_list.append(img)
    return img_list
def saveToDir(contents):
    img=40
    try:
        path=r'F:\Python crawler'
        if not os.path.isdir(path):
            os.makedirs(path)
        img=0
        for item in contents:
            paths=path+str(img)+'.jpg'
            time.sleep(1)
            urllib.request.urlretrieve(item,paths)
            img+=1
            print('%sDownloaded'%img)
    except Exception as e:
        print(e)
def start():
    page=getpage()
    imgs=getimgdata(page)
    saveToDir(imgs)

start()


More information about the Tutor mailing list