方法一:借助build_opener和addheaders完成
import urllib.request
import os,sys,io
#解决编码问题,修改默认编码为gb18030
sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='gb18030')
url="https://www.jianshu.com/"
#注意:在urllib 中headers是元组
headers=("User-Agent","UMozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36")
opener=urllib.request.build_opener()
opener.addheaders=[headers]
data=opener.open(url)
print(data.read().decode())
方法二:用add_headers()来添加headers
import urllib.request
import os,sys,io
#解决编码问题,修改默认编码为gb18030
sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='gb18030')
try:
url="https://www.jianshu.com/"
req=urllib.request.Request(url=url)
req.add_header("User-Agent","UMozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36")
file=urllib.request.urlopen(req,timeout=10.1)
print(file.read().decode("utf-8",'ignore'))
except Exception as e:
print("时间超时",str(e))