当前位置:博客首页 > Python > 正文

第19课 http协议及requests模块使用 time模块、get/post/headers/status_code/代理

作者: Jarvan 分类: Python 发布时间: 2019-06-19 13:12 百度已收录

requests模块快速上手:
https://2.python-requests.org//zh_CN/latest/user/quickstart.html

案例1:获得百度serp中url的真实地址

import requests
"""
获得百度serp中url的真实地址
"""

url = 'https://www.baidu.com/link?url=p2B5uRHw_3J30JpbLnkgwxMVZmSu2ddwbMNSadiRN4eQJykMztsMasoV7n_3jzL7'
resp = requests.head(url)
print(resp.headers.get('Location'))

案例2:requests的各种方法练习

import requests

def download(word):
    url = 'http://www.baidu.com/s'
    params = {
        'ie': 'utf-8',
        'word': word
    }
    headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit'
                     '/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari'
                     '/537.36'
    }
    resp = requests.get(url,params=params,headers=headers)
    print(resp.url) # 获取请求的完整url
    print(resp.headers) #获取响应头
    print(resp.status_code) # 获取状态码
    print(resp.text) #获取相应内容,unicode编码

if __name__ == '__main__':
    download('防腐木')

案例3:图片下载

# -*- coding: utf-8 -*-
import requests

def download(url):
    resp = requests.get(url)
    f = open('pic.jpg', mode='wb')
    f.write(resp.content)
    f.close()

if __name__ == '__main__':
    url = 'https://hbimg.huabanimg.com/437658f08a68e1d24ea512b48b4a8cb7d808f3de25148-uBjU7Q_fw658'
    download(url)

案例4:time时间戳转化

# -*- coding: utf-8 -*-
import time

s = time.localtime(1508840786)
print(s)
print(time.strftime('%Y-%m-%d %H-%M-%S',s))

# 输出结果为:time.struct_time(tm_year=2017, tm_mon=10, tm_mday=24, 
# tm_hour=18, tm_min=26, tm_sec=26, tm_wday=1, tm_yday=297, tm_isdst=0)
# 2017-10-24 18-26-26

案例5:查询指定关键词百度SERP TOP10的title,url,time

# -*- coding: utf-8 -*-
import requests
import time

"""
查询某关键词百度TOP10
"""


def top10(word):
    url = 'https://www.baidu.com/s'
    params = {
        'ie': 'UTF-8',
        'wd': word,
        'tn': 'json'
    }
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
                      '(KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36'
    }

    resp = requests.get(url, params=params, headers=headers)
    content = resp.json()
    entry = content['feed']['entry']
    for item in entry[:-1]:
        title = item['title']
        link = item['url']
        snapshot = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(item['time']))
        print(title + '\n', link + '\n', snapshot)
        print('=' * 50)


if __name__ == '__main__':
    word = 'seo'
    top10(word)

案例6:使用代理访问并下载百度

# -*- coding: utf-8 -*-
import requests

# 无论是https还是http协议,value值都是http://xxxx
proxy = {
    'https': 'http://112.85.167.244:9999'
}


def con_baidu(proxies=None):
    url = 'https://www.baidu.com'
    try:
        resp = requests.get(url, proxies=proxies, timeout=10)
    except requests.RequestException:
        print('connect failed')
    else:
        resp.encoding = 'utf-8'
        print(resp.headers)
        content = resp.text
        print(content)

if __name__ == '__main__':
    con_baidu(proxy)

发表评论