Requests 中文文档 很有趣,有兴趣的可以先看看。
import requests from lxml import etree # xpath库 url = "https://www.xinpianchang.com/discover/article?from=navigator" headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36" } response = requests.get(url, headers=headers, timeout=10) # 发起请求 print(response.status_code) # 返回状态码 tree = etree.HTML(response.text) # 返回源代码,可能会乱码 elements = tree.xpath("//h2[@class='truncate block']") for element in elements: print(element.text) content是二进制字节码,使用utf-8来解析 print(response.content.decode("utf-8"))
import requests url = "https://img2.woyaogexing.com/2022/06/24/783b56bc70a892a9!400x400.jpg" response = requests.get(url) content = response.content with open("123.jpg", "wb") as f: f.write(content)
也可以urlretrieve()方法直接将远程数据下载到本地。
from urllib.request import urlretrieve url = "https://img2.woyaogexing.com/2022/06/24/783b56bc70a892a9!400x400.jpg" urlretrieve(url, "1.jpg")
import requests url = "https://video.pearvideo.com/mp4/adshort/20220622/cont-1765998-15899235_adpkg-ad_hd.mp4" response = requests.get(url, stream=True) content = response.content with open("1.mp4", "wb") as f: f.write(content) 或者用下面的分段下载,比较不占内存 with open("2.mp4", "wb") as f: for i in response.iter_content(1024 * 1024): # 1 M f.write(i)
import csv data = [[1, 2, 3], [4, 5, 6]] with open("1.csv", "w", newline="") as f: csv_writer = csv.writer(f) csv_writer.writerows(data)
url = "http://httpbin.org/post" data = {"username": "abc", "password": "123"} files = {"img": open("data/123.jpg", "rb")} resp = requests.post(url, data=data, files=files) print(resp.text)
import requests 会话 session = requests.session() data = { 'name': '' 'password': '' } 1. 登录 url = '' session.post(url, data=data) 2. 拿数据 res = session.get('') print(res.json())
import requests url = 'https://www.pearvideo.com/video_1756378' contId = url.split('_')[1] videoStatusUrl = f'https://www.pearvideo.com/videoStatus.jsp?contId={contId}&mrd=0.6270606489702433' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.82 Safari/537.36', # 防盗链:溯源,当前本次请求的上一级是谁 'Referer': url } res = requests.get(videoStatusUrl, headers=headers) dic = res.json() srcUrl = dic['videoInfo']['videos']['srcUrl'] systemTime = dic['systemTime'] srcUrl = srcUrl.replace(systemTime, f'cont-{contId}') 下载视频 with open('a.mp4', 'wb') as f: f.write(requests.get(srcUrl).content)
import requests url = 'https://www.baidu.com/' proxies = { 'https': 'https://36.6.57.27:40257' } headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.82 Safari/537.36', } resp = requests.get(url, headers=headers, proxies=proxies) resp.encoding = 'utf-8' print(resp.text)
本文作者:a
本文链接:
版权声明:本博客所有文章除特别声明外,均采用 BY-NC-SA 许可协议。转载请注明出处!