import requests from bs4 import BeautifulSoup ## 发送请求,获得数据 def getHtml(page): url = "" header = {"User-Agent": ""} r = requests.get(url, params={"reportTime": "2021-06-30", "pageNum": page}, headers=header) return r.text ## 解析表格标题 def parseTitle(soup): table = soup.find(id="myTable04") ths = table.find_all("th") title = [th.text for th in ths] return title ## 解析表格数据 def parseData(soup): tbody = soup.find(id="myTable04").find("tbody") trs = tbody.find_all("tr") data = [] for tr in trs: tds = tr.find_all("td") tdsv = [td.text for td in tds] data.append(tdsv) return data ## 抓取和解析全部数据 tableData = [] for page in range(1, 224): html = getHtml(page) soup = BeautifulSoup(html, "lxml") if page == 1: title = parseTitle(soup) tableData.append(title) pageData = parseData(soup) tableData.extend(pageData) tableData[:5] # 查看前五条记录 len(tableData) # 查看数据集大小
本文作者:a
本文链接:
版权声明:本博客所有文章除特别声明外,均采用 BY-NC-SA 许可协议。转载请注明出处!