Python获取各省IP地址(附带源码)
该代码使用 Python 爬取福建省的 IP 段数据,通过 requests 和 lxml 库获取网页内容并解析,提取省、市、区及 IP 段信息,保存到 Pandas DataFrame 中。接着根据 IP 段随机生成 IP 地址,并将结果写入 Excel 文件。
Python代码如下:
Python代码如下:
import requests
from lxml import etree
import pandas as pd
import random
# 福建省ip段的url地址
url = "http://ip.khcha.com/ipsection_view.aspx?fl=2&type=S_FJ"
page_text = requests.get(url=url)
html = etree.HTML(page_text.text)
# 获取第一层的地区列表url
ul_ui_list = html.xpath('//*[@class="navr"]/ul/li')
# 创建一个空的列表
data_lst = []
# 构造下一级的url-- 省市级
for i in ul_ui_list:
if len(i.xpath('.//@href')) > 0:
new_url = "http://ip.khcha.com/" + i.xpath('.//@href')[0] # 组合下一级的url
page_text_next = requests.get(url=new_url)
html_next = etree.HTML(page_text_next.text) # 获取网页数据
ul_ui_list_next = html_next.xpath('//*[@class="navr"]/ul/li')
# 构造市区级url
for i_next in ul_ui_list_next:
if len(i_next.xpath('.//@href')) > 0:
new_url_next = "http://ip.khcha.com/" + i_next.xpath('.//@href')[0]
html_next_next = etree.HTML(requests.get(url=new_url_next).text)
data_list = html_next_next.xpath('//*[@class="mainright"]/ul/li')
for data in data_list:
parse_list = new_url_next.split('&')
province = '福建省'
city = parse_list[1].split('=')[1]
area = parse_list[2].split('=')[1]
l_ip = data.xpath('.//span[@class = "l"]/text()')[0]
r_ip = data.xpath('.//span[@class = "r"]/text()')[0]
lst = [province, city, area, l_ip, r_ip]
data_lst.append(lst)
else:
continue
else:
continue
# 根据IP段随机生成ip
# 根据起始ip,确定该段中的ip数量
columns = ['province','city','area','start_ip','end_ip']
df = pd.DataFrame(data_lst,columns=columns)
# 定义一个空的DataFrame,用来存放生成的新的数据
columns_new = ["province","city","area","ip"]
data_frame = pd.DataFrame(columns=columns_new)
# 遍历DataFrame,生成随机ip段
print(len(df))
print("++++++++++++++++++++++++++++++++++++++")
for index,rows in df.iterrows():
start_ip = rows['start_ip']
end_ip = rows['end_ip']
# 通过观察,发现ip前两个号段是一致的
# 随机IP的前缀
pre_ip = '.'.join(start_ip.split('.')[0:2]) + '.' # 122.9.
# 计算该组区间之间的ip数量
ip_num = (int(end_ip.split('.')[2]) - int(start_ip.split('.')[2]) + 1) * (int(end_ip.split('.')[3]) - int(start_ip.split('.')[3]) + 1)
print(ip_num)
print("**********************************")
# 每个区段选择总数的1/10 整个福建省大约60万+
for i in range(ip_num // 10):
random_ip_3 = random.randint(int(start_ip.split('.')[2]), int(end_ip.split('.')[2])) # 随机生成ip第三位
random_ip_4 = random.randint(int(start_ip.split('.')[3]), int(end_ip.split('.')[3])) # 随机生成ip第四位
post_ip = str(random_ip_3) + '.' + str(random_ip_4) # 组成新的ip的后两个地址信息
new_ip = pre_ip + post_ip # 构造新的ip地址
# 组合数据,写入DataFrame
combined_strings = [rows['province'], rows['city'], rows['area'], new_ip]
print(len(data_frame))
data_frame.loc[len(data_frame)] = combined_strings # 将组合的数据追加到DataFrame中
print(data_frame.head())
print("===================================================")
# 将DataFrame中的数据导入到excel中
excel_path = "ip_福建.xlsx"
data_frame.to_excel(excel_path, index=False)
ICP备案:
公安联网备案: