创建类
在这class Gongjao8684(object):
def __init__(self,url):
self.url=url
self.all_gj_list=[]
self.filename=(urlsplit(self.url)[1]).split('.')[0]
get_tree(url)函数,便于调用
def get_tree(selfj,url):
headers={
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'
}
r = requests.get(url,headers=headers)
tree=etree.HTML(r.text)
return(tree)
获取城市的总线路
def get_gj_route(self,url):
tree=self.get_tree(url)
find_gj_number=tree.xpath("//div[@class='bus_kt_r1']/a/@href")
find_gj_zimu=tree.xpath("//div[@class='bus_kt_r2']/a/@href")
获取各数字或字母开头的线路总链接
def get_zhitou_gj(self,gj_list):
for g_url in gj_list:
g_url=self.url+g_url
self.get_all_zhitou(g_url)
print("{} 市公交线路爬取成功!\n".format(self.filename))
获取各数据或字母开头的所有的线的详细路链接
def get_all_zhitou(self,url):
tree=self.get_tree(url)
sub_url=tree.xpath("//div[@id='con_site_1']/a/@href")
sub_zhitou=tree.xpath("//div[@id='con_site_1']/a/text()")
for i in range(len(sub_url)):
print("开始爬取 {} 市 {} 公交线站点".format(self.filename,sub_zhitou[i]))
self.get_message(sub_url[i])
print("结束爬取 {} 市 {} 公交线站点".format(self.filename,sub_zhitou[i]))
time.sleep(2)
获取每个站点的详细信息,最后以字典的形式保存到列表当中
def get_message(self,url):
url=self.url+url
tree=self.get_tree(url)
try:
mess_time=tree.xpath('//div[@class="bus_i_content"]/p[1]/text()')
mess_price=tree.xpath('//div[@class="bus_i_content"]/p[2]/text()')
mess_gongsi=tree.xpath('//*[@id="bus_line"]/div[1]/div/p[3]/a/text()')
mess_uptime=tree.xpath('//div[@class="bus_i_content"]/p[4]/text()')
except Exception as e:
mess_time=[]
mess_price=[]
mess_gongsi=[]
mess_uptime=[]
try:
up_n=tree.xpath('//*[@id="bus_line"]//strong')
up_num=tree.xpath('//*[@id="bus_line"]/div/span/text()')
up_list=tree.xpath('//div[@class="bus_line_site "][1]//a/text()')
up_name=up_n[0].text+up_num[0]
except Exception as e:
up_num=''
up_list=[]
try:
down_name=tree.xpath('//*[@id="bus_line"]//strong')
down_num=tree.xpath('//span[@class="bus_line_no"]')
down_list=tree.xpath('//div[@class="bus_line_site "][2]//a/text()')
down_gx_name=down_name[1].text
down_gx_num=down_num[1].text
down_gx_name+=down_gx_num
except Exception as e:
down_gx_name=''
down_list=['环型线路']
item={
'运行时间':mess_time[0][5:],
'票价信息':mess_price[0][5:],
'公交公司':str(mess_gongsi),
'最后更新':mess_uptime[0][5:],
'上行站数':up_name[0],
'上行站点':up_list,
'下行站数':down_gx_name,
'下行站点':down_list,
}
self.all_gj_list.append(item)
运行模块
def run(self):
gj_list=self.get_gj_route(self.url)
self.get_zhitou_gj(gj_list)
obj=json.dump(self.all_gj_list,open('%s_json.txt'%self.filename,'w',encoding='utf-8'))
主函数调用:
from Gongjao8684 import Gongjao8684
def main():
url='https://liuzhou.8684.cn/'
bus=Gongjao8684(url)
bus.run()
if __name__ == '__main__':![在这里插入图片描述](https://img-blog.csdnimg.cn/20190908122017865.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzQxMDUzNDg3,size_16,color_FFFFFF,t_70)
main()
结果如下
转载自原文链接, 如需删除请联系管理员。
原文链接:爬取某市的公交路线站点,转载请注明来源!