首页 » 技术分享 » 爬取某市的公交路线站点

爬取某市的公交路线站点

 

创建类

在这class Gongjao8684(object):
	def __init__(self,url):
		self.url=url
		self.all_gj_list=[]
		self.filename=(urlsplit(self.url)[1]).split('.')[0]

get_tree(url)函数,便于调用

def get_tree(selfj,url):
		headers={
	'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'
	}
		r = requests.get(url,headers=headers)
		tree=etree.HTML(r.text)
		return(tree)

获取城市的总线路

def get_gj_route(self,url):
		tree=self.get_tree(url)
		find_gj_number=tree.xpath("//div[@class='bus_kt_r1']/a/@href")
		find_gj_zimu=tree.xpath("//div[@class='bus_kt_r2']/a/@href")

获取各数字或字母开头的线路总链接

def get_zhitou_gj(self,gj_list):
		for g_url in gj_list:
			g_url=self.url+g_url
			self.get_all_zhitou(g_url)
		print("{} 市公交线路爬取成功!\n".format(self.filename))

获取各数据或字母开头的所有的线的详细路链接

def get_all_zhitou(self,url):
		tree=self.get_tree(url)
		sub_url=tree.xpath("//div[@id='con_site_1']/a/@href")
		sub_zhitou=tree.xpath("//div[@id='con_site_1']/a/text()")
		
		for i in range(len(sub_url)):
			print("开始爬取 {} 市 {} 公交线站点".format(self.filename,sub_zhitou[i]))
			self.get_message(sub_url[i])
			print("结束爬取 {} 市 {} 公交线站点".format(self.filename,sub_zhitou[i]))
			time.sleep(2)

获取每个站点的详细信息,最后以字典的形式保存到列表当中

def get_message(self,url):
		url=self.url+url
		tree=self.get_tree(url)
		try:
			mess_time=tree.xpath('//div[@class="bus_i_content"]/p[1]/text()')
			mess_price=tree.xpath('//div[@class="bus_i_content"]/p[2]/text()')
			mess_gongsi=tree.xpath('//*[@id="bus_line"]/div[1]/div/p[3]/a/text()')
			mess_uptime=tree.xpath('//div[@class="bus_i_content"]/p[4]/text()')
		except Exception as e:
			mess_time=[]
			mess_price=[]
			mess_gongsi=[]
			mess_uptime=[]
		try:
			up_n=tree.xpath('//*[@id="bus_line"]//strong')
			up_num=tree.xpath('//*[@id="bus_line"]/div/span/text()')
			up_list=tree.xpath('//div[@class="bus_line_site "][1]//a/text()')
			up_name=up_n[0].text+up_num[0]
		except Exception as e:
			up_num=''
			up_list=[]

		try:
			down_name=tree.xpath('//*[@id="bus_line"]//strong')
			down_num=tree.xpath('//span[@class="bus_line_no"]')
			down_list=tree.xpath('//div[@class="bus_line_site "][2]//a/text()')
			down_gx_name=down_name[1].text
			down_gx_num=down_num[1].text
			down_gx_name+=down_gx_num
		except Exception as e:
			down_gx_name=''
			down_list=['环型线路']
		item={
			'运行时间':mess_time[0][5:],
			'票价信息':mess_price[0][5:],
			'公交公司':str(mess_gongsi),
			'最后更新':mess_uptime[0][5:],
			'上行站数':up_name[0],
			'上行站点':up_list,
			'下行站数':down_gx_name,
			'下行站点':down_list,
		}
	self.all_gj_list.append(item)

运行模块

def run(self):
		gj_list=self.get_gj_route(self.url)
		self.get_zhitou_gj(gj_list)
		obj=json.dump(self.all_gj_list,open('%s_json.txt'%self.filename,'w',encoding='utf-8'))

主函数调用:

from Gongjao8684 import Gongjao8684
def main():
	url='https://liuzhou.8684.cn/'
	bus=Gongjao8684(url)
	bus.run()
if __name__ == '__main__':![在这里插入图片描述](https://img-blog.csdnimg.cn/20190908122017865.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzQxMDUzNDg3,size_16,color_FFFFFF,t_70)
	main()

结果如下
在这里插入图片描述

转载自原文链接, 如需删除请联系管理员。

原文链接:爬取某市的公交路线站点,转载请注明来源!

0