此文档,需要提前设置好google的webdriver。
抓取的信息会保存为xls表
可以替换url=参数来寻找不同岗位和地区:先打开boss直聘网站,然后根据自己想要查询的岗位进行搜索,再复制url连接
#!/usr/bin/python3 #-*-conding:utf8 -*- from time import sleep import os from selenium import webdriver import xlwt #创建一个excel文件 job_book = xlwt.Workbook(encoding="utf-8") #创建表 job_sheet = job_book.add_sheet("运维") #51job 南京 运维 职位搜索 url = "https://www.zhipin.com/job_detail/?query=%E8%BF%90%E7%BB%B4&city=101190100&industry=&position=" #打开职位名称浏览器 browser = webdriver.Chrome() #分隔符号 splite = "*"*5 browser.get(url) # sleep(10) #查询class name 招聘标题 job_title = browser.find_elements_by_class_name("primary-wrapper") # print(type(job_title)) #职位标题链接 job_url_all = [] #职位标题信息 job_info_all = [] try: #循环处理招聘标题 for x in range(0,len(job_title)): # for x in range(0,5): #获取招聘标题文本 job_info = job_title[x].text #获取招聘详细信息链接 job_url = "http://www.zhipin.com" + job_title[x].find_element_by_class_name("primary-box").get_attribute("href") #让招聘标题存入数组 job_info_splite = str(job_info).split() # print(job_url) job_url_all.append(job_url) # print(job_info_splite) job_info_all.append(job_info_splite) # print(splite) except : #有异常就退出 print("******** 有异常,退出,请查看是否被限制访问!********") browser.close() #所有职位标题链接 print(job_url_all) print(job_info_all) browser.close() x_num =0 for x in range(0,len(job_url_all)): # 打开职位描述浏览器 browser_job = webdriver.Chrome() browser_job.get(job_url_all[x]) # browser_job.execute_script(job_url_all[x]) job_sec = browser_job.find_elements_by_class_name("job-sec") while len(job_sec) ==0: sleep(3) job_sec = browser_job.find_elements_by_class_name("job-sec") job_describe = job_sec[0].text print(job_info_all[x]) print(job_describe) x_num += 1; job_sheet.write(x_num,0,job_info_all[x]) x_num += 1; job_sheet.write(x_num,0,job_url_all[x]) x_num += 1; job_sheet.write(x_num,0,job_describe) x_num += 1; job_sheet.write(x_num,0,"") x_num += 1; job_sheet.write(x_num,0,"") print(splite) browser_job.close() #保存信息到excel job_book.save("zhipin.xls")