此文档,需要提前设置好google的webdriver。
抓取的信息会保存为xls表
可以替换url=参数来寻找不同岗位和地区:先打开boss直聘网站,然后根据自己想要查询的岗位进行搜索,再复制url连接
#!/usr/bin/python3
#-*-conding:utf8 -*-
from time import sleep
import os
from selenium import webdriver
import xlwt
#创建一个excel文件
job_book = xlwt.Workbook(encoding="utf-8")
#创建表
job_sheet = job_book.add_sheet("运维")
#51job 南京 运维 职位搜索
url = "https://www.zhipin.com/job_detail/?query=%E8%BF%90%E7%BB%B4&city=101190100&industry=&position="
#打开职位名称浏览器
browser = webdriver.Chrome()
#分隔符号
splite = "*"*5
browser.get(url)
# sleep(10)
#查询class name 招聘标题
job_title = browser.find_elements_by_class_name("primary-wrapper")
# print(type(job_title))
#职位标题链接
job_url_all = []
#职位标题信息
job_info_all = []
try:
#循环处理招聘标题
for x in range(0,len(job_title)):
# for x in range(0,5):
#获取招聘标题文本
job_info = job_title[x].text
#获取招聘详细信息链接
job_url = "http://www.zhipin.com" + job_title[x].find_element_by_class_name("primary-box").get_attribute("href")
#让招聘标题存入数组
job_info_splite = str(job_info).split()
# print(job_url)
job_url_all.append(job_url)
# print(job_info_splite)
job_info_all.append(job_info_splite)
# print(splite)
except :
#有异常就退出
print("******** 有异常,退出,请查看是否被限制访问!********")
browser.close()
#所有职位标题链接
print(job_url_all)
print(job_info_all)
browser.close()
x_num =0
for x in range(0,len(job_url_all)):
# 打开职位描述浏览器
browser_job = webdriver.Chrome()
browser_job.get(job_url_all[x])
# browser_job.execute_script(job_url_all[x])
job_sec = browser_job.find_elements_by_class_name("job-sec")
while len(job_sec) ==0:
sleep(3)
job_sec = browser_job.find_elements_by_class_name("job-sec")
job_describe = job_sec[0].text
print(job_info_all[x])
print(job_describe)
x_num += 1;
job_sheet.write(x_num,0,job_info_all[x])
x_num += 1;
job_sheet.write(x_num,0,job_url_all[x])
x_num += 1;
job_sheet.write(x_num,0,job_describe)
x_num += 1;
job_sheet.write(x_num,0,"")
x_num += 1;
job_sheet.write(x_num,0,"")
print(splite)
browser_job.close()
#保存信息到excel
job_book.save("zhipin.xls")