import requests
from bs4 import BeautifulSoup
LIMIT = 50
INDEED_URL = "http://www.indeed.com/jobs?q=python&limit={LIMIT}"
def extract_indeed_pages():
result = requests.get(INDEED_URL)
soup = BeautifulSoup(result.text, "html.parser")
pagination = soup.find("div", {"class":"pagination"})
links = pagination.find_all('a')
pages = []
for link in links[:-1]:
pages.append(int(link.string))
max_page = pages[-1]
return max_page
def extract_indeed_jobs(last_page):
jobs = []
result = requests.get(INDEED_URL)
soup = BeautifulSoup(result.text, "html.parser")
results = soup.find("div", {"class":"jobsearch-SerpJobCard"})
for result in results:
title = result.find("div", {"class": "title"}).find("a")["title"]
company = result.find("span", {"class": "company"})
company_anchor = company.find("a")
if company_anchor is not None:
company = str(company_anchor.string)
else:
company = str(company.string)
company = company.strip()
print(company)
return jobs
'호그와트' 카테고리의 다른 글
대나무숲 1 돌파 (0) | 2022.02.28 |
---|---|
Dreamhack login-1 (0) | 2022.02.24 |
extract title jobs (0) | 2022.02.24 |
extract last page (0) | 2022.02.24 |
Dreamhack web PATCH-1 (0) | 2022.02.21 |