호그와트

extract company

영웅*^%&$ 2022. 2. 24. 12:16
728x90

import requests

from bs4 import BeautifulSoup

LIMIT = 50

INDEED_URL = "http://www.indeed.com/jobs?q=python&limit={LIMIT}"

def extract_indeed_pages():

result = requests.get(INDEED_URL)

soup = BeautifulSoup(result.text, "html.parser")

pagination = soup.find("div", {"class":"pagination"})

links = pagination.find_all('a')

pages = []

for link in links[:-1]:

pages.append(int(link.string))

max_page = pages[-1]

return max_page

def extract_indeed_jobs(last_page):

jobs = []

result = requests.get(INDEED_URL)

soup = BeautifulSoup(result.text, "html.parser")

results = soup.find("div", {"class":"jobsearch-SerpJobCard"})

for result in results:

title = result.find("div", {"class": "title"}).find("a")["title"]

company = result.find("span", {"class": "company"})

company_anchor = company.find("a")

if company_anchor is not None:

company = str(company_anchor.string)

else:

company = str(company.string)

company = company.strip()

print(company)

return jobs

728x90

'호그와트' 카테고리의 다른 글

대나무숲 1 돌파  (0) 2022.02.28
Dreamhack login-1  (0) 2022.02.24
extract title jobs  (0) 2022.02.24
extract last page  (0) 2022.02.24
Dreamhack web PATCH-1  (0) 2022.02.21