오늘은 파이썬으로 데이터 스크랩을 해보았다.
MAIN PAGE
from players import extract_player_info
keyword = "squad"
players_info= extract_player_info(keyword)
file = open(f"{keyword}.csv", "w", encoding="utf-8")
file.write("Name, Position, Number, Link\n")
for each in players_info:
file.write(f"{each['name']},{each['position']},{each['number']},{each['link']}\n")
file.close()
# keyword = "squad"
# player = extract_player_info(keyword)
# players= player
# print(players)
FILE PAGE
# from players import extract_player_info
# keyword = "squad"
# player = extract_player_info(keyword)
# def save_to_file(file_name, players):
# keyword = "squad"
# player = extract_player_info(keyword)
# file = open(f"{keyword}.csv", "w", encoding="utf-8")
# file.close()
# file.write("name, position, number, link\n")
# for each in player:
# file.write(
# f"{player['name']},{player['position']},{player['number']},{player['link']}\n")
# file.close()
PLAYERS PAGE
from requests import get
from bs4 import BeautifulSoup
def extract_player_info(keyword):
base_url = "https://www.premierleague.com/clubs/12/Manchester-United/"
keyword = "squad"
response = get(f"{base_url}{keyword}")
if response.status_code != 200:
print("can't request the page.")
else:
results = []
soup = BeautifulSoup(response.text, "html.parser")
player_sections = soup.find_all("ul", class_="squadListContainer")
for player_section in player_sections:
players = player_section.find_all("li", recursive=False)
for player in players:
anchors = player.find("a")
link = anchors['href']
info = player.find("span", class_="playerCardInfo")
number = player.find("span", class_="number")
position = player.find("span", class_="position")
name = player.find("h4", class_="name")
player_data = {
"name": name.string,
"position": position.string,
"number": number.string,
"link": f"https://www.premierleague.com/{link}"
}
results.append(player_data)
return results
'Programming > python' 카테고리의 다른 글
[python] Day 28 (0) | 2023.03.07 |
---|---|
[python] Day 26 (0) | 2023.03.05 |
[Python, web scrapper] 데이터 수집 연습 (0) | 2023.03.03 |
[Python] Beautifulsoup 을 사용해서 웹사이트 정보 가져오는 연습 (0) | 2023.03.02 |
[python] Day 23 (1) | 2023.03.01 |