mirror of
https://github.com/SocksPls/hltv-api
synced 2025-04-30 15:43:07 +00:00
add selenium for solve cloudflare bug
This commit is contained in:
parent
42fcd13ab1
commit
9e32b42ab7
1 changed files with 55 additions and 20 deletions
75
main.py
75
main.py
|
@ -12,12 +12,17 @@ HLTV_ZONEINFO=zoneinfo.ZoneInfo(HLTV_COOKIE_TIMEZONE)
|
|||
LOCAL_TIMEZONE_NAME = tzlocal.get_localzone_name()
|
||||
LOCAL_ZONEINFO = zoneinfo.ZoneInfo(LOCAL_TIMEZONE_NAME)
|
||||
|
||||
YEAR = datetime.datetime.now().year
|
||||
START_DATE = str(YEAR) + "-01-01"
|
||||
END_DATE = str(YEAR) + "-12-31"
|
||||
|
||||
TEAM_MAP_FOR_RESULTS = []
|
||||
def _get_all_teams():
|
||||
if not TEAM_MAP_FOR_RESULTS:
|
||||
teams = get_parsed_page("https://www.hltv.org/stats/teams?minMapCount=0")
|
||||
teams = get_parsed_page(f"https://www.hltv.org/stats/teams?startDate={START_DATE}&endDate={END_DATE}&minMapCount=1")
|
||||
print(teams.find_all("td", {"class": ["teamCol-teams-overview"], }))
|
||||
for team in teams.find_all("td", {"class": ["teamCol-teams-overview"], }):
|
||||
team = {'id': converters.to_int(team.find("a")["href"].split("/")[-2]), 'name': team.find("a").text, 'url': "https://hltv.org" + team.find("a")["href"]}
|
||||
team = {'flag': team.find("img")["alt"], 'id': converters.to_int(team.find("a")["href"].split("/")[-2]), 'name': team.find("a").text, 'url': "https://hltv.org" + team.find("a")["href"]}
|
||||
TEAM_MAP_FOR_RESULTS.append(team)
|
||||
|
||||
def _findTeamId(teamName: str):
|
||||
|
@ -27,6 +32,13 @@ def _findTeamId(teamName: str):
|
|||
return team['id']
|
||||
return None
|
||||
|
||||
def _findTeamFlag(teamName: str):
|
||||
_get_all_teams()
|
||||
for team in TEAM_MAP_FOR_RESULTS:
|
||||
if team['name'] == teamName:
|
||||
return team['flag']
|
||||
return None
|
||||
|
||||
def _padIfNeeded(numberStr: str):
|
||||
if int(numberStr) < 10:
|
||||
return str(numberStr).zfill(2)
|
||||
|
@ -34,27 +46,46 @@ def _padIfNeeded(numberStr: str):
|
|||
return str(numberStr)
|
||||
|
||||
def _monthNameToNumber(monthName: str):
|
||||
# Check for the input "Augu" and convert it to "August"
|
||||
# This is necessary because the input string may have been sanitized
|
||||
# by removing the "st" from the day numbers, such as "21st" -> "21"
|
||||
if monthName == "Augu":
|
||||
monthName = "August"
|
||||
return datetime.datetime.strptime(monthName, '%B').month
|
||||
|
||||
def get_parsed_page(url, delay=0.5):
|
||||
# This fixes a blocked by cloudflare error i've encountered
|
||||
headers = {
|
||||
"referer": "https://www.hltv.org/stats",
|
||||
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
|
||||
months = {
|
||||
'January': 1,
|
||||
'February': 2,
|
||||
'March': 3,
|
||||
'April': 4,
|
||||
'May': 5,
|
||||
'June': 6,
|
||||
'July': 7,
|
||||
'August': 8,
|
||||
'Augu': 8,
|
||||
'September': 9,
|
||||
'October': 10,
|
||||
'November': 11,
|
||||
'December': 12
|
||||
}
|
||||
return months[monthName]
|
||||
|
||||
cookies = {
|
||||
"hltvTimeZone": HLTV_COOKIE_TIMEZONE
|
||||
}
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.keys import Keys
|
||||
from bs4 import BeautifulSoup
|
||||
import time
|
||||
|
||||
def get_parsed_page(url, delay=3):
|
||||
options = webdriver.ChromeOptions()
|
||||
options.add_argument("--headless")
|
||||
options.add_argument("--disable-gpu")
|
||||
options.add_argument("--window-size=1920x1080")
|
||||
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
|
||||
|
||||
driver = webdriver.Chrome(options=options)
|
||||
|
||||
driver.get(url)
|
||||
|
||||
time.sleep(delay)
|
||||
|
||||
return BeautifulSoup(requests.get(url, headers=headers, cookies=cookies).text, "lxml")
|
||||
page_source = driver.page_source
|
||||
|
||||
driver.quit()
|
||||
|
||||
return BeautifulSoup(page_source, "html.parser")
|
||||
|
||||
def top5teams():
|
||||
home = get_parsed_page("https://hltv.org/")
|
||||
|
@ -222,7 +253,7 @@ def get_matches():
|
|||
|
||||
for match in matchdays:
|
||||
matchDetails = match.find_all("div", {"class": "upcomingMatch"})
|
||||
date = match.find({'div': {'class': 'matchDayHeadline'}}).text.split()[-1]
|
||||
date = match.find({'span': {'class': 'matchDayHeadline'}}).text.split()[-1]
|
||||
for getMatch in matchDetails:
|
||||
matchObj = {}
|
||||
|
||||
|
@ -302,16 +333,20 @@ def get_results():
|
|||
resultObj['team1'] = res.find_all("td", {"class": "team-cell"})[0].text.lstrip().rstrip()
|
||||
resultObj['team1score'] = converters.to_int(res.find("td", {"class": "result-score"}).find_all("span")[0].text.lstrip().rstrip())
|
||||
resultObj['team1-id'] = _findTeamId(res.find_all("td", {"class": "team-cell"})[0].text.lstrip().rstrip())
|
||||
resultObj['team1-flag'] = _findTeamFlag(res.find_all("td", {"class": "team-cell"})[0].text.lstrip().rstrip())
|
||||
resultObj['team2'] = res.find_all("td", {"class": "team-cell"})[1].text.lstrip().rstrip()
|
||||
resultObj['team2-id'] = _findTeamId(res.find_all("td", {"class": "team-cell"})[1].text.lstrip().rstrip())
|
||||
resultObj['team2score'] = converters.to_int(res.find("td", {"class": "result-score"}).find_all("span")[1].text.lstrip().rstrip())
|
||||
resultObj['team2-flag'] = _findTeamFlag(res.find_all("td", {"class": "team-cell"})[1].text.lstrip().rstrip())
|
||||
else:
|
||||
resultObj['team1'] = None
|
||||
resultObj['team1-id'] = None
|
||||
resultObj['team1score'] = None
|
||||
resultObj['team1-flag'] = None
|
||||
resultObj['team2'] = None
|
||||
resultObj['team2-id'] = None
|
||||
resultObj['team2score'] = None
|
||||
resultObj['team2-flag'] = None
|
||||
|
||||
results_list.append(resultObj)
|
||||
|
||||
|
@ -472,4 +507,4 @@ if __name__ == "__main__":
|
|||
pp.pprint(get_results_by_date(today_iso, today_iso))
|
||||
|
||||
pp.pprint('get_match_countdown')
|
||||
pp.pprint(get_match_countdown(MATCH_WITH_COUNTDOWN))
|
||||
pp.pprint(get_match_countdown(MATCH_WITH_COUNTDOWN))
|
Loading…
Add table
Reference in a new issue