diff --git a/README.md b/README.md index 7b8632f..cf52ee8 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ Provides an API for HLTV ## `get_players` ```python ->>> hltv.get_players("6667") +>>> hltv.get_players("6665") ['karrigan', 'olofmeister', 'GuardiaN', 'NiKo', 'rain'] ``` @@ -51,3 +51,10 @@ Provides an API for HLTV >>> hltv.get_results() [{'team2score': 16, 'team1': 'AGO', 'team2': 'G2', 'team1score': 8, 'date': '15/2/2018', 'event': 'ESL Pro League Season 7 Europe'}, ... ] ``` + +## `get_results_by_date` + +```python +>>> hltv.get_results_by_date() +[{'team2score': 16, 'team1': 'AGO', 'team2': 'G2', 'team1score': 8, 'map': 'Inferno', 'date': '2018-2-15', 'event': 'ESL Pro League Season 7 Europe'}, ... ] +``` diff --git a/main.py b/main.py index 3a97424..7d60836 100644 --- a/main.py +++ b/main.py @@ -6,7 +6,13 @@ from python_utils import converters def get_parsed_page(url): - return BeautifulSoup(requests.get(url).text, "lxml") + # This fixes a blocked by cloudflare error i've encountered + headers = { + "referer": "https://www.hltv.org/stats", + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)" + } + + return BeautifulSoup(requests.get(url, headers=headers).text, "lxml") def top5teams(): @@ -25,15 +31,15 @@ def top30teams(): teams = page.find("div", {"class": "ranking"}) teamlist = [] for team in teams.find_all("div", {"class": "ranked-team standard-box"}): - newteam = {'name': team.find('div', {"class": "header"}).select('.name')[0].text.strip(), + newteam = {'name': team.find('div', {"class": "ranking-header"}).select('.name')[0].text.strip(), 'rank': converters.to_int(team.select('.position')[0].text.strip(), regexp=True), 'rank-points': converters.to_int(team.find('span', {'class': 'points'}).text, regexp=True), - 'team-id': converters.to_int(team.select('.name')[0]["data-url"].split("/")[2], regexp=True), + 'team-id': converters.to_int(team.find('a', {'class': 'details moreLink'})['href'].split('/')[-1]), 'team-players': []} for player_div in team.find_all("td", {"class": "player-holder"}): player = {} player['name'] = player_div.find('img', {'class': 'playerPicture'})['title'] - player['player-id'] = converters.to_int(player_div.find('span', {"class": "js-link"})['data-url'].split("/")[2]) + player['player-id'] = converters.to_int(player_div.select('.pointer')[0]['href'].split("/")[-2]) newteam['team-players'].append(player) teamlist.append(newteam) return teamlist @@ -45,8 +51,8 @@ def top_players(): playersArray = [] for player in players.find_all("div", {"class": "top-x-box standard-box"}): playerObj = {} - playerObj['country'] = player.find('img', {'class': 'flag country gtSmartphone-only'})['alt'].encode('utf8') - buildName = player.find('img', {'class': 'img'})['alt'].encode('utf8').split('\'') + playerObj['country'] = player.find_all('img')[1]['alt'].encode('utf8') + buildName = player.find('img', {'class': 'img'})['alt'].split("'") playerObj['name'] = buildName[0].rstrip() + buildName[2] playerObj['nickname'] = player.find('a', {'class': 'name'}).text.encode('utf8') playerObj['rating'] = player.find('div', {'class': 'rating'}).find('span', {'class': 'bold'}).text.encode('utf8') @@ -57,12 +63,12 @@ def top_players(): def get_players(teamid): - page = get_parsed_page("http://www.hltv.org/?pageid=362&teamid=" + teamid) - titlebox = page.find("div", {"class": "teamProfile"}) + page = get_parsed_page("http://www.hltv.org/?pageid=362&teamid=" + str(teamid)) + titlebox = page.find("div", {"class": "bodyshot-team"}) players = [] - for player in titlebox.find_all("div", {"class": "bodyshot-team"}): - players = player.text.strip().encode('utf8').split('\n\n\n\n') - players = [x.strip(' ') for x in players] + for player_link in titlebox.find_all("a"): + players.append(player_link['title']) + return players @@ -108,11 +114,11 @@ def _get_current_lineup(player_anchors): players = [] for player_anchor in player_anchors[0:5]: player = {} - buildName = player_anchor.find("img", {"class": "container-width"})["alt"].encode('utf8').split('\'') - player['country'] = player_anchor.find("div", {"class": "teammate-info standard-box"}).find("img", {"class": "flag"})["alt"].encode('utf8') + buildName = player_anchor.find("img", {"class": "container-width"})["alt"].split('\'') + player['country'] = player_anchor.find("div", {"class": "teammate-info standard-box"}).find("img", {"class": "flag"})["alt"] player['name'] = buildName[0].rstrip() + buildName[2] - player['nickname'] = player_anchor.find("div", {"class": "teammate-info standard-box"}).find("div", {"class": "text-ellipsis"}).text.encode('utf8') - player['maps-played'] = int(re.search(r'\d+', player_anchor.find("div", {"class": "teammate-info standard-box"}).find("span").text.encode('utf8')).group()) + player['nickname'] = player_anchor.find("div", {"class": "teammate-info standard-box"}).find("div", {"class": "text-ellipsis"}).text + player['maps-played'] = int(re.search(r'\d+', player_anchor.find("div", {"class": "teammate-info standard-box"}).find("span").text).group()) players.append(player) return players @@ -124,11 +130,11 @@ def _get_historical_lineup(player_anchors): players = [] for player_anchor in player_anchors[5::]: player = {} - buildName = player_anchor.find("img", {"class": "container-width"})["alt"].encode('utf8').split('\'') + buildName = player_anchor.find("img", {"class": "container-width"})["alt"].split('\'') player['country'] = player_anchor.find("div", {"class": "teammate-info standard-box"}).find("img", {"class": "flag"})["alt"].encode('utf8') player['name'] = buildName[0].rstrip() + buildName[2] player['nickname'] = player_anchor.find("div", {"class": "teammate-info standard-box"}).find("div", {"class": "text-ellipsis"}).text.encode('utf8') - player['maps-played'] = int(re.search(r'\d+', player_anchor.find("div", {"class": "teammate-info standard-box"}).find("span").text.encode('utf8')).group()) + player['maps-played'] = int(re.search(r'\d+', player_anchor.find("div", {"class": "teammate-info standard-box"}).find("span").text).group()) players.append(player) return players @@ -208,28 +214,73 @@ def get_results(): return results_list +def get_results_by_date(start_date, end_date): + # Dates like yyyy-mm-dd (iso) + results_list = [] + offset = 0 + # Loop through all stats pages + while True: + url = "https://www.hltv.org/stats/matches?startDate="+start_date+"&endDate="+end_date+"&offset="+str(offset) + + results = get_parsed_page(url) + + # Total amount of results of the query + amount = int(results.find("span", attrs={"class": "pagination-data"}).text.split("of")[1].strip()) + + # All rows (s) of the match table + pastresults = results.find("tbody").find_all("tr") + + # Parse each element to a result dictionary + for result in pastresults: + team_cols = result.find_all("td", {"class": "team-col"}) + t1 = team_cols[0].find("a").text + t2 = team_cols[1].find("a").text + t1_score = int(team_cols[0].find_all(attrs={"class": "score"})[0].text.strip()[1:-1]) + t2_score = int(team_cols[1].find_all(attrs={"class": "score"})[0].text.strip()[1:-1]) + map = result.find(attrs={"class": "statsDetail"}).find(attrs={"class": "dynamic-map-name-full"}).text + event = result.find(attrs={"class": "event-col"}).text + date = result.find(attrs={"class": "date-col"}).find("a").find("div").text + + result_dict = {"team1": t1, "team2": t2, "team1score": t1_score, + "team2score": t2_score, "date": date, "map": map, "event": event} + + # Add this pages results to the result list + results_list.append(result_dict) + + # Get the next 50 results (next page) or break + if offset < amount: + offset += 50 + else: + break + + return results_list if __name__ == "__main__": import pprint pp = pprint.PrettyPrinter() - + pp.pprint('top5') pp.pprint(top5teams()) pp.pprint('top30') pp.pprint(top30teams()) - + pp.pprint('top_players') pp.pprint(top_players()) - + pp.pprint('get_players') - pp.pprint(get_players('6137')) - + pp.pprint(get_players('6665')) + pp.pprint('get_team_info') - pp.pprint(get_team_info('6137')) + pp.pprint(get_team_info('6665')) pp.pprint('get_matches') pp.pprint(get_matches()) pp.pprint('get_results') pp.pprint(get_results()) + + pp.pprint('get_results_by_date') + today_iso = datetime.datetime.today().isoformat().split('T')[0] + pp.pprint(get_results_by_date(today_iso, today_iso)) + \ No newline at end of file