mirror of
https://github.com/SocksPls/hltv-api
synced 2025-05-01 15:59:36 +00:00
fix broken features due to changes on HLTV
This commit is contained in:
parent
32db613206
commit
992334fe56
2 changed files with 143 additions and 116 deletions
13
README.md
13
README.md
|
@ -7,9 +7,10 @@ It is being updated though, slowly.
|
||||||
|
|
||||||
`top5teams`
|
`top5teams`
|
||||||
`top30teams`
|
`top30teams`
|
||||||
~~`top_players`~~
|
`top_players`
|
||||||
~~`get_players`~~
|
`get_players`
|
||||||
~~`get_team_info`~~
|
`get_team_info`
|
||||||
~~`_get_lineup`~~
|
`_get_current_lineup`
|
||||||
~~`get_matches`~~
|
`_get_historical_lineup`
|
||||||
~~`get_results`~~
|
`get_matches`
|
||||||
|
`get_results`
|
||||||
|
|
234
main.py
234
main.py
|
@ -1,3 +1,4 @@
|
||||||
|
import re
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from python_utils import converters
|
from python_utils import converters
|
||||||
|
@ -38,33 +39,29 @@ def top30teams():
|
||||||
|
|
||||||
|
|
||||||
def top_players():
|
def top_players():
|
||||||
page = get_parsed_page("http://www.hltv.org/?pageid=348&statsfilter=10&mapid=0")
|
page = get_parsed_page("https://www.hltv.org/stats")
|
||||||
boxes = page.find_all("div", {"class": "framedBox"})
|
players = page.find_all("div", {"class": "col"})[0]
|
||||||
top_player_categories = []
|
playersArray = []
|
||||||
for box in boxes:
|
for player in players.find_all("div", {"class": "top-x-box standard-box"}):
|
||||||
category_obj = {'category': box.find("h2").text}
|
playerObj = {}
|
||||||
players = []
|
playerObj['country'] = player.find('img', {'class': 'flag country gtSmartphone-only'})['alt'].encode('utf8')
|
||||||
for player_elem in box.select("> div"):
|
buildName = player.find('img', {'class': 'img'})['alt'].encode('utf8').split('\'')
|
||||||
player = {}
|
playerObj['name'] = buildName[0].rstrip() + buildName[2]
|
||||||
player_link = player_elem.find('a')
|
playerObj['nickname'] = player.find('a', {'class': 'name'}).text.encode('utf8')
|
||||||
player['name'] = player_link.text
|
playerObj['rating'] = player.find('div', {'class': 'rating'}).find('span', {'class': 'bold'}).text.encode('utf8')
|
||||||
player['team'] = player_elem.text.split("(")[1].split(")")[0]
|
playerObj['maps-played'] = player.find('div', {'class': 'average gtSmartphone-only'}).find('span', {'class': 'bold'}).text.encode('utf8')
|
||||||
p_url = player_link['href']
|
|
||||||
player['player-id'] = converters.to_int(p_url[p_url.index('playerid=')+9:p_url.index('&statsfilter')])
|
playersArray.append(playerObj)
|
||||||
player['stat'] = player_elem.select('div:nth-of-type(2)')[0].text
|
return playersArray
|
||||||
players.append(player)
|
|
||||||
category_obj['players'] = players
|
|
||||||
top_player_categories.append(category_obj)
|
|
||||||
return top_player_categories
|
|
||||||
|
|
||||||
|
|
||||||
def get_players(teamid):
|
def get_players(teamid):
|
||||||
page = get_parsed_page("http://www.hltv.org/?pageid=362&teamid=" + teamid)
|
page = get_parsed_page("http://www.hltv.org/?pageid=362&teamid=" + teamid)
|
||||||
titlebox = page.find("div", {"class": "centerFade"})
|
titlebox = page.find("div", {"class": "teamProfile"})
|
||||||
players = []
|
players = []
|
||||||
for player in titlebox.find_all("div")[5:25]:
|
for player in titlebox.find_all("div", {"class": "standard-box overlayImageFrame"}):
|
||||||
players.append(player.text.strip())
|
players.append(player.text.strip().encode('utf8'))
|
||||||
print([x for x in set(players) if x is not u''])
|
return players
|
||||||
|
|
||||||
|
|
||||||
def get_team_info(teamid):
|
def get_team_info(teamid):
|
||||||
|
@ -77,127 +74,156 @@ def get_team_info(teamid):
|
||||||
page = get_parsed_page("http://www.hltv.org/?pageid=179&teamid=" + str(teamid))
|
page = get_parsed_page("http://www.hltv.org/?pageid=179&teamid=" + str(teamid))
|
||||||
|
|
||||||
team_info = {}
|
team_info = {}
|
||||||
|
team_info['team-name']=page.find("div", {"class": "context-item"}).text.encode('utf8')
|
||||||
|
|
||||||
content_boxes = page.select('div.centerFade .covGroupBoxContent')
|
current_lineup = _get_current_lineup(page.find_all("div", {"class": "col teammate"}))
|
||||||
team_info['team-name']=content_boxes[0].select('> div')[0].text.strip()
|
|
||||||
team_info['region'] = content_boxes[0].select('> div')[4].select('.covSmallHeadline')[1].text.strip()
|
|
||||||
|
|
||||||
current_lineup_div = content_boxes[1]
|
|
||||||
current_lineup = _get_lineup(current_lineup_div.select('a'))
|
|
||||||
team_info['current-lineup'] = current_lineup
|
team_info['current-lineup'] = current_lineup
|
||||||
|
|
||||||
historical_players_div = content_boxes[2]
|
historical_players = _get_historical_lineup(page.find_all("div", {"class": "col teammate"}))
|
||||||
historical_players = _get_lineup(historical_players_div.select('a'))
|
|
||||||
team_info['historical-players'] = historical_players
|
team_info['historical-players'] = historical_players
|
||||||
|
|
||||||
team_stats_div = content_boxes[3]
|
team_stats_columns = page.find_all("div", {"class": "columns"})
|
||||||
team_stats = {}
|
team_stats = {}
|
||||||
for index, stat_div in enumerate(team_stats_div.select('> div')[3:]):
|
|
||||||
if (index%2):
|
for columns in team_stats_columns:
|
||||||
stat_title = stat_div.select('.covSmallHeadline')[0].text.strip()
|
stats = columns.find_all("div", {"class": "col standard-box big-padding"})
|
||||||
stat_value = stat_div.select('.covSmallHeadline')[1].text.strip()
|
|
||||||
|
for stat in stats:
|
||||||
|
stat_value = stat.find("div", {"class": "large-strong"}).text.encode('utf8')
|
||||||
|
stat_title = stat.find("div", {"class": "small-label-below"}).text.encode('utf8')
|
||||||
team_stats[stat_title] = stat_value
|
team_stats[stat_title] = stat_value
|
||||||
|
|
||||||
team_info['stats'] = team_stats
|
team_info['stats'] = team_stats
|
||||||
|
|
||||||
return team_info
|
return team_info
|
||||||
|
|
||||||
|
|
||||||
def _get_lineup(player_anchors):
|
def _get_current_lineup(player_anchors):
|
||||||
"""
|
"""
|
||||||
helper function for function above
|
helper function for function above
|
||||||
:return: list of players
|
:return: list of players
|
||||||
"""
|
"""
|
||||||
players = []
|
players = []
|
||||||
for player_anchor in player_anchors:
|
for player_anchor in player_anchors[0:5]:
|
||||||
player = {}
|
player = {}
|
||||||
player_link = player_anchor.get('href')
|
buildName = player_anchor.find("img", {"class": "container-width"})["alt"].encode('utf8').split('\'')
|
||||||
player['player-id'] = converters.to_int(player_link[player_link.index('playerid'):], regexp=True)
|
player['country'] = player_anchor.find("div", {"class": "teammate-info standard-box"}).find("img", {"class": "flag"})["alt"].encode('utf8')
|
||||||
player_text = player_anchor.text
|
player['name'] = buildName[0].rstrip() + buildName[2]
|
||||||
player['name'] = player_text[0:player_text.index("(")].strip()
|
player['nickname'] = player_anchor.find("div", {"class": "teammate-info standard-box"}).find("div", {"class": "text-ellipsis"}).text.encode('utf8')
|
||||||
player['maps-played'] = converters.to_int(player_text[player_text.index("("):], regexp=True)
|
player['maps-played'] = int(re.search(r'\d+', player_anchor.find("div", {"class": "teammate-info standard-box"}).find("span").text.encode('utf8')).group())
|
||||||
|
players.append(player)
|
||||||
|
return players
|
||||||
|
|
||||||
|
def _get_historical_lineup(player_anchors):
|
||||||
|
"""
|
||||||
|
helper function for function above
|
||||||
|
:return: list of players
|
||||||
|
"""
|
||||||
|
players = []
|
||||||
|
for player_anchor in player_anchors[5::]:
|
||||||
|
player = {}
|
||||||
|
buildName = player_anchor.find("img", {"class": "container-width"})["alt"].encode('utf8').split('\'')
|
||||||
|
player['country'] = player_anchor.find("div", {"class": "teammate-info standard-box"}).find("img", {"class": "flag"})["alt"].encode('utf8')
|
||||||
|
player['name'] = buildName[0].rstrip() + buildName[2]
|
||||||
|
player['nickname'] = player_anchor.find("div", {"class": "teammate-info standard-box"}).find("div", {"class": "text-ellipsis"}).text.encode('utf8')
|
||||||
|
player['maps-played'] = int(re.search(r'\d+', player_anchor.find("div", {"class": "teammate-info standard-box"}).find("span").text.encode('utf8')).group())
|
||||||
players.append(player)
|
players.append(player)
|
||||||
return players
|
return players
|
||||||
|
|
||||||
|
|
||||||
def get_matches():
|
def get_matches():
|
||||||
matches = get_parsed_page("http://www.hltv.org/matches/")
|
matches = get_parsed_page("http://www.hltv.org/matches/")
|
||||||
matchlist = matches.find_all("div", {"class": ["matchListBox", "matchListDateBox"]})
|
|
||||||
datestring = ""
|
|
||||||
matches_list = []
|
matches_list = []
|
||||||
for match in matchlist:
|
upcomingmatches = matches.find("div", {"class": "upcoming-matches"})
|
||||||
if match['class'][0] == "matchListDateBox":
|
|
||||||
# TODO possibly change this into real date object
|
matchdays = upcomingmatches.find_all("div", {"class": "match-day"})
|
||||||
datestring = match.text.strip()
|
|
||||||
|
for match in matchdays:
|
||||||
|
matchDetails = match.find_all("table", {"class": "table"})
|
||||||
|
|
||||||
|
for getMatch in matchDetails:
|
||||||
|
matchObj = {}
|
||||||
|
|
||||||
|
matchObj['date'] = match.find("span", {"class": "standard-headline"}).text.encode('utf8')
|
||||||
|
matchObj['time'] = getMatch.find("td", {"class": "time"}).text.encode('utf8').lstrip().rstrip()
|
||||||
|
|
||||||
|
if (getMatch.find("td", {"class": "placeholder-text-cell"})):
|
||||||
|
matchObj['event'] = getMatch.find("td", {"class": "placeholder-text-cell"}).text.encode('utf8')
|
||||||
|
elif (getMatch.find("td", {"class": "event"})):
|
||||||
|
matchObj['event'] = getMatch.find("td", {"class": "event"}).text.encode('utf8')
|
||||||
else:
|
else:
|
||||||
try:
|
matchObj['event'] = None
|
||||||
#What does matchd mean?
|
|
||||||
matchd = {}
|
|
||||||
matchd['date'] = datestring + " - " + match.find("div", {"class": "matchTimeCell"}).text.strip()
|
|
||||||
team1div = match.find("div", {"class": "matchTeam1Cell"})
|
|
||||||
team1 = {}
|
|
||||||
team1["name"] = team1div.text.strip()
|
|
||||||
team1href = team1div.select('a')[0].get('href')
|
|
||||||
team1["id"] = converters.to_int(team1href[team1href.index('teamid'):], regexp=True)
|
|
||||||
matchd['team1'] = team1
|
|
||||||
team2div = match.find("div", {"class": "matchTeam2Cell"})
|
|
||||||
team2 = {}
|
|
||||||
team2["name"] = team2div.text.strip()
|
|
||||||
team2href = team2div.select('a')[0].get('href')
|
|
||||||
team2["id"] = converters.to_int(team2href[team2href.index('teamid'):], regexp=True)
|
|
||||||
matchd['team2'] = team2
|
|
||||||
|
|
||||||
# include link (id) to match page
|
if (getMatch.find_all("td", {"class": "team-cell"})):
|
||||||
matchd['matchid'] = match.find("div", {"class": "matchActionCell"}).find("a").get('href') #What a fucking mess lmao
|
matchObj['team1'] = getMatch.find_all("td", {"class": "team-cell"})[0].text.encode('utf8').lstrip().rstrip()
|
||||||
|
matchObj['team2'] = getMatch.find_all("td", {"class": "team-cell"})[1].text.encode('utf8').lstrip().rstrip()
|
||||||
|
else:
|
||||||
|
matchObj['team1'] = None
|
||||||
|
matchObj['team2'] = None
|
||||||
|
|
||||||
|
matches_list.append(matchObj)
|
||||||
|
|
||||||
matches_list.append(matchd)
|
|
||||||
except:
|
|
||||||
# what does this do man?
|
|
||||||
print(match.text[:7].strip(), match.text[7:-7].strip())
|
|
||||||
return matches_list
|
return matches_list
|
||||||
|
|
||||||
def get_results():
|
def get_results():
|
||||||
results = get_parsed_page("http://www.hltv.org/results/")
|
results = get_parsed_page("http://www.hltv.org/results/")
|
||||||
resultslist = results.find_all("div", {"class": ["matchListBox", "matchListDateBox"]})
|
|
||||||
datestring = ""
|
|
||||||
results_list = []
|
results_list = []
|
||||||
for result in resultslist:
|
|
||||||
if result['class'][0] == "matchListDateBox":
|
pastresults = results.find("div", {"class": "results-holder"}).find("div", {"class": "results-all"}).find_all("div", {"class": "results-sublist"})
|
||||||
# TODO possibly change this into a real date object
|
|
||||||
datestring = result.text.strip()
|
for result in pastresults:
|
||||||
|
resultDiv = result.find_all("div", {"class": "result-con"})
|
||||||
|
|
||||||
|
for res in resultDiv:
|
||||||
|
getRes = res.find("div", {"class": "result"}).find("table")
|
||||||
|
|
||||||
|
resultObj = {}
|
||||||
|
|
||||||
|
resultObj['date'] = result.find("span", {"class": "standard-headline"}).text.encode('utf8')
|
||||||
|
|
||||||
|
if (res.find("td", {"class": "placeholder-text-cell"})):
|
||||||
|
resultObj['event'] = res.find("td", {"class": "placeholder-text-cell"}).text.encode('utf8')
|
||||||
|
elif (res.find("td", {"class": "event"})):
|
||||||
|
resultObj['event'] = res.find("td", {"class": "event"}).text.encode('utf8')
|
||||||
else:
|
else:
|
||||||
#What does resultd mean?
|
resultObj['event'] = None
|
||||||
resultd = {}
|
|
||||||
#This page uses the time box to show map played
|
|
||||||
resultd['date'] = datestring
|
|
||||||
resultd['map'] = result.find("div", {"class": "matchTimeCell"}).text.strip()
|
|
||||||
scores = result.find("div", {"class": "matchScoreCell"}).text.strip()
|
|
||||||
|
|
||||||
#Team 1 info
|
if (res.find_all("td", {"class": "team-cell"})):
|
||||||
team1div = result.find("div", {"class": "matchTeam1Cell"})
|
resultObj['team1'] = res.find_all("td", {"class": "team-cell"})[0].text.encode('utf8').lstrip().rstrip()
|
||||||
team1 = {}
|
resultObj['team1score'] = res.find("td", {"class": "result-score"}).find_all("span")[0].text.encode('utf8').lstrip().rstrip()
|
||||||
team1['name'] = team1div.text.strip()
|
resultObj['team2'] = res.find_all("td", {"class": "team-cell"})[1].text.encode('utf8').lstrip().rstrip()
|
||||||
#I seem to get the ID slightly differently, still works fine though
|
resultObj['team2score'] = res.find("td", {"class": "result-score"}).find_all("span")[1].text.encode('utf8').lstrip().rstrip()
|
||||||
team1href = team1div.select('a')[0].get('href')
|
else:
|
||||||
team1['id'] = converters.to_int(team1href.split("=")[-1], regexp=True)
|
resultObj['team1'] = None
|
||||||
team1['score'] = converters.to_int(scores.split("-")[0].strip(), regexp=True)
|
resultObj['team2'] = None
|
||||||
resultd['team1'] = team1
|
|
||||||
|
|
||||||
#Team 2 info
|
results_list.append(resultObj)
|
||||||
team2div = result.find("div", {"class": "matchTeam2Cell"})
|
|
||||||
team2 = {}
|
|
||||||
team2['name'] = team2div.text.strip()
|
|
||||||
team2href = team2div.select('a')[0].get('href')
|
|
||||||
team2['id'] = converters.to_int(team2href.split("=")[-1], regexp=True)
|
|
||||||
team2['score'] = converters.to_int(scores.split("-")[1].strip(), regexp=True)
|
|
||||||
resultd['team2'] = team2
|
|
||||||
|
|
||||||
resultd['matchid'] = result.find("div", {"class": "matchActionCell"}).find("a").get('href') #What a fucking mess lmao
|
return results_list
|
||||||
|
|
||||||
results_list.append(resultd)
|
|
||||||
return(results_list)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import pprint
|
import pprint
|
||||||
pp = pprint.PrettyPrinter()
|
pp = pprint.PrettyPrinter()
|
||||||
pp.pprint(top30teams())
|
|
||||||
|
# pp.pprint('top5')
|
||||||
|
# pp.pprint(top5teams())
|
||||||
|
#
|
||||||
|
# pp.pprint('top30')
|
||||||
|
# pp.pprint(top30teams())
|
||||||
|
|
||||||
|
# pp.pprint('top_players')
|
||||||
|
# pp.pprint(top_players())
|
||||||
|
|
||||||
|
# pp.pprint('get_players')
|
||||||
|
# pp.pprint(get_players('6137'))
|
||||||
|
|
||||||
|
# pp.pprint('get_team_info')
|
||||||
|
# pp.pprint(get_team_info('6137'))
|
||||||
|
|
||||||
|
# pp.pprint('get_matches')
|
||||||
|
# pp.pprint(get_matches())
|
||||||
|
|
||||||
|
# pp.pprint('get_results')
|
||||||
|
# pp.pprint(get_results())
|
||||||
|
|
Loading…
Add table
Reference in a new issue