1
0
Fork 0
mirror of https://github.com/SocksPls/hltv-api synced 2025-04-30 07:33:09 +00:00

Merge pull request #1 from flannelJesus/top20teams

Add a top 20 teams function (grabs a lot more data as well), a function to parse ints, and refactored webpage parsing
This commit is contained in:
SecretlyTaco 2016-09-12 01:08:13 +01:00 committed by GitHub
commit c4c6632ddd

39
main.py
View file

@ -2,24 +2,49 @@ import requests
import json import json
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
home = requests.get("http://hltv.org/").text
home = BeautifulSoup(home, "lxml")
matches = requests.get("http://www.hltv.org/matches/").text def get_parsed_page(url):
matches = BeautifulSoup(matches, "lxml") return BeautifulSoup(requests.get(url).text, "lxml")
def parseint(string):
return int(''.join([x for x in string if x.isdigit()]))
def top5teams(): def top5teams():
home = get_parsed_page("http://hltv.org/")
count = 0 count = 0
teams = [] teams = []
for team in home.find_all("div", {"class": "vsbox",})[:5]: for team in home.find_all("div", {"class": "vsbox", })[:5]:
count += 1 count += 1
teamname = team.find_all("div")[2].text.strip() teamname = team.find_all("div")[2].text.strip()
teams.append(teamname) teams.append(teamname)
return teams return teams
def top20teams():
page = get_parsed_page("http://www.hltv.org/ranking/teams/")
teams = page.select("div.ranking-box")
teamlist = []
for team in teams:
newteam = {'name': team.select('.ranking-teamName > a')[0].text.strip(),
'rank': parseint(team.select('.ranking-number')[0].text.strip()),
'rank-points': parseint(team.select('.ranking-teamName > span')[0].text),
'team-id': parseint(team.select('.ranking-delta')[0].get('id')),
'team-players': []}
for player_div in team.select('.ranking-lineup > div'):
player = {}
player_anchor = player_div.select('.ranking-playerNick > a')[0]
player['name'] = player_anchor.text.strip()
player['player-id'] = parseint(player_anchor.get('href'))
newteam['team-players'].append(player)
teamlist.append(newteam)
return teamlist
def getmatches(): def getmatches():
match_data = [] matches = get_parsed_page("http://www.hltv.org/matches/")
matchlist = matches.find_all("div", {"class": ["matchListBox", "matchListDateBox"]}) matchlist = matches.find_all("div", {"class": ["matchListBox", "matchListDateBox"]})
for match in matchlist: for match in matchlist:
if match['class'][0] == "matchListDateBox": if match['class'][0] == "matchListDateBox":
@ -34,4 +59,4 @@ def getmatches():
print(match.text[:7].strip(), match.text[7:-7].strip()) print(match.text[:7].strip(), match.text[7:-7].strip())
if __name__ == "__main__": if __name__ == "__main__":
getmatches() print(top20teams())