From a87b1ef06858d6d9888c531a035b7fb28fb4931c Mon Sep 17 00:00:00 2001 From: flannel jesus Date: Sun, 11 Sep 2016 23:42:36 +0100 Subject: [PATCH] top20teams function I'm using dictionaries because dictionaries are basically JSON already. We can talk about this decision if you don't like it. I'm just assuming we'll be exporting JSON at the end of this all. --- main.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index 4937e0b..269e1f4 100644 --- a/main.py +++ b/main.py @@ -7,6 +7,10 @@ def get_parsed_page(url): return BeautifulSoup(requests.get(url).text, "lxml") +def parseint(string): + return int(''.join([x for x in string if x.isdigit()])) + + def top5teams(): home = get_parsed_page("http://hltv.org/") count = 0 @@ -18,6 +22,27 @@ def top5teams(): return teams +def top20teams(): + page = get_parsed_page("http://www.hltv.org/ranking/teams/") + teams = page.select("div.ranking-box") + teamlist = [] + for team in teams: + newteam = {'name': team.select('.ranking-teamName > a')[0].text.strip(), + 'rank': parseint(team.select('.ranking-number')[0].text.strip()), + 'rank-points': parseint(team.select('.ranking-teamName > span')[0].text), + 'team-id': parseint(team.select('.ranking-delta')[0].get('id')), + 'team-players': []} + for player_div in team.select('.ranking-lineup > div'): + player = {} + player_anchor = player_div.select('.ranking-playerNick > a')[0] + player['name'] = player_anchor.text.strip() + player['player-id'] = parseint(player_anchor.get('href')) + newteam['team-players'].append(player) + teamlist.append(newteam) + return teamlist + + + def getmatches(): matches = get_parsed_page("http://www.hltv.org/matches/") matchlist = matches.find_all("div", {"class": ["matchListBox", "matchListDateBox"]}) @@ -33,6 +58,5 @@ def getmatches(): except: print(match.text[:7].strip(), match.text[7:-7].strip()) - if __name__ == "__main__": - getmatches() + print(top20teams())