Merge pull request #1 from flannelJesus/top20teams

Add a top 20 teams function (grabs a lot more data as well), a function to parse ints, and refactored webpage parsing
2025-07-12 03:59:15 +00:00 · 2016-09-12 01:08:13 +01:00 · 2016-09-12 01:08:13 +01:00 · c4c6632ddd
commit c4c6632ddd
parent b49abe92fa a87b1ef068
1 changed files with 34 additions and 9 deletions
--- a/main.py
+++ b/main.py
@ -2,24 +2,49 @@ import requests
 import json
 from bs4 import BeautifulSoup
 home = requests.get("http://hltv.org/").text
 home = BeautifulSoup(home, "lxml")
-matches = requests.get("http://www.hltv.org/matches/").text
+def get_parsed_page(url):
-matches = BeautifulSoup(matches, "lxml")
+    return BeautifulSoup(requests.get(url).text, "lxml")
 def parseint(string):
    return int(''.join([x for x in string if x.isdigit()]))
 def top5teams():
    home = get_parsed_page("http://hltv.org/")
    count = 0
    teams = []
-    for team in home.find_all("div", {"class": "vsbox",})[:5]:
+    for team in home.find_all("div", {"class": "vsbox", })[:5]:
        count += 1
        teamname = team.find_all("div")[2].text.strip()
        teams.append(teamname)
    return teams
 def top20teams():
    page = get_parsed_page("http://www.hltv.org/ranking/teams/")
    teams = page.select("div.ranking-box")
    teamlist = []
    for team in teams:
        newteam = {'name': team.select('.ranking-teamName > a')[0].text.strip(),
                   'rank': parseint(team.select('.ranking-number')[0].text.strip()),
                   'rank-points': parseint(team.select('.ranking-teamName > span')[0].text),
                   'team-id': parseint(team.select('.ranking-delta')[0].get('id')),
                   'team-players': []}
        for player_div in team.select('.ranking-lineup > div'):
            player = {}
            player_anchor = player_div.select('.ranking-playerNick > a')[0]
            player['name'] = player_anchor.text.strip()
            player['player-id'] = parseint(player_anchor.get('href'))
            newteam['team-players'].append(player)
        teamlist.append(newteam)
    return teamlist
 def getmatches():
-    match_data = []
+    matches = get_parsed_page("http://www.hltv.org/matches/")
    matchlist = matches.find_all("div", {"class": ["matchListBox", "matchListDateBox"]})
    for match in matchlist:
        if match['class'][0] == "matchListDateBox":
@ -34,4 +59,4 @@ def getmatches():
                print(match.text[:7].strip(), match.text[7:-7].strip())
 if __name__ == "__main__":
-    getmatches()
+    print(top20teams())