refactored bs4 parsing pages

2025-04-30 23:43:08 +00:00 · 2016-09-11 23:40:54 +01:00 · 2016-09-11 23:40:54 +01:00 · 3fc083b5e3
commit 3fc083b5e3
parent b49abe92fa
1 changed files with 9 additions and 8 deletions
--- a/main.py
+++ b/main.py
@ -2,28 +2,28 @@ import requests
 import json
 from bs4 import BeautifulSoup
 home = requests.get("http://hltv.org/").text
 home = BeautifulSoup(home, "lxml")
-matches = requests.get("http://www.hltv.org/matches/").text
+def get_parsed_page(url):
-matches = BeautifulSoup(matches, "lxml")
+    return BeautifulSoup(requests.get(url).text, "lxml")
 def top5teams():
    home = get_parsed_page("http://hltv.org/")
    count = 0
    teams = []
-    for team in home.find_all("div", {"class": "vsbox",})[:5]:
+    for team in home.find_all("div", {"class": "vsbox", })[:5]:
        count += 1
        teamname = team.find_all("div")[2].text.strip()
        teams.append(teamname)
    return teams
 def getmatches():
-    match_data = []
+    matches = get_parsed_page("http://www.hltv.org/matches/")
    matchlist = matches.find_all("div", {"class": ["matchListBox", "matchListDateBox"]})
    for match in matchlist:
        if match['class'][0] == "matchListDateBox":
-            print("* " +  match.text)
+            print("* " + match.text)
        else:
            try:
                time = match.find("div", {"class": "matchTimeCell"}).text.strip()
@ -33,5 +33,6 @@ def getmatches():
            except:
                print(match.text[:7].strip(), match.text[7:-7].strip())
 if __name__ == "__main__":
    getmatches()