refactored bs4 parsing pages

2025-04-30 15:43:07 +00:00 · 2016-09-11 23:40:54 +01:00 · 2016-09-11 23:40:54 +01:00 · 3fc083b5e3
commit 3fc083b5e3
parent b49abe92fa
1 changed files with 9 additions and 8 deletions
--- a/main.py
+++ b/main.py
@ -2,24 +2,24 @@ import requests
 import json
 from bs4 import BeautifulSoup

-home = requests.get("http://hltv.org/").text
-home = BeautifulSoup(home, "lxml")

-matches = requests.get("http://www.hltv.org/matches/").text
-matches = BeautifulSoup(matches, "lxml")
+def get_parsed_page(url):
+    return BeautifulSoup(requests.get(url).text, "lxml")


 def top5teams():
+    home = get_parsed_page("http://hltv.org/")
    count = 0
    teams = []
-    for team in home.find_all("div", {"class": "vsbox",})[:5]:
+    for team in home.find_all("div", {"class": "vsbox", })[:5]:
        count += 1
        teamname = team.find_all("div")[2].text.strip()
        teams.append(teamname)
    return teams

+
 def getmatches():
-    match_data = []
+    matches = get_parsed_page("http://www.hltv.org/matches/")
    matchlist = matches.find_all("div", {"class": ["matchListBox", "matchListDateBox"]})
    for match in matchlist:
        if match['class'][0] == "matchListDateBox":
@ -33,5 +33,6 @@ def getmatches():
            except:
                print(match.text[:7].strip(), match.text[7:-7].strip())

+
 if __name__ == "__main__":
    getmatches()