1
0
Fork 0
mirror of https://github.com/SocksPls/hltv-api synced 2025-04-30 23:43:08 +00:00

refactored bs4 parsing pages

This commit is contained in:
flannel jesus 2016-09-11 23:40:54 +01:00
parent b49abe92fa
commit 3fc083b5e3

15
main.py
View file

@ -2,28 +2,28 @@ import requests
import json import json
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
home = requests.get("http://hltv.org/").text
home = BeautifulSoup(home, "lxml")
matches = requests.get("http://www.hltv.org/matches/").text def get_parsed_page(url):
matches = BeautifulSoup(matches, "lxml") return BeautifulSoup(requests.get(url).text, "lxml")
def top5teams(): def top5teams():
home = get_parsed_page("http://hltv.org/")
count = 0 count = 0
teams = [] teams = []
for team in home.find_all("div", {"class": "vsbox",})[:5]: for team in home.find_all("div", {"class": "vsbox", })[:5]:
count += 1 count += 1
teamname = team.find_all("div")[2].text.strip() teamname = team.find_all("div")[2].text.strip()
teams.append(teamname) teams.append(teamname)
return teams return teams
def getmatches(): def getmatches():
match_data = [] matches = get_parsed_page("http://www.hltv.org/matches/")
matchlist = matches.find_all("div", {"class": ["matchListBox", "matchListDateBox"]}) matchlist = matches.find_all("div", {"class": ["matchListBox", "matchListDateBox"]})
for match in matchlist: for match in matchlist:
if match['class'][0] == "matchListDateBox": if match['class'][0] == "matchListDateBox":
print("* " + match.text) print("* " + match.text)
else: else:
try: try:
time = match.find("div", {"class": "matchTimeCell"}).text.strip() time = match.find("div", {"class": "matchTimeCell"}).text.strip()
@ -33,5 +33,6 @@ def getmatches():
except: except:
print(match.text[:7].strip(), match.text[7:-7].strip()) print(match.text[:7].strip(), match.text[7:-7].strip())
if __name__ == "__main__": if __name__ == "__main__":
getmatches() getmatches()