Merge pull request #30 from SocksPls/date-formatting

fix(app): housekeeping/dateformat
2025-07-11 03:29:14 +00:00 · 2022-07-14 13:45:25 +02:00 · 2022-07-14 13:45:25 +02:00 · 16eec2314f
commit 16eec2314f
parent ab66ebf62b 2889936ebb
2 changed files with 110 additions and 117 deletions
--- a/main.py
+++ b/main.py
@ -4,6 +4,36 @@ import datetime
 from bs4 import BeautifulSoup
 from python_utils import converters
 import time
+import zoneinfo
+import tzlocal
+
+HLTV_COOKIE_TIMEZONE = "Europe/Copenhagen"
+HLTV_ZONEINFO=zoneinfo.ZoneInfo(HLTV_COOKIE_TIMEZONE)
+LOCAL_TIMEZONE_NAME = tzlocal.get_localzone_name()
+LOCAL_ZONEINFO = zoneinfo.ZoneInfo(LOCAL_TIMEZONE_NAME)
+
+def padIfNeeded(numberStr: str):
+    if int(numberStr) < 10:
+        return str(numberStr).zfill(2)
+    else:
+        return str(numberStr)
+
+def monthNameToNumber(monthName: str):
+    months = {
+        'January': 1,
+        'February': 2,
+        'March': 3,
+        'April': 4,
+        'May': 5,
+        'June': 6,
+        'July': 7,
+        'August': 8,
+        'September': 9,
+        'October': 10,
+        'November': 11,
+        'December': 12
+    }
+    return months[monthName]

 def get_parsed_page(url, delay=0.5):
    # This fixes a blocked by cloudflare error i've encountered
@ -12,24 +42,26 @@ def get_parsed_page(url, delay=0.5):
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
    }

+    cookies = {
+        "hltvTimeZone": HLTV_COOKIE_TIMEZONE
+    }
+
    time.sleep(delay)

-    return BeautifulSoup(requests.get(url, headers=headers).text, "lxml")
+    return BeautifulSoup(requests.get(url, headers=headers, cookies=cookies).text, "lxml")


 def top5teams():
-    home = get_parsed_page("http://hltv.org/")
-    count = 0
+    home = get_parsed_page("https://hltv.org/")
    teams = []
    for team in home.find_all("div", {"class": ["col-box rank"], }):
-        count += 1
-        teamname = team.text[3:]
-        teams.append(teamname)
+        team = {'name': team.text[3:], 'url': "https://hltv.org" + team.find_all("a")[1]["href"]}
+        teams.append(team)
    return teams


 def top30teams():
-    page = get_parsed_page("http://www.hltv.org/ranking/teams/")
+    page = get_parsed_page("https://www.hltv.org/ranking/teams/")
    teams = page.find("div", {"class": "ranking"})
    teamlist = []
    for team in teams.find_all("div", {"class": "ranked-team standard-box"}):
@ -37,11 +69,14 @@ def top30teams():
                   'rank': converters.to_int(team.select('.position')[0].text.strip(), regexp=True),
                   'rank-points': converters.to_int(team.find('span', {'class': 'points'}).text, regexp=True),
                   'team-id': converters.to_int(team.find('a', {'class': 'details moreLink'})['href'].split('/')[-1]),
+                   'team-url': "https://hltv.org/team/" + team.find('a', {'class': 'details moreLink'})['href'].split('/')[-1] + "/" + team.find('div', {"class": "ranking-header"}).select('.name')[0].text.strip(),
+                   'stats-url': "https://www.hltv.org" + team.find('a', {'class': 'details moreLink'})['href'],
                   'team-players': []}
        for player_div in team.find_all("td", {"class": "player-holder"}):
            player = {}
            player['name'] = player_div.find('img', {'class': 'playerPicture'})['title']
            player['player-id'] = converters.to_int(player_div.select('.pointer')[0]['href'].split("/")[-2])
+            player['url'] = "https://www.hltv.org" + player_div.select('.pointer')[0]['href']
            newteam['team-players'].append(player)
        teamlist.append(newteam)
    return teamlist
@ -53,66 +88,31 @@ def top_players():
    playersArray = []
    for player in players.find_all("div", {"class": "top-x-box standard-box"}):
        playerObj = {}
-        playerObj['country'] = player.find_all('img')[1]['alt'].encode('utf8')
+        playerObj['country'] = player.find_all('img')[1]['alt']
        buildName = player.find('img', {'class': 'img'})['alt'].split("'")
        playerObj['name'] = buildName[0].rstrip() + buildName[2]
-        playerObj['nickname'] = player.find('a', {'class': 'name'}).text.encode('utf8')
-        playerObj['rating'] = player.find('div', {'class': 'rating'}).find('span', {'class': 'bold'}).text.encode('utf8')
-        playerObj['maps-played'] = player.find('div', {'class': 'average gtSmartphone-only'}).find('span', {'class': 'bold'}).text.encode('utf8')
-
+        playerObj['nickname'] = player.find('a', {'class': 'name'}).text
+        playerObj['rating'] = player.find('div', {'class': 'rating'}).find('span', {'class': 'bold'}).text
+        playerObj['maps-played'] = player.find('div', {'class': 'average gtSmartphone-only'}).find('span', {'class': 'bold'}).text
+        playerObj['url'] = "https://hltv.org" + player.find('a', {'class': 'name'}).get('href')
        playersArray.append(playerObj)
    return playersArray


 def get_players(teamid):
-    page = get_parsed_page("http://www.hltv.org/?pageid=362&teamid=" + str(teamid))
+    page = get_parsed_page("https://www.hltv.org/?pageid=362&teamid=" + str(teamid))
    titlebox = page.find("div", {"class": "bodyshot-team"})
    players = []
    for player_link in titlebox.find_all("a"):
        players.append({
            'id': player_link["href"].split("/")[2],
            'nickname': player_link["title"],
-            'name': player_link.find("img")['title']
+            'name': player_link.find("img")['title'],
+            'url': "https://hltv.org" + player_link["href"]
        })

    return players

-def get_player_info(player_id):
-    """
-    :param player_id: integer (or string consisting of integers)
-    :return: dictionary of player
-
-    example player id: 7398 (dupreeh)
-    """
-
-    page = get_parsed_page(f"https://www.hltv.org/stats/players/{player_id}/a")
-    statistics = page.find("div", {"class": "statistics"}).find_all("div", {"class": "stats-row"})
-    player_info = {
-        'nickname': page.find("h1", {"class": "summaryNickname text-ellipsis"}).text.encode('utf8'),
-        'name': page.find("div", {"class": "text-ellipsis"}).text[1:-1].encode('utf8'),
-        'country': page.find("img", {"class": "flag"})["alt"],
-        'team': page.find("div", {"class": "SummaryTeamname text-ellipsis"}).text.encode('utf8'),
-        'age': page.find("div", {"class": "summaryPlayerAge"}).text[:2],
-        'stats': {
-            'total_kills': statistics[0].find_all("span")[1].text,
-            'headshot_percent': statistics[1].find_all("span")[1].text,
-            'total_deaths': statistics[2].find_all("span")[1].text,
-            'kd_ratio': statistics[3].find_all("span")[1].text,
-            'dmg_per_round': statistics[4].find_all("span")[1].text,
-            'grenade_dmg_per_round': statistics[5].find_all("span")[1].text,
-            'maps_played': statistics[6].find_all("span")[1].text,
-            'rounds_played': statistics[7].find_all("span")[1].text,
-            'kills_per_round': statistics[8].find_all("span")[1].text,
-            'assists_per_round': statistics[9].find_all("span")[1].text,
-            'deaths_per_round': statistics[10].find_all("span")[1].text,
-            'saved_by_teammate_per_round': statistics[11].find_all("span")[1].text,
-            'saved_teammates_per_round': statistics[12].find_all("span")[1].text,
-            'rating_1': statistics[13].find_all("span")[1].text,
-        }
-    }
-
-    return player_info
-
 def get_team_info(teamid):
    """
    :param teamid: integer (or string consisting of integers)
@ -120,10 +120,10 @@ def get_team_info(teamid):

    example team id: 5378 (virtus pro)
    """
-    page = get_parsed_page("http://www.hltv.org/?pageid=179&teamid=" + str(teamid))
+    page = get_parsed_page("https://www.hltv.org/?pageid=179&teamid=" + str(teamid))

    team_info = {}
-    team_info['team-name']=page.find("div", {"class": "context-item"}).text.encode('utf8')
+    team_info['team-name']=page.find("div", {"class": "context-item"}).text

    current_lineup = _get_current_lineup(page.find_all("div", {"class": "col teammate"}))
    team_info['current-lineup'] = current_lineup
@ -138,55 +138,16 @@ def get_team_info(teamid):
        stats = columns.find_all("div", {"class": "col standard-box big-padding"})

        for stat in stats:
-            stat_value = stat.find("div", {"class": "large-strong"}).text.encode('utf8')
-            stat_title = stat.find("div", {"class": "small-label-below"}).text.encode('utf8')
+            stat_value = stat.find("div", {"class": "large-strong"}).text
+            stat_title = stat.find("div", {"class": "small-label-below"}).text
            team_stats[stat_title] = stat_value

    team_info['stats'] = team_stats

+    team_info['url'] = "https://hltv.org/stats/team/" + str(teamid) + "/" + str(team_info['team-name'])
+
    return team_info

-
-def get_match_info(match_id):
-    """
-    Will get match data, NOT map data
-    :param match_id: integer (or string consisting of integers)
-    :return dictionary of match
-
-    example match id: 65090 (fnatic-vs-nip)
-    """
-    page = get_parsed_page(f"https://www.hltv.org/stats/matches/{match_id}/a")
-
-    match_info = {
-        "team1": {
-            "name": page.find_all("table", {"class": "stats-table"})[0].find("th", {"class": "st-teamname"}).text
-        },
-        "team2": {
-            "name": page.find_all("table", {"class": "stats-table"})[1].find("th", {"class": "st-teamname"}).text
-        }
-    }
-
-    match_info["team1"]["players"] = [player.text for player in page.find_all("table", {"class": "stats-table"})[0].find_all("td", {"class": "st-player"})]
-    match_info["team2"]["players"] = [player.text for player in page.find_all("table", {"class": "stats-table"})[1].find_all("td", {"class": "st-player"})]
-
-    for team in ["team1", "team2"]:
-        for count, player in enumerate(match_info[team]["players"]):
-            stats_table = page.find_all("table", {"class": "stats-table"})[0]
-            match_info[team][player] = {
-                "kills": stats_table.find_all("td", {"class": "st-kills"})[count].text.split()[0],
-                "headshots": stats_table.find_all("td", {"class": "st-kills"})[count].text.split("(")[-1][:-1],
-                "assists": stats_table.find_all("td", {"class": "st-assists"})[count].text.split()[0],
-                "flash_assists": stats_table.find_all("td", {"class": "st-assists"})[count].text.split("(")[-1][:-1],
-                "deaths": stats_table.find_all("td", {"class": "st-deaths"})[count].text,
-                "kast": stats_table.find_all("td", {"class": "st-kdratio"})[count].text,
-                "kd_diff": stats_table.find_all("td", {"class": "st-kddiff"})[count].text,
-                "adr": stats_table.find_all("td", {"class": "st-adr"})[count].text,
-                "fk_diff": stats_table.find_all("td", {"class": "st-fkdiff"})[count].text,
-                "rating": stats_table.find_all("td", {"class": "st-rating"})[count].text,
-            }
-
-    return match_info
-
 def _get_current_lineup(player_anchors):
    """
    helper function for function above
@ -200,6 +161,7 @@ def _get_current_lineup(player_anchors):
        player['name'] = buildName[0].rstrip() + buildName[2]
        player['nickname'] = player_anchor.find("div", {"class": "teammate-info standard-box"}).find("div", {"class": "text-ellipsis"}).text
        player['maps-played'] = int(re.search(r'\d+', player_anchor.find("div", {"class": "teammate-info standard-box"}).find("span").text).group())
+        player['url'] = "https://hltv.org" + player_anchor.find("div", {"class": "teammate-info standard-box"}).find("a").get("href")
        players.append(player)
    return players

@ -212,18 +174,18 @@ def _get_historical_lineup(player_anchors):
    for player_anchor in player_anchors[5::]:
        player = {}
        buildName = player_anchor.find("img", {"class": "container-width"})["alt"].split('\'')
-        player['country'] = player_anchor.find("div", {"class": "teammate-info standard-box"}).find("img", {"class": "flag"})["alt"].encode('utf8')
+        player['country'] = player_anchor.find("div", {"class": "teammate-info standard-box"}).find("img", {"class": "flag"})["alt"]
        player['name'] = buildName[0].rstrip() + buildName[2]
-        player['nickname'] = player_anchor.find("div", {"class": "teammate-info standard-box"}).find("div", {"class": "text-ellipsis"}).text.encode('utf8')
+        player['nickname'] = player_anchor.find("div", {"class": "teammate-info standard-box"}).find("div", {"class": "text-ellipsis"}).text
        player['maps-played'] = int(re.search(r'\d+', player_anchor.find("div", {"class": "teammate-info standard-box"}).find("span").text).group())
+        player['url'] = "https://hltv.org" + player_anchor.find("div", {"class": "teammate-info standard-box"}).find("a").get("href")
        players.append(player)
    return players


 def get_matches():
-    matches = get_parsed_page("http://www.hltv.org/matches/")
+    matches = get_parsed_page("https://www.hltv.org/matches/")
    matches_list = []
-    upcomingmatches = matches.find("div", {"class": "upcomingMatchesSection"})

    matchdays = matches.find_all("div", {"class": "upcomingMatchesSection"})

@ -233,17 +195,29 @@ def get_matches():
        for getMatch in matchDetails:
            matchObj = {}

-            matchObj['date'] = date
            matchObj['url'] = "https://hltv.org" + getMatch.find("a", {"class": "match a-reset"}).get("href")
-            matchObj['time'] = getMatch.find("div", {"class": "matchTime"}).text
+            
+            if (date and getMatch.find("div", {"class": "matchTime"})):
+                timeFromHLTV = datetime.datetime.strptime(date + " " + getMatch.find("div", {"class": "matchTime"}).text,'%Y-%m-%d %H:%M').replace(tzinfo=HLTV_ZONEINFO)
+                timeFromHLTV = timeFromHLTV.astimezone(LOCAL_ZONEINFO)
+                matchObj['date'] = timeFromHLTV.strftime('%Y-%m-%d')
+                matchObj['time'] = timeFromHLTV.strftime('%H:%M')
+                timenow = datetime.datetime.now().astimezone(LOCAL_ZONEINFO).strftime('%Y-%m-%d %H:%M')
+                deadline = date + " " + getMatch.find("div", {"class": "matchTime"}).text
+                currentTime = datetime.datetime.strptime(timenow,'%Y-%m-%d %H:%M')
+                ends = datetime.datetime.strptime(deadline, '%Y-%m-%d %H:%M')
+
+                if currentTime < ends:
+                    matchObj['countdown'] = str(ends - currentTime)
+
            if getMatch.find("div", {"class": "matchEvent"}):
-                matchObj['event'] = getMatch.find("div", {"class": "matchEvent"}).text.encode('utf8').strip()
+                matchObj['event'] = getMatch.find("div", {"class": "matchEvent"}).text.strip()
            else:
-                matchObj['event'] = getMatch.find("div", {"class": "matchInfoEmpty"}).text.encode('utf8').strip()
+                matchObj['event'] = getMatch.find("div", {"class": "matchInfoEmpty"}).text.strip()

            if (getMatch.find_all("div", {"class": "matchTeams"})):
-                matchObj['team1'] = getMatch.find_all("div", {"class": "matchTeam"})[0].text.encode('utf8').lstrip().rstrip()
-                matchObj['team2'] = getMatch.find_all("div", {"class": "matchTeam"})[1].text.encode('utf8').lstrip().rstrip()
+                matchObj['team1'] = getMatch.find_all("div", {"class": "matchTeam"})[0].text.lstrip().rstrip()
+                matchObj['team2'] = getMatch.find_all("div", {"class": "matchTeam"})[1].text.lstrip().rstrip()
            else:
                matchObj['team1'] = None
                matchObj['team2'] = None
@ -253,7 +227,7 @@ def get_matches():
    return matches_list

 def get_results():
-    results = get_parsed_page("http://www.hltv.org/results/")
+    results = get_parsed_page("https://www.hltv.org/results/")

    results_list = []

@ -263,28 +237,36 @@ def get_results():
        resultDiv = result.find_all("div", {"class": "result-con"})

        for res in resultDiv:
-            getRes = res.find("div", {"class": "result"}).find("table")
-
            resultObj = {}

+            resultObj['url'] = "https://hltv.org" + res.find("a", {"class": "a-reset"}).get("href")
+
            if (res.parent.find("span", {"class": "standard-headline"})):
-                resultObj['date'] = res.parent.find("span", {"class": "standard-headline"}).text.encode('utf8')
+                dateText = res.parent.find("span", {"class": "standard-headline"}).text.replace("Results for ", "").replace("th", "")
+
+                dateArr = dateText.split()
+
+                dateTextFromArrPadded = padIfNeeded(dateArr[2]) + "-" + padIfNeeded(monthNameToNumber(dateArr[0])) + "-" + padIfNeeded(dateArr[1])
+                dateFromHLTV = datetime.datetime.strptime(dateTextFromArrPadded,'%Y-%m-%d').replace(tzinfo=HLTV_ZONEINFO)
+                dateFromHLTV = dateFromHLTV.astimezone(LOCAL_ZONEINFO)
+
+                resultObj['date'] = dateFromHLTV.strftime('%Y-%m-%d')
            else:
                dt = datetime.date.today()
                resultObj['date'] = str(dt.day) + '/' + str(dt.month) + '/' + str(dt.year)

            if (res.find("td", {"class": "placeholder-text-cell"})):
-                resultObj['event'] = res.find("td", {"class": "placeholder-text-cell"}).text.encode('utf8')
+                resultObj['event'] = res.find("td", {"class": "placeholder-text-cell"}).text
            elif (res.find("td", {"class": "event"})):
-                resultObj['event'] = res.find("td", {"class": "event"}).text.encode('utf8')
+                resultObj['event'] = res.find("td", {"class": "event"}).text
            else:
                resultObj['event'] = None

            if (res.find_all("td", {"class": "team-cell"})):
-                resultObj['team1'] = res.find_all("td", {"class": "team-cell"})[0].text.encode('utf8').lstrip().rstrip()
-                resultObj['team1score'] = converters.to_int(res.find("td", {"class": "result-score"}).find_all("span")[0].text.encode('utf8').lstrip().rstrip())
-                resultObj['team2'] = res.find_all("td", {"class": "team-cell"})[1].text.encode('utf8').lstrip().rstrip()
-                resultObj['team2score'] = converters.to_int(res.find("td", {"class": "result-score"}).find_all("span")[1].text.encode('utf8').lstrip().rstrip())
+                resultObj['team1'] = res.find_all("td", {"class": "team-cell"})[0].text.lstrip().rstrip()
+                resultObj['team1score'] = converters.to_int(res.find("td", {"class": "result-score"}).find_all("span")[0].text.lstrip().rstrip())
+                resultObj['team2'] = res.find_all("td", {"class": "team-cell"})[1].text.lstrip().rstrip()
+                resultObj['team2score'] = converters.to_int(res.find("td", {"class": "result-score"}).find_all("span")[1].text.lstrip().rstrip())
            else:
                resultObj['team1'] = None
                resultObj['team2'] = None
@ -318,10 +300,20 @@ def get_results_by_date(start_date, end_date):
            t2_score = int(team_cols[1].find_all(attrs={"class": "score"})[0].text.strip()[1:-1])
            map = result.find(attrs={"class": "statsDetail"}).find(attrs={"class": "dynamic-map-name-full"}).text
            event = result.find(attrs={"class": "event-col"}).text
-            date = result.find(attrs={"class": "date-col"}).find("a").find("div").text
+            dateText = result.find(attrs={"class": "date-col"}).find("a").find("div").text
+            url = "https://hltv.org" + result.find(attrs={"class": "date-col"}).find("a").get("href")
+            dateArr = dateText.split("/")
+            # TODO: yes, this shouldn't be hardcoded, but I'll be very surprised if this API is still a thing in 21XX
+            startingTwoDigitsOfYear = "20"
+            dateTextFromArrPadded = startingTwoDigitsOfYear + padIfNeeded(dateArr[2]) + "-" + padIfNeeded(dateArr[1]) + "-" + padIfNeeded(dateArr[0])
+
+            dateFromHLTV = datetime.datetime.strptime(dateTextFromArrPadded,'%Y-%m-%d').replace(tzinfo=HLTV_ZONEINFO)
+            dateFromHLTV = dateFromHLTV.astimezone(LOCAL_ZONEINFO)
+
+            date = dateFromHLTV.strftime('%Y-%m-%d')

            result_dict = {"team1": t1, "team2": t2, "team1score": t1_score,
-                           "team2score": t2_score, "date": date, "map": map, "event": event}
+                           "team2score": t2_score, "date": date, "map": map, "event": event, "url": url}

            # Add this pages results to the result list
            results_list.append(result_dict)
--- a/requirements.txt
+++ b/requirements.txt
@ -4,3 +4,4 @@ lxml==4.9.1
 python-utils==2.0.0
 requests==2.20.0
 six==1.10.0
+tzlocal==4.2