From 883f396794fde44eba810fd9555fd6da3ed5960b Mon Sep 17 00:00:00 2001 From: Kate Date: Thu, 8 Jun 2023 09:26:28 +0100 Subject: [PATCH] clean up code. infinite random crawl. --- Pipfile | 1 + main.py | 170 ++++++++++++++++++-------- player_info.py | 30 ----- group_info.py => steam_api_wrapper.py | 36 ++++-- 4 files changed, 146 insertions(+), 91 deletions(-) delete mode 100644 player_info.py rename group_info.py => steam_api_wrapper.py (75%) diff --git a/Pipfile b/Pipfile index 6a4b2f9..2e1b01d 100644 --- a/Pipfile +++ b/Pipfile @@ -9,6 +9,7 @@ beautifulsoup4 = "*" lxml = "*" ratelimit = "*" mongoengine = "*" +black = "*" [dev-packages] diff --git a/main.py b/main.py index 1bea2fe..d7237d1 100644 --- a/main.py +++ b/main.py @@ -1,67 +1,129 @@ from models.group import Group from models.player import Player -import group_info, player_info -import datetime +from steam_api_wrapper import get_group_details, get_players_groups +import datetime, random -# Set the starting group -starting_group = "ilovebloop" - -# Get the members of the starting group -print("[*] Getting members of starting group...") -starting_group_details = group_info.get_group_details(starting_group) -starting_group_members = starting_group_details["members"] - -# Check if the starting group already exists in the database -if not Group.objects(id64=starting_group_details["id64"]).first(): - # If not, create it - Group( - id64=starting_group_details["id64"], - name=starting_group_details["name"], - tag=starting_group_details["tag"], - members=starting_group_members, - link=starting_group, - last_updated=datetime.datetime.now() - ).save() - -# Get the groups of the starting group members -groups_of_starting_group_members = [] -for member in starting_group_members: - print(f"[*] Getting groups for member {member}...") - member_group_links = player_info.get_group_links(member) - - # Check if the member already exists in the database - if not Player.objects(id64=member).first(): - # If not, create it - Player( - id64=member, - groups=member_group_links, - link=f"https://steamcommunity.com/profiles/{member}", - last_updated=datetime.datetime.now() - ).save() - - groups_of_starting_group_members.extend(member_group_links) - -# Remove duplicates -groups_of_starting_group_members = list(set(groups_of_starting_group_members)) - -print(groups_of_starting_group_members) - -# Update or create each group in the database -for group_link in groups_of_starting_group_members: - print(f"[*] Getting group details for group {group_link}...") +def save_group(group_details: dict): # Check if the group already exists in the database - if not Group.objects(link=group_link).first(): + if not Group.objects(id64=group_details["id64"]).first(): # If not, create it - group_details = group_info.get_group_details(group_link) Group( id64=group_details["id64"], name=group_details["name"], tag=group_details["tag"], members=group_details["members"], - link=group_link, - last_updated=datetime.datetime.now() + link=group_details["link"], + last_updated=datetime.datetime.now(), ).save() - print(f"[*] Got group details for group {group_details['name']}") + print(f"\r[*] Got group details for group {group_details['name']}", end="") else: - print(f"[*] Group {group_link} already exists in the database. Skipping...") + print( + f"\r[*] Group {group_details['link']} already exists in the database. Skipping...", + end="", + ) + + +def save_player(player_details: dict): + # Check if the player already exists in the database + if not Player.objects(id64=player_details["id64"]).first(): + # If not, create it + Player( + id64=player_details["id64"], + groups=player_details["groups"], + link=player_details["link"], + last_updated=datetime.datetime.now(), + ).save() + print(f"\r[*] Got player details for player {player_details['id64']}", end="") + else: + print( + f"\r[*] Player {player_details['id64']} already exists in the database. Skipping...", + end="", + ) + + +def scan_group(group_link: str): + if not Group.objects( + link=str("https://steamcommunity.com/groups/" + group_link) + ).first(): + # If the starting group does not exist in the database, get its details + starting_group_details = get_group_details(group_link) + save_group(starting_group_details) + print(f"\r\033[K[+] Added group {group_link} to the database.") + else: + print( + f"\r\033[K[ ] Group {group_link} already exists in the database. Skipping..." + ) + + +def scan_player(player_id: str): + if not Player.objects(id64=player_id).first(): + member_group_links = get_players_groups(player_id) + + # Save player to database + save_player( + { + "id64": player_id, + "groups": member_group_links, + "link": f"https://steamcommunity.com/profiles/{player_id}", + } + ) + print(f"\r\033[K[+] Added player {player_id} to the database.") + else: + print( + f"\r\033[K[ ] Player {player_id} already exists in the database. Skipping..." + ) + + +def get_random_group(max_size=100): + # First, we find the groups that satisfy the size condition + suitable_groups = Group.objects.filter( + __raw__={"$where": f"this.members.length < {max_size}"} + ) + + # If there are no suitable groups, return None + if not suitable_groups: + return None + + # Then, we randomly select one from the suitable groups + random_index = random.randint(0, len(suitable_groups) - 1) + + return suitable_groups[random_index] + + +def get_random_player(): + # First, we find the players that satisfy the size condition + suitable_players = Player.objects.all() + + # If there are no suitable players, return None + if not suitable_players: + return None + + # Then, we randomly select one from the suitable players + random_index = random.randint(0, len(suitable_players) - 1) + + return suitable_players[random_index] + + +if __name__ == "__main__": + # Crawl starting group + scan_group("ilovebloop") + + flippy = 0 + while True: + try: + if flippy % 2: + # Get a random group + random_group = random.choice(get_random_player().groups) + if random_group: + print(f"[*] Crawling group {random_group}...", end="") + scan_group(random_group) + else: + # Get a random player + random_player_id = random.choice(get_random_group().members) + print(f"[*] Crawling player {random_player_id}...", end="") + scan_player(random_player_id) + flippy += 1 + except IndexError as e: + print("[E] IndexError: ", e) + continue diff --git a/player_info.py b/player_info.py deleted file mode 100644 index 210c526..0000000 --- a/player_info.py +++ /dev/null @@ -1,30 +0,0 @@ -import requests -from bs4 import BeautifulSoup -from ratelimit import limits, sleep_and_retry - -# Set up rate limiter, one request per second -CALLS = 1 -RATE_LIMIT = 10 - - -@sleep_and_retry -@limits(calls=CALLS, period=RATE_LIMIT) -def get_group_links(user_id): - url = f"https://steamcommunity.com/profiles/{user_id}/groups/" - response = requests.get(url) - - soup = BeautifulSoup(response.text, "lxml") - - group_blocks = soup.find_all("div", class_="group_block") - group_links = [] - for block in group_blocks: - link_element = block.find("a", class_="linkTitle") - if link_element: - group_links.append(link_element["href"].split("/")[-1]) - - return group_links - - -if __name__ == "__main__": - group_ids = get_group_links("76561198084483014") - print(group_ids) diff --git a/group_info.py b/steam_api_wrapper.py similarity index 75% rename from group_info.py rename to steam_api_wrapper.py index 3f8c41a..56b5161 100644 --- a/group_info.py +++ b/steam_api_wrapper.py @@ -1,10 +1,11 @@ -import requests, time +import requests from bs4 import BeautifulSoup from ratelimit import limits, sleep_and_retry +import time # Set up rate limiter, one request per second CALLS = 1 -RATE_LIMIT = 10 +RATE_LIMIT = 15 @sleep_and_retry @@ -13,7 +14,10 @@ def make_request(url): try: response = requests.get(url) if response.status_code == 429: - print("HTTP 429 Too Many Requests received. Pausing for 30 seconds.") + print( + f"\r[*]HTTP 429 Too Many Requests received. Pausing for 30 seconds.", + end="", + ) time.sleep(30) return make_request(url) response.raise_for_status() @@ -62,7 +66,7 @@ def get_group_details(group_url_name): return try: - print(f"[*] Getting page {next_page_url}...") + print(f"\r[*] Getting page {next_page_url}...", end="") group_details_soup = BeautifulSoup( group_details_response.content, "lxml-xml" ) @@ -92,12 +96,30 @@ def get_group_details(group_url_name): return { "id64": group_id64, "name": group_name, - "url": group_url, + "link": group_url, "tag": tag, "members": all_members, } +@sleep_and_retry +@limits(calls=CALLS, period=RATE_LIMIT) +def get_players_groups(user_id): + url = f"https://steamcommunity.com/profiles/{user_id}/groups/" + response = make_request(url) + + soup = BeautifulSoup(response.text, "lxml") + + group_blocks = soup.find_all("div", class_="group_block") + group_links = [] + for block in group_blocks: + link_element = block.find("a", class_="linkTitle") + if link_element: + group_links.append(link_element["href"].split("/")[-1]) + + return group_links + + if __name__ == "__main__": - # Replace 'ilovebloop' with the desired group URL name - print(get_group_details("steamworks")) + group_ids = get_players_groups("76561198084483014") + print(group_ids)