steam-group-crawler/steam_api_wrapper.py

125 lines
3.5 KiB
Python

import requests
from bs4 import BeautifulSoup
from ratelimit import limits, sleep_and_retry
import time
# Set up rate limiter, one request per second
CALLS = 1
RATE_LIMIT = 15
@sleep_and_retry
@limits(calls=CALLS, period=RATE_LIMIT)
def make_request(url):
try:
response = requests.get(url)
if response.status_code == 429:
print(
f"\r[*]HTTP 429 Too Many Requests received. Pausing for 30 seconds.",
end="",
)
time.sleep(30)
return make_request(url)
response.raise_for_status()
return response
except requests.HTTPError as http_err:
print(f"HTTP error occurred: {http_err}")
return None
except Exception as err:
print(f"Other error occurred: {err}")
return None
def get_group_details(group_url_name):
# Regular group page URL
group_url = f"https://steamcommunity.com/groups/{group_url_name}"
group_page_response = make_request(group_url)
if not group_page_response:
return
group_page_soup = BeautifulSoup(group_page_response.text, "lxml")
# Group Tag
tag_span = group_page_soup.find("span", {"class": "grouppage_header_abbrev"})
tag = tag_span.text.strip() if tag_span else "No tag"
# Initialize an empty list to store all members
all_members = []
# Start with the first page
next_page_url = (
f"https://steamcommunity.com/groups/{group_url_name}/memberslistxml/?xml=1"
)
# Initialize a counter for pages
page_counter = 0
while next_page_url and page_counter < 500:
# Group details XML page URL
group_details_url = next_page_url
group_details_response = make_request(group_details_url)
if not group_details_response:
return
try:
print(f"\r[*] Getting page {next_page_url}...", end="")
group_details_soup = BeautifulSoup(
group_details_response.content, "lxml-xml"
)
# Group Name
group_name = group_details_soup.find("groupName").text
# Group ID64
group_id64 = group_details_soup.find("groupID64").text
# Member List
members = [
member.text for member in group_details_soup.find_all("steamID64")
]
all_members.extend(members)
# Get the URL for the next page, if there is one
next_page_link = group_details_soup.find("nextPageLink")
next_page_url = next_page_link.text if next_page_link else None
# Increment page counter
page_counter += 1
except Exception as err:
print(f"Error occurred during parsing of group details XML page: {err}")
return {
"id64": group_id64,
"name": group_name,
"link": group_url,
"tag": tag,
"members": all_members,
}
@sleep_and_retry
@limits(calls=CALLS, period=RATE_LIMIT)
def get_players_groups(user_id):
url = f"https://steamcommunity.com/profiles/{user_id}/groups/"
response = make_request(url)
soup = BeautifulSoup(response.text, "lxml")
group_blocks = soup.find_all("div", class_="group_block")
group_links = []
for block in group_blocks:
link_element = block.find("a", class_="linkTitle")
if link_element:
group_links.append(link_element["href"].split("/")[-1])
return group_links
if __name__ == "__main__":
group_ids = get_players_groups("76561198084483014")
print(group_ids)