steam-group-crawler/player_info.py
Kate bcd06fb05d stop after 500 pages of player IDs
-this only applies to the 14 biggest groups
- is needed to stay under the 16MB document limit in mongodb
2023-06-07 04:47:20 +01:00

30 lines
797 B
Python

import requests
from bs4 import BeautifulSoup
from ratelimit import limits, sleep_and_retry
# Set up rate limiter, one request per second
CALLS = 1
RATE_LIMIT = 10
@sleep_and_retry
@limits(calls=CALLS, period=RATE_LIMIT)
def get_group_links(user_id):
url = f"https://steamcommunity.com/profiles/{user_id}/groups/"
response = requests.get(url)
soup = BeautifulSoup(response.text, "lxml")
group_blocks = soup.find_all("div", class_="group_block")
group_links = []
for block in group_blocks:
link_element = block.find("a", class_="linkTitle")
if link_element:
group_links.append(link_element["href"].split("/")[-1])
return group_links
if __name__ == "__main__":
group_ids = get_group_links("76561198084483014")
print(group_ids)