Add pagination and auth headers

This commit is contained in:
Marta Sienkiewicz 2026-01-18 13:20:57 +01:00
parent 32516d4b74
commit c023427bf6
2 changed files with 36 additions and 8 deletions

View file

@ -12,19 +12,46 @@ CATEGORY_ID = 9 # Database category ID
class DiscourseDatabase():
def __init__(self):
data = self.get_category_data()
self.category_topics_csv(data)
def get_category_data(self) -> requests.Response:
"""Get data from a Discourse category"""
self.category_topics_csv(data)
load_dotenv()
def get_headers(self, auth=False):
"""Get request headers, optionally with auth data."""
headers = {
"content-type": "application/json",
}
if auth:
headers["Api-Key"] = os.getenv("DISCOURSE_PAT")
headers["Api-Username"] = os.getenv("DISCOURSE_USERNAME")
return headers
def get_category_data(self) -> dict:
"""Get all topics from a Discourse category with pagination"""
url = f"{DISCOURSE_URL}/c/{CATEGORY_ID}.json"
print(f"Fetching data from {url}")
res = requests.get(url, headers)
all_topics = []
page = 0
while True:
params = {"per_page": 100, "page": page}
res = requests.get(url, headers=self.get_headers(), params=params)
res.raise_for_status()
res_json = res.json()
topics = res_json["topic_list"]["topics"]
if not topics:
break
for topic in topics:
if topic["category_id"] == CATEGORY_ID:
all_topics.append(topic)
print(f"Fetched page {page}: {len(topics)} topics, {len(all_topics)} total in category")
page += 1
return {"topic_list": {"topics": all_topics}}
def get_topic_content(self, topic_id: str):
"""Get a single topic's content"""
get_url = f"{DISCOURSE_URL}/posts/{topic_id}.json"
res = requests.get(get_url, headers=self.get_headers(auth=True))
res.raise_for_status()
res_json = json.loads(res.text)
return res_json
return res.json()
def category_topics_csv(self, category_data) -> None:
"""Save category topics to a csv file"""

View file

@ -1,3 +1,4 @@
requests==2.32.5
Jinja2==3.1.6
pandas==2.3.3
pandas==2.3.3
python-dotenv==1.2.1