From c023427bf6410f2228f4a3aa07c11bf94ce727c7 Mon Sep 17 00:00:00 2001 From: Marta Sienkiewicz Date: Sun, 18 Jan 2026 13:20:57 +0100 Subject: [PATCH] Add pagination and auth headers --- discourse.py | 41 ++++++++++++++++++++++++++++++++++------- requirements.txt | 3 ++- 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/discourse.py b/discourse.py index f72f12c..5760646 100644 --- a/discourse.py +++ b/discourse.py @@ -12,19 +12,46 @@ CATEGORY_ID = 9 # Database category ID class DiscourseDatabase(): def __init__(self): data = self.get_category_data() - self.category_topics_csv(data) - - def get_category_data(self) -> requests.Response: - """Get data from a Discourse category""" + self.category_topics_csv(data) + load_dotenv() + + def get_headers(self, auth=False): + """Get request headers, optionally with auth data.""" headers = { "content-type": "application/json", } + if auth: + headers["Api-Key"] = os.getenv("DISCOURSE_PAT") + headers["Api-Username"] = os.getenv("DISCOURSE_USERNAME") + return headers + + def get_category_data(self) -> dict: + """Get all topics from a Discourse category with pagination""" url = f"{DISCOURSE_URL}/c/{CATEGORY_ID}.json" print(f"Fetching data from {url}") - res = requests.get(url, headers) + all_topics = [] + page = 0 + while True: + params = {"per_page": 100, "page": page} + res = requests.get(url, headers=self.get_headers(), params=params) + res.raise_for_status() + res_json = res.json() + topics = res_json["topic_list"]["topics"] + if not topics: + break + for topic in topics: + if topic["category_id"] == CATEGORY_ID: + all_topics.append(topic) + print(f"Fetched page {page}: {len(topics)} topics, {len(all_topics)} total in category") + page += 1 + return {"topic_list": {"topics": all_topics}} + + def get_topic_content(self, topic_id: str): + """Get a single topic's content""" + get_url = f"{DISCOURSE_URL}/posts/{topic_id}.json" + res = requests.get(get_url, headers=self.get_headers(auth=True)) res.raise_for_status() - res_json = json.loads(res.text) - return res_json + return res.json() def category_topics_csv(self, category_data) -> None: """Save category topics to a csv file""" diff --git a/requirements.txt b/requirements.txt index ac9ebf9..6c001de 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ requests==2.32.5 Jinja2==3.1.6 -pandas==2.3.3 \ No newline at end of file +pandas==2.3.3 +python-dotenv==1.2.1 \ No newline at end of file