Add pagination and auth headers
This commit is contained in:
parent
32516d4b74
commit
c023427bf6
2 changed files with 36 additions and 8 deletions
41
discourse.py
41
discourse.py
|
|
@ -12,19 +12,46 @@ CATEGORY_ID = 9 # Database category ID
|
||||||
class DiscourseDatabase():
|
class DiscourseDatabase():
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
data = self.get_category_data()
|
data = self.get_category_data()
|
||||||
self.category_topics_csv(data)
|
self.category_topics_csv(data)
|
||||||
|
load_dotenv()
|
||||||
def get_category_data(self) -> requests.Response:
|
|
||||||
"""Get data from a Discourse category"""
|
def get_headers(self, auth=False):
|
||||||
|
"""Get request headers, optionally with auth data."""
|
||||||
headers = {
|
headers = {
|
||||||
"content-type": "application/json",
|
"content-type": "application/json",
|
||||||
}
|
}
|
||||||
|
if auth:
|
||||||
|
headers["Api-Key"] = os.getenv("DISCOURSE_PAT")
|
||||||
|
headers["Api-Username"] = os.getenv("DISCOURSE_USERNAME")
|
||||||
|
return headers
|
||||||
|
|
||||||
|
def get_category_data(self) -> dict:
|
||||||
|
"""Get all topics from a Discourse category with pagination"""
|
||||||
url = f"{DISCOURSE_URL}/c/{CATEGORY_ID}.json"
|
url = f"{DISCOURSE_URL}/c/{CATEGORY_ID}.json"
|
||||||
print(f"Fetching data from {url}")
|
print(f"Fetching data from {url}")
|
||||||
res = requests.get(url, headers)
|
all_topics = []
|
||||||
|
page = 0
|
||||||
|
while True:
|
||||||
|
params = {"per_page": 100, "page": page}
|
||||||
|
res = requests.get(url, headers=self.get_headers(), params=params)
|
||||||
|
res.raise_for_status()
|
||||||
|
res_json = res.json()
|
||||||
|
topics = res_json["topic_list"]["topics"]
|
||||||
|
if not topics:
|
||||||
|
break
|
||||||
|
for topic in topics:
|
||||||
|
if topic["category_id"] == CATEGORY_ID:
|
||||||
|
all_topics.append(topic)
|
||||||
|
print(f"Fetched page {page}: {len(topics)} topics, {len(all_topics)} total in category")
|
||||||
|
page += 1
|
||||||
|
return {"topic_list": {"topics": all_topics}}
|
||||||
|
|
||||||
|
def get_topic_content(self, topic_id: str):
|
||||||
|
"""Get a single topic's content"""
|
||||||
|
get_url = f"{DISCOURSE_URL}/posts/{topic_id}.json"
|
||||||
|
res = requests.get(get_url, headers=self.get_headers(auth=True))
|
||||||
res.raise_for_status()
|
res.raise_for_status()
|
||||||
res_json = json.loads(res.text)
|
return res.json()
|
||||||
return res_json
|
|
||||||
|
|
||||||
def category_topics_csv(self, category_data) -> None:
|
def category_topics_csv(self, category_data) -> None:
|
||||||
"""Save category topics to a csv file"""
|
"""Save category topics to a csv file"""
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
requests==2.32.5
|
requests==2.32.5
|
||||||
Jinja2==3.1.6
|
Jinja2==3.1.6
|
||||||
pandas==2.3.3
|
pandas==2.3.3
|
||||||
|
python-dotenv==1.2.1
|
||||||
Loading…
Add table
Add a link
Reference in a new issue