2026-01-11 15:56:30 +01:00
|
|
|
'''
|
|
|
|
|
Class to generate a csv file based on data fetched via Discourse REST API
|
|
|
|
|
'''
|
2026-01-18 13:21:27 +01:00
|
|
|
import os
|
2026-01-11 17:40:57 +01:00
|
|
|
import csv
|
2026-01-18 13:21:27 +01:00
|
|
|
import json
|
|
|
|
|
import requests
|
|
|
|
|
from dotenv import load_dotenv
|
2026-01-11 15:56:30 +01:00
|
|
|
|
2026-01-11 19:25:04 +01:00
|
|
|
DISCOURSE_URL = "https://kb.hs3.pl" # Database is hosted here
|
|
|
|
|
CATEGORY_ID = 9 # Database category ID
|
2026-04-01 07:39:10 +02:00
|
|
|
PLACES = [
|
|
|
|
|
"cow-work",
|
|
|
|
|
"garage",
|
|
|
|
|
"lab",
|
|
|
|
|
"audiolab",
|
|
|
|
|
"server-room"
|
|
|
|
|
]
|
2026-03-24 15:39:18 +01:00
|
|
|
|
2026-01-11 19:25:04 +01:00
|
|
|
class DiscourseDatabase():
|
|
|
|
|
def __init__(self):
|
|
|
|
|
data = self.get_category_data()
|
2026-01-18 13:20:57 +01:00
|
|
|
self.category_topics_csv(data)
|
|
|
|
|
load_dotenv()
|
|
|
|
|
|
|
|
|
|
def get_headers(self, auth=False):
|
|
|
|
|
"""Get request headers, optionally with auth data."""
|
2026-01-11 15:56:30 +01:00
|
|
|
headers = {
|
|
|
|
|
"content-type": "application/json",
|
|
|
|
|
}
|
2026-01-18 13:20:57 +01:00
|
|
|
if auth:
|
|
|
|
|
headers["Api-Key"] = os.getenv("DISCOURSE_PAT")
|
|
|
|
|
headers["Api-Username"] = os.getenv("DISCOURSE_USERNAME")
|
|
|
|
|
return headers
|
|
|
|
|
|
|
|
|
|
def get_category_data(self) -> dict:
|
|
|
|
|
"""Get all topics from a Discourse category with pagination"""
|
2026-01-11 19:25:04 +01:00
|
|
|
url = f"{DISCOURSE_URL}/c/{CATEGORY_ID}.json"
|
|
|
|
|
print(f"Fetching data from {url}")
|
2026-01-18 13:20:57 +01:00
|
|
|
all_topics = []
|
|
|
|
|
page = 0
|
|
|
|
|
while True:
|
|
|
|
|
params = {"per_page": 100, "page": page}
|
|
|
|
|
res = requests.get(url, headers=self.get_headers(), params=params)
|
|
|
|
|
res.raise_for_status()
|
|
|
|
|
res_json = res.json()
|
|
|
|
|
topics = res_json["topic_list"]["topics"]
|
|
|
|
|
if not topics:
|
|
|
|
|
break
|
|
|
|
|
for topic in topics:
|
|
|
|
|
if topic["category_id"] == CATEGORY_ID:
|
|
|
|
|
all_topics.append(topic)
|
|
|
|
|
print(f"Fetched page {page}: {len(topics)} topics, {len(all_topics)} total in category")
|
|
|
|
|
page += 1
|
|
|
|
|
return {"topic_list": {"topics": all_topics}}
|
|
|
|
|
|
|
|
|
|
def get_topic_content(self, topic_id: str):
|
|
|
|
|
"""Get a single topic's content"""
|
|
|
|
|
get_url = f"{DISCOURSE_URL}/posts/{topic_id}.json"
|
|
|
|
|
res = requests.get(get_url, headers=self.get_headers(auth=True))
|
2026-01-11 15:56:30 +01:00
|
|
|
res.raise_for_status()
|
2026-01-18 13:20:57 +01:00
|
|
|
return res.json()
|
2026-01-11 15:56:30 +01:00
|
|
|
|
2026-01-11 19:25:04 +01:00
|
|
|
def category_topics_csv(self, category_data) -> None:
|
2026-01-11 17:40:57 +01:00
|
|
|
"""Save category topics to a csv file"""
|
2026-01-11 19:25:04 +01:00
|
|
|
columns = ["id", "title", "place", "tags"]
|
|
|
|
|
records = category_data["topic_list"]["topics"]
|
2026-01-11 17:40:57 +01:00
|
|
|
with open('zasoby.csv', 'w', encoding='UTF8') as f:
|
|
|
|
|
write = csv.writer(f)
|
|
|
|
|
write.writerow(columns)
|
2026-01-11 19:25:04 +01:00
|
|
|
for topic in records:
|
|
|
|
|
html_url = f'<a href="{DISCOURSE_URL}/t/{topic["id"]}">{topic["title"]}</a>'
|
|
|
|
|
place = self.get_place(topic)
|
|
|
|
|
write.writerow([topic["id"], html_url, place, topic["tags"]])
|
|
|
|
|
print(f"New zasoby.csv generated with {len(records)} records")
|
|
|
|
|
|
|
|
|
|
def get_place(self, topic):
|
|
|
|
|
"""Get place of a topic"""
|
2026-04-01 07:39:10 +02:00
|
|
|
for place in PLACES:
|
2026-01-11 19:25:04 +01:00
|
|
|
if place in topic["tags"]:
|
|
|
|
|
return f'<a href="https://kb.hs3.pl/tag/{place}">{place}</a>'
|
2026-01-18 13:38:09 +01:00
|
|
|
return "unknown"
|
|
|
|
|
|
|
|
|
|
def replace_string_in_post(self, topic_id: str, old_string: str, new_string: str) -> dict:
|
|
|
|
|
"""Replace a selected string within a topic's first post using Discourse REST API"""
|
|
|
|
|
# Fetch the topic to get the first post ID
|
|
|
|
|
topic_url = f"{DISCOURSE_URL}/t/{topic_id}.json"
|
|
|
|
|
topic_res = requests.get(topic_url, headers=self.get_headers(auth=True))
|
|
|
|
|
topic_res.raise_for_status()
|
|
|
|
|
topic_data = topic_res.json()
|
|
|
|
|
|
|
|
|
|
# Get the first post ID from the topic
|
|
|
|
|
first_post_id = topic_data["post_stream"]["posts"][0]["id"]
|
|
|
|
|
|
|
|
|
|
# Fetch the post content
|
|
|
|
|
post_url = f"{DISCOURSE_URL}/posts/{first_post_id}.json"
|
|
|
|
|
post_res = requests.get(post_url, headers=self.get_headers(auth=True))
|
|
|
|
|
post_res.raise_for_status()
|
|
|
|
|
post_data = post_res.json()
|
|
|
|
|
|
|
|
|
|
# Replace the string
|
|
|
|
|
updated_raw = post_data["raw"].replace(old_string, new_string)
|
|
|
|
|
|
|
|
|
|
# Update the post
|
|
|
|
|
payload = {"post": {"raw": updated_raw}}
|
|
|
|
|
res = requests.put(post_url, json=payload, headers=self.get_headers(auth=True))
|
|
|
|
|
res.raise_for_status()
|
|
|
|
|
return res.json()
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
disc = DiscourseDatabase()
|
|
|
|
|
category = disc.get_category_data()
|
|
|
|
|
records = category["topic_list"]["topics"]
|
|
|
|
|
for topic in records:
|
|
|
|
|
if "lab" in topic["tags"]:
|
|
|
|
|
disc.replace_string_in_post(topic["id"], "[Workshop](https://kb.s.hs3.pl/tag/workshop)", "[Lab](https://kb.s.hs3.pl/tag/lab)")
|
|
|
|
|
|