reddit/src/reddit.py

"""
title: Reddit
author: @nathanwindisch
author_url: https://git.wnd.sh/owui-tools/reddit
funding_url: https://patreon.com/NathanWindisch
version: 0.0.4
changelog:
- 0.0.1 - Initial upload to openwebui community.
- 0.0.2 - Renamed from "Reddit Feeds" to just "Reddit".
- 0.0.3 - Updated author_url in docstring to point to
          git repo.
- 0.0.4 - Updated to use a class instead of a series of
					functions (RedditPage), and merged the user
					and subreddit functions into a single method
					(RedditPage#get_page).
"""


import json
import requests
from enum import Enum
from typing import Awaitable, Callable
from pydantic import BaseModel, Field

class RedditPageType(Enum):
	SUBREDDIT = "r"
	USER = "u"
	SUBREDDIT_COMMENTS = "r"
	USER_COMMENTS = "u"

class RedditPageData:
	def __init__(self, uri: str = "", posts: list = [], comments: list = [], after: str = ""):
		self.uri = uri
		self.posts = posts
		self.comments = comments
		self.after = after
	def __str__(self): return json.dumps({ "uri": self.uri, "posts": self.posts, "comments": self.comments, "after": self.after })


class RedditPage:
	def __init__(self, id: str, page_type: RedditPageType, after: str | None = None):
		self.base_uri = "https://old.reddit.com"
		self.id = id
		self.page_type = page_type
		self.after = ""
		self.children = []
		self.posts = []
		self.comments = []
		self.after = after

	def __str__(self): return json.dumps(RedditPageData(uri=self.get_uri(), posts=self.posts, comments=self.comments, after=self.after))
	def get_uri(self):
		uri = f"{self.base_uri}/{self.page_type.value}/{self.id}.json"
		if self.after: uri += f"?after={self.after}"
		return uri
	def get_data(self): return { "posts": self.posts, "comments": self.comments, "after": self.after }
	def get_page(self):
		response = requests.get(self.get_uri())
		if not response.ok: return RedditPageData({ "posts": [], "comments": [], "after": "" })
		raw_data = json.loads(response.content)
		is_comments = self.page_type.name.endswith("_COMMENTS")

		if is_comments:
			for i in range(0, 1): self.extract_children(raw_data[i])
			self.after = None
		else:
			self.extract_children(raw_data)
			try: self.after = raw_data["data"]["after"]
			except: None

		self.parse_posts()
		self.parse_comments()
		return RedditPageData(posts=self.posts, comments=self.comments, after=self.after)


	def extract_children(self, data):
		if "data" in data and "children" in data["data"]:
			for item in data["data"]["children"]: self.children.append(item)

	def parse_posts(self):
		for item in self.children:
			if item["kind"] != "t3": continue # skip non-post items
			item = item["data"]
			self.posts.append({
				# General information
				"id": item["name"],
				"title": item["title"],
				"description": item["selftext"],
				"link": item["url"],

				# Author & subreddit information
				"author_username": item["author"],
				"author_id": item["author_fullname"],
				"subreddit_name": item["subreddit"],
				"subreddit_id": item["subreddit_id"],
				"subreddit_subscribers": item["subreddit_subscribers"],

				# Post information
				"score": item["score"],
				"upvotes": item["ups"],
				"downvotes": item["downs"],
				"upvote_ratio": item["upvote_ratio"],
				"total_comments": item["num_comments"],
				"total_crossposts": item["num_crossposts"],
				"total_awards": item["total_awards_received"],
				"domain": item["domain"],
				"flair_text": item["link_flair_text"],
				"media_embed": item["media_embed"],

				# Post flags
				"is_pinned": item["pinned"],
				"is_self": item["is_self"],
				"is_video": item["is_video"],
				"is_media_only": item["media_only"],
				"is_over_18": item["over_18"],
				"is_edited": item["edited"],
				"is_hidden": item["hidden"],
				"is_archived": item["archived"],
				"is_locked": item["locked"],
				"is_quarantined": item["quarantine"],
				"is_spoiler": item["spoiler"],
				"is_stickied": item["stickied"],
				"is_send_replies": item["send_replies"],

				"created_at": item["created_utc"],
			})

	def parse_comments(self):
		for item in self.children:
			if item["kind"] != "t1": continue
			item = item["data"]
			self.comments.append({
				# General information
				"id": item["name"],
				"body": item["body"],
				"link": item["permalink"],
				"post_id": item["link_id"],
				"post_title": item["link_title"],
				"post_link": item["link_permalink"],

				# Author & subreddit information
				"author_username": item["author"],
				"author_id": item["author_fullname"],
				"subreddit_name": item["subreddit"],
				"subreddit_id": item["subreddit_id"],

				# Comment information
				"score": item["score"],
				"upvotes": item["ups"],
				"downvotes": item["downs"],
				"total_comments": item["num_comments"],
				"total_awards": item["total_awards_received"],

				# Comment flags
				"is_edited": item["edited"],
				"is_archived": item["archived"],
				"is_locked": item["locked"],
				"is_quarantined": item["quarantine"],
				"is_stickied": item["stickied"],
				"is_send_replies": item["send_replies"],

				# Comment date
				"published_at": item["created_utc"],
			})


class Tools:
	def __init__(self): pass

	class UserValves(BaseModel):
		USER_AGENT: str = Field(
			default="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
			description="The user agent to use when making requests to Reddit."
		)

	async def get_reddit_feed(
		self,
		id: str,
		page_type: RedditPageType,
		__event_emitter__: Callable[[dict], Awaitable[None]],
		__user__: dict = {},
	) -> str:
		"""
		Retrieves the popular posts from a specific feed, either a /u/username or /r/subreddit feed, or the comments on either.
		:param id: The ID of the feed to retrieve, such as a username or a subreddit name. Additionally, a post ID can be appended to either to retrieve comments from a specific post.
		:param page_type: The type of page to retrieve, must be 'USER', 'SUBREDDIT', 'USER_COMMENTS' or 'SUBREDDIT_COMMENTS'.
		:return: An object containing a list of posts, comments, and the next ID to use under the 'after' key, or an error message.
		Note: The 'USER' page_type will retrieve both posts and comments, while the 'SUBREDDIT' page_type will only retrieve posts (unless a post id is provided as well, and the page_type is 'SUBREDDIT_COMMENTS').
		"""
		id = id.replace("/r/", "").replace("/u/", "").replace("u/", "").replace("r/", "") # Strip any /r/ or /u/ from the ID

		# This accounts for the type being dropped by OpenWebUI
		if not isinstance(page_type, RedditPageType):
			try:
				page_type = RedditPageType[page_type]
			except ValueError:
				await __event_emitter__({ "data": { "description": f"Error: Invalid page type '{page_type}', try 'USER', 'SUBREDDIT', 'USER_COMMENTS' or 'SUBREDDIT_COMMENTS'.", "status": "complete", "done": True }, "type": "status" })
				return f"Error: Invalid page type '{page_type}', try either 'USER', 'SUBREDDIT', 'USER_COMMENTS' or 'SUBREDDIT_COMMENTS'."

		await __event_emitter__({ "data": { "description": f"Starting retrieval for {page_type.value}/{id}...", "status": "in_progress", "done": False }, "type": "status" })
		page = RedditPage(id, page_type).get_page()
		await __event_emitter__({ "data": { "description": f"Retrieved {len(page.posts)} posts and {len(page.comments)} comments from {page_type.value}/{id}.", "status": "complete", "done": True }, "type": "status" })
		return str(page)