204 lines
7.3 KiB
Python
204 lines
7.3 KiB
Python
"""
|
|
title: Reddit
|
|
author: @nathanwindisch
|
|
author_url: https://git.wnd.sh/owui-tools/reddit
|
|
funding_url: https://patreon.com/NathanWindisch
|
|
version: 0.0.4
|
|
changelog:
|
|
- 0.0.1 - Initial upload to openwebui community.
|
|
- 0.0.2 - Renamed from "Reddit Feeds" to just "Reddit".
|
|
- 0.0.3 - Updated author_url in docstring to point to
|
|
git repo.
|
|
- 0.0.4 - Updated to use a class instead of a series of
|
|
functions (RedditPage), and merged the user
|
|
and subreddit functions into a single method
|
|
(RedditPage#get_page).
|
|
"""
|
|
|
|
|
|
import json
|
|
import requests
|
|
from enum import Enum
|
|
from typing import Awaitable, Callable
|
|
from pydantic import BaseModel, Field
|
|
|
|
class RedditPageType(Enum):
|
|
SUBREDDIT = "r"
|
|
USER = "u"
|
|
SUBREDDIT_COMMENTS = "r"
|
|
USER_COMMENTS = "u"
|
|
|
|
class RedditPageData:
|
|
def __init__(self, uri: str = "", posts: list = [], comments: list = [], after: str = ""):
|
|
self.uri = uri
|
|
self.posts = posts
|
|
self.comments = comments
|
|
self.after = after
|
|
def __str__(self): return json.dumps({ "uri": self.uri, "posts": self.posts, "comments": self.comments, "after": self.after })
|
|
|
|
|
|
class RedditPage:
|
|
def __init__(self, id: str, page_type: RedditPageType, after: str | None = None):
|
|
self.base_uri = "https://old.reddit.com"
|
|
self.id = id
|
|
self.page_type = page_type
|
|
self.after = ""
|
|
self.children = []
|
|
self.posts = []
|
|
self.comments = []
|
|
self.after = after
|
|
|
|
def __str__(self): return json.dumps(RedditPageData(uri=self.get_uri(), posts=self.posts, comments=self.comments, after=self.after))
|
|
def get_uri(self):
|
|
uri = f"{self.base_uri}/{self.page_type.value}/{self.id}.json"
|
|
if self.after: uri += f"?after={self.after}"
|
|
return uri
|
|
def get_data(self): return { "posts": self.posts, "comments": self.comments, "after": self.after }
|
|
def get_page(self):
|
|
response = requests.get(self.get_uri())
|
|
if not response.ok: return RedditPageData({ "posts": [], "comments": [], "after": "" })
|
|
raw_data = json.loads(response.content)
|
|
is_comments = self.page_type.name.endswith("_COMMENTS")
|
|
|
|
if is_comments:
|
|
for i in range(0, 1): self.extract_children(raw_data[i])
|
|
self.after = None
|
|
else:
|
|
self.extract_children(raw_data)
|
|
try: self.after = raw_data["data"]["after"]
|
|
except: None
|
|
|
|
self.parse_posts()
|
|
self.parse_comments()
|
|
return RedditPageData(posts=self.posts, comments=self.comments, after=self.after)
|
|
|
|
|
|
def extract_children(self, data):
|
|
if "data" in data and "children" in data["data"]:
|
|
for item in data["data"]["children"]: self.children.append(item)
|
|
|
|
def parse_posts(self):
|
|
for item in self.children:
|
|
if item["kind"] != "t3": continue # skip non-post items
|
|
item = item["data"]
|
|
self.posts.append({
|
|
# General information
|
|
"id": item["name"],
|
|
"title": item["title"],
|
|
"description": item["selftext"],
|
|
"link": item["url"],
|
|
|
|
# Author & subreddit information
|
|
"author_username": item["author"],
|
|
"author_id": item["author_fullname"],
|
|
"subreddit_name": item["subreddit"],
|
|
"subreddit_id": item["subreddit_id"],
|
|
"subreddit_subscribers": item["subreddit_subscribers"],
|
|
|
|
# Post information
|
|
"score": item["score"],
|
|
"upvotes": item["ups"],
|
|
"downvotes": item["downs"],
|
|
"upvote_ratio": item["upvote_ratio"],
|
|
"total_comments": item["num_comments"],
|
|
"total_crossposts": item["num_crossposts"],
|
|
"total_awards": item["total_awards_received"],
|
|
"domain": item["domain"],
|
|
"flair_text": item["link_flair_text"],
|
|
"media_embed": item["media_embed"],
|
|
|
|
# Post flags
|
|
"is_pinned": item["pinned"],
|
|
"is_self": item["is_self"],
|
|
"is_video": item["is_video"],
|
|
"is_media_only": item["media_only"],
|
|
"is_over_18": item["over_18"],
|
|
"is_edited": item["edited"],
|
|
"is_hidden": item["hidden"],
|
|
"is_archived": item["archived"],
|
|
"is_locked": item["locked"],
|
|
"is_quarantined": item["quarantine"],
|
|
"is_spoiler": item["spoiler"],
|
|
"is_stickied": item["stickied"],
|
|
"is_send_replies": item["send_replies"],
|
|
|
|
"created_at": item["created_utc"],
|
|
})
|
|
|
|
def parse_comments(self):
|
|
for item in self.children:
|
|
if item["kind"] != "t1": continue
|
|
item = item["data"]
|
|
self.comments.append({
|
|
# General information
|
|
"id": item["name"],
|
|
"body": item["body"],
|
|
"link": item["permalink"],
|
|
"post_id": item["link_id"],
|
|
"post_title": item["link_title"],
|
|
"post_link": item["link_permalink"],
|
|
|
|
# Author & subreddit information
|
|
"author_username": item["author"],
|
|
"author_id": item["author_fullname"],
|
|
"subreddit_name": item["subreddit"],
|
|
"subreddit_id": item["subreddit_id"],
|
|
|
|
# Comment information
|
|
"score": item["score"],
|
|
"upvotes": item["ups"],
|
|
"downvotes": item["downs"],
|
|
"total_comments": item["num_comments"],
|
|
"total_awards": item["total_awards_received"],
|
|
|
|
# Comment flags
|
|
"is_edited": item["edited"],
|
|
"is_archived": item["archived"],
|
|
"is_locked": item["locked"],
|
|
"is_quarantined": item["quarantine"],
|
|
"is_stickied": item["stickied"],
|
|
"is_send_replies": item["send_replies"],
|
|
|
|
# Comment date
|
|
"published_at": item["created_utc"],
|
|
})
|
|
|
|
|
|
|
|
class Tools:
|
|
def __init__(self): pass
|
|
|
|
class UserValves(BaseModel):
|
|
USER_AGENT: str = Field(
|
|
default="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
|
|
description="The user agent to use when making requests to Reddit."
|
|
)
|
|
|
|
async def get_reddit_feed(
|
|
self,
|
|
id: str,
|
|
page_type: RedditPageType,
|
|
__event_emitter__: Callable[[dict], Awaitable[None]],
|
|
__user__: dict = {},
|
|
) -> str:
|
|
"""
|
|
Retrieves the popular posts from a specific feed, either a /u/username or /r/subreddit feed, or the comments on either.
|
|
:param id: The ID of the feed to retrieve, such as a username or a subreddit name. Additionally, a post ID can be appended to either to retrieve comments from a specific post.
|
|
:param page_type: The type of page to retrieve, must be 'USER', 'SUBREDDIT', 'USER_COMMENTS' or 'SUBREDDIT_COMMENTS'.
|
|
:return: An object containing a list of posts, comments, and the next ID to use under the 'after' key, or an error message.
|
|
Note: The 'USER' page_type will retrieve both posts and comments, while the 'SUBREDDIT' page_type will only retrieve posts (unless a post id is provided as well, and the page_type is 'SUBREDDIT_COMMENTS').
|
|
"""
|
|
id = id.replace("/r/", "").replace("/u/", "").replace("u/", "").replace("r/", "") # Strip any /r/ or /u/ from the ID
|
|
|
|
# This accounts for the type being dropped by OpenWebUI
|
|
if not isinstance(page_type, RedditPageType):
|
|
try:
|
|
page_type = RedditPageType[page_type]
|
|
except ValueError:
|
|
await __event_emitter__({ "data": { "description": f"Error: Invalid page type '{page_type}', try 'USER', 'SUBREDDIT', 'USER_COMMENTS' or 'SUBREDDIT_COMMENTS'.", "status": "complete", "done": True }, "type": "status" })
|
|
return f"Error: Invalid page type '{page_type}', try either 'USER', 'SUBREDDIT', 'USER_COMMENTS' or 'SUBREDDIT_COMMENTS'."
|
|
|
|
await __event_emitter__({ "data": { "description": f"Starting retrieval for {page_type.value}/{id}...", "status": "in_progress", "done": False }, "type": "status" })
|
|
page = RedditPage(id, page_type).get_page()
|
|
await __event_emitter__({ "data": { "description": f"Retrieved {len(page.posts)} posts and {len(page.comments)} comments from {page_type.value}/{id}.", "status": "complete", "done": True }, "type": "status" })
|
|
return str(page) |