Initial commit.
This commit is contained in:
commit
1198424f4f
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
__pycache__/
|
3
README.md
Normal file
3
README.md
Normal file
@ -0,0 +1,3 @@
|
||||
# open-webui tool: bbc news feeds
|
||||
|
||||
A tool to interact with bbc.co.uk/news feeds, including top stories, world news, UK/regional news, and more.
|
148
src/bbc_news.py
Normal file
148
src/bbc_news.py
Normal file
@ -0,0 +1,148 @@
|
||||
"""
|
||||
title: BBC News Feeds
|
||||
author: @nathanwindisch
|
||||
author_url: https://github.com/nathanwindisch
|
||||
funding_url: https://www.patreon.com/NathanWindisch
|
||||
version: 0.0.9
|
||||
changelog:
|
||||
- 0.0.1 - Initial upload to openwebui community.
|
||||
- 0.0.2 - Modified formatting slightly.
|
||||
- 0.0.3 - Added tool docstring, and this changelog.
|
||||
- 0.0.4 - Added funding_url to docstring.
|
||||
- 0.0.5 - Updated get_bbc_news_feed function to use a default for
|
||||
the ArticleType, and updated it's docstring to include
|
||||
a list of the possible types, to assist the LLM's query.
|
||||
- 0.0.6 - Added event emitter to the get_bbc_news_feed function,
|
||||
to provide status updates to the user as the function
|
||||
executes. Also wrapped the function in a try/catch, to
|
||||
handle any exceptions that may occur during execution.
|
||||
- 0.0.7 - Fixed a major bug where the type was not being casted
|
||||
to the ArticleType enum, causing the get_uri function
|
||||
to not be called correctly.
|
||||
- 0.0.8 - Updated the ArticleType parameter docstring to make it
|
||||
mandatory, and for to contain the full names of the
|
||||
'world/' types rather than the abbreviations.
|
||||
- 0.0.9 - Created a new function, get_bbc_news_content, which
|
||||
retrieves the article text content of a BBC News link,
|
||||
given it's URI.
|
||||
"""
|
||||
|
||||
import re
|
||||
import json
|
||||
import requests
|
||||
import xml.etree.ElementTree as ElementTree
|
||||
from typing import Awaitable, Callable
|
||||
from pydantic import BaseModel
|
||||
from enum import Enum
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
class ArticleType(Enum):
|
||||
top_stories = ""
|
||||
world = "world"
|
||||
uk = "uk"
|
||||
business = "business"
|
||||
politics = "politics"
|
||||
health = "health"
|
||||
education = "education"
|
||||
science_and_environment = "science_and_environment"
|
||||
technology = "technology"
|
||||
entertainment_and_arts = "entertainment_and_arts"
|
||||
england = "england"
|
||||
northern_ireland = "northern_ireland"
|
||||
scotland = "scotland"
|
||||
wales = "wales"
|
||||
africa = "world/africa"
|
||||
asia = "world/asia"
|
||||
australia = "world/australia"
|
||||
europe = "world/europe"
|
||||
latin_america = "world/latin_america"
|
||||
middle_east = "world/middle_east"
|
||||
us_and_canada = "world/us_and_canada"
|
||||
def get_name(self) -> str: return self.name.replace("_", " ").title()
|
||||
def get_uri(self) -> str: return f"https://feeds.bbci.co.uk/news/{self.value}/rss.xml" if self.name != "top_stories" else "https://feeds.bbci.co.uk/news/rss.xml"
|
||||
|
||||
# Regex to match a BBC News article URI.
|
||||
# Details:
|
||||
# - Must use http or https.
|
||||
# - Must be a bbc.com or bbc.co.uk domain.
|
||||
# - Must be a news article or video.
|
||||
# - Must have a valid ID (alphanumeric characters).
|
||||
URI_REGEX = re.compile("^(https?:\/\/)(www\.)?bbc\.(com|co\.uk)\/news\/(articles|videos)\/\w+$")
|
||||
|
||||
class Tools:
|
||||
def __init__(self): pass
|
||||
class UserValves(BaseModel): pass
|
||||
|
||||
|
||||
async def get_bbc_news_feed(
|
||||
self,
|
||||
type: ArticleType,
|
||||
__event_emitter__: Callable[[dict], Awaitable[None]],
|
||||
__user__: dict = {},
|
||||
) -> str:
|
||||
"""
|
||||
Get the latest news from the BBC, as an array of JSON objects with a title, description, link, and published date.
|
||||
:param type: The type of news to get. It can be any of the ArticleType enum values (world, uk, business, politics, health, education, science_and_environment, technology, entertainment_and_arts, england, northern_ireland, scotland, wales, world/africa, world/asia, world/australia, world/europe, world/latin_america, world/middle_east, world/us_and_canada).
|
||||
:return: A list of news items or an error message.
|
||||
"""
|
||||
await __event_emitter__({ "data": { "description": f"Starting BBC News Feed retrieval for articles in the '{type.get_name()}' category...", "status": "in_progress", "done": False }, "type": "status" })
|
||||
type = ArticleType(type) # Enforce the type (it seems to get dropped by openwebui...)
|
||||
output = []
|
||||
try:
|
||||
response = requests.get(type.get_uri())
|
||||
if not response.ok: return f"Error: '{type}' ({type.get_uri()}) not found ({response.status_code})"
|
||||
root = ElementTree.fromstring(response.content)
|
||||
for item in root.iter("item"): output.append({
|
||||
"title": item.find("title").text,
|
||||
"description": item.find("description").text,
|
||||
"link": item.find("link").text,
|
||||
"published": item.find("pubDate").text,
|
||||
})
|
||||
await __event_emitter__({ "data": { "description": f"Retrieved {len(output)} news items from BBC News Feed for articles in the '{type.get_name()}' category.", "status": "complete", "done": True }, "type": "status" })
|
||||
except Exception as e:
|
||||
await __event_emitter__({ "data": { "description": f"Failed to retrieved any news items from BBC News Feed for articles in the '{type.get_name()}' ({type.get_uri()}) category: {e}.", "status": "complete", "done": True }, "type": "status" })
|
||||
return f"Error: {e}"
|
||||
|
||||
return json.dumps(output)
|
||||
|
||||
|
||||
async def get_bbc_news_content(
|
||||
self,
|
||||
uri: str,
|
||||
__event_emitter__: Callable[[dict], Awaitable[None]],
|
||||
__user__: dict = {},
|
||||
) -> str:
|
||||
"""
|
||||
Get the content of a news article from the BBC.
|
||||
:param uri: The URI of the article to get the content of, which should start with https://bbc.com/news or https://bbc.co.uk/news.
|
||||
:return: The content of the article or an error message.
|
||||
"""
|
||||
await __event_emitter__({ "data": { "description": f"Starting BBC News Article retrieval from '{uri}'...", "status": "in_progress", "done": False }, "type": "status" })
|
||||
|
||||
if uri == "":
|
||||
await __event_emitter__({ "data": { "description": f"Error: No URI provided.", "status": "complete", "done": True }, "type": "status" })
|
||||
return "Error: No URI provided"
|
||||
|
||||
if not re.match(URI_REGEX, uri):
|
||||
await __event_emitter__({ "data": { "description": f"Error: URI must be a BBC News article.", "status": "complete", "done": True }, "type": "status" })
|
||||
return "Error: URI must be a BBC News article."
|
||||
|
||||
content = ""
|
||||
try:
|
||||
response = requests.get(uri)
|
||||
if not response.ok: return f"Error: '{uri}' not found ({response.status_code})"
|
||||
article = BeautifulSoup(response.content, "html.parser").find("article")
|
||||
if article is None:
|
||||
await __event_emitter__({ "data": { "description": f"Failed to retrieve BBC News Article content from '{uri}': Article content not found.", "status": "complete", "done": True }, "type": "status" })
|
||||
return f"Error: Article content for {uri} not found."
|
||||
|
||||
paragraphs = article.find_all("p")
|
||||
for paragraph in paragraphs: content += f"{paragraph.text}\n"
|
||||
await __event_emitter__({ "data": { "description": f"Retrieved BBC News Article content from '{uri}' ({len(content)} characters).", "status": "complete", "done": True }, "type": "status" })
|
||||
except Exception as e:
|
||||
await __event_emitter__({ "data": { "description": f"Failed to retrieve BBC News Article content from '{uri}': {e}.", "status": "complete", "done": True }, "type": "status" })
|
||||
return f"Error: {e}"
|
||||
|
||||
return content
|
||||
|
12
test/bbc_news.test.py
Normal file
12
test/bbc_news.test.py
Normal file
@ -0,0 +1,12 @@
|
||||
# First, we need to add the src to the path so we can import the class
|
||||
import sys, os
|
||||
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../src")))
|
||||
# Unfortunately, we don't get any type hinting, but this does work :)
|
||||
from bbc_news import Tools, ArticleType
|
||||
|
||||
async def main():
|
||||
async def mock_event_emitter(event: dict): print("Event Emitted:", event["data"])
|
||||
await Tools().get_bbc_news_feed(ArticleType.top_stories, mock_event_emitter)
|
||||
|
||||
import asyncio
|
||||
if __name__ == "__main__": asyncio.run(main())
|
Loading…
Reference in New Issue
Block a user