Initial commit.
This commit is contained in:
commit
1198424f4f
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
__pycache__/
|
3
README.md
Normal file
3
README.md
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
# open-webui tool: bbc news feeds
|
||||||
|
|
||||||
|
A tool to interact with bbc.co.uk/news feeds, including top stories, world news, UK/regional news, and more.
|
148
src/bbc_news.py
Normal file
148
src/bbc_news.py
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
"""
|
||||||
|
title: BBC News Feeds
|
||||||
|
author: @nathanwindisch
|
||||||
|
author_url: https://github.com/nathanwindisch
|
||||||
|
funding_url: https://www.patreon.com/NathanWindisch
|
||||||
|
version: 0.0.9
|
||||||
|
changelog:
|
||||||
|
- 0.0.1 - Initial upload to openwebui community.
|
||||||
|
- 0.0.2 - Modified formatting slightly.
|
||||||
|
- 0.0.3 - Added tool docstring, and this changelog.
|
||||||
|
- 0.0.4 - Added funding_url to docstring.
|
||||||
|
- 0.0.5 - Updated get_bbc_news_feed function to use a default for
|
||||||
|
the ArticleType, and updated it's docstring to include
|
||||||
|
a list of the possible types, to assist the LLM's query.
|
||||||
|
- 0.0.6 - Added event emitter to the get_bbc_news_feed function,
|
||||||
|
to provide status updates to the user as the function
|
||||||
|
executes. Also wrapped the function in a try/catch, to
|
||||||
|
handle any exceptions that may occur during execution.
|
||||||
|
- 0.0.7 - Fixed a major bug where the type was not being casted
|
||||||
|
to the ArticleType enum, causing the get_uri function
|
||||||
|
to not be called correctly.
|
||||||
|
- 0.0.8 - Updated the ArticleType parameter docstring to make it
|
||||||
|
mandatory, and for to contain the full names of the
|
||||||
|
'world/' types rather than the abbreviations.
|
||||||
|
- 0.0.9 - Created a new function, get_bbc_news_content, which
|
||||||
|
retrieves the article text content of a BBC News link,
|
||||||
|
given it's URI.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
import requests
|
||||||
|
import xml.etree.ElementTree as ElementTree
|
||||||
|
from typing import Awaitable, Callable
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from enum import Enum
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
|
class ArticleType(Enum):
|
||||||
|
top_stories = ""
|
||||||
|
world = "world"
|
||||||
|
uk = "uk"
|
||||||
|
business = "business"
|
||||||
|
politics = "politics"
|
||||||
|
health = "health"
|
||||||
|
education = "education"
|
||||||
|
science_and_environment = "science_and_environment"
|
||||||
|
technology = "technology"
|
||||||
|
entertainment_and_arts = "entertainment_and_arts"
|
||||||
|
england = "england"
|
||||||
|
northern_ireland = "northern_ireland"
|
||||||
|
scotland = "scotland"
|
||||||
|
wales = "wales"
|
||||||
|
africa = "world/africa"
|
||||||
|
asia = "world/asia"
|
||||||
|
australia = "world/australia"
|
||||||
|
europe = "world/europe"
|
||||||
|
latin_america = "world/latin_america"
|
||||||
|
middle_east = "world/middle_east"
|
||||||
|
us_and_canada = "world/us_and_canada"
|
||||||
|
def get_name(self) -> str: return self.name.replace("_", " ").title()
|
||||||
|
def get_uri(self) -> str: return f"https://feeds.bbci.co.uk/news/{self.value}/rss.xml" if self.name != "top_stories" else "https://feeds.bbci.co.uk/news/rss.xml"
|
||||||
|
|
||||||
|
# Regex to match a BBC News article URI.
|
||||||
|
# Details:
|
||||||
|
# - Must use http or https.
|
||||||
|
# - Must be a bbc.com or bbc.co.uk domain.
|
||||||
|
# - Must be a news article or video.
|
||||||
|
# - Must have a valid ID (alphanumeric characters).
|
||||||
|
URI_REGEX = re.compile("^(https?:\/\/)(www\.)?bbc\.(com|co\.uk)\/news\/(articles|videos)\/\w+$")
|
||||||
|
|
||||||
|
class Tools:
|
||||||
|
def __init__(self): pass
|
||||||
|
class UserValves(BaseModel): pass
|
||||||
|
|
||||||
|
|
||||||
|
async def get_bbc_news_feed(
|
||||||
|
self,
|
||||||
|
type: ArticleType,
|
||||||
|
__event_emitter__: Callable[[dict], Awaitable[None]],
|
||||||
|
__user__: dict = {},
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Get the latest news from the BBC, as an array of JSON objects with a title, description, link, and published date.
|
||||||
|
:param type: The type of news to get. It can be any of the ArticleType enum values (world, uk, business, politics, health, education, science_and_environment, technology, entertainment_and_arts, england, northern_ireland, scotland, wales, world/africa, world/asia, world/australia, world/europe, world/latin_america, world/middle_east, world/us_and_canada).
|
||||||
|
:return: A list of news items or an error message.
|
||||||
|
"""
|
||||||
|
await __event_emitter__({ "data": { "description": f"Starting BBC News Feed retrieval for articles in the '{type.get_name()}' category...", "status": "in_progress", "done": False }, "type": "status" })
|
||||||
|
type = ArticleType(type) # Enforce the type (it seems to get dropped by openwebui...)
|
||||||
|
output = []
|
||||||
|
try:
|
||||||
|
response = requests.get(type.get_uri())
|
||||||
|
if not response.ok: return f"Error: '{type}' ({type.get_uri()}) not found ({response.status_code})"
|
||||||
|
root = ElementTree.fromstring(response.content)
|
||||||
|
for item in root.iter("item"): output.append({
|
||||||
|
"title": item.find("title").text,
|
||||||
|
"description": item.find("description").text,
|
||||||
|
"link": item.find("link").text,
|
||||||
|
"published": item.find("pubDate").text,
|
||||||
|
})
|
||||||
|
await __event_emitter__({ "data": { "description": f"Retrieved {len(output)} news items from BBC News Feed for articles in the '{type.get_name()}' category.", "status": "complete", "done": True }, "type": "status" })
|
||||||
|
except Exception as e:
|
||||||
|
await __event_emitter__({ "data": { "description": f"Failed to retrieved any news items from BBC News Feed for articles in the '{type.get_name()}' ({type.get_uri()}) category: {e}.", "status": "complete", "done": True }, "type": "status" })
|
||||||
|
return f"Error: {e}"
|
||||||
|
|
||||||
|
return json.dumps(output)
|
||||||
|
|
||||||
|
|
||||||
|
async def get_bbc_news_content(
|
||||||
|
self,
|
||||||
|
uri: str,
|
||||||
|
__event_emitter__: Callable[[dict], Awaitable[None]],
|
||||||
|
__user__: dict = {},
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Get the content of a news article from the BBC.
|
||||||
|
:param uri: The URI of the article to get the content of, which should start with https://bbc.com/news or https://bbc.co.uk/news.
|
||||||
|
:return: The content of the article or an error message.
|
||||||
|
"""
|
||||||
|
await __event_emitter__({ "data": { "description": f"Starting BBC News Article retrieval from '{uri}'...", "status": "in_progress", "done": False }, "type": "status" })
|
||||||
|
|
||||||
|
if uri == "":
|
||||||
|
await __event_emitter__({ "data": { "description": f"Error: No URI provided.", "status": "complete", "done": True }, "type": "status" })
|
||||||
|
return "Error: No URI provided"
|
||||||
|
|
||||||
|
if not re.match(URI_REGEX, uri):
|
||||||
|
await __event_emitter__({ "data": { "description": f"Error: URI must be a BBC News article.", "status": "complete", "done": True }, "type": "status" })
|
||||||
|
return "Error: URI must be a BBC News article."
|
||||||
|
|
||||||
|
content = ""
|
||||||
|
try:
|
||||||
|
response = requests.get(uri)
|
||||||
|
if not response.ok: return f"Error: '{uri}' not found ({response.status_code})"
|
||||||
|
article = BeautifulSoup(response.content, "html.parser").find("article")
|
||||||
|
if article is None:
|
||||||
|
await __event_emitter__({ "data": { "description": f"Failed to retrieve BBC News Article content from '{uri}': Article content not found.", "status": "complete", "done": True }, "type": "status" })
|
||||||
|
return f"Error: Article content for {uri} not found."
|
||||||
|
|
||||||
|
paragraphs = article.find_all("p")
|
||||||
|
for paragraph in paragraphs: content += f"{paragraph.text}\n"
|
||||||
|
await __event_emitter__({ "data": { "description": f"Retrieved BBC News Article content from '{uri}' ({len(content)} characters).", "status": "complete", "done": True }, "type": "status" })
|
||||||
|
except Exception as e:
|
||||||
|
await __event_emitter__({ "data": { "description": f"Failed to retrieve BBC News Article content from '{uri}': {e}.", "status": "complete", "done": True }, "type": "status" })
|
||||||
|
return f"Error: {e}"
|
||||||
|
|
||||||
|
return content
|
||||||
|
|
12
test/bbc_news.test.py
Normal file
12
test/bbc_news.test.py
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
# First, we need to add the src to the path so we can import the class
|
||||||
|
import sys, os
|
||||||
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../src")))
|
||||||
|
# Unfortunately, we don't get any type hinting, but this does work :)
|
||||||
|
from bbc_news import Tools, ArticleType
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
async def mock_event_emitter(event: dict): print("Event Emitted:", event["data"])
|
||||||
|
await Tools().get_bbc_news_feed(ArticleType.top_stories, mock_event_emitter)
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
if __name__ == "__main__": asyncio.run(main())
|
Loading…
Reference in New Issue
Block a user