Compare commits

...

2 Commits
v0.1.3 ... main

5 changed files with 726 additions and 0 deletions

132
config/feeds.json Normal file
View File

@ -0,0 +1,132 @@
{
"Feeds": {
"CBC": [
{
"Name": "CBC Top Stories",
"Url": "https://www.cbc.ca/cmlink/rss-topstories",
"Category": "National",
"Enabled": true
},
{
"Name": "CBC World",
"Url": "https://www.cbc.ca/cmlink/rss-world",
"Category": "International",
"Enabled": true
},
{
"Name": "CBC Politics",
"Url": "https://www.cbc.ca/cmlink/rss-politics",
"Category": "Politics",
"Enabled": true
},
{
"Name": "CBC Business",
"Url": "https://www.cbc.ca/cmlink/rss-business",
"Category": "Economy",
"Enabled": true
},
{
"Name": "CBC Health",
"Url": "https://www.cbc.ca/cmlink/rss-health",
"Category": "Health",
"Enabled": true
},
{
"Name": "CBC Technology & Science",
"Url": "https://www.cbc.ca/cmlink/rss-technology",
"Category": "Technology",
"Enabled": true
},
{
"Name": "CBC Arts & Entertainment",
"Url": "https://www.cbc.ca/cmlink/rss-arts",
"Category": "Arts",
"Enabled": true
},
{
"Name": "CBC Sports",
"Url": "https://www.cbc.ca/cmlink/rss-sports",
"Category": "Sports",
"Enabled": true
},
{
"Name": "CBC Local Ottawa",
"Url": "https://www.cbc.ca/cmlink/rss-canada-ottawa",
"Category": "Local",
"Region": "Ottawa",
"Enabled": true
},
{
"Name": "CBC Local Toronto",
"Url": "https://www.cbc.ca/cmlink/rss-canada-toronto",
"Category": "Local",
"Region": "Toronto",
"Enabled": true
},
{
"Name": "CBC Local Montreal",
"Url": "https://www.cbc.ca/cmlink/rss-canada-montreal",
"Category": "Local",
"Region": "Montreal",
"Enabled": true
},
{
"Name": "CBC Local Vancouver",
"Url": "https://www.cbc.ca/cmlink/rss-canada-britishcolumbia",
"Category": "Local",
"Region": "Vancouver",
"Enabled": true
},
{
"Name": "CBC Local Calgary",
"Url": "https://www.cbc.ca/cmlink/rss-canada-calgary",
"Category": "Local",
"Region": "Calgary",
"Enabled": true
},
{
"Name": "CBC Local Edmonton",
"Url": "https://www.cbc.ca/cmlink/rss-canada-edmonton",
"Category": "Local",
"Region": "Edmonton",
"Enabled": true
},
{
"Name": "CBC Local Winnipeg",
"Url": "https://www.cbc.ca/cmlink/rss-canada-manitoba",
"Category": "Local",
"Region": "Winnipeg",
"Enabled": true
},
{
"Name": "CBC Local Halifax",
"Url": "https://www.cbc.ca/cmlink/rss-canada-novascotia",
"Category": "Local",
"Region": "Halifax",
"Enabled": true
},
{
"Name": "CBC Local St. John's",
"Url": "https://www.cbc.ca/cmlink/rss-canada-newfoundland",
"Category": "Local",
"Region": "St. John's",
"Enabled": true
}
],
"WeatherNetwork": [
{
"Name": "WeatherNetwork National Weather",
"Url": "",
"Category": "Weather",
"Enabled": false
},
{
"Name": "WeatherNetwork Regional Feed",
"Url": "",
"Category": "Weather",
"Enabled": false
}
],
"Other": []
}
}

135
src/config/feed_loader.py Normal file
View File

@ -0,0 +1,135 @@
# ============================================================
# File: src/config/feed_loader.py
# Description:
# Loads and filters RSS feed definitions from /config/feeds.json.
# Currently defaults to the Ottawa region for stable offline
# development. Region aliases and geolocation support remain
# in place for future activation.
# ============================================================
import os
import json
from src.core.terminal_colors import TerminalColors as C
# ------------------------------------------------------------
# Region alias table for common Canadian cities/suburbs
# ------------------------------------------------------------
REGION_ALIASES = {
# Ontario
"cornwall": "Ottawa",
"south glengarry": "Ottawa",
"glengarry": "Ottawa",
"kingston": "Ottawa",
"belleville": "Ottawa",
# Québec (map suburbs to Montréal)
"montreal": "Montreal",
"laval": "Montreal",
"leval": "Montreal", # typo guard
"terrebonne": "Montreal",
"longueuil": "Montreal",
"brossard": "Montreal",
"repentigny": "Montreal",
"l'île-perrot": "Montreal",
"saint-lambert": "Montreal",
# British Columbia
"vancouver": "Vancouver",
"burnaby": "Vancouver",
"surrey": "Vancouver",
"richmond": "Vancouver",
# Alberta
"calgary": "Calgary",
"edmonton": "Edmonton",
# Manitoba
"winnipeg": "Winnipeg",
# Nova Scotia
"halifax": "Halifax",
# Newfoundland and Labrador
"st. john's": "St. John's",
"saint johns": "St. John's",
}
class FeedLoader:
"""Handles loading and filtering RSS feed definitions."""
def __init__(self, config_dir: str = "config", region_override: str | None = None):
self.config_path = os.path.join(config_dir, "feeds.json")
# For now, IP-based detection is disabled.
# Use region_override if provided, otherwise default to Ottawa.
self.region = region_override or "Ottawa"
self.all_feeds = {}
self.active_feeds = []
self._load_json()
self._filter_active_feeds()
# ------------------------------------------------------------
def _load_json(self) -> None:
"""Loads feeds.json and parses all sections."""
if not os.path.exists(self.config_path):
raise FileNotFoundError(f"{C.ERROR}[error]{C.RESET} Missing feeds.json at {self.config_path}")
with open(self.config_path, "r", encoding="utf-8") as file:
try:
data = json.load(file)
except json.JSONDecodeError as e:
raise ValueError(f"{C.ERROR}[error]{C.RESET} Invalid JSON in feeds.json: {e}")
if "Feeds" not in data or not isinstance(data["Feeds"], dict):
raise ValueError(f"{C.ERROR}[error]{C.RESET} feeds.json must contain a 'Feeds' dictionary.")
self.all_feeds = data["Feeds"]
# ------------------------------------------------------------
def _filter_active_feeds(self) -> None:
"""Filters feeds for the detected region and enabled status."""
active = []
for provider, feed_list in self.all_feeds.items():
for feed in feed_list:
if not feed.get("Enabled", False):
continue
feed_region = feed.get("Region")
if feed_region and feed_region != self.region:
continue # Skip other CBC regions
active.append({
"Provider": provider,
"Name": feed.get("Name", "Unnamed Feed"),
"Url": feed.get("Url"),
"Category": feed.get("Category", "General"),
"Region": feed.get("Region", None)
})
self.active_feeds = active
print(f"{C.INFO}[info]{C.RESET} Loaded {len(self.active_feeds)} active feeds for region: {C.SUCCESS}{self.region}{C.RESET}")
# ------------------------------------------------------------
def get_active_feeds(self) -> list[dict]:
"""Returns the list of active region-appropriate feeds."""
return self.active_feeds
def get_region(self) -> str:
"""Returns the currently set region."""
return self.region
# ------------------------------------------------------------
# Example usage (manual testing)
# ------------------------------------------------------------
if __name__ == "__main__":
loader = FeedLoader()
feeds = loader.get_active_feeds()
print(f"{C.INFO}Detected Region:{C.RESET} {C.SUCCESS}{loader.get_region()}{C.RESET}")
for feed in feeds:
print(f"{C.DEBUG}- {feed['Name']} ({feed['Url']}) [{feed['Provider']}] {C.RESET}")

View File

@ -0,0 +1,31 @@
# ============================================================
# File: src/core/terminal_colors.py
# Description:
# Provides ANSI color codes for structured console logging.
# Used across all Telefact modules (FeedLoader, RSSHandler,
# Renderer, etc.) for consistent, readable terminal output.
# ============================================================
class TerminalColors:
"""ANSI terminal color codes for clean, readable logs."""
RESET = "\033[0m"
# Core log levels
INFO = "\033[96m" # Cyan
WARN = "\033[93m" # Yellow
ERROR = "\033[91m" # Red
SUCCESS = "\033[92m" # Green
DEBUG = "\033[90m" # Grey
# Optional stylistic colors for subsystem tags
HEADER = "\033[95m" # Magenta
TITLE = "\033[94m" # Blue
DATE = "\033[32m" # Green (for timestamps)
SECTION = "\033[36m" # Cyan for dividers
@staticmethod
def format(text: str, color: str) -> str:
"""Quick helper for inline colored text."""
return f"{color}{text}{TerminalColors.RESET}"

262
src/rss/rss_feedHandler.py Normal file
View File

@ -0,0 +1,262 @@
# ============================================================
# File: src/rss/rss_feedHandler.py
# Description:
# Handles RSS feed retrieval, caching, and validation for
# the Telefact broadcaster. Uses FeedLoader for region-aware
# feed selection and stores cached data under Cache/Feeds/<Region>.
# Automatically refreshes feeds only if the cache is older than
# the configured update interval (default: 10 minutes).
# Optimized for CBC RSS: ignores image-only items and non-story types.
# ============================================================
import os
import re
import json
import time
import requests
import xml.etree.ElementTree as ET
from html import unescape
from datetime import datetime
from src.config.feed_loader import FeedLoader
from src.core.terminal_colors import TerminalColors as C
class RSSFeedHandler:
"""Handles downloading, caching, and parsing of RSS feeds."""
def __init__(
self,
cache_dir: str = "Cache/Feeds",
config_dir: str = "config",
cache_duration_minutes: int = 10,
story_limit: int = 6,
):
self.cache_dir = cache_dir
self.cache_duration = cache_duration_minutes * 60 # seconds
self.feed_loader = FeedLoader(config_dir=config_dir)
self.region = self.feed_loader.get_region()
self.story_limit = story_limit
self._ensure_cache_dirs()
# ------------------------------------------------------------
def _ensure_cache_dirs(self) -> None:
"""Ensures regional cache directories exist."""
region_path = os.path.join(self.cache_dir, self.region)
os.makedirs(region_path, exist_ok=True)
# ------------------------------------------------------------
def _get_cache_path(self, feed_name: str) -> str:
"""Generates path to feed cache file."""
safe_name = feed_name.replace(" ", "_").replace("/", "_")
return os.path.join(self.cache_dir, self.region, f"{safe_name}.json")
# ------------------------------------------------------------
def _is_cache_valid(self, path: str) -> bool:
"""Returns True if cache exists and is within valid time."""
if not os.path.exists(path):
return False
age = time.time() - os.path.getmtime(path)
return age < self.cache_duration
# ------------------------------------------------------------
def _fetch_rss(self, url: str) -> str | None:
"""Fetches raw RSS XML from a given URL with retry & spoofed UA."""
headers = {
"User-Agent": (
"Mozilla/5.0 (X11; Linux x86_64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/121.0 Safari/537.36"
)
}
for attempt in range(3):
try:
response = requests.get(url, headers=headers, timeout=20)
if response.status_code == 200:
return response.text
print(f"{C.WARN}[warn]{C.RESET} RSS fetch failed ({response.status_code}) for {url}")
break # non-200 is not retryable
except requests.Timeout:
print(f"{C.WARN}[warn]{C.RESET} Timeout {attempt + 1}/3 for {url}")
except requests.RequestException as e:
print(f"{C.WARN}[warn]{C.RESET} RSS fetch error ({attempt + 1}/3): {e}")
time.sleep(2 ** attempt) # backoff: 1s, 2s, 4s
return None
# ------------------------------------------------------------
def _strip_html(self, text: str) -> str:
"""Removes HTML tags and decodes entities."""
clean = re.sub(r"<img[^>]*>", "", text)
clean = re.sub(r"<[^>]+>", "", clean)
clean = unescape(clean)
return clean.strip()
# ------------------------------------------------------------
def _parse_rss(self, xml_data: str) -> list[dict]:
"""Parses RSS XML into a list of story dictionaries, CBC-optimized."""
try:
root = ET.fromstring(xml_data)
except ET.ParseError as e:
print(f"{C.WARN}[warn]{C.RESET} XML parse error: {e}")
return []
channel = root.find("channel")
if channel is None:
return []
stories = []
for item in channel.findall("item"):
cbc_type = ""
deptid = ""
for child in item:
tag = child.tag.lower()
if tag.endswith("type"):
cbc_type = (child.text or "").strip()
elif tag.endswith("deptid"):
deptid = (child.text or "").strip()
# Skip if not a story
if cbc_type.lower() not in ("story", ""):
continue
title = item.findtext("title", "").strip()
link = item.findtext("link", "").strip()
description = item.findtext("description", "").strip()
pub_date = item.findtext("pubDate", "").strip()
# Remove CDATA wrappers
title = title.replace("<![CDATA[", "").replace("]]>", "").strip()
description = description.replace("<![CDATA[", "").replace("]]>", "").strip()
clean_text = self._strip_html(description)
if not clean_text or len(clean_text) < 20:
continue
summary = clean_text[:500].rstrip()
stories.append({
"Title": title,
"Summary": summary,
"Link": link,
"PubDate": pub_date,
"DeptID": deptid,
"Provider": "CBC",
})
if not stories:
print(f"{C.DEBUG}[debug]{C.RESET} No valid <cbc:type>story</cbc:type> entries found — possible namespace issue.")
else:
print(f"{C.DEBUG}[debug]{C.RESET} Parsed {len(stories)} stories successfully.")
stories = self._sort_stories(stories)
if self.story_limit > 0:
stories = stories[:self.story_limit]
return stories
# ------------------------------------------------------------
def _sort_stories(self, stories: list[dict]) -> list[dict]:
"""Sorts stories by publication date, newest first."""
def parse_date(pubdate: str):
try:
return datetime.strptime(pubdate, "%a, %d %b %Y %H:%M:%S %Z")
except Exception:
return datetime.min
return sorted(stories, key=lambda s: parse_date(s.get("PubDate", "")), reverse=True)
# ------------------------------------------------------------
def _load_cached_feed(self, path: str) -> list[dict]:
"""Loads cached JSON feed data if available."""
try:
with open(path, "r", encoding="utf-8") as file:
return json.load(file)
except Exception as e:
print(f"{C.WARN}[warn]{C.RESET} Could not load cache: {e}")
return []
# ------------------------------------------------------------
def _save_cache(self, path: str, stories: list[dict]) -> None:
"""Saves parsed stories to local cache."""
try:
with open(path, "w", encoding="utf-8") as file:
json.dump(
{"LastUpdated": datetime.utcnow().isoformat(), "Stories": stories},
file,
ensure_ascii=False,
indent=2,
)
except Exception as e:
print(f"{C.WARN}[warn]{C.RESET} Failed to save cache for {path}: {e}")
# ------------------------------------------------------------
def update_feeds(self, force: bool = False) -> None:
"""Fetches and caches all active feeds."""
active_feeds = self.feed_loader.get_active_feeds()
if not active_feeds:
print(f"{C.WARN}[warn]{C.RESET} No active feeds to update.")
return
print(f"{C.INFO}[info]{C.RESET} Updating {len(active_feeds)} feeds for region: {C.SUCCESS}{self.region}{C.RESET}")
for feed in active_feeds:
feed_name = feed["Name"]
feed_url = feed["Url"]
cache_path = self._get_cache_path(feed_name)
if not force and self._is_cache_valid(cache_path):
print(f"{C.DEBUG}[debug]{C.RESET} Cache valid for: {feed_name}")
continue
print(f"{C.INFO}[info]{C.RESET} Fetching: {C.TITLE}{feed_name}{C.RESET}")
xml_data = self._fetch_rss(feed_url)
if not xml_data:
continue
stories = self._parse_rss(xml_data)
if not stories:
print(f"{C.WARN}[warn]{C.RESET} No valid stories found in {feed_name}")
continue
self._save_cache(cache_path, stories)
print(f"{C.SUCCESS}[success]{C.RESET} Cached {len(stories)} stories from {feed_name}")
# ------------------------------------------------------------
def load_cached_feeds(self) -> dict[str, list[dict]]:
"""Loads all cached feeds for the current region."""
region_path = os.path.join(self.cache_dir, self.region)
if not os.path.exists(region_path):
return {}
cached_data = {}
for filename in os.listdir(region_path):
if not filename.endswith(".json"):
continue
path = os.path.join(region_path, filename)
feed_name = filename.replace(".json", "").replace("_", " ")
data = self._load_cached_feed(path)
cached_data[feed_name] = data
return cached_data
# ------------------------------------------------------------
# Example usage (manual test)
# ------------------------------------------------------------
if __name__ == "__main__":
handler = RSSFeedHandler(cache_duration_minutes=10, story_limit=6)
handler.update_feeds()
cached = handler.load_cached_feeds()
for feed, data in cached.items():
print(f"\n{C.SECTION}=== {feed} ==={C.RESET}")
if isinstance(data, dict):
stories = data.get("Stories", [])
else:
stories = data
for story in stories:
print(f"- {story['Title']}")

166
src/rss/rss_feedIndex.py Normal file
View File

@ -0,0 +1,166 @@
# ============================================================
# File: src/rss/rss_feedIndex.py
# Description:
# Converts cached RSS feed data into Telefact pages.
# Builds index pages (headlines) and paginated subpages
# (story summaries) using the TelefactFrame grid (40×24).
# Designed for use in broadcaster rotation.
# ============================================================
import os
import textwrap
from datetime import datetime, timezone
from src.core.telefact_frame import TelefactFrame
from src.rss.rss_feedHandler import RSSFeedHandler
from src.core.terminal_colors import TerminalColors as C
class RSSFeedIndex:
"""
Generates TelefactFrame pages and subpages from cached RSS feeds.
Produces:
- Index page (list of headlines)
- One or more subpages per story (paginated if long)
"""
def __init__(self, cache_dir: str = "Cache/Feeds", region: str | None = None):
self.handler = RSSFeedHandler(cache_dir=cache_dir)
self.region = region or self.handler.region
self.frames = {} # { page_number: [TelefactFrame, TelefactFrame, ...] }
# layout constants
self.max_width = 40
self.header_rows = 1
self.footer_rows = 1
self.body_start = self.header_rows
self.body_end = 23 # up to line 23 (0-indexed)
self.body_lines = self.body_end - self.body_start
# ------------------------------------------------------------
def _wrap_text(self, text: str) -> list[str]:
"""Breaks plain text into 40-character lines."""
wrapped = textwrap.wrap(text, width=self.max_width)
return wrapped or ["(no content)"]
# ------------------------------------------------------------
def _new_frame(self) -> TelefactFrame:
"""Create a new blank Telefact frame."""
return TelefactFrame()
# ------------------------------------------------------------
def _draw_centered(self, frame: TelefactFrame, row: int, text: str, color: str = "white"):
"""Center text horizontally within the 40-column grid."""
text = text.strip()
if len(text) > self.max_width:
text = text[:self.max_width]
start_col = max(0, (self.max_width - len(text)) // 2)
for i, ch in enumerate(text):
frame.set_cell(start_col + i, row, ch, color)
# ------------------------------------------------------------
def _build_index_page(self, feed_name: str, stories: list[dict]) -> TelefactFrame:
"""Builds a single index page listing headlines."""
frame = self._new_frame()
self._draw_centered(frame, 1, f"{feed_name.upper()} INDEX", "yellow")
# timestamp (UTC)
timestamp = datetime.now(timezone.utc).strftime("%b %d %H:%M UTC")
self._draw_centered(frame, 2, f"Updated {timestamp}", "green")
row = 4
for i, story in enumerate(stories[: self.body_lines - 4]):
title = story.get("Title", "").strip()
if not title:
continue
display = f"{i + 1:02d}. {title[: self.max_width - 5]}"
for j, ch in enumerate(display):
if j < self.max_width:
frame.set_cell(j, row, ch, "white")
row += 1
self._draw_centered(frame, 23, "Telefact: The world at your fingertips", "cyan")
return frame
# ------------------------------------------------------------
def _build_story_subpages(self, story: dict) -> list[TelefactFrame]:
"""Builds one or more subpages for a story (paginated)."""
title = story.get("Title", "").strip()
body = story.get("Summary", "").strip()
pubdate = story.get("PubDate", "").strip()
wrapped_body = self._wrap_text(body)
lines_per_page = self.body_lines - 4 # reserve rows for title/date/footer
chunks = [
wrapped_body[i : i + lines_per_page]
for i in range(0, len(wrapped_body), lines_per_page)
]
subpages = []
total_pages = len(chunks)
for page_num, chunk in enumerate(chunks, start=1):
frame = self._new_frame()
self._draw_centered(frame, 1, title, "yellow")
if pubdate:
self._draw_centered(frame, 2, pubdate, "green")
row = 4
for line in chunk:
for j, ch in enumerate(line[: self.max_width]):
frame.set_cell(j, row, ch, "white")
row += 1
footer_text = f"Page {page_num}/{total_pages}"
self._draw_centered(frame, 23, footer_text, "cyan")
subpages.append(frame)
return subpages
# ------------------------------------------------------------
def build_all_pages(self):
"""
Loads cached RSS feeds and builds Teletext-ready pages.
Returns a dict mapping feed names to their page structures:
{ "Top Stories": {"index": TelefactFrame, "subpages": [frames...]}, ... }
"""
cached = self.handler.load_cached_feeds()
if not cached:
print(f"{C.WARN}[warn]{C.RESET} No cached feeds found.")
return {}
page_map = {}
for feed_name, data in cached.items():
stories = []
if isinstance(data, dict):
stories = data.get("Stories", [])
elif isinstance(data, list):
stories = data
else:
continue
if not stories:
continue
index_frame = self._build_index_page(feed_name, stories)
subpages = []
for story in stories:
subpages.extend(self._build_story_subpages(story))
page_map[feed_name] = {"index": index_frame, "subpages": subpages}
print(f"{C.INFO}[info]{C.RESET} Built {len(page_map)} indexed feeds into TelefactFrames.")
return page_map
# ------------------------------------------------------------
# Manual test harness
# ------------------------------------------------------------
if __name__ == "__main__":
indexer = RSSFeedIndex()
pages = indexer.build_all_pages()
for feed_name, page_data in pages.items():
print(f"\n{C.SUCCESS}Feed:{C.RESET} {feed_name}")
subpage_count = len(page_data['subpages'])
print(f" {C.DEBUG}- Index ready, {subpage_count} story subpages.{C.RESET}")