vikramvasudevan's picture
Upload folder using huggingface_hub
4aebf77 verified
# modules/retriever.py
from typing import List, Dict
from openai import OpenAI
from modules.youtube_metadata.embeddings import get_embedding
def retrieve_videos(
query: str, collection, top_k: int = 3, channel_id: str = None
) -> List[Dict]:
client = OpenAI()
# Create embedding for query
embedding = get_embedding(query)
# Query Chroma
if not channel_id:
results = collection.query(
query_embeddings=[embedding],
n_results=top_k,
include=["metadatas", "documents", "distances"],
)
else:
results = collection.query(
query_embeddings=[embedding],
n_results=top_k,
include=["metadatas", "documents", "distances"],
where={"channel_id": channel_id},
)
# Build list of standardized dicts
videos = []
metadatas_list = results.get("metadatas", [[]])[0] # list of metadata dicts
documents_list = results.get("documents", [[]])[0] # list of text
distances_list = results.get("distances", [[]])[0] # optional
for idx, meta in enumerate(metadatas_list):
videos.append(
{
"video_id": meta.get("video_id", ""),
"video_title": meta.get(
"video_title", meta.get("title", documents_list[idx])
),
"channel": meta.get("channel", meta.get("channel_title", "")),
"description": documents_list[idx] if idx < len(documents_list) else "",
"score": distances_list[idx] if idx < len(distances_list) else None,
}
)
return videos