Ajayan's picture
Update space
40784d8
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import gradio as gr
from src.utils import load_from_pickle, validate_input
VECTOR_PATH = "model/tfidf_vectorizer.pkl"
MATRIX_PATH = "model/tfidf_matrix.pkl"
DATA_PATH = "data/books_summary.csv"
# 1. Load the pre-trained models and data
print("Loading models and data...")
tfidf_vectorizer = load_from_pickle(VECTOR_PATH)
tfidf_matrix = load_from_pickle(MATRIX_PATH)
books_df = pd.read_csv(DATA_PATH)
print(f"Original dataset shape: {books_df.shape}")
# Group by 'book_name' and 'summaries', and aggregate 'categories' into a single cell
books_df = books_df.groupby(["book_name", "summaries"], as_index=False).agg(
{"categories": lambda tags: ", ".join(set(tags.dropna()))}
) # Remove duplicates within tags
print(f"After aggregating categories: {books_df.shape}")
# Drop duplicates (just to be extra cautious)
books_df = books_df.drop_duplicates(subset=["book_name", "summaries"], keep="first")
book_titles = books_df["book_name"].tolist()
print("Models and data loaded successfully!")
# 2. Recommendation Function
def recommend_books(input_book_title):
"""
Recommends top 5 similar books based on the input book title.
Args:
input_book_title (str): The title of the book input by the user.
Returns:
List of recommended books with their summaries and tags.
"""
# Validate input
if not validate_input(input_book_title, book_titles):
return "Book title not found in the dataset. Please try another title."
# Find index of the input book
book_index = books_df[books_df["book_name"] == input_book_title].index[0]
# Compute cosine similarity
cosine_similarities = cosine_similarity(
tfidf_matrix[book_index], tfidf_matrix
).flatten()
# Sort and get top 5 similar books (excluding the input book itself)
similar_indices = cosine_similarities.argsort()[-6:-1][::-1]
recommendations = books_df.iloc[similar_indices]
"""# Format the output
output = []
for _, row in recommendations.iterrows():
output.append(f"**Title:** {row['book_name']}\n**Summary:** {row['summaries']}\n**Tags:** {row['categories']}\n")
return "\n\n".join(output)"""
# Format the recommendations for the UI
formatted_books = []
for _, row in recommendations.iterrows():
formatted_books.append(
{
"title": row["book_name"],
"description": row["summaries"],
"categories": row["categories"].split(", "),
}
)
return formatted_books
def display_recommendations(book_title):
"""
Wrapper function to display recommendations.
"""
result = recommend_books(book_title)
if isinstance(result, str): # If it's an error message
return result
# Construct formatted HTML response for book recommendations
response = ""
for book in result:
response += f"""
<div style='border:1px solid #ddd; border-radius:10px; padding:10px; margin:10px; box-shadow:2px 2px 8px #ccc;'>
<h2 style='color:#333;'>{book['title']}</h2>
<p style='color:#555;'>{book['description']}</p>
<div>
{" ".join([f"<button style='background-color:#007BFF; color:white; border:none; padding:5px 10px; margin:2px; border-radius:5px;'>{tag}</button>" for tag in book['categories']])}
</div>
</div>
"""
return response
# 3. Gradio Interface
# Gradio UI definition
interface = gr.Interface(
fn=display_recommendations,
inputs=gr.Textbox(label="Enter Book Title", placeholder="e.g., The Great Gatsby"),
outputs=gr.HTML(label="Top 5 Recommendations"),
title="πŸ“š Book Recommendation System",
description="Enter the title of a book, and we'll recommend 5 similar books.",
theme="compact",
)
if __name__ == "__main__":
# Run the Gradio interface when app.py is executed
interface.launch()