Teagasc-HR-Bot / app.py
AlexO164's picture
Upload 4 files
a7a264b verified
import os
from langchain_openai import OpenAIEmbeddings
from pinecone import Pinecone, ServerlessSpec
from langchain_pinecone import PineconeVectorStore
import chainlit as cl
from dotenv import load_dotenv
from openai import AsyncOpenAI
import socketio
from fastapi import FastAPI
# Load environment variables
load_dotenv()
# Initialize Pinecone
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"), environment=os.getenv("PINECONE_ENVIRONMENT"))
index_name = "tg-handbook-index"
if index_name not in pc.list_indexes().names():
pc.create_index(
name=index_name,
dimension=1536,
metric='cosine',
spec=ServerlessSpec(
cloud='aws',
region='us-east-1'
)
)
index = pc.Index(index_name)
embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))
# Instrument the OpenAI client
cl.instrument_openai()
client = AsyncOpenAI()
settings = {
"model": "gpt-4o",
"temperature": 0,
}
# Initialize Socket.IO server with custom ping timeout and interval
sio = socketio.AsyncServer(
async_mode='asgi',
ping_timeout=5, # Time in seconds before a ping is considered timed out
ping_interval=25 # Time in seconds between pings
)
# Initialize FastAPI app and mount the Socket.IO app
app = FastAPI()
app.mount('/', socketio.ASGIApp(sio))
# Welcome message
welcome_message = """
Hello! I am the Teagasc Staff Handbook helper, a large language model trained to assist you with information from the Teagasc Staff Handbook.
You can ask me about policies, procedures, or any other information contained in the handbook.
"""
@cl.on_chat_start
async def main():
msg = cl.Message(content=welcome_message)
await msg.send()
# Prompt template for the chatbot
prompt_template = """
Your name is Teagasc Staff Handbook helper, a large language model trained by Teagasc based on the GPT-4o architecture.
Your role is to respond to the user's queries from the staff handbook.
Use the following documents to answer the user's query. Always include the sources of the documents in your response.
If the user query is not about the staff handbook, refuse to answer. The answers should be concise and accurate.
If the answer is not referenced in the {context} refuse answer.
Current date: May 2024-05-30.
Context: {context}
User Query: {query}
Sources: {sources}
"""
@cl.on_message
async def on_message(message: cl.Message):
# Set up the Pinecone vector store with the existing index and embeddings
docsearch = PineconeVectorStore(
index=index,
embedding=embeddings,
namespace=None
)
try:
# Perform similarity search with the query
docs = docsearch.similarity_search(message.content)
print(f"Found {len(docs)} documents.")
for doc in docs:
print(f"Document metadata: {doc.metadata}")
except KeyError as ke:
await cl.Message(content=f"Session error: {str(ke)}").send()
print(f"Session error: {str(ke)}")
return
except Exception as e:
await cl.Message(content=f"Error during similarity search: {str(e)}").send()
print(f"Error during similarity search: {str(e)}")
return
if not docs:
await cl.Message(content="No relevant documents found.").send()
return
# Extract the content and source URL from the matching documents
context = "\n\n".join([doc.metadata.get("page_content", "No content") for doc in docs])
sources = "\n".join([doc.metadata.get("source_url", "Unknown source") for doc in docs])
if not context.strip():
await cl.Message(content="Found documents but they contain no content.").send()
return
prompt = prompt_template.format(query=message.content, context=context, sources=sources)
try:
response = await client.chat.completions.create(
messages=[
{
"content": prompt,
"role": "system"
},
{
"content": message.content,
"role": "user"
}
],
**settings
)
await cl.Message(content=response.choices[0].message.content).send()
except Exception as e:
await cl.Message(content=f"Error generating response: {str(e)}").send()
print(f"Error generating response: {str(e)}")