import os from langchain_openai import OpenAIEmbeddings from pinecone import Pinecone, ServerlessSpec from langchain_pinecone import PineconeVectorStore import chainlit as cl from dotenv import load_dotenv from openai import AsyncOpenAI import socketio from fastapi import FastAPI # Load environment variables load_dotenv() # Initialize Pinecone pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"), environment=os.getenv("PINECONE_ENVIRONMENT")) index_name = "tg-handbook-index" if index_name not in pc.list_indexes().names(): pc.create_index( name=index_name, dimension=1536, metric='cosine', spec=ServerlessSpec( cloud='aws', region='us-east-1' ) ) index = pc.Index(index_name) embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY")) # Instrument the OpenAI client cl.instrument_openai() client = AsyncOpenAI() settings = { "model": "gpt-4o", "temperature": 0, } # Initialize Socket.IO server with custom ping timeout and interval sio = socketio.AsyncServer( async_mode='asgi', ping_timeout=5, # Time in seconds before a ping is considered timed out ping_interval=25 # Time in seconds between pings ) # Initialize FastAPI app and mount the Socket.IO app app = FastAPI() app.mount('/', socketio.ASGIApp(sio)) # Welcome message welcome_message = """ Hello! I am the Teagasc Staff Handbook helper, a large language model trained to assist you with information from the Teagasc Staff Handbook. You can ask me about policies, procedures, or any other information contained in the handbook. """ @cl.on_chat_start async def main(): msg = cl.Message(content=welcome_message) await msg.send() # Prompt template for the chatbot prompt_template = """ Your name is Teagasc Staff Handbook helper, a large language model trained by Teagasc based on the GPT-4o architecture. Your role is to respond to the user's queries from the staff handbook. Use the following documents to answer the user's query. Always include the sources of the documents in your response. If the user query is not about the staff handbook, refuse to answer. The answers should be concise and accurate. If the answer is not referenced in the {context} refuse answer. Current date: May 2024-05-30. Context: {context} User Query: {query} Sources: {sources} """ @cl.on_message async def on_message(message: cl.Message): # Set up the Pinecone vector store with the existing index and embeddings docsearch = PineconeVectorStore( index=index, embedding=embeddings, namespace=None ) try: # Perform similarity search with the query docs = docsearch.similarity_search(message.content) print(f"Found {len(docs)} documents.") for doc in docs: print(f"Document metadata: {doc.metadata}") except KeyError as ke: await cl.Message(content=f"Session error: {str(ke)}").send() print(f"Session error: {str(ke)}") return except Exception as e: await cl.Message(content=f"Error during similarity search: {str(e)}").send() print(f"Error during similarity search: {str(e)}") return if not docs: await cl.Message(content="No relevant documents found.").send() return # Extract the content and source URL from the matching documents context = "\n\n".join([doc.metadata.get("page_content", "No content") for doc in docs]) sources = "\n".join([doc.metadata.get("source_url", "Unknown source") for doc in docs]) if not context.strip(): await cl.Message(content="Found documents but they contain no content.").send() return prompt = prompt_template.format(query=message.content, context=context, sources=sources) try: response = await client.chat.completions.create( messages=[ { "content": prompt, "role": "system" }, { "content": message.content, "role": "user" } ], **settings ) await cl.Message(content=response.choices[0].message.content).send() except Exception as e: await cl.Message(content=f"Error generating response: {str(e)}").send() print(f"Error generating response: {str(e)}")