Spaces:

hamxaameer
/

OutfitOrbit-Chatbot-Assistant

Sleeping

App Files Files Community

hamxaameer commited on 26 days ago

Commit

eceb5f0

verified ·

1 Parent(s): 9ae102e

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -53

app.py CHANGED Viewed

@@ -62,7 +62,6 @@ def initialize_llm():
                 device=device,
                 max_length=300,
                 truncation=True,
-                model_kwargs={"low_cpu_mem_usage": True, "use_cache": True}  # Optimize for speed
             )
             CONFIG["llm_model"] = model_name
@@ -377,12 +376,10 @@ Answer the question using the knowledge above. Be specific and helpful (100-250
             # T5 uses max_length not max_new_tokens
             output = llm_client(
                 user_prompt,
-                max_length=150,  # Even shorter for faster response
-                temperature=0.7,  # Lower temp for consistency
-                top_p=0.9,
                 do_sample=True,
-                num_beams=1,  # Disable beam search for speed
-                early_stopping=True
             )
         else:
             # Other models use max_new_tokens
@@ -491,60 +488,26 @@ def generate_answer_langchain(
 # GRADIO INTERFACE
 # ============================================================================
-def fashion_chatbot(message: str, history: List[List[str]]):
     """
-    Chatbot function for Gradio interface with streaming
     """
     try:
         if not message or not message.strip():
-            yield "Please ask a fashion-related question!"
-            return
-        # Show typing indicator
-        yield "🔍 Searching fashion knowledge base..."
-        # Retrieve documents
-        retrieved_docs, confidence = retrieve_knowledge_langchain(
             message.strip(),
             vectorstore,
-            top_k=CONFIG["top_k"]
         )
-        if not retrieved_docs:
-            yield "I couldn't find relevant information to answer your question."
-            return
-        # Update status
-        yield f"💭 Generating answer (found {len(retrieved_docs)} relevant sources)..."
-        # Generate answer with multiple attempts
-        llm_answer = None
-        for attempt in range(1, 5):
-            logger.info(f"\n  🤖 LLM Generation Attempt {attempt}/4")
-            llm_answer = generate_llm_answer(message.strip(), retrieved_docs, llm_client, attempt)
-            if llm_answer:
-                break
-        # Fallback if needed
-        if not llm_answer:
-            logger.error(f"  ✗ All LLM attempts failed - using fallback")
-            llm_answer = synthesize_direct_answer(message.strip(), retrieved_docs)
-        # Stream the answer word by word for natural flow
-        words = llm_answer.split()
-        displayed_text = ""
-        for i, word in enumerate(words):
-            displayed_text += word + " "
-            # Yield every 2-3 words for smooth streaming
-            if i % 2 == 0 or i == len(words) - 1:
-                yield displayed_text.strip()
     except Exception as e:
         logger.error(f"Error in chatbot: {e}")
-        yield f"Sorry, I encountered an error: {str(e)}"
 # ============================================================================
 # INITIALIZE AND LAUNCH
@@ -575,7 +538,7 @@ def startup():
 # Initialize on startup
 startup()
-# Create Gradio interface with streaming enabled
 demo = gr.ChatInterface(
     fn=fashion_chatbot,
     title="👗 Fashion Advisor - RAG System",
@@ -598,10 +561,6 @@ I can help with:
         "How to dress for a summer wedding?",
         "What's the best outfit for a university presentation?",
     ],
-    cache_examples=False,  # Don't cache for fresh responses
-    retry_btn="🔄 Retry",
-    undo_btn="↩️ Undo",
-    clear_btn="🗑️ Clear",
 )
 # Launch

                 device=device,
                 max_length=300,
                 truncation=True,
             )
             CONFIG["llm_model"] = model_name
             # T5 uses max_length not max_new_tokens
             output = llm_client(
                 user_prompt,
+                max_length=200,  # Shorter for speed
+                temperature=temperature,
+                top_p=top_p,
                 do_sample=True,
             )
         else:
             # Other models use max_new_tokens
 # GRADIO INTERFACE
 # ============================================================================
+def fashion_chatbot(message: str, history: List[List[str]]) -> str:
     """
+    Chatbot function for Gradio interface
     """
     try:
         if not message or not message.strip():
+            return "Please ask a fashion-related question!"
+        # Generate answer using RAG pipeline
+        answer = generate_answer_langchain(
             message.strip(),
             vectorstore,
+            llm_client
         )
+        return answer
     except Exception as e:
         logger.error(f"Error in chatbot: {e}")
+        return f"Sorry, I encountered an error: {str(e)}"
 # ============================================================================
 # INITIALIZE AND LAUNCH
 # Initialize on startup
 startup()
+# Create Gradio interface - simple version compatible with all Gradio versions
 demo = gr.ChatInterface(
     fn=fashion_chatbot,
     title="👗 Fashion Advisor - RAG System",
         "How to dress for a summer wedding?",
         "What's the best outfit for a university presentation?",
     ],
 )
 # Launch