hamxaameer commited on
Commit
eceb5f0
Β·
verified Β·
1 Parent(s): 9ae102e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -53
app.py CHANGED
@@ -62,7 +62,6 @@ def initialize_llm():
62
  device=device,
63
  max_length=300,
64
  truncation=True,
65
- model_kwargs={"low_cpu_mem_usage": True, "use_cache": True} # Optimize for speed
66
  )
67
 
68
  CONFIG["llm_model"] = model_name
@@ -377,12 +376,10 @@ Answer the question using the knowledge above. Be specific and helpful (100-250
377
  # T5 uses max_length not max_new_tokens
378
  output = llm_client(
379
  user_prompt,
380
- max_length=150, # Even shorter for faster response
381
- temperature=0.7, # Lower temp for consistency
382
- top_p=0.9,
383
  do_sample=True,
384
- num_beams=1, # Disable beam search for speed
385
- early_stopping=True
386
  )
387
  else:
388
  # Other models use max_new_tokens
@@ -491,60 +488,26 @@ def generate_answer_langchain(
491
  # GRADIO INTERFACE
492
  # ============================================================================
493
 
494
- def fashion_chatbot(message: str, history: List[List[str]]):
495
  """
496
- Chatbot function for Gradio interface with streaming
497
  """
498
  try:
499
  if not message or not message.strip():
500
- yield "Please ask a fashion-related question!"
501
- return
502
-
503
- # Show typing indicator
504
- yield "πŸ” Searching fashion knowledge base..."
505
 
506
- # Retrieve documents
507
- retrieved_docs, confidence = retrieve_knowledge_langchain(
508
  message.strip(),
509
  vectorstore,
510
- top_k=CONFIG["top_k"]
511
  )
512
 
513
- if not retrieved_docs:
514
- yield "I couldn't find relevant information to answer your question."
515
- return
516
-
517
- # Update status
518
- yield f"πŸ’­ Generating answer (found {len(retrieved_docs)} relevant sources)..."
519
-
520
- # Generate answer with multiple attempts
521
- llm_answer = None
522
- for attempt in range(1, 5):
523
- logger.info(f"\n πŸ€– LLM Generation Attempt {attempt}/4")
524
- llm_answer = generate_llm_answer(message.strip(), retrieved_docs, llm_client, attempt)
525
-
526
- if llm_answer:
527
- break
528
-
529
- # Fallback if needed
530
- if not llm_answer:
531
- logger.error(f" βœ— All LLM attempts failed - using fallback")
532
- llm_answer = synthesize_direct_answer(message.strip(), retrieved_docs)
533
-
534
- # Stream the answer word by word for natural flow
535
- words = llm_answer.split()
536
- displayed_text = ""
537
-
538
- for i, word in enumerate(words):
539
- displayed_text += word + " "
540
-
541
- # Yield every 2-3 words for smooth streaming
542
- if i % 2 == 0 or i == len(words) - 1:
543
- yield displayed_text.strip()
544
 
545
  except Exception as e:
546
  logger.error(f"Error in chatbot: {e}")
547
- yield f"Sorry, I encountered an error: {str(e)}"
548
 
549
  # ============================================================================
550
  # INITIALIZE AND LAUNCH
@@ -575,7 +538,7 @@ def startup():
575
  # Initialize on startup
576
  startup()
577
 
578
- # Create Gradio interface with streaming enabled
579
  demo = gr.ChatInterface(
580
  fn=fashion_chatbot,
581
  title="πŸ‘— Fashion Advisor - RAG System",
@@ -598,10 +561,6 @@ I can help with:
598
  "How to dress for a summer wedding?",
599
  "What's the best outfit for a university presentation?",
600
  ],
601
- cache_examples=False, # Don't cache for fresh responses
602
- retry_btn="πŸ”„ Retry",
603
- undo_btn="↩️ Undo",
604
- clear_btn="πŸ—‘οΈ Clear",
605
  )
606
 
607
  # Launch
 
62
  device=device,
63
  max_length=300,
64
  truncation=True,
 
65
  )
66
 
67
  CONFIG["llm_model"] = model_name
 
376
  # T5 uses max_length not max_new_tokens
377
  output = llm_client(
378
  user_prompt,
379
+ max_length=200, # Shorter for speed
380
+ temperature=temperature,
381
+ top_p=top_p,
382
  do_sample=True,
 
 
383
  )
384
  else:
385
  # Other models use max_new_tokens
 
488
  # GRADIO INTERFACE
489
  # ============================================================================
490
 
491
+ def fashion_chatbot(message: str, history: List[List[str]]) -> str:
492
  """
493
+ Chatbot function for Gradio interface
494
  """
495
  try:
496
  if not message or not message.strip():
497
+ return "Please ask a fashion-related question!"
 
 
 
 
498
 
499
+ # Generate answer using RAG pipeline
500
+ answer = generate_answer_langchain(
501
  message.strip(),
502
  vectorstore,
503
+ llm_client
504
  )
505
 
506
+ return answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
507
 
508
  except Exception as e:
509
  logger.error(f"Error in chatbot: {e}")
510
+ return f"Sorry, I encountered an error: {str(e)}"
511
 
512
  # ============================================================================
513
  # INITIALIZE AND LAUNCH
 
538
  # Initialize on startup
539
  startup()
540
 
541
+ # Create Gradio interface - simple version compatible with all Gradio versions
542
  demo = gr.ChatInterface(
543
  fn=fashion_chatbot,
544
  title="πŸ‘— Fashion Advisor - RAG System",
 
561
  "How to dress for a summer wedding?",
562
  "What's the best outfit for a university presentation?",
563
  ],
 
 
 
 
564
  )
565
 
566
  # Launch