| import gradio as gr |
| import torch |
| import spaces |
|
|
| from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig |
| from langchain_text_splitters import RecursiveCharacterTextSplitter |
| from langchain_community.vectorstores import FAISS |
| from langchain_community.embeddings import HuggingFaceEmbeddings |
|
|
| import PyPDF2 |
| from docx import Document |
|
|
|
|
| class ResumeRAG: |
| def __init__(self): |
| self.has_cuda = torch.cuda.is_available() |
| self.device = "cuda" if self.has_cuda else "cpu" |
| print(f"Using device: {self.device}") |
|
|
| |
| self.embeddings = HuggingFaceEmbeddings( |
| model_name="sentence-transformers/all-MiniLM-L6-v2", |
| model_kwargs={"device": self.device}, |
| ) |
|
|
| self.text_splitter = RecursiveCharacterTextSplitter( |
| chunk_size=500, |
| chunk_overlap=50 |
| ) |
|
|
| self.vector_store = None |
|
|
| model_name = "mistralai/Mistral-7B-Instruct-v0.2" |
|
|
| if not self.has_cuda: |
| raise RuntimeError( |
| "No CUDA GPU detected. Use a GPU Space/ZeroGPU, or switch to a smaller CPU model." |
| ) |
|
|
| |
| quantization_config = BitsAndBytesConfig( |
| load_in_4bit=True, |
| bnb_4bit_compute_dtype=torch.float16, |
| bnb_4bit_use_double_quant=True, |
| bnb_4bit_quant_type="nf4", |
| ) |
|
|
| print("Loading tokenizer...") |
| self.tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
| print("Loading model...") |
| self.model = AutoModelForCausalLM.from_pretrained( |
| model_name, |
| quantization_config=quantization_config, |
| device_map="auto", |
| trust_remote_code=True |
| ) |
|
|
| |
| if self.tokenizer.pad_token_id is None: |
| self.tokenizer.pad_token = self.tokenizer.eos_token |
|
|
| def extract_text_from_pdf(self, file_path: str) -> str: |
| try: |
| with open(file_path, "rb") as f: |
| reader = PyPDF2.PdfReader(f) |
| return "".join([(p.extract_text() or "") for p in reader.pages]) |
| except Exception as e: |
| return f"Error reading PDF: {e}" |
|
|
| def extract_text_from_docx(self, file_path: str) -> str: |
| try: |
| doc = Document(file_path) |
| return "\n".join([p.text for p in doc.paragraphs]) |
| except Exception as e: |
| return f"Error reading DOCX: {e}" |
|
|
| def process_resume(self, file) -> str: |
| if file is None: |
| return "Please upload a resume file." |
|
|
| file_path = file.name |
| if file_path.lower().endswith(".pdf"): |
| text = self.extract_text_from_pdf(file_path) |
| elif file_path.lower().endswith(".docx"): |
| text = self.extract_text_from_docx(file_path) |
| else: |
| return "Unsupported file format. Please upload PDF or DOCX." |
|
|
| if text.startswith("Error"): |
| return text |
|
|
| if not text.strip(): |
| return "No text could be extracted from the resume." |
|
|
| chunks = self.text_splitter.split_text(text) |
| if not chunks: |
| return "No text chunks could be created from the resume." |
|
|
| self.vector_store = FAISS.from_texts(chunks, self.embeddings) |
| return f"β
Resume processed successfully! Extracted {len(chunks)} text chunks." |
|
|
| def generate_answer(self, question: str, context: str) -> str: |
| prompt = f"""[INST] You are a helpful assistant analyzing a resume. |
| |
| Context: |
| {context} |
| |
| Question: {question} |
| |
| Answer only from the context. If the answer is not in the context, say it is not in the resume. [/INST]""" |
|
|
| inputs = self.tokenizer(prompt, return_tensors="pt") |
|
|
| |
| target_device = self.model.get_input_embeddings().weight.device |
| inputs = {k: v.to(target_device) for k, v in inputs.items()} |
|
|
| with torch.no_grad(): |
| outputs = self.model.generate( |
| **inputs, |
| max_new_tokens=1024, |
| temperature=0.7, |
| top_p=0.9, |
| do_sample=True, |
| pad_token_id=self.tokenizer.eos_token_id, |
| ) |
|
|
| text = self.tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
| |
| if "[/INST]" in text: |
| return text.split("[/INST]")[-1].strip() |
| return text.strip() |
|
|
| def query(self, question: str): |
| if self.vector_store is None: |
| return "Please upload a resume first.", "" |
|
|
| if not question.strip(): |
| return "Please enter a question.", "" |
|
|
| docs = self.vector_store.similarity_search(question, k=3) |
| context = "\n\n".join([d.page_content for d in docs]) |
|
|
| answer = self.generate_answer(question, context) |
|
|
| if torch.cuda.is_available(): |
| torch.cuda.empty_cache() |
|
|
| return answer, context |
|
|
|
|
| print("Initializing Resume RAG System...") |
| rag_system = ResumeRAG() |
|
|
| with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo: |
| gr.Markdown( |
| """ |
| # π Resume RAG Q&A System |
| Powered by Mistral-7B + FAISS vector search |
| |
| Upload your resume and ask questions about experience, skills, education, and more. |
| """ |
| ) |
|
|
| with gr.Row(): |
| with gr.Column(scale=1): |
| gr.Markdown("### π€ Upload Resume") |
| file_input = gr.File( |
| label="Upload PDF or DOCX", |
| file_types=[".pdf", ".docx"] |
| ) |
| upload_btn = gr.Button("Process Resume", variant="primary", size="lg") |
| upload_status = gr.Textbox(label="Status", interactive=False) |
|
|
| gr.Markdown( |
| """ |
| --- |
| **Example Questions:** |
| - What programming languages does the candidate know? |
| - Summarize the work experience |
| - What is the education background? |
| - List all technical skills |
| """ |
| ) |
|
|
| with gr.Column(scale=2): |
| gr.Markdown("### π¬ Ask Questions") |
| question_input = gr.Textbox( |
| label="Your Question", |
| placeholder="e.g., What are the candidate's key skills?", |
| lines=2 |
| ) |
| submit_btn = gr.Button("Get Answer", variant="primary", size="lg") |
|
|
| answer_output = gr.Textbox( |
| label="Answer", |
| lines=8, |
| interactive=False |
| ) |
|
|
| with gr.Accordion("π Retrieved Context", open=False): |
| context_output = gr.Textbox( |
| label="Relevant Resume Sections", |
| lines=6, |
| interactive=False |
| ) |
|
|
| |
| @spaces.GPU |
| def query_gpu(q): |
| return rag_system.query(q) |
|
|
| upload_btn.click( |
| fn=rag_system.process_resume, |
| inputs=[file_input], |
| outputs=[upload_status] |
| ) |
|
|
| submit_btn.click( |
| fn=query_gpu, |
| inputs=[question_input], |
| outputs=[answer_output, context_output] |
| ) |
|
|
| question_input.submit( |
| fn=query_gpu, |
| inputs=[question_input], |
| outputs=[answer_output, context_output] |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch(share=True) |
|
|