import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM MODEL_NAME = "Ronaldodev/llama" # Charger modèle et tokenizer UNE SEULE FOIS print("Chargement du modèle...") tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) def chat(prompt): inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate( **inputs, max_new_tokens=200, do_sample=True, temperature=0.7 ) return tokenizer.decode(outputs[0], skip_special_tokens=True) iface = gr.Interface( fn=chat, inputs=gr.Textbox(lines=5, placeholder="Écris ton message ici..."), outputs=gr.Textbox(), title="Assistant IA (Llama 3.2 1B CPU)", ) if __name__ == "__main__": iface.launch()