Backend Integration Guide

Hugging Face Inference API

Recommended Models

mistralai/Mistral-7B-Instruct-v0.1 - Best quality
google/flan-t5-xxl - Good instruction following
microsoft/DialoGPT-large - Conversational AI

Setup Steps

1. Get HF API token from settings
2. Install requests library
3. Implement API calls
4. Handle responses

Python Implementation

import requests

class HuggingFaceAPI:
    def __init__(self, api_key):
        self.api_key = api_key
        self.base_url = "https://api-inference.huggingface.co/models"
        
    def query_model(self, model_name, prompt, max_length=500):
        headers = {"Authorization": f"Bearer {self.api_key}"}
        payload = {
            "inputs": prompt,
            "parameters": {
                "max_length": max_length,
                "temperature": 0.7,
                "do_sample": True
            }
        }
        
        try:
            response = requests.post(
                f"{self.base_url}/{model_name}",
                headers=headers,
                json=payload
            )
            result = response.json()
            return result[0]["generated_text"]
        except Exception as e:
            return f"Error: {str(e)}"

# Usage
hf_api = HuggingFaceAPI("your_hf_api_key_here")
response = hf_api.query_model(
    "mistralai/Mistral-7B-Instruct-v0.1",
    "Generate a Python function to calculate factorial"
)
print(response)

Gradio Integration Example

import gradio as gr

def chat_interface(message, history):
    hf_api = HuggingFaceAPI("your_hf_api_key")
    response = hf_api.query_model("mistralai/Mistral-7B-Instruct", message)
    return response

iface = gr.ChatInterface(
    chat_interface,
    title="PromptCraft AI Assistant",
    description="Powered by Hugging Face models"
)
iface.launch()

OpenAI API Integration

Available Models

gpt-4 - Most capable
gpt-3.5-turbo - Fast & cost-effective
gpt-4-turbo - Latest features

Pricing (Approx)

GPT-4: $0.03/1K tokens input
GPT-3.5: $0.0015/1K tokens input
Free tier: $18 credit for 3 months

OpenAI Implementation

import openai
from typing import List, Dict

class OpenAIClient:
    def __init__(self, api_key: str, model: str = "gpt-3.5-turbo"):
        self.client = openai.OpenAI(api_key=api_key)
        self.model = model
        
    def chat_completion(self, messages: List[Dict], temperature: float = 0.7) -> str:
        try:
            response = self.client.chat.completions.create(
                model=self.model,
                messages=messages,
                temperature=temperature,
                max_tokens=1000
            )
            return response.choices[0].message.content
        except Exception as e:
            return f"Error: {str(e)}"
    
    def single_message(self, prompt: str, system_message: str = None) -> str:
        messages = []
        if system_message:
            messages.append({"role": "system", "content": system_message})
        messages.append({"role": "user", "content": prompt})
        
        return self.chat_completion(messages)

# Usage
openai_client = OpenAIClient("your_openai_api_key")
system_prompt = "You are a helpful AI assistant that provides direct answers."
response = openai_client.single_message(
    "Generate a Python function to calculate factorial",
    system_prompt
)
print(response)

Advanced Features

# Streaming responses
def stream_chat(messages):
    stream = client.chat.completions.create(
        model="gpt-4",
        messages=messages,
        stream=True
    )
    
    for chunk in stream:
        if chunk.choices[0].delta.content is not None:
            yield chunk.choices[0].delta.content

# Function calling
functions = [{
    "name": "calculate_factorial",
    "description": "Calculate factorial of a number",
    "parameters": {
        "type": "object",
        "properties": {
            "number": {"type": "integer", "description": "The number to calculate factorial for"}
        },
        "required": ["number"]
    }
}]

Local Model Deployment

Lightweight Models

Microsoft/DialoGPT-small - 334MB
distilgpt2 - 534MB
facebook/blenderbot-400M-distill - 1.5GB

Requirements

• Minimum 2GB RAM for small models
• Python 3.8+ with transformers library
• Internet for initial model download
• GPU optional but recommended

Local Model Implementation

from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch

class LocalAIModel:
    def __init__(self, model_name: str = "microsoft/DialoGPT-small"):
        self.model_name = model_name
        self.tokenizer = None
        self.model = None
        self.chat_pipeline = None
        
    def load_model(self):
        """Load the model and tokenizer"""
        try:
            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
            self.model = AutoModelForCausalLM.from_pretrained(self.model_name)
            
            # Create chat pipeline
            self.chat_pipeline