gauravsahu1990 commited on
Commit
9bee602
Β·
verified Β·
1 Parent(s): 0a94afe

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. Dockerfile +16 -0
  2. app.py +170 -0
  3. model_loader.py +27 -0
  4. requirements.txt +20 -0
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ # Set the working directory inside the container
4
+ WORKDIR /app
5
+
6
+ # Copy all files from the current directory to the container's working directory
7
+ COPY . .
8
+
9
+ # Install dependencies from the requirements file without using cache to reduce image size
10
+ RUN pip install --no-cache-dir -r requirements.txt
11
+
12
+ # Define the command to start the application using Gunicorn with 4 worker processes
13
+ # - `-w 4`: Uses 4 worker processes for handling requests
14
+ # - `-b 0.0.0.0:7860`: Binds the server to port 7860 on all network interfaces
15
+ # - `app:app`: Runs the Flask app (assuming `app.py` contains the Flask instance named `app`)
16
+ CMD ["gunicorn", "-w", "1", "-b", "0.0.0.0:7860", "app:app"]
app.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, io, base64, torch, logging
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ from flask import Flask, request, jsonify
5
+ from sqlalchemy import create_engine, inspect
6
+ from model_loader import load_model
7
+
8
+ # -------------------------------------------------------
9
+ # 🧠 Flask App Setup
10
+ # -------------------------------------------------------
11
+ app = Flask("ChatBot-Backend")
12
+
13
+ # -------------------------------------------------------
14
+ # 🧾 Logging Configuration
15
+ # -------------------------------------------------------
16
+ LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
17
+ logging.basicConfig(
18
+ level=LOG_LEVEL,
19
+ format="[%(asctime)s] [%(levelname)s] %(message)s",
20
+ datefmt="%Y-%m-%d %H:%M:%S",
21
+ )
22
+ logger = logging.getLogger("ChatBot")
23
+
24
+ logger.info("πŸš€ Starting ChatBot backend service...")
25
+
26
+ # -------------------------------------------------------
27
+ # βš™οΈ Database Configuration
28
+ # -------------------------------------------------------
29
+ DB_USER = os.getenv("DB_USER", "root")
30
+ DB_PASSWORD = os.getenv("DB_PASSWORD", "root1234")
31
+ DB_HOST = os.getenv("DB_HOST", "database-1.chks4awear3o.eu-north-1.rds.amazonaws.com")
32
+ DB_PORT = os.getenv("DB_PORT", "3306")
33
+ DB_NAME = os.getenv("DB_NAME", "chatbot_db")
34
+
35
+ # -------------------------------------------------------
36
+ # 🧩 Database Engine Setup
37
+ # -------------------------------------------------------
38
+ try:
39
+ engine = create_engine(f"mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}")
40
+ insp = inspect(engine)
41
+ logger.info("βœ… Connected to MySQL successfully.")
42
+ except Exception as e:
43
+ logger.error(f"❌ Database connection failed: {e}")
44
+ engine = None
45
+
46
+ # -------------------------------------------------------
47
+ # 🧠 Model and Schema
48
+ # -------------------------------------------------------
49
+ tokenizer, model = None, None
50
+ schema_description = ""
51
+
52
+ def build_schema_description():
53
+ """Builds schema text dynamically from MySQL tables."""
54
+ global schema_description
55
+ if not engine:
56
+ schema_description = "⚠️ Database connection unavailable."
57
+ return
58
+ try:
59
+ schema_description = ""
60
+ for table in insp.get_table_names():
61
+ schema_description += f"Table: {table}\n"
62
+ for col in insp.get_columns(table):
63
+ schema_description += f" - {col['name']} ({col['type']})\n"
64
+ schema_description += "\n"
65
+ logger.info("πŸ“˜ Schema description built successfully.")
66
+ except Exception as e:
67
+ logger.error(f"⚠️ Error while building schema: {e}")
68
+ schema_description = f"⚠️ Schema fetch error: {e}"
69
+
70
+ def generate_sql(question: str) -> str:
71
+ """Generates SQL query from user question using the model."""
72
+ if tokenizer is None or model is None:
73
+ raise RuntimeError("Model not loaded yet.")
74
+ logger.info(f"🧩 Generating SQL for: {question}")
75
+
76
+ prompt = (
77
+ "You are a professional SQL generator.\n"
78
+ "Convert the following question into a valid SQL query based on this schema:\n\n"
79
+ f"{schema_description}\n"
80
+ f"Question: {question}\n\nSQL:"
81
+ )
82
+
83
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(model.device)
84
+ with torch.no_grad():
85
+ outputs = model.generate(**inputs, max_new_tokens=200, temperature=0.2, do_sample=False)
86
+
87
+ sql = tokenizer.decode(outputs[0], skip_special_tokens=True)
88
+ if "SELECT" in sql.upper():
89
+ sql = sql[sql.upper().find("SELECT"):]
90
+ sql = sql.strip()
91
+ logger.info(f"🧠 Generated SQL: {sql}")
92
+ return sql
93
+
94
+
95
+ @app.before_first_request
96
+ def init_model():
97
+ """Loads the model and builds schema once before first API call."""
98
+ global tokenizer, model
99
+ logger.info("πŸͺ„ Initializing model on first request...")
100
+ tokenizer, model = load_model()
101
+ model.eval()
102
+ build_schema_description()
103
+ logger.info("βœ… Model loaded and schema ready.")
104
+
105
+
106
+ # -------------------------------------------------------
107
+ # 🌐 Routes
108
+ # -------------------------------------------------------
109
+ @app.route("/")
110
+ def home():
111
+ return jsonify({"message": "Chatbot backend is running!"})
112
+
113
+
114
+ @app.route("/api/ask", methods=["POST"])
115
+ def ask():
116
+ """Main API endpoint for answering user queries."""
117
+ try:
118
+ data = request.get_json(force=True)
119
+ except Exception as e:
120
+ logger.error(f"❌ Invalid JSON received: {e}")
121
+ return jsonify({"error": "Invalid JSON payload"}), 400
122
+
123
+ question = data.get("question", "").strip()
124
+ if not question:
125
+ return jsonify({"error": "Empty question"}), 400
126
+
127
+ logger.info(f"πŸ—¨οΈ Received question: {question}")
128
+
129
+ try:
130
+ sql = generate_sql(question)
131
+ df = pd.read_sql(sql, engine)
132
+ logger.info(f"βœ… SQL executed successfully, {len(df)} rows fetched.")
133
+
134
+ if df.empty:
135
+ return jsonify({"answer": "No relevant data found in the database."})
136
+
137
+ html_table = df.to_html(index=False, classes="table table-striped")
138
+
139
+ # Plot graph
140
+ chart_base64 = None
141
+ try:
142
+ if len(df.columns) >= 2:
143
+ plt.figure(figsize=(6, 4))
144
+ df.plot(x=df.columns[0], y=df.columns[1], kind="bar")
145
+ plt.title(question)
146
+ plt.tight_layout()
147
+
148
+ buf = io.BytesIO()
149
+ plt.savefig(buf, format="png")
150
+ buf.seek(0)
151
+ chart_base64 = base64.b64encode(buf.read()).decode("utf-8")
152
+ plt.close()
153
+ logger.info("πŸ“ˆ Chart generated successfully.")
154
+ except Exception as plot_err:
155
+ logger.warning(f"⚠️ Chart generation failed: {plot_err}")
156
+
157
+ return jsonify({
158
+ "answer": f"Here’s what I found:<br>{html_table}",
159
+ "chart": chart_base64
160
+ })
161
+
162
+ except Exception as e:
163
+ logger.exception(f"❌ Error processing request: {e}")
164
+ return jsonify({"answer": f"⚠️ Error: {str(e)}"})
165
+
166
+ # -----------------------
167
+ # Run Flask app
168
+ # -----------------------
169
+ if __name__ == '__main__':
170
+ app.run(debug=True)
model_loader.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM
2
+ import os
3
+
4
+ MODEL_NAME = "Yuk050/gemma-3-1b-text-to-sql-model"
5
+ LOCAL_DIR = "./model_cache"
6
+
7
+ _tokenizer = None
8
+ _model = None
9
+
10
+ def load_model():
11
+ global _tokenizer, _model
12
+ if _tokenizer is not None and _model is not None:
13
+ return _tokenizer, _model
14
+
15
+ print("πŸ”„ Loading model...")
16
+ if os.path.exists(LOCAL_DIR):
17
+ _tokenizer = AutoTokenizer.from_pretrained(LOCAL_DIR)
18
+ _model = AutoModelForCausalLM.from_pretrained(LOCAL_DIR, trust_remote_code=True)
19
+ else:
20
+ _tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
21
+ _model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, trust_remote_code=True)
22
+ os.makedirs(LOCAL_DIR, exist_ok=True)
23
+ _tokenizer.save_pretrained(LOCAL_DIR)
24
+ _model.save_pretrained(LOCAL_DIR)
25
+
26
+ print("βœ… Model loaded successfully!")
27
+ return _tokenizer, _model
requirements.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core AI libraries
2
+ transformers>=4.47.0
3
+ accelerate>=1.0.0
4
+ safetensors>=0.4.5
5
+ torch==2.4.1
6
+ torchvision==0.19.1
7
+ torchaudio==2.4.1
8
+
9
+ # Data + DB
10
+ sqlalchemy==2.0.36
11
+ pymysql==1.1.1
12
+ pandas==2.2.3
13
+ requests==2.32.3
14
+ matplotlib==3.9.2
15
+
16
+ # Web server
17
+ flask==2.2.2
18
+ werkzeug==2.2.3
19
+ gunicorn
20
+ uvicorn[standard]