Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| import pandas as pd | |
| import skops.io as sio | |
| from io import BytesIO | |
| class StockPredictor: | |
| """ | |
| A class used to load stock prediction models, process historical stock data, | |
| and forecast stock prices. | |
| Attributes | |
| ---------- | |
| model_dir : str | |
| Directory containing the trained models. | |
| data_dir : str | |
| Directory containing the historical stock data CSV files. | |
| models : dict | |
| Dictionary of loaded models. | |
| Methods | |
| ------- | |
| load_models(model_dir): | |
| Loads the models from the specified directory. | |
| load_stock_data(ticker): | |
| Loads and processes historical stock data from a CSV file. | |
| forecast(ticker, days): | |
| Forecasts stock prices for the specified ticker and number of days. | |
| """ | |
| def __init__(self, model_dir="model/SKLearn_Models", data_dir="data"): | |
| """ | |
| Initializes the StockPredictor class by loading the models and setting the data directory. | |
| Parameters | |
| ---------- | |
| model_dir : str | |
| Directory containing the trained models. | |
| data_dir : str | |
| Directory containing the historical stock data CSV files. | |
| """ | |
| self.models = self.load_models(model_dir) | |
| self.data_dir = data_dir | |
| def load_models(self, model_dir): | |
| """ | |
| Loads the models from the specified directory. | |
| Parameters | |
| ---------- | |
| model_dir : str | |
| Directory containing the trained models. | |
| Returns | |
| ------- | |
| dict | |
| Dictionary of loaded models. | |
| """ | |
| models = {} | |
| for file in os.listdir(model_dir): | |
| if file.endswith(".skops"): | |
| ticker = file.split("_")[0] | |
| models[ticker] = sio.load(os.path.join(model_dir, file)) | |
| return models | |
| def load_stock_data(self, ticker): | |
| """ | |
| Loads and processes historical stock data from a CSV file. | |
| Parameters | |
| ---------- | |
| ticker : str | |
| Stock ticker symbol. | |
| Returns | |
| ------- | |
| pandas.DataFrame | |
| Processed historical stock data. | |
| """ | |
| # Construct the CSV file path | |
| csv_path = os.path.join(self.data_dir, f"{ticker}.csv") | |
| data = pd.read_csv(csv_path) | |
| # Convert 'date' to datetime | |
| data["date"] = pd.to_datetime(data["date"]) | |
| # Filter the data to start from the year 2000 | |
| data = data[data["date"] >= "2000-01-01"] | |
| # Sort by date | |
| data.sort_values("date", inplace=True) | |
| # Feature engineering: create new features such as year, month, day, and moving averages | |
| data["year"] = data["date"].dt.year | |
| data["month"] = data["date"].dt.month | |
| data["day"] = data["date"].dt.day | |
| data["ma_5"] = data["close"].rolling(window=5).mean() | |
| data["ma_10"] = data["close"].rolling(window=10).mean() | |
| # Adding lag features | |
| data["lag_5"] = data["close"].shift(5) | |
| data["lag_10"] = data["close"].shift(10) | |
| # Drop rows with NaN values created by rolling window | |
| data.dropna(inplace=True) | |
| return data | |
| def forecast(self, ticker, days): | |
| """ | |
| Forecasts stock prices for the specified ticker and number of days. | |
| Parameters | |
| ---------- | |
| ticker : str | |
| Stock ticker symbol. | |
| days : int | |
| Number of days for forecasting. | |
| Returns | |
| ------- | |
| tuple | |
| A tuple containing a DataFrame with dates, actual close values, and predicted close values, | |
| and the plot as a numpy array. | |
| """ | |
| model = self.models.get(ticker) | |
| if model: | |
| # Load historical stock data | |
| data = self.load_stock_data(ticker) | |
| # Define features | |
| features = ["year", "month", "day", "ma_5", "ma_10", "lag_5", "lag_10"] | |
| # Predict the actual values in the dataset | |
| X_actual = data[features] | |
| actual_predictions = model.predict(X_actual) | |
| data["predicted_close"] = actual_predictions | |
| # Use the last available values for features | |
| last_date = data["date"].max() | |
| next_30_days = pd.date_range( | |
| start=last_date + pd.Timedelta(days=1), periods=days | |
| ) | |
| last_values = data[features].iloc[-1].copy() | |
| last_5_close = data["close"].iloc[-5:].tolist() | |
| last_10_close = data["close"].iloc[-10:].tolist() | |
| predictions = [] | |
| for date in next_30_days: | |
| last_values["year"] = date.year | |
| last_values["month"] = date.month | |
| last_values["day"] = date.day | |
| # Update the lag features | |
| if len(last_5_close) >= 5: | |
| last_values["lag_5"] = last_5_close[-5] | |
| if len(last_10_close) >= 10: | |
| last_values["lag_10"] = last_10_close[-10] | |
| # Ensure input features are in the correct format | |
| prediction_input = pd.DataFrame([last_values], columns=features) | |
| prediction = model.predict(prediction_input)[0] | |
| predictions.append(prediction) | |
| # Update the moving averages dynamically | |
| last_5_close.append(prediction) | |
| last_10_close.append(prediction) | |
| if len(last_5_close) > 5: | |
| last_5_close.pop(0) | |
| if len(last_10_close) > 10: | |
| last_10_close.pop(0) | |
| last_values["ma_5"] = np.mean(last_5_close) | |
| last_values["ma_10"] = np.mean(last_10_close) | |
| prediction_df = pd.DataFrame( | |
| {"date": next_30_days, "predicted_close": predictions} | |
| ) | |
| # Concatenate actual and predicted data for plotting, limiting to last 60 days | |
| combined_df = pd.concat( | |
| [data[["date", "close", "predicted_close"]], prediction_df], | |
| ignore_index=True, | |
| ) | |
| plot_data = combined_df.tail(60) | |
| plt.figure(figsize=(14, 7)) | |
| plt.plot(plot_data["date"], plot_data["close"], label="Actual") | |
| plt.plot(plot_data["date"], plot_data["predicted_close"], label="Predicted") | |
| plt.xlabel("Date") | |
| plt.ylabel("Stock Price") | |
| plt.title( | |
| f"Last 30 Days Actual and Next {days} Days Prediction for {ticker}" | |
| ) | |
| plt.legend() | |
| plt.grid(True) | |
| plt.xticks(rotation=45) | |
| # Save the plot to a numpy array | |
| buf = BytesIO() | |
| plt.savefig(buf, format="png") | |
| buf.seek(0) | |
| img = np.array(plt.imread(buf)) | |
| plt.close() | |
| return plot_data, img | |
| else: | |
| return pd.DataFrame({"Error": ["Model not found"]}), None | |
| def create_gradio_interface(stock_predictor): | |
| """ | |
| Creates the Gradio interface for the stock predictor. | |
| Parameters | |
| ---------- | |
| stock_predictor : StockPredictor | |
| Instance of the StockPredictor class. | |
| Returns | |
| ------- | |
| gradio.Interface | |
| The Gradio interface. | |
| """ | |
| tickers = list(stock_predictor.models.keys()) | |
| dropdown = gr.Dropdown(choices=tickers, label="Select Ticker") | |
| slider = gr.Slider( | |
| minimum=1, | |
| maximum=30, | |
| step=1, | |
| label="Number of Days for Forecasting", | |
| ) | |
| iface = gr.Interface( | |
| fn=stock_predictor.forecast, | |
| inputs=[dropdown, slider], | |
| outputs=[ | |
| gr.DataFrame(headers=["date", "close", "predicted_close"]), | |
| gr.Image(type="numpy"), | |
| ], | |
| title="Stock Price Forecasting", | |
| description="Select a ticker and number of days to forecast stock prices.", | |
| ) | |
| return iface | |
| if __name__ == "__main__": | |
| # Initialize StockPredictor and create Gradio interface | |
| stock_predictor = StockPredictor( | |
| model_dir="model/SKLearn_Models", | |
| data_dir="data/Cleaned_Kaggle_NASDAQ_Daily_Data", | |
| ) | |
| iface = create_gradio_interface(stock_predictor) | |
| # Launch the app | |
| iface.launch() | |