import streamlit as st import pandas as pd from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode from test import predict_hoax, evaluate_model_performance from load_model import load_model from styles import COMMON_CSS from google.cloud import storage from io import StringIO import os from datetime import datetime import pytz def save_corrections_to_gcs(bucket_name, file_name, correction_data): client = storage.Client() bucket = client.bucket(bucket_name) blob = bucket.blob(file_name) # Check if the blob (file) exists if blob.exists(): existing_data = blob.download_as_string().decode('utf-8') existing_df = pd.read_csv(StringIO(existing_data)) else: # Create a new DataFrame if the file does not exist existing_df = pd.DataFrame(columns=['Timestamp', 'Title', 'Content', 'Prediction', 'Correction']) new_data_df = pd.DataFrame(correction_data) updated_df = pd.concat([existing_df, new_data_df], ignore_index=True) updated_csv_data = updated_df.to_csv(index=False) blob.upload_from_string(updated_csv_data, content_type='text/csv') def load_data(file): return pd.read_csv(file) def show_deteksi_upload(): st.markdown(COMMON_CSS, unsafe_allow_html=True) st.markdown("
Pilih Model
", unsafe_allow_html=True) selected_model = st.selectbox( "", [ "cahya/bert-base-indonesian-522M", "indobenchmark/indobert-base-p2", "indolem/indobert-base-uncased", "mdhugol/indonesia-bert-sentiment-classification" ], key="model_selector_upload" ) tokenizer, model = load_model(selected_model) st.markdown("
Unggah File Disini (format: Title; Content)
", unsafe_allow_html=True) uploaded_file = st.file_uploader("", type="csv") if 'df' not in st.session_state: st.session_state.df = None if uploaded_file is not None: df = load_data(uploaded_file) df.index = df.index + 1 st.markdown("
Data yang Diunggah
", unsafe_allow_html=True) grid_options = GridOptionsBuilder.from_dataframe(df) grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10) grid_options.configure_default_column(cellStyle={'fontSize': '12px'}) gridOptions = grid_options.build() AgGrid( df, gridOptions=gridOptions, update_mode=GridUpdateMode.VALUE_CHANGED, use_container_width=True ) if st.button("Deteksi", key="detect_upload"): try: df['Detection'] = df.apply(lambda row: predict_hoax(row['Title'], row['Content']), axis=1) df['Correction'] = False st.session_state.df = df.copy() except Exception as e: st.error(f"Terjadi kesalahan saat deteksi: {e}") if st.session_state.df is not None: st.markdown("
Hasil Deteksi
", unsafe_allow_html=True) cols = ['Correction', 'Detection'] + [col for col in st.session_state.df.columns if col not in ['Correction', 'Detection', 'Label_id']] st.session_state.df = st.session_state.df[cols] if 'checked_corrections' not in st.session_state: st.session_state.checked_corrections = [] st.session_state.df['Correction'] = st.session_state.df.index.isin(st.session_state.checked_corrections) grid_options = GridOptionsBuilder.from_dataframe(st.session_state.df) grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10) grid_options.configure_default_column(editable=True, groupable=True) grid_options.configure_column('Correction', editable=True, type='boolean', cellRenderer='agCheckboxCellRenderer', cellEditor='agCheckboxCellEditor', singleClickEdit=True) gridOptions = grid_options.build() grid_response = AgGrid( st.session_state.df, gridOptions=gridOptions, update_mode=GridUpdateMode.VALUE_CHANGED, key="detection_grid" ) if grid_response['data'] is not None: edited_df = pd.DataFrame(grid_response['data']) edited_df.index = st.session_state.df.index st.session_state.checked_corrections = edited_df[edited_df['Correction']].index.tolist() st.session_state.df = edited_df.copy() corrected_indices = edited_df[edited_df['Correction']].index edited_df['Correction'] = edited_df.apply(lambda row: 'HOAX' if (row['Detection'] == 'NON-HOAX' and row['Correction']) else ('NON-HOAX' if (row['Detection'] == 'HOAX' and row['Correction']) else row['Detection']), axis=1 ) st.session_state.df = edited_df.copy() st.session_state.corrected_indices = corrected_indices corrected_df = st.session_state.df.loc[st.session_state.corrected_indices].copy() if not corrected_df.empty: corrected_df['Correction'] = corrected_df.apply(lambda row: 'HOAX' if (row['Detection'] == 'NON-HOAX' and row['Correction']) else ('NON-HOAX' if (row['Detection'] == 'HOAX' and row['Correction']) else row['Detection']), axis=1 ) # Tambahkan Timestamp hanya untuk penyimpanan wib = pytz.timezone('Asia/Jakarta') corrected_df['Timestamp'] = datetime.now(wib).strftime('%Y-%m-%d %H:%M:%S') cols = ['Timestamp', 'Correction', 'Detection', 'Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource'] cols = [col for col in cols if col in corrected_df.columns] corrected_df_to_display = corrected_df[cols] st.markdown("
Data yang Dikoreksi
", unsafe_allow_html=True) st.dataframe(corrected_df_to_display, use_container_width=True, hide_index=True) else: st.write("Tidak ada data yang dikoreksi.") if st.button("Simpan", key="corrected_data"): if 'df' in st.session_state and 'corrected_indices' in st.session_state: corrected_df = st.session_state.df.loc[st.session_state.corrected_indices].copy() wib = pytz.timezone('Asia/Jakarta') corrected_df['Timestamp'] = datetime.now(wib).strftime('%Y-%m-%d %H:%M:%S') corrected_df = corrected_df.rename(columns={'Detection': 'Prediction'}) cols = ['Timestamp', 'Title', 'Content', 'Prediction', 'Correction'] corrected_df = corrected_df[cols] if not corrected_df.empty: # Define GCS bucket and file name bucket_name = "dashboardindohoax-bucket" file_name = "koreksi_pengguna.csv" # Convert DataFrame to list of dicts for GCS correction_data = corrected_df.to_dict(orient='records') # Save corrected data to GCS save_corrections_to_gcs(bucket_name, file_name, correction_data) st.success("Data telah disimpan.") st.session_state.corrected_df = corrected_df else: st.warning("Tidak ada data yang dikoreksi untuk disimpan.") else: st.warning("Data deteksi tidak ditemukan.")