Spaces:
Running
Running
| import streamlit as st | |
| import pandas as pd | |
| from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode | |
| from test import predict_hoax, evaluate_model_performance | |
| from load_model import load_model | |
| from styles import COMMON_CSS | |
| from google.cloud import storage | |
| from io import StringIO | |
| import os | |
| from datetime import datetime | |
| import pytz | |
| def save_corrections_to_gcs(bucket_name, file_name, correction_data): | |
| client = storage.Client() | |
| bucket = client.bucket(bucket_name) | |
| blob = bucket.blob(file_name) | |
| # Check if the blob (file) exists | |
| if blob.exists(): | |
| existing_data = blob.download_as_string().decode('utf-8') | |
| existing_df = pd.read_csv(StringIO(existing_data)) | |
| else: | |
| # Create a new DataFrame if the file does not exist | |
| existing_df = pd.DataFrame(columns=['Timestamp', 'Title', 'Content', 'Prediction', 'Correction']) | |
| new_data_df = pd.DataFrame(correction_data) | |
| updated_df = pd.concat([existing_df, new_data_df], ignore_index=True) | |
| updated_csv_data = updated_df.to_csv(index=False) | |
| blob.upload_from_string(updated_csv_data, content_type='text/csv') | |
| def load_data(file): | |
| return pd.read_csv(file) | |
| def show_deteksi_upload(): | |
| st.markdown(COMMON_CSS, unsafe_allow_html=True) | |
| st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Pilih Model</h6>", unsafe_allow_html=True) | |
| selected_model = st.selectbox( | |
| "", | |
| [ | |
| "cahya/bert-base-indonesian-522M", | |
| "indobenchmark/indobert-base-p2", | |
| "indolem/indobert-base-uncased", | |
| "mdhugol/indonesia-bert-sentiment-classification" | |
| ], | |
| key="model_selector_upload" | |
| ) | |
| tokenizer, model = load_model(selected_model) | |
| st.markdown("<h6 style='font-size: 14px; margin-bottom: -200px;'>Unggah File Disini (format: Title; Content)</h6>", unsafe_allow_html=True) | |
| uploaded_file = st.file_uploader("", type="csv") | |
| if 'df' not in st.session_state: | |
| st.session_state.df = None | |
| if uploaded_file is not None: | |
| df = load_data(uploaded_file) | |
| df.index = df.index + 1 | |
| st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Diunggah</h6>", unsafe_allow_html=True) | |
| grid_options = GridOptionsBuilder.from_dataframe(df) | |
| grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10) | |
| grid_options.configure_default_column(cellStyle={'fontSize': '12px'}) | |
| gridOptions = grid_options.build() | |
| AgGrid( | |
| df, | |
| gridOptions=gridOptions, | |
| update_mode=GridUpdateMode.VALUE_CHANGED, | |
| use_container_width=True | |
| ) | |
| if st.button("Deteksi", key="detect_upload"): | |
| try: | |
| df['Detection'] = df.apply(lambda row: predict_hoax(row['Title'], row['Content']), axis=1) | |
| df['Correction'] = False | |
| st.session_state.df = df.copy() | |
| except Exception as e: | |
| st.error(f"Terjadi kesalahan saat deteksi: {e}") | |
| if st.session_state.df is not None: | |
| st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Hasil Deteksi</h6>", unsafe_allow_html=True) | |
| cols = ['Correction', 'Detection'] + [col for col in st.session_state.df.columns if col not in ['Correction', 'Detection', 'Label_id']] | |
| st.session_state.df = st.session_state.df[cols] | |
| if 'checked_corrections' not in st.session_state: | |
| st.session_state.checked_corrections = [] | |
| st.session_state.df['Correction'] = st.session_state.df.index.isin(st.session_state.checked_corrections) | |
| grid_options = GridOptionsBuilder.from_dataframe(st.session_state.df) | |
| grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10) | |
| grid_options.configure_default_column(editable=True, groupable=True) | |
| grid_options.configure_column('Correction', editable=True, type='boolean', cellRenderer='agCheckboxCellRenderer', cellEditor='agCheckboxCellEditor', singleClickEdit=True) | |
| gridOptions = grid_options.build() | |
| grid_response = AgGrid( | |
| st.session_state.df, | |
| gridOptions=gridOptions, | |
| update_mode=GridUpdateMode.VALUE_CHANGED, | |
| key="detection_grid" | |
| ) | |
| if grid_response['data'] is not None: | |
| edited_df = pd.DataFrame(grid_response['data']) | |
| edited_df.index = st.session_state.df.index | |
| st.session_state.checked_corrections = edited_df[edited_df['Correction']].index.tolist() | |
| st.session_state.df = edited_df.copy() | |
| corrected_indices = edited_df[edited_df['Correction']].index | |
| edited_df['Correction'] = edited_df.apply(lambda row: | |
| 'HOAX' if (row['Detection'] == 'NON-HOAX' and row['Correction']) else | |
| ('NON-HOAX' if (row['Detection'] == 'HOAX' and row['Correction']) else row['Detection']), | |
| axis=1 | |
| ) | |
| st.session_state.df = edited_df.copy() | |
| st.session_state.corrected_indices = corrected_indices | |
| corrected_df = st.session_state.df.loc[st.session_state.corrected_indices].copy() | |
| if not corrected_df.empty: | |
| corrected_df['Correction'] = corrected_df.apply(lambda row: | |
| 'HOAX' if (row['Detection'] == 'NON-HOAX' and row['Correction']) else | |
| ('NON-HOAX' if (row['Detection'] == 'HOAX' and row['Correction']) else row['Detection']), | |
| axis=1 | |
| ) | |
| # Tambahkan Timestamp hanya untuk penyimpanan | |
| wib = pytz.timezone('Asia/Jakarta') | |
| corrected_df['Timestamp'] = datetime.now(wib).strftime('%Y-%m-%d %H:%M:%S') | |
| cols = ['Timestamp', 'Correction', 'Detection', 'Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource'] | |
| cols = [col for col in cols if col in corrected_df.columns] | |
| corrected_df_to_display = corrected_df[cols] | |
| st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Dikoreksi</h6>", unsafe_allow_html=True) | |
| st.dataframe(corrected_df_to_display, use_container_width=True, hide_index=True) | |
| else: | |
| st.write("Tidak ada data yang dikoreksi.") | |
| if st.button("Simpan", key="corrected_data"): | |
| if 'df' in st.session_state and 'corrected_indices' in st.session_state: | |
| corrected_df = st.session_state.df.loc[st.session_state.corrected_indices].copy() | |
| wib = pytz.timezone('Asia/Jakarta') | |
| corrected_df['Timestamp'] = datetime.now(wib).strftime('%Y-%m-%d %H:%M:%S') | |
| corrected_df = corrected_df.rename(columns={'Detection': 'Prediction'}) | |
| cols = ['Timestamp', 'Title', 'Content', 'Prediction', 'Correction'] | |
| corrected_df = corrected_df[cols] | |
| if not corrected_df.empty: | |
| # Define GCS bucket and file name | |
| bucket_name = "dashboardindohoax-bucket" | |
| file_name = "koreksi_pengguna.csv" | |
| # Convert DataFrame to list of dicts for GCS | |
| correction_data = corrected_df.to_dict(orient='records') | |
| # Save corrected data to GCS | |
| save_corrections_to_gcs(bucket_name, file_name, correction_data) | |
| st.success("Data telah disimpan.") | |
| st.session_state.corrected_df = corrected_df | |
| else: | |
| st.warning("Tidak ada data yang dikoreksi untuk disimpan.") | |
| else: | |
| st.warning("Data deteksi tidak ditemukan.") |