Spaces:

nlp-brin-id
/

deteksihoax

Running

App Files Files Community

Nakhwa commited on Aug 26, 2024

Commit

fc8b513

verified ·

1 Parent(s): 02ccda8

Update deteksi_upload.py

Browse files

Files changed (1) hide show

deteksi_upload.py +18 -32

deteksi_upload.py CHANGED Viewed

@@ -10,7 +10,6 @@ import os
 from datetime import datetime
 import pytz
-# Set environment variable for Google Cloud credentials using secrets
 with open("credentials.json", "w") as f:
     f.write(st.secrets["GOOGLE_APPLICATION_CREDENTIALS"])
@@ -20,21 +19,16 @@ def save_corrections_to_gcs(bucket_name, file_name, correction_data):
     client = storage.Client()
     bucket = client.bucket("dashboardhoax-bucket")
     blob = bucket.blob("koreksi_pengguna_file.csv")
-    # Check if the blob (file) exists
     if blob.exists():
-        # Download existing CSV from GCS
         existing_data = blob.download_as_string().decode('utf-8')
         existing_df = pd.read_csv(StringIO(existing_data))
     else:
-        # Create a new DataFrame if the file does not exist
         existing_df = pd.DataFrame(columns=['Timestamp', 'Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource', 'Result_Detection', 'Result_Correction'])
-    # Append the new data to the existing data
     new_data_df = pd.DataFrame(correction_data)
     updated_df = pd.concat([existing_df, new_data_df], ignore_index=True)
-    # Convert the DataFrame back to CSV and upload
     updated_csv_data = updated_df.to_csv(index=False)
     blob.upload_from_string(updated_csv_data, content_type='text/csv')
@@ -72,8 +66,8 @@ def show_deteksi_upload():
         grid_options = GridOptionsBuilder.from_dataframe(df)
         grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
         gridOptions = grid_options.build()
-        gridOptions['defaultColDef'] = {'fontSize': 10}
         AgGrid(
             df,
@@ -91,17 +85,16 @@ def show_deteksi_upload():
                 st.error(f"Terjadi kesalahan saat deteksi: {e}")
     if st.session_state.df is not None:
-        accuracy, precision, recall, f1 = evaluate_model_performance(st.session_state.df, tokenizer, model)
-        performance_text = (
-            f"*Performansi Model*\n\n"
-            f"*Accuracy:* {round(accuracy, 2)}&nbsp;&nbsp;"
-            f"*Precision:* {round(precision, 2)}&nbsp;&nbsp;"
-            f"*Recall:* {round(recall, 2)}&nbsp;&nbsp;"
-            f"*F1 Score:* {round(f1, 2)}"
-        )
-        st.success(performance_text)
         st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Hasil Deteksi</h6>", unsafe_allow_html=True)
@@ -111,6 +104,7 @@ def show_deteksi_upload():
         grid_options = GridOptionsBuilder.from_dataframe(df_reordered)
         grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
         grid_options.configure_default_column(editable=True, groupable=True)
         gridOptions = grid_options.build()
         grid_response = AgGrid(
@@ -133,18 +127,14 @@ def show_deteksi_upload():
             st.session_state.df = edited_df.copy()
             if not corrected_df.empty:
-                corrected_df['Result_Correction'] = corrected_df.apply(lambda row:
-                    'HOAX' if (row['Result_Detection'] == 'NON-HOAX' and row['Correction']) else
-                    ('NON-HOAX' if (row['Result_Detection'] == 'HOAX' and row['Correction']) else row['Result_Detection']),
-                    axis=1
-                )
-                # Add Timestamp only for saving
                 wib = pytz.timezone('Asia/Jakarta')
                 corrected_df['Timestamp'] = datetime.now(wib).strftime('%Y-%m-%d %H:%M:%S')
-                cols = ['Timestamp', 'Result_Correction', 'Result_Detection', 'Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource']
-                corrected_df_to_display = corrected_df[cols]
                 st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Dikoreksi</h6>", unsafe_allow_html=True)
                 st.dataframe(corrected_df_to_display, use_container_width=True, hide_index=True)
@@ -154,20 +144,16 @@ def show_deteksi_upload():
         if st.button("Simpan", key="corrected_data"):
             if 'df' in st.session_state:
                 corrected_df = st.session_state.df[st.session_state.df['Correction']].copy()
                 wib = pytz.timezone('Asia/Jakarta')
                 corrected_df['Timestamp'] = datetime.now(wib).strftime('%Y-%m-%d %H:%M:%S')
                 corrected_df = corrected_df.drop(columns=['Correction'])
                 if not corrected_df.empty:
-                    # Define GCS bucket and file name
                     bucket_name = "your-bucket-name"
                     file_name = "corrected_upload_data.csv"
-                    # Convert DataFrame to list of dicts for GCS
                     correction_data = corrected_df.to_dict(orient='records')
-                    # Save corrected data to GCS
                     save_corrections_to_gcs(bucket_name, file_name, correction_data)
                     st.success("Data telah disimpan.")
@@ -175,4 +161,4 @@ def show_deteksi_upload():
                 else:
                     st.warning("Tidak ada data yang dikoreksi untuk disimpan.")
             else:
-                st.warning("Data deteksi tidak ditemukan.")

 from datetime import datetime
 import pytz
 with open("credentials.json", "w") as f:
     f.write(st.secrets["GOOGLE_APPLICATION_CREDENTIALS"])
     client = storage.Client()
     bucket = client.bucket("dashboardhoax-bucket")
     blob = bucket.blob("koreksi_pengguna_file.csv")
     if blob.exists():
         existing_data = blob.download_as_string().decode('utf-8')
         existing_df = pd.read_csv(StringIO(existing_data))
     else:
         existing_df = pd.DataFrame(columns=['Timestamp', 'Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource', 'Result_Detection', 'Result_Correction'])
     new_data_df = pd.DataFrame(correction_data)
     updated_df = pd.concat([existing_df, new_data_df], ignore_index=True)
     updated_csv_data = updated_df.to_csv(index=False)
     blob.upload_from_string(updated_csv_data, content_type='text/csv')
         grid_options = GridOptionsBuilder.from_dataframe(df)
         grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
+        grid_options.configure_default_column(cellStyle={'fontSize': '12px'})
         gridOptions = grid_options.build()
         AgGrid(
             df,
                 st.error(f"Terjadi kesalahan saat deteksi: {e}")
     if st.session_state.df is not None:
+        if 'Label' in st.session_state.df.columns:
+            accuracy, precision, recall, f1 = evaluate_model_performance(st.session_state.df, tokenizer, model)
+            performance_text = (
+                f"*Performansi Model*\n\n"
+                f"*Accuracy:* {round(accuracy, 2)}&nbsp;&nbsp;"
+                f"*Precision:* {round(precision, 2)}&nbsp;&nbsp;"
+                f"*Recall:* {round(recall, 2)}&nbsp;&nbsp;"
+                f"*F1 Score:* {round(f1, 2)}"
+            )
+            st.success(performance_text)
         st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Hasil Deteksi</h6>", unsafe_allow_html=True)
         grid_options = GridOptionsBuilder.from_dataframe(df_reordered)
         grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
         grid_options.configure_default_column(editable=True, groupable=True)
+        grid_options.configure_default_column(cellStyle={'fontSize': '12px'})
         gridOptions = grid_options.build()
         grid_response = AgGrid(
             st.session_state.df = edited_df.copy()
             if not corrected_df.empty:
+                expected_cols = ['Timestamp', 'Result_Detection', 'Result_Correction', 'Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource']
+                existing_cols = [col for col in expected_cols if col in corrected_df.columns]
+                # Tambahkan Timestamp hanya untuk penyimpanan
                 wib = pytz.timezone('Asia/Jakarta')
                 corrected_df['Timestamp'] = datetime.now(wib).strftime('%Y-%m-%d %H:%M:%S')
+                corrected_df_to_display = corrected_df[existing_cols]
                 st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Dikoreksi</h6>", unsafe_allow_html=True)
                 st.dataframe(corrected_df_to_display, use_container_width=True, hide_index=True)
         if st.button("Simpan", key="corrected_data"):
             if 'df' in st.session_state:
                 corrected_df = st.session_state.df[st.session_state.df['Correction']].copy()
                 wib = pytz.timezone('Asia/Jakarta')
                 corrected_df['Timestamp'] = datetime.now(wib).strftime('%Y-%m-%d %H:%M:%S')
                 corrected_df = corrected_df.drop(columns=['Correction'])
                 if not corrected_df.empty:
                     bucket_name = "your-bucket-name"
                     file_name = "corrected_upload_data.csv"
                     correction_data = corrected_df.to_dict(orient='records')
                     save_corrections_to_gcs(bucket_name, file_name, correction_data)
                     st.success("Data telah disimpan.")
                 else:
                     st.warning("Tidak ada data yang dikoreksi untuk disimpan.")
             else:
+                st.warning("Data deteksi tidak ditemukan.")