Spaces:
Running
Running
Update deteksi_upload.py
Browse files- deteksi_upload.py +18 -32
deteksi_upload.py
CHANGED
|
@@ -10,7 +10,6 @@ import os
|
|
| 10 |
from datetime import datetime
|
| 11 |
import pytz
|
| 12 |
|
| 13 |
-
# Set environment variable for Google Cloud credentials using secrets
|
| 14 |
with open("credentials.json", "w") as f:
|
| 15 |
f.write(st.secrets["GOOGLE_APPLICATION_CREDENTIALS"])
|
| 16 |
|
|
@@ -20,21 +19,16 @@ def save_corrections_to_gcs(bucket_name, file_name, correction_data):
|
|
| 20 |
client = storage.Client()
|
| 21 |
bucket = client.bucket("dashboardhoax-bucket")
|
| 22 |
blob = bucket.blob("koreksi_pengguna_file.csv")
|
| 23 |
-
|
| 24 |
-
# Check if the blob (file) exists
|
| 25 |
if blob.exists():
|
| 26 |
-
# Download existing CSV from GCS
|
| 27 |
existing_data = blob.download_as_string().decode('utf-8')
|
| 28 |
existing_df = pd.read_csv(StringIO(existing_data))
|
| 29 |
else:
|
| 30 |
-
# Create a new DataFrame if the file does not exist
|
| 31 |
existing_df = pd.DataFrame(columns=['Timestamp', 'Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource', 'Result_Detection', 'Result_Correction'])
|
| 32 |
|
| 33 |
-
# Append the new data to the existing data
|
| 34 |
new_data_df = pd.DataFrame(correction_data)
|
| 35 |
updated_df = pd.concat([existing_df, new_data_df], ignore_index=True)
|
| 36 |
|
| 37 |
-
# Convert the DataFrame back to CSV and upload
|
| 38 |
updated_csv_data = updated_df.to_csv(index=False)
|
| 39 |
blob.upload_from_string(updated_csv_data, content_type='text/csv')
|
| 40 |
|
|
@@ -72,8 +66,8 @@ def show_deteksi_upload():
|
|
| 72 |
|
| 73 |
grid_options = GridOptionsBuilder.from_dataframe(df)
|
| 74 |
grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
|
|
|
|
| 75 |
gridOptions = grid_options.build()
|
| 76 |
-
gridOptions['defaultColDef'] = {'fontSize': 10}
|
| 77 |
|
| 78 |
AgGrid(
|
| 79 |
df,
|
|
@@ -91,17 +85,16 @@ def show_deteksi_upload():
|
|
| 91 |
st.error(f"Terjadi kesalahan saat deteksi: {e}")
|
| 92 |
|
| 93 |
if st.session_state.df is not None:
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
st.success(performance_text)
|
| 105 |
|
| 106 |
st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Hasil Deteksi</h6>", unsafe_allow_html=True)
|
| 107 |
|
|
@@ -111,6 +104,7 @@ def show_deteksi_upload():
|
|
| 111 |
grid_options = GridOptionsBuilder.from_dataframe(df_reordered)
|
| 112 |
grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
|
| 113 |
grid_options.configure_default_column(editable=True, groupable=True)
|
|
|
|
| 114 |
gridOptions = grid_options.build()
|
| 115 |
|
| 116 |
grid_response = AgGrid(
|
|
@@ -133,18 +127,14 @@ def show_deteksi_upload():
|
|
| 133 |
st.session_state.df = edited_df.copy()
|
| 134 |
|
| 135 |
if not corrected_df.empty:
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
('NON-HOAX' if (row['Result_Detection'] == 'HOAX' and row['Correction']) else row['Result_Detection']),
|
| 139 |
-
axis=1
|
| 140 |
-
)
|
| 141 |
|
| 142 |
-
#
|
| 143 |
wib = pytz.timezone('Asia/Jakarta')
|
| 144 |
corrected_df['Timestamp'] = datetime.now(wib).strftime('%Y-%m-%d %H:%M:%S')
|
| 145 |
|
| 146 |
-
|
| 147 |
-
corrected_df_to_display = corrected_df[cols]
|
| 148 |
|
| 149 |
st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Dikoreksi</h6>", unsafe_allow_html=True)
|
| 150 |
st.dataframe(corrected_df_to_display, use_container_width=True, hide_index=True)
|
|
@@ -154,20 +144,16 @@ def show_deteksi_upload():
|
|
| 154 |
if st.button("Simpan", key="corrected_data"):
|
| 155 |
if 'df' in st.session_state:
|
| 156 |
corrected_df = st.session_state.df[st.session_state.df['Correction']].copy()
|
| 157 |
-
|
| 158 |
wib = pytz.timezone('Asia/Jakarta')
|
| 159 |
corrected_df['Timestamp'] = datetime.now(wib).strftime('%Y-%m-%d %H:%M:%S')
|
| 160 |
corrected_df = corrected_df.drop(columns=['Correction'])
|
| 161 |
|
| 162 |
if not corrected_df.empty:
|
| 163 |
-
# Define GCS bucket and file name
|
| 164 |
bucket_name = "your-bucket-name"
|
| 165 |
file_name = "corrected_upload_data.csv"
|
| 166 |
|
| 167 |
-
# Convert DataFrame to list of dicts for GCS
|
| 168 |
correction_data = corrected_df.to_dict(orient='records')
|
| 169 |
|
| 170 |
-
# Save corrected data to GCS
|
| 171 |
save_corrections_to_gcs(bucket_name, file_name, correction_data)
|
| 172 |
|
| 173 |
st.success("Data telah disimpan.")
|
|
@@ -175,4 +161,4 @@ def show_deteksi_upload():
|
|
| 175 |
else:
|
| 176 |
st.warning("Tidak ada data yang dikoreksi untuk disimpan.")
|
| 177 |
else:
|
| 178 |
-
st.warning("Data deteksi tidak ditemukan.")
|
|
|
|
| 10 |
from datetime import datetime
|
| 11 |
import pytz
|
| 12 |
|
|
|
|
| 13 |
with open("credentials.json", "w") as f:
|
| 14 |
f.write(st.secrets["GOOGLE_APPLICATION_CREDENTIALS"])
|
| 15 |
|
|
|
|
| 19 |
client = storage.Client()
|
| 20 |
bucket = client.bucket("dashboardhoax-bucket")
|
| 21 |
blob = bucket.blob("koreksi_pengguna_file.csv")
|
| 22 |
+
|
|
|
|
| 23 |
if blob.exists():
|
|
|
|
| 24 |
existing_data = blob.download_as_string().decode('utf-8')
|
| 25 |
existing_df = pd.read_csv(StringIO(existing_data))
|
| 26 |
else:
|
|
|
|
| 27 |
existing_df = pd.DataFrame(columns=['Timestamp', 'Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource', 'Result_Detection', 'Result_Correction'])
|
| 28 |
|
|
|
|
| 29 |
new_data_df = pd.DataFrame(correction_data)
|
| 30 |
updated_df = pd.concat([existing_df, new_data_df], ignore_index=True)
|
| 31 |
|
|
|
|
| 32 |
updated_csv_data = updated_df.to_csv(index=False)
|
| 33 |
blob.upload_from_string(updated_csv_data, content_type='text/csv')
|
| 34 |
|
|
|
|
| 66 |
|
| 67 |
grid_options = GridOptionsBuilder.from_dataframe(df)
|
| 68 |
grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
|
| 69 |
+
grid_options.configure_default_column(cellStyle={'fontSize': '12px'})
|
| 70 |
gridOptions = grid_options.build()
|
|
|
|
| 71 |
|
| 72 |
AgGrid(
|
| 73 |
df,
|
|
|
|
| 85 |
st.error(f"Terjadi kesalahan saat deteksi: {e}")
|
| 86 |
|
| 87 |
if st.session_state.df is not None:
|
| 88 |
+
if 'Label' in st.session_state.df.columns:
|
| 89 |
+
accuracy, precision, recall, f1 = evaluate_model_performance(st.session_state.df, tokenizer, model)
|
| 90 |
+
performance_text = (
|
| 91 |
+
f"*Performansi Model*\n\n"
|
| 92 |
+
f"*Accuracy:* {round(accuracy, 2)} "
|
| 93 |
+
f"*Precision:* {round(precision, 2)} "
|
| 94 |
+
f"*Recall:* {round(recall, 2)} "
|
| 95 |
+
f"*F1 Score:* {round(f1, 2)}"
|
| 96 |
+
)
|
| 97 |
+
st.success(performance_text)
|
|
|
|
| 98 |
|
| 99 |
st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Hasil Deteksi</h6>", unsafe_allow_html=True)
|
| 100 |
|
|
|
|
| 104 |
grid_options = GridOptionsBuilder.from_dataframe(df_reordered)
|
| 105 |
grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
|
| 106 |
grid_options.configure_default_column(editable=True, groupable=True)
|
| 107 |
+
grid_options.configure_default_column(cellStyle={'fontSize': '12px'})
|
| 108 |
gridOptions = grid_options.build()
|
| 109 |
|
| 110 |
grid_response = AgGrid(
|
|
|
|
| 127 |
st.session_state.df = edited_df.copy()
|
| 128 |
|
| 129 |
if not corrected_df.empty:
|
| 130 |
+
expected_cols = ['Timestamp', 'Result_Detection', 'Result_Correction', 'Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource']
|
| 131 |
+
existing_cols = [col for col in expected_cols if col in corrected_df.columns]
|
|
|
|
|
|
|
|
|
|
| 132 |
|
| 133 |
+
# Tambahkan Timestamp hanya untuk penyimpanan
|
| 134 |
wib = pytz.timezone('Asia/Jakarta')
|
| 135 |
corrected_df['Timestamp'] = datetime.now(wib).strftime('%Y-%m-%d %H:%M:%S')
|
| 136 |
|
| 137 |
+
corrected_df_to_display = corrected_df[existing_cols]
|
|
|
|
| 138 |
|
| 139 |
st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Dikoreksi</h6>", unsafe_allow_html=True)
|
| 140 |
st.dataframe(corrected_df_to_display, use_container_width=True, hide_index=True)
|
|
|
|
| 144 |
if st.button("Simpan", key="corrected_data"):
|
| 145 |
if 'df' in st.session_state:
|
| 146 |
corrected_df = st.session_state.df[st.session_state.df['Correction']].copy()
|
|
|
|
| 147 |
wib = pytz.timezone('Asia/Jakarta')
|
| 148 |
corrected_df['Timestamp'] = datetime.now(wib).strftime('%Y-%m-%d %H:%M:%S')
|
| 149 |
corrected_df = corrected_df.drop(columns=['Correction'])
|
| 150 |
|
| 151 |
if not corrected_df.empty:
|
|
|
|
| 152 |
bucket_name = "your-bucket-name"
|
| 153 |
file_name = "corrected_upload_data.csv"
|
| 154 |
|
|
|
|
| 155 |
correction_data = corrected_df.to_dict(orient='records')
|
| 156 |
|
|
|
|
| 157 |
save_corrections_to_gcs(bucket_name, file_name, correction_data)
|
| 158 |
|
| 159 |
st.success("Data telah disimpan.")
|
|
|
|
| 161 |
else:
|
| 162 |
st.warning("Tidak ada data yang dikoreksi untuk disimpan.")
|
| 163 |
else:
|
| 164 |
+
st.warning("Data deteksi tidak ditemukan.")
|