Spaces:
Running
Running
| import abc | |
| import gradio as gr | |
| import os | |
| import pandas as pd | |
| from gen_table import * | |
| from meta_data import * | |
| with gr.Blocks(title="Open Agent Leaderboard") as demo: | |
| struct = load_results(OVERALL_MATH_SCORE_FILE) | |
| timestamp = struct['time'] | |
| EVAL_TIME = format_timestamp(timestamp) | |
| results = struct['results'] | |
| N_MODEL = len(results) | |
| N_DATA = len(results['IO']) | |
| DATASETS = list(results['IO']) | |
| DATASETS.remove('META') | |
| print(DATASETS) | |
| # Ensure overall_table is generated before defining llm_options | |
| check_box = BUILD_L1_DF(results, DEFAULT_MATH_BENCH) | |
| overall_table = generate_table(results, DEFAULT_MATH_BENCH) | |
| # Save the complete overall_table as a CSV file | |
| csv_path_overall = os.path.join(os.getcwd(), 'src/overall_results.csv') | |
| overall_table.to_csv(csv_path_overall, index=False) | |
| print(f"Overall results saved to {csv_path_overall}") | |
| # Extract all possible LLM options from overall_table | |
| llm_options = list(set(row.LLM for row in overall_table.itertuples() if hasattr(row, 'LLM'))) | |
| gr.Markdown(LEADERBORAD_INTRODUCTION.format(EVAL_TIME)) | |
| with gr.Tabs(elem_classes='tab-buttons') as tabs: | |
| with gr.Tab(label='🏅 Open Agent Overall Math Leaderboard'): | |
| gr.Markdown(LEADERBOARD_MD['MATH_MAIN']) | |
| # Move the definition of check_box and overall_table here | |
| # check_box = BUILD_L1_DF(results, DEFAULT_MATH_BENCH) | |
| # overall_table = generate_table(results, DEFAULT_MATH_BENCH) | |
| type_map = check_box['type_map'] | |
| type_map['Rank'] = 'number' | |
| checkbox_group = gr.CheckboxGroup( | |
| choices=check_box['all'], | |
| value=check_box['required'], | |
| label='Evaluation Dimension', | |
| interactive=True, | |
| ) | |
| # New CheckboxGroup component for selecting Algorithm and LLM | |
| algo_name = gr.CheckboxGroup( | |
| choices=ALGORITHMS, | |
| value=ALGORITHMS, | |
| label='Algorithm', | |
| interactive=True | |
| ) | |
| llm_name = gr.CheckboxGroup( | |
| choices=llm_options, # Use the extracted llm_options | |
| value=llm_options, | |
| label='LLM', | |
| interactive=True | |
| ) | |
| initial_headers = ['Rank'] + check_box['essential'] + checkbox_group.value | |
| available_headers = [h for h in initial_headers if h in overall_table.columns] | |
| data_component = gr.components.DataFrame( | |
| value=overall_table[available_headers], | |
| type='pandas', | |
| datatype=[type_map[x] for x in available_headers], | |
| interactive=False, | |
| wrap=True, | |
| visible=True) | |
| def filter_df(fields, algos, llms, *args): | |
| headers = ['Rank'] + check_box['essential'] + fields | |
| df = overall_table.copy() | |
| # Add filtering logic | |
| df['flag'] = df.apply(lambda row: ( | |
| row['Algorithm'] in algos and | |
| row['LLM'] in llms | |
| ), axis=1) | |
| df = df[df['flag']].copy() | |
| df.pop('flag') | |
| # Ensure all requested columns exist | |
| available_headers = [h for h in headers if h in df.columns] | |
| original_columns = df.columns.tolist() | |
| available_headers = sorted(available_headers, key=lambda x: original_columns.index(x)) | |
| # If no columns are available, return an empty DataFrame with basic columns | |
| if not available_headers: | |
| available_headers = ['Rank'] + check_box['essential'] | |
| comp = gr.components.DataFrame( | |
| value=df[available_headers], | |
| type='pandas', | |
| datatype=[type_map[x] for x in available_headers], | |
| interactive=False, | |
| wrap=True, | |
| visible=True) | |
| return comp | |
| # Update change events to include new filtering conditions | |
| checkbox_group.change( | |
| fn=filter_df, | |
| inputs=[checkbox_group, algo_name, llm_name], | |
| outputs=data_component | |
| ) | |
| algo_name.change( | |
| fn=filter_df, | |
| inputs=[checkbox_group, algo_name, llm_name], | |
| outputs=data_component | |
| ) | |
| llm_name.change( | |
| fn=filter_df, | |
| inputs=[checkbox_group, algo_name, llm_name], | |
| outputs=data_component | |
| ) | |
| with gr.Tab(label='🏅 Open Agent Detail Math Leaderboard'): | |
| gr.Markdown(LEADERBOARD_MD['MATH_DETAIL']) | |
| struct_detail = load_results(DETAIL_MATH_SCORE_FILE) | |
| timestamp = struct_detail['time'] | |
| EVAL_TIME = format_timestamp(timestamp) | |
| results_detail = struct_detail['results'] | |
| table, check_box = BUILD_L2_DF(results_detail, DEFAULT_MATH_BENCH) | |
| # Save the complete table as a CSV file | |
| csv_path_detail = os.path.join(os.getcwd(), 'src/detail_results.csv') | |
| table.to_csv(csv_path_detail, index=False) | |
| print(f"Detail results saved to {csv_path_detail}") | |
| type_map = check_box['type_map'] | |
| type_map['Rank'] = 'number' | |
| checkbox_group = gr.CheckboxGroup( | |
| choices=check_box['all'], | |
| value=check_box['required'], | |
| label='Evaluation Dimension', | |
| interactive=True, | |
| ) | |
| headers = ['Rank'] + checkbox_group.value | |
| with gr.Row(): | |
| algo_name = gr.CheckboxGroup( | |
| choices=ALGORITHMS, | |
| value=ALGORITHMS, | |
| label='Algorithm', | |
| interactive=True | |
| ) | |
| dataset_name = gr.CheckboxGroup( | |
| choices=DATASETS, | |
| value=DATASETS, | |
| label='Datasets', | |
| interactive=True | |
| ) | |
| llm_name = gr.CheckboxGroup( | |
| choices=check_box['LLM_options'], | |
| value=check_box['LLM_options'], | |
| label='LLM', | |
| interactive=True | |
| ) | |
| data_component = gr.components.DataFrame( | |
| value=table[headers], | |
| type='pandas', | |
| datatype=[type_map[x] for x in headers], | |
| interactive=False, | |
| wrap=True, | |
| visible=True) | |
| def filter_df2(fields, algos, datasets, llms): | |
| headers = ['Rank'] + fields | |
| df = table.copy() | |
| # Filter data | |
| df['flag'] = df.apply(lambda row: ( | |
| row['Algorithm'] in algos and | |
| row['Dataset'] in datasets and | |
| row['LLM'] in llms | |
| ), axis=1) | |
| df = df[df['flag']].copy() | |
| df.pop('flag') | |
| # Group by dataset and calculate ranking within each group based on Score | |
| if 'Score' in df.columns: | |
| # Create a temporary ranking column | |
| df['Rank'] = df.groupby('Dataset')['Score'].rank(method='first', ascending=False) | |
| # Ensure ranking is integer | |
| df['Rank'] = df['Rank'].astype(int) | |
| original_columns = df.columns.tolist() | |
| headers = sorted(headers, key=lambda x: original_columns.index(x)) | |
| comp = gr.components.DataFrame( | |
| value=df[headers], | |
| type='pandas', | |
| datatype=[type_map[x] for x in headers], | |
| interactive=False, | |
| wrap=True, | |
| visible=True) | |
| return comp | |
| # Add change events for all checkbox groups | |
| checkbox_group.change( | |
| fn=filter_df2, | |
| inputs=[checkbox_group, algo_name, dataset_name, llm_name], | |
| outputs=data_component | |
| ) | |
| algo_name.change( | |
| fn=filter_df2, | |
| inputs=[checkbox_group, algo_name, dataset_name, llm_name], | |
| outputs=data_component | |
| ) | |
| dataset_name.change( | |
| fn=filter_df2, | |
| inputs=[checkbox_group, algo_name, dataset_name, llm_name], | |
| outputs=data_component | |
| ) | |
| llm_name.change( | |
| fn=filter_df2, | |
| inputs=[checkbox_group, algo_name, dataset_name, llm_name], | |
| outputs=data_component | |
| ) | |
| with gr.Tab(label='🏅 Open Agent Multi-Modal Leaderboard'): | |
| gr.Markdown(LEADERBOARD_MD['MULTI_MODAL_MAIN']) | |
| struct_multi_modal = load_results(MULTIMODAL_SCORE_FILE) | |
| timestamp = struct_multi_modal['time'] | |
| EVAL_TIME_MM = format_timestamp(timestamp) | |
| # Use BUILD_L3_DF to process multi-modal results (pass the list directly) | |
| table_mm, check_box_mm = BUILD_L3_DF( | |
| struct_multi_modal['multi_modal_results'], DEFAULT_MULTI_MODAL_BENCH | |
| ) | |
| # Save the complete table as a CSV file | |
| csv_path_multi_modal = os.path.join(os.getcwd(), 'src/multi_modal_results.csv') | |
| table_mm.to_csv(csv_path_multi_modal, index=False) | |
| print(f"Multi-modal results saved to {csv_path_multi_modal}") | |
| type_map_mm = check_box_mm['type_map'] | |
| checkbox_group_mm = gr.CheckboxGroup( | |
| choices=check_box_mm['all'], | |
| value=check_box_mm['required'], | |
| label='Evaluation Dimension', | |
| interactive=True, | |
| ) | |
| # Ensure unique values for Agent and VLMs | |
| unique_agents = sorted(table_mm['Agent'].drop_duplicates().str.strip().tolist()) | |
| unique_vlms = sorted(table_mm['VLMs'].drop_duplicates().str.strip().tolist()) | |
| agent_name_mm = gr.CheckboxGroup( | |
| choices=unique_agents, | |
| value=unique_agents, | |
| label='Agent', | |
| interactive=True | |
| ) | |
| vlm_name_mm = gr.CheckboxGroup( | |
| choices=unique_vlms, | |
| value=unique_vlms, | |
| label='VLMs', | |
| interactive=True | |
| ) | |
| initial_headers_mm = ['Rank'] + checkbox_group_mm.value | |
| print(initial_headers_mm, "111111111") | |
| available_headers_mm = [h for h in initial_headers_mm if h in table_mm.columns] | |
| data_component_mm = gr.components.DataFrame( | |
| value=table_mm[available_headers_mm], | |
| type='pandas', | |
| datatype=[type_map_mm[x] for x in available_headers_mm], | |
| interactive=False, | |
| wrap=True, | |
| visible=True | |
| ) | |
| def filter_df_mm(fields, agents, vlms, *args): | |
| headers = ['Rank'] + fields | |
| df = table_mm.copy() | |
| # Validate inputs to avoid errors | |
| if not agents: | |
| agents = df['Agent'].unique().tolist() | |
| if not vlms: | |
| vlms = df['VLMs'].unique().tolist() | |
| # Add filtering logic | |
| df['flag'] = df.apply(lambda row: ( | |
| row['Agent'] in agents and | |
| row['VLMs'] in vlms | |
| ), axis=1) | |
| df = df[df['flag']].copy() | |
| df.pop('flag') | |
| # Ensure all requested columns exist | |
| available_headers = [h for h in headers if h in df.columns] | |
| # If no columns are available, return an empty DataFrame with basic columns | |
| if not available_headers: | |
| available_headers = ['Rank'] + check_box_mm['essential'] | |
| comp = gr.components.DataFrame( | |
| value=df[available_headers], | |
| type='pandas', | |
| datatype=[type_map_mm.get(col, 'str') for col in available_headers], | |
| interactive=False, | |
| wrap=True, | |
| visible=True | |
| ) | |
| return comp | |
| # Add change events for multi-modal leaderboard | |
| checkbox_group_mm.change( | |
| fn=filter_df_mm, | |
| inputs=[checkbox_group_mm, agent_name_mm, vlm_name_mm], | |
| outputs=data_component_mm | |
| ) | |
| agent_name_mm.change( | |
| fn=filter_df_mm, | |
| inputs=[checkbox_group_mm, agent_name_mm, vlm_name_mm], | |
| outputs=data_component_mm | |
| ) | |
| vlm_name_mm.change( | |
| fn=filter_df_mm, | |
| inputs=[checkbox_group_mm, agent_name_mm, vlm_name_mm], | |
| outputs=data_component_mm | |
| ) | |
| with gr.Row(): | |
| with gr.Accordion("📙 Citation", open=False): | |
| gr.Textbox( | |
| value=CITATION_BUTTON_TEXT, lines=7, | |
| label="Copy the BibTeX snippet to cite this source", | |
| elem_id="citation-button", | |
| show_copy_button=True, | |
| ) | |
| if __name__ == '__main__': | |
| demo.launch(server_name='0.0.0.0') |