diff --git a/__pycache__/topic_extraction.cpython-310.pyc b/__pycache__/topic_extraction.cpython-310.pyc index 0b272dba195b763470ba6fa2236f86e85502950a..bd02850abd31f0919549f2d55c373e3c1c9c1deb 100644 Binary files a/__pycache__/topic_extraction.cpython-310.pyc and b/__pycache__/topic_extraction.cpython-310.pyc differ diff --git a/pearson_json/final_subtopics.json b/pearson_json/final_subtopics.json new file mode 100644 index 0000000000000000000000000000000000000000..b241dce389ee1ae16a32709aca4cb5c0f0e6b4b3 --- /dev/null +++ b/pearson_json/final_subtopics.json @@ -0,0 +1,142 @@ +[ + { + "title": "", + "contents": [], + "children": [] + }, + { + "title": "", + "contents": [], + "children": [] + }, + { + "title": "", + "contents": [], + "children": [] + }, + { + "title": "", + "contents": [], + "children": [] + }, + { + "title": "", + "contents": [], + "children": [] + }, + { + "title": "", + "contents": [], + "children": [] + }, + { + "title": "", + "contents": [], + "children": [] + }, + { + "title": "", + "contents": [], + "children": [] + }, + { + "title": "", + "contents": [], + "children": [] + }, + { + "title": "", + "contents": [], + "children": [] + }, + { + "title": "", + "contents": [], + "children": [] + }, + { + "title": "", + "contents": [], + "children": [] + }, + { + "title": "", + "contents": [], + "children": [] + }, + { + "title": "", + "contents": [], + "children": [] + }, + { + "title": "", + "contents": [], + "children": [] + }, + { + "title": "", + "contents": [], + "children": [] + }, + { + "title": "", + "contents": [], + "children": [] + }, + { + "title": "", + "contents": [], + "children": [] + }, + { + "title": "", + "contents": [], + "children": [] + }, + { + "title": "", + "contents": [], + "children": [] + }, + { + "title": "", + "contents": [], + "children": [] + }, + { + "title": "", + "contents": [], + "children": [] + }, + { + "title": "", + "contents": [], + "children": [] + }, + { + "title": "", + "contents": [], + "children": [] + }, + { + "title": "", + "contents": [], + "children": [] + }, + { + "title": "", + "contents": [], + "children": [] + }, + { + "title": "", + "contents": [], + "children": [] + }, + { + "title": "", + "contents": [], + "children": [] + } +] \ No newline at end of file diff --git a/topic_extr.py b/topic_extr.py index 9d2c3ae45cdda31ccb3fd38139f93dd47b83eee2..9ccb0d655b4b3bba0cf6efc834069161f4545f3f 100644 --- a/topic_extr.py +++ b/topic_extr.py @@ -169,36 +169,149 @@ async def classify_image_async(image_data: bytes, api_key: str, max_retries: int return await loop.run_in_executor(None, call_gemini_for_table_classification, preprocessed, api_key, max_retries) -def call_gemini_for_subtopic_identification(text: str, api_key: str, max_retries: int = 1) -> dict: - """ - Sends the recognized text from a specification table to Gemini, - asking it to identify the main topic (like '2 Algebra and functions') - and subtopics (like '2.5', '3.4', etc.). - - Returns a dict of the form: - { - "title": "2 Algebra and functions", - "subtopics": ["2.5", "2.6", ...] - } - - If Gemini can't find anything, it might return empty strings or lists. - """ +# def call_gemini_for_subtopic_identification_image(image_data: bytes, api_key: str, max_retries: int = 1) -> dict: +# for attempt in range(max_retries + 1): +# try: +# prompt = """ +# You are given an image from an educational curriculum specification. The image may contain either: +# 1) A main topic heading in the format: " ", for example "2 Algebra and functions continued". +# 2) A subtopic heading in the format ".", for example "2.5", "2.6", or "3.4". +# 3) Possibly no relevant text at all. + +# Your task: +# 1. If the cell shows a main topic, extract the topic name (e.g. "2 Algebra and functions") and place it in the JSON key "title". +# 2. If the cell shows one or more subtopic numbers (e.g. "2.5", "2.6"), collect them in the JSON key "subtopics" as an array of strings. +# 3. If neither a main topic nor subtopic is detected, return empty values. + +# Output only valid JSON in this exact structure, with no extra text or explanation: + +# { +# "title": "...", +# "subtopics": [...] +# } + +# Where: +# - "title" is the recognized main topic (if any). Otherwise, an empty string. +# - "subtopics" is an array of recognized subtopic numbers (e.g. ["2.5", "2.6"]). Otherwise, an empty array. + +# Examples: +# 1. If the image text is "2 Algebra and functions continued", return: +# { +# "title": "2 Algebra and functions continued", +# "subtopics": [] +# } + +# 2. If the image text is "2.5 Solve linear and quadratic inequalities ...", return: +# { +# "title": "", +# "subtopics": ["2.5"] +# } + +# 3. If the image text is "2.6 Manipulate polynomials algebraically ...", return: +# { +# "title": "", +# "subtopics": ["2.6"] +# } + +# If you cannot recognize any text matching these patterns, or if nothing is found, return: +# { +# "title": "", +# "subtopics": [] +# } +# """ +# global _GEMINI_CLIENT +# if _GEMINI_CLIENT is None: +# _GEMINI_CLIENT = genai.Client(api_key=api_key) +# client = _GEMINI_CLIENT + +# resp = client.models.generate_content( +# model="gemini-2.0-flash", +# contents=[ +# { +# "parts": [ +# {"text": prompt}, +# { +# "inline_data": { +# "mime_type": "image/jpeg", +# "data": base64.b64encode(image_data).decode("utf-8") +# } +# } +# ] +# } +# ], +# config=types.GenerateContentConfig(temperature=0.0) +# ) +# if not resp or not resp.text: +# return {"title": "", "subtopics": []} + +# raw = resp.text.strip() + +# data = json.loads(raw) +# title = data.get("title", "") +# subtopics = data.get("subtopics", []) +# if not isinstance(subtopics, list): +# subtopics = [] +# return {"title": title, "subtopics": subtopics} + +# except Exception as e: +# if attempt < max_retries: +# time.sleep(0.5) +# else: +# return {"title": "", "subtopics": []} + + # return {"title": "", "subtopics": []} + +def call_gemini_for_subtopic_identification_image(image_data: bytes, api_key: str, max_retries: int = 1) -> dict: for attempt in range(max_retries + 1): try: - prompt = f""" -You are given text extracted from a table that represents topics and subtopics from an educational curriculum. -The text may include a main topic heading in the format: " ", for example, "2 Algebra and functions". -It may also include subtopics in the format of ".", such as "2.5", "3.4", etc. -Extract and output a valid JSON object with exactly two keys: - - "title": the main topic heading (if found). If not found, use an empty string. - - "subtopics": an array of strings representing each subtopic number extracted from the text. -Output exactly in this JSON format with no additional text. For example: + prompt = """ + You are given an image from an educational curriculum specification. The image may contain either: +1) A main topic heading in the format: " ", for example "2 Algebra and functions continued". +2) A subtopic heading in the format ".", for example "2.5", "2.6", or "3.4". +3) Possibly no relevant text at all. + +Your task: +1. If the cell shows a main topic, extract the topic name (e.g. "2 Algebra and functions") and place it in the JSON key "title". +2. If the cell shows one or more subtopic numbers (e.g. "2.5", "2.6"), collect them in the JSON key "subtopics" as an array of strings. +3. If neither a main topic nor subtopic is detected, return empty values. + +Output only valid JSON in this exact structure, with no extra text or explanation: + +Output only valid JSON in this exact structure, with no extra text or explanation: + +{ + "title": "...", + "subtopics": [...] +} + +Where: +- "title" is the recognized main topic (if any). Otherwise, an empty string. +- "subtopics" is an array of recognized subtopic numbers (e.g. ["2.5", "2.6"]). Otherwise, an empty array. + +Examples: +1. If the image text is "2 Algebra and functions continued", return: +{ + "title": "2 Algebra and functions continued", + "subtopics": [] +} + +2. If the image text is "2.5 Solve linear and quadratic inequalities ...", return: +{ + "title": "", + "subtopics": ["2.5"] +} + +3. If the image text is "2.6 Manipulate polynomials algebraically ...", return: +{ + "title": "", + "subtopics": ["2.6"] +} + +If you cannot recognize any text matching these patterns, or if nothing is found, return: { - "title": "2 Algebra and functions", - "subtopics": ["2.5", "2.6"] + "title": "", + "subtopics": [] } -Text: -{text} """ global _GEMINI_CLIENT if _GEMINI_CLIENT is None: @@ -207,36 +320,45 @@ Text: resp = client.models.generate_content( model="gemini-2.0-flash", - contents=[prompt], + contents=[ + { + "parts": [ + {"text": prompt}, + { + "inline_data": { + "mime_type": "image/jpeg", + "data": base64.b64encode(image_data).decode("utf-8") + } + } + ] + } + ], config=types.GenerateContentConfig(temperature=0.0) ) - + # Log the raw response + logger.info(f"Gemini subtopic extraction raw response: {resp.text if resp and resp.text else 'None'}") + if not resp or not resp.text: - # If Gemini gives no response, fallback to empty. + logger.warning("Gemini returned an empty response for subtopic extraction.") return {"title": "", "subtopics": []} raw = resp.text.strip() - - # Attempt to parse raw as JSON - try: - data = json.loads(raw) - # Guarantee the structure we want - title = data.get("title", "") - subs = data.get("subtopics", []) - if not isinstance(subs, list): - subs = [] - return {"title": title, "subtopics": subs} - except Exception: - # If JSON parse fails, return empty - return {"title": "", "subtopics": []} + data = json.loads(raw) + title = data.get("title", "") + subtopics = data.get("subtopics", []) + if not isinstance(subtopics, list): + subtopics = [] + return {"title": title, "subtopics": subtopics} except Exception as e: - # If there's an error or a 503, we can retry or bail out + logger.error(f"Gemini subtopic identification error on attempt {attempt}: {e}") if attempt < max_retries: time.sleep(0.5) else: return {"title": "", "subtopics": []} + return {"title": "", "subtopics": []} + class S3ImageWriter(DataWriter): @@ -314,11 +436,13 @@ class S3ImageWriter(DataWriter): logger.warning(f"No image data found for S3 key {s3_key}. Skipping.") continue + # Write temporary file for processing. with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file: temp_file.write(img_data) temp_path = temp_file.name try: + # 1) Extract row bounding boxes. if col_type.lower() == 'two': extractor = TableExtractor( skip_header=True, @@ -334,42 +458,77 @@ class S3ImageWriter(DataWriter): subtopic_threshold=0.2 ) row_boxes = extractor.process_image(temp_path) - - snippet = ["**Extracted table cells:**"] - cell_texts = [] + logger.info(f"Extracted {len(row_boxes)} rows from {temp_path}") for i, row in enumerate(row_boxes): - for j, box in enumerate(row): - cell_key = f"{self.base_path}cells/table_s3_{os.path.basename(s3_key)}_r{i}_c{j}.jpg" - self.s3_writer.write(cell_key, img_data) # or cell_data if you truly cropped + logger.info(f"Row {i} has {len(row)} cells") - text = "..." # placeholder - cell_texts.append(text) - - snippet.append(f"![Row {i} Col {j}]({cell_key})") + # out_folder = temp_path + "_rows" + # os.makedirs(out_folder, exist_ok=True) + out_folder = os.path.join(os.path.dirname(temp_path), os.path.basename(temp_path) + "_rows") + os.makedirs(out_folder, exist_ok=True) - combined_text = "\n".join(cell_texts) + extractor.save_extracted_cells(temp_path, row_boxes, out_folder) + logger.info(f"Files in {out_folder}:") + for root, dirs, files in os.walk(out_folder): + logger.info(f"{root}: {files}") - subtopic_info = call_gemini_for_subtopic_identification(combined_text, self.gemini_api_key) + recognized_main_topic = "" + main_topic_image_key = None + recognized_subtopics = [] - # subtopic_info might be: {"title": "2 Algebra and functions", "subtopics": ["2.5"]} + # 2) Loop over each cell image. + for i, row in enumerate(row_boxes): + row_dir = os.path.join(out_folder, f"row_{i}") + for j, _ in enumerate(row): + cell_path = os.path.join(row_dir, f"col_{j}.png") + if not os.path.isfile(cell_path): + alternative_path = os.path.join(row_dir, f"col_{j}.jpg") + if os.path.isfile(alternative_path): + cell_path = alternative_path + else: + logger.warning(f"Cell image not found: {cell_path}") + continue + + with open(cell_path, "rb") as cf: + cell_image_data = cf.read() + + # Save cell image to S3. + cell_key = f"{self.base_path}cells/{os.path.basename(s3_key)}_r{i}_c{j}.png" + self.s3_writer.write(cell_key, cell_image_data) + + # Log before calling Gemini. + logger.debug(f"About to call Gemini for cell image: {cell_path}") + info = call_gemini_for_subtopic_identification_image(cell_image_data, self.gemini_api_key) + logger.info(f"Gemini subtopic extraction result for cell {cell_path}: {info}") + + if info["title"] and not recognized_main_topic: + recognized_main_topic = info["title"] + main_topic_image_key = cell_key + + for st in info["subtopics"]: + recognized_subtopics.append({ + "title": st, + "contents": [{"type": "image", "key": cell_key}], + "children": [] + }) + + # 3) Build final JSON for this table. final_json = { - "title": subtopic_info.get("title", ""), - "contents": [ - {"type": "image", "key": s3_key} - ], - "children": [] + "title": recognized_main_topic, + "contents": [], + "children": recognized_subtopics } - for st in subtopic_info.get("subtopics", []): - final_json["children"].append({ - "title": st, - "contents": [ - {"type": "image", "key": f"subtopic_{st}_example.jpg"} - ] - }) + if main_topic_image_key: + final_json["contents"].append({"type": "image", "key": main_topic_image_key}) + # Save the final JSON. self.extracted_subtopics[s3_key] = final_json - # Replace the original table image line in the markdown with the snippet + # Optionally, create a snippet to replace the markdown line. + snippet = ["**Extracted table cells:**"] + for i, row in enumerate(row_boxes): + for j, _ in enumerate(row): + snippet.append(f"![Row {i} Col {j}]({self.base_path}cells/{os.path.basename(s3_key)}_r{i}_c{j}.jpg)") new_snip = "\n".join(snippet) old_line = f"![HAS TO BE PROCESSED - {col_type} column table]({s3_key})" md_content = md_content.replace(old_line, new_snip) @@ -381,6 +540,8 @@ class S3ImageWriter(DataWriter): return md_content + + def post_process(self, key: str, md_content: str) -> str: return asyncio.run(self.post_process_async(key, md_content)) @@ -457,7 +618,7 @@ class LocalImageWriter(DataWriter): with open(temp_path, "wb") as f: f.write(desc_item["data"]) try: - if col_type.lower() == 'two': + if col_type.lower() == 'two': #check for table_row_extr script for more details extractor = TableExtractor( skip_header=True, merge_two_col_rows=True, @@ -822,7 +983,7 @@ class MineruNoTextProcessor: if __name__ == "__main__": input_pdf = "/home/user/app/input_output/a-level-pearson-mathematics-specification.pdf" - output_dir = "/home/user/app/we" + output_dir = "/home/user/app/pearson_json" gemini_key = os.getenv("GEMINI_API_KEY", "AIzaSyDtoakpXa2pjJwcQB6TJ5QaXHNSA5JxcrU") try: processor = MineruNoTextProcessor(output_folder=output_dir, gemini_api_key=gemini_key) @@ -830,4 +991,4 @@ if __name__ == "__main__": logger.info("Processing completed successfully.") # The result includes final_markdown and subtopics_extracted except Exception as e: - logger.error(f"Processing failed: {e}") + logger.error(f"Processing failed: {e}") \ No newline at end of file diff --git a/topic_extract_arsenii.py b/topic_extract_arsenii.py new file mode 100644 index 0000000000000000000000000000000000000000..9e605192a48ae12bc216dbc7f90599de6a746186 --- /dev/null +++ b/topic_extract_arsenii.py @@ -0,0 +1,883 @@ +#!/usr/bin/env python3 +import os +import re +import gc +import json +import logging +import fitz +import boto3 +import base64 +import time +import asyncio +import tempfile +import requests +from io import BytesIO +from typing import List, Dict, Any + +import torch +import cv2 +import numpy as np + +from google import genai +from google.genai import types + +from magic_pdf.data.dataset import PymuDocDataset +from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze +from magic_pdf.data.data_reader_writer.base import DataWriter +from table_row_extraction import TableExtractor + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +file_handler = logging.FileHandler("topic_extraction_ars.log") +file_handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(name)s - %(message)s")) +logger.addHandler(file_handler) + +_GEMINI_CLIENT = None + +def unify_whitespace(text: str) -> str: + return re.sub(r"\s+", " ", text).strip() + +def find_all_occurrences(pdf_bytes: bytes, search_text: str) -> List[int]: + doc = fitz.open(stream=pdf_bytes, filetype="pdf") + st_norm = unify_whitespace(search_text) + found = [] + for i in range(doc.page_count): + raw = doc[i].get_text("raw") + norm = unify_whitespace(raw) + if st_norm in norm: + found.append(i) + doc.close() + return sorted(found) + +def create_subset_pdf(original_pdf_bytes: bytes, page_indices: List[int]) -> bytes: + if not page_indices: + raise ValueError("No page indices provided for subset creation.") + doc = fitz.open(stream=original_pdf_bytes, filetype="pdf") + new_doc = fitz.open() + for p in sorted(set(page_indices)): + if 0 <= p < doc.page_count: + new_doc.insert_pdf(doc, from_page=p, to_page=p) + else: + logger.error(f"Page index {p} out of range (0..{doc.page_count - 1}).") + raise ValueError(f"Page index {p} out of range.") + subset_bytes = new_doc.tobytes() + new_doc.close() + doc.close() + return subset_bytes + +class s3Writer: + def __init__(self, ak: str, sk: str, bucket: str, endpoint_url: str): + self.bucket = bucket + self.client = boto3.client( + 's3', + aws_access_key_id=ak, + aws_secret_access_key=sk, + endpoint_url=endpoint_url + ) + + def write(self, path: str, data: bytes) -> None: + try: + file_obj = BytesIO(data) + self.client.upload_fileobj( + file_obj, + self.bucket, + path + ) + logger.info(f"Uploaded to S3: {path}") + except Exception as e: + logger.error(f"Failed to upload to S3: {str(e)}") + raise + +def preprocess_image(image_data: bytes, max_dim: int = 600, quality: int = 60) -> bytes: + arr = np.frombuffer(image_data, np.uint8) + img = cv2.imdecode(arr, cv2.IMREAD_COLOR) + if img is not None: + h, w, _ = img.shape + if max(h, w) > max_dim: + scale = max_dim / float(max(h, w)) + new_w = int(w * scale) + new_h = int(h * scale) + img = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA) + encode_params = [int(cv2.IMWRITE_JPEG_QUALITY), quality] + success, enc = cv2.imencode(".jpg", img, encode_params) + if success: + return enc.tobytes() + return image_data + +def call_gemini_for_table_classification(image_data: bytes, api_key: str, max_retries: int = 1) -> str: + """ + Existing Gemini call to classify an image as TWO_COLUMN, THREE_COLUMN, or NO_TABLE. + """ + for attempt in range(max_retries + 1): + try: + prompt = """You are given an image. Determine if it shows a table that has exactly 2 or 3 columns. +The three-column 'table' image includes such key features: + - Three columns header + - Headers like 'Topics', 'Content', 'Guidelines' + - Possibly sections (e.g. 8.4, 9.1) +The two-column 'table' image includes such key features: + - Two columns + - Headers like 'Subject content' and 'Additional information' + - Possibly sections (e.g. 2.1, 3.4) +If the image is a relevant table with 2 columns, respond with 'TWO_COLUMN'. +If the image is a relevant table with 3 columns, respond with 'THREE_COLUMN'. +If the image does not show a table at all, respond with 'NO_TABLE'. +Return only one of these exact labels. +""" + global _GEMINI_CLIENT + if _GEMINI_CLIENT is None: + _GEMINI_CLIENT = genai.Client(api_key=api_key) + client = _GEMINI_CLIENT + + resp = client.models.generate_content( + model="gemini-2.0-flash", + contents=[ + { + "parts": [ + {"text": prompt}, + { + "inline_data": { + "mime_type": "image/jpeg", + "data": base64.b64encode(image_data).decode('utf-8') + } + } + ] + } + ], + config=types.GenerateContentConfig(temperature=0.0) + ) + if resp and resp.text: + classification = resp.text.strip().upper() + if "THREE" in classification: + return "THREE_COLUMN" + elif "TWO" in classification: + return "TWO_COLUMN" + return "NO_TABLE" + except Exception as e: + logger.error(f"Gemini table classification error: {e}") + if "503" in str(e): + return "NO_TABLE" + if attempt < max_retries: + time.sleep(0.5) + else: + return "NO_TABLE" + +async def classify_image_async(image_data: bytes, api_key: str, max_retries: int = 1) -> str: + loop = asyncio.get_event_loop() + preprocessed = preprocess_image(image_data) + return await loop.run_in_executor(None, call_gemini_for_table_classification, preprocessed, api_key, max_retries) + + +def call_gemini_for_subtopic_identification_image(image_data: bytes, api_key: str, max_retries: int = 1) -> dict: + """ + Sends the *image* (not text) of a table cell to Gemini to identify: + - A main topic heading in the format: " ", e.g. "2 Algebra and functions" + - A subtopic heading in the format: ".", e.g. "2.5", "3.4" + Returns a dict of the form: + { + "title": "", + "subtopics": ["2.5", "2.6", ...] + } + """ + for attempt in range(max_retries + 1): + try: + # Prompt specifically instructs Gemini to read the image’s text and extract + # either a main topic or subtopic heading if present: + prompt = """ + You are given an image of a table cell from an educational curriculum specification. + The text in this cell may contain: + 1) A main topic heading in the format " ", for example: "2 Algebra and functions" + 2) A subtopic heading in the format ".", for example: "2.5" or "3.4" + Identify if the cell contains exactly one main topic or subtopic. + Return a valid JSON object with the keys "title" and "subtopics" of the form: + {{ + "title": "2 Algebra and functions", + "subtopics": ["2.5", "2.6"] + }} + If you find a main topic (like '2 Algebra and functions'), put it in "title". + If you find subtopic numbers (like '2.5', '3.4'), put them in the "subtopics" array. + """ + + # Re-use or initialize your global Gemini client: + global _GEMINI_CLIENT + if _GEMINI_CLIENT is None: + _GEMINI_CLIENT = genai.Client(api_key=api_key) + client = _GEMINI_CLIENT + + # Send the prompt + image to Gemini: + resp = client.models.generate_content( + model="gemini-2.0-flash", + contents=[ + { + "parts": [ + {"text": prompt}, + { + "inline_data": { + "mime_type": "image/jpeg", + "data": base64.b64encode(image_data).decode("utf-8") + } + } + ] + } + ], + config=types.GenerateContentConfig(temperature=0.0) + ) + # if not resp or not resp.text: + # return {"title": "", "subtopics": []} + + raw = resp.text.strip().replace("```json", "").replace("```", "") + logger.info(f"== RAW == {raw}") + + # Attempt to parse JSON from Gemini’s response: + data = json.loads(raw) + title = data.get("title", "") + subtopics = data.get("subtopics", []) + if not isinstance(subtopics, list): + subtopics = [] + return {"title": title, "subtopics": subtopics} + + except Exception as e: + # Retry logic if you like: + if attempt < max_retries: + time.sleep(0.5) + else: + return {"title": "", "subtopics": []} + # fallback: + return {"title": "", "subtopics": []} + + + + +class S3ImageWriter(DataWriter): + def __init__(self, s3_writer: s3Writer, base_path: str, gemini_api_key: str): + self.s3_writer = s3_writer + self.base_path = base_path if base_path.endswith("/") else base_path + "/" + self.gemini_api_key = gemini_api_key + self.descriptions = {} + self._img_count = 0 + self.extracted_tables = {} + # New attribute to store final subtopic JSON + self.extracted_subtopics = {} + + def write(self, path: str, data: bytes) -> None: + self._img_count += 1 + unique_id = f"img_{self._img_count}.jpg" + s3_key = f"{self.base_path}{unique_id}" + self.s3_writer.write(s3_key, data) + self.descriptions[path] = { + "data": data, + "s3_path": s3_key, + "table_classification": "NO_TABLE", + "final_alt": "" + } + + async def post_process_async(self, key: str, md_content: str) -> str: + logger.info("Classifying images to detect tables.") + tasks = { + p: asyncio.create_task(classify_image_async(info["data"], self.gemini_api_key)) + for p, info in self.descriptions.items() + } + results = await asyncio.gather(*tasks.values(), return_exceptions=True) + for p, result in zip(tasks.keys(), results): + if isinstance(result, Exception): + logger.error(f"Table classification error for {p}: {result}") + self.descriptions[p]['table_classification'] = "NO_TABLE" + else: + self.descriptions[p]['table_classification'] = result + + # 2) Replace the original markdown references with alt text + for p, info in self.descriptions.items(): + cls = info['table_classification'] + if cls == "TWO_COLUMN": + info['final_alt'] = "HAS TO BE PROCESSED - two column table" + elif cls == "THREE_COLUMN": + info['final_alt'] = "HAS TO BE PROCESSED - three column table" + else: + info['final_alt'] = "NO_TABLE image" + md_content = md_content.replace(f"![]({key}{p})", f"![{info['final_alt']}]({info['s3_path']})") + + md_content = await self._process_table_images_in_markdown(key, md_content) + + # Filter final lines to keep only lines with images + final_lines = [ + line.strip() for line in md_content.split("\n") + if re.match(r"^\!\[.*\]\(.*\)", line.strip()) + ] + return "\n".join(final_lines) + + + async def _process_table_images_in_markdown(self, key: str, md_content: str) -> str: + pat = r"!\[HAS TO BE PROCESSED - (two|three) column table\]\(([^)]+)\)" + matches = re.findall(pat, md_content, flags=re.IGNORECASE) + if not matches: + return md_content + + for (col_type, s3_key) in matches: + logger.info(f"Processing table image: {s3_key}, columns={col_type}") + img_data = None + for desc in self.descriptions.values(): + if desc.get("s3_path") == s3_key: + img_data = desc.get("data") + break + if img_data is None: + logger.warning(f"No image data found for S3 key {s3_key}. Skipping.") + continue + + with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file: + temp_file.write(img_data) + temp_path = temp_file.name + + try: + if col_type.lower() == 'two': + extractor = TableExtractor( + skip_header=True, + merge_two_col_rows=True, + enable_subtopic_merge=True, + subtopic_threshold=0.2 + ) + else: + extractor = TableExtractor( + skip_header=True, + merge_two_col_rows=False, + enable_subtopic_merge=False, + subtopic_threshold=0.2 + ) + row_boxes = extractor.process_image(temp_path) + + #save cell images to S3 or local + out_folder = temp_path + "_rows" + os.makedirs(out_folder, exist_ok=True) + extractor.save_extracted_cells(temp_path, row_boxes, out_folder) + + recognized_main_topic = None + recognized_subtopics = [] + + for i, row in enumerate(row_boxes): + row_dir = os.path.join(out_folder, f"row_{i}") + for j, _ in enumerate(row): + cell_path = os.path.join(row_dir, f"col_{j}.jpg") + if not os.path.isfile(cell_path): + continue + + with open(cell_path, "rb") as cf: + cell_image_data = cf.read() + + # store that cell image to S3 + cell_key = f"{self.base_path}cells/{os.path.basename(s3_key)}_r{i}_c{j}.jpg" + self.s3_writer.write(cell_key, cell_image_data) + + # Call Gemini with the cell image + info = call_gemini_for_subtopic_identification_image(cell_image_data, self.gemini_api_key) + logger.info(f"== INFO == {info}") + # e.g. info = {"title": "2 Algebra and functions", "subtopics": ["2.5"]} + + # 3d) Merge the recognized topic/subtopics + if info["title"]: + recognized_main_topic = info["title"] + if info["subtopics"]: + recognized_subtopics.extend(info["subtopics"]) + + snippet = ["**Extracted table cells:**"] + cell_texts = [] + for i, row in enumerate(row_boxes): + for j, box in enumerate(row): + cell_key = f"{self.base_path}cells/table_s3_{os.path.basename(s3_key)}_r{i}_c{j}.jpg" + self.s3_writer.write(cell_key, img_data) # or cell_data if you truly cropped + + text = "..." # placeholder + cell_texts.append(text) + + snippet.append(f"![Row {i} Col {j}]({cell_key})") + + final_json = { + "title": recognized_main_topic, + "contents": [ + { + "type": "image", + "key": s3_key + } + ], + "children": [] + } + for st in recognized_subtopics: + final_json["children"].append({ + "title": st, + "contents": [ + {"type": "image", "key": f"subtopic_{st}_example.jpg"} + ] + }) + + self.extracted_subtopics[s3_key] = final_json + + + # Replace the original table image line in the markdown with the snippet + new_snip = "\n".join(snippet) + old_line = f"![HAS TO BE PROCESSED - {col_type} column table]({s3_key})" + md_content = md_content.replace(old_line, new_snip) + + snippet = ["**Extracted table cells:**"] + for i, row in enumerate(row_boxes): + for j, _ in enumerate(row): + snippet.append(f"![Row {i} Col {j}]({self.base_path}cells/{os.path.basename(s3_key)}_r{i}_c{j}.jpg)") + new_snip = "\n".join(snippet) + old_line = f"![HAS TO BE PROCESSED - {col_type} column table]({s3_key})" + md_content = md_content.replace(old_line, new_snip) + + except Exception as e: + logger.error(f"Error processing table image {s3_key}: {e}") + finally: + os.remove(temp_path) + + return md_content + + def post_process(self, key: str, md_content: str) -> str: + return asyncio.run(self.post_process_async(key, md_content)) + + +class LocalImageWriter(DataWriter): + def __init__(self, output_folder: str, gemini_api_key: str): + self.output_folder = output_folder + os.makedirs(self.output_folder, exist_ok=True) + self.descriptions = {} + self._img_count = 0 + self.gemini_api_key = gemini_api_key + + self.extracted_tables = {} + + def write(self, path: str, data: bytes) -> None: + self._img_count += 1 + unique_id = f"img_{self._img_count}.jpg" + self.descriptions[path] = { + "data": data, + "relative_path": unique_id, + "table_classification": "NO_TABLE", + "final_alt": "" + } + # Also save the original image locally for testing. + image_path = os.path.join(self.output_folder, unique_id) + with open(image_path, "wb") as f: + f.write(data) + + async def post_process_async(self, key: str, md_content: str) -> str: + logger.info("Classifying images to detect tables.") + tasks = [] + for p, info in self.descriptions.items(): + tasks.append((p, classify_image_async(info["data"], self.gemini_api_key))) + for p, task in tasks: + try: + classification = await task + self.descriptions[p]['table_classification'] = classification + except Exception as e: + logger.error(f"Table classification error: {e}") + self.descriptions[p]['table_classification'] = "NO_TABLE" + for p, info in self.descriptions.items(): + cls = info['table_classification'] + if cls == "TWO_COLUMN": + info['final_alt'] = "HAS TO BE PROCESSED - two column table" + elif cls == "THREE_COLUMN": + info['final_alt'] = "HAS TO BE PROCESSED - three column table" + else: + info['final_alt'] = "NO_TABLE image" + md_content = md_content.replace(f"![]({key}{p})", f"![{info['final_alt']}]({info['relative_path']})") + md_content = self._process_table_images_in_markdown(md_content) + final_lines = [] + for line in md_content.split("\n"): + if re.match(r"^\!\[.*\]\(.*\)", line.strip()): + final_lines.append(line.strip()) + return "\n".join(final_lines) + + def _process_table_images_in_markdown(self, md_content: str) -> str: + pat = r"!\[HAS TO BE PROCESSED - (two|three) column table\]\(([^)]+)\)" + matches = re.findall(pat, md_content, flags=re.IGNORECASE) + if not matches: + return md_content + for (col_type, image_id) in matches: + logger.info(f"Processing table image => {image_id}, columns={col_type}") + temp_path = os.path.join(self.output_folder, image_id) + desc_item = None + for k, val in self.descriptions.items(): + if val["relative_path"] == image_id: + desc_item = val + break + if not desc_item: + logger.warning(f"No matching image data for {image_id}, skipping extraction.") + continue + if not os.path.exists(temp_path): + with open(temp_path, "wb") as f: + f.write(desc_item["data"]) + try: + if col_type.lower() == 'two': #check for table_row_extr script for more details + extractor = TableExtractor( + skip_header=True, + merge_two_col_rows=True, + enable_subtopic_merge=True, + subtopic_threshold=0.2 + ) + else: + extractor = TableExtractor( + skip_header=True, + merge_two_col_rows=False, + enable_subtopic_merge=False, + subtopic_threshold=0.2 + ) + row_boxes = extractor.process_image(temp_path) + out_folder = temp_path + "_rows" + os.makedirs(out_folder, exist_ok=True) + extractor.save_extracted_cells(temp_path, row_boxes, out_folder) + # List all extracted cell images relative to the output folder. + extracted_cells = [] + for root, dirs, files in os.walk(out_folder): + for file in files: + rel_path = os.path.relpath(os.path.join(root, file), self.output_folder) + extracted_cells.append(rel_path) + # Save mapping for testing. + self.extracted_tables[image_id] = extracted_cells + snippet = ["**Extracted table cells:**"] + for i, row in enumerate(row_boxes): + row_dir = os.path.join(out_folder, f"row_{i}") + for j, _ in enumerate(row): + cell_file = f"col_{j}.jpg" + cell_path = os.path.join(row_dir, cell_file) + relp = os.path.relpath(cell_path, self.output_folder) + snippet.append(f"![Row {i} Col {j}]({relp})") + new_snip = "\n".join(snippet) + old_line = f"![HAS TO BE PROCESSED - {col_type} column table]({image_id})" + md_content = md_content.replace(old_line, new_snip) + except Exception as e: + logger.error(f"Error processing table image {image_id}: {e}") + finally: + if os.path.exists(temp_path): + os.remove(temp_path) + return md_content + + def post_process(self, key: str, md_content: str) -> str: + return asyncio.run(self.post_process_async(key, md_content)) + +class GeminiTopicExtractor: + def __init__(self, api_key: str = None, num_pages: int = 14): + self.api_key = api_key or os.getenv("GEMINI_API_KEY", "") + self.num_pages = num_pages + + def extract_subtopics(self, pdf_path: str) -> Dict[str, List[int]]: + first_pages_text = self._read_first_pages_raw(pdf_path, self.num_pages) + if not first_pages_text.strip(): + logger.error("No text from first pages => cannot extract subtopics.") + return {} + prompt = f""" +You have the first pages of a PDF specification, including a table of contents. +Instructions: +1. Identify the 'Contents' section listing all topics, subtopics, and their corresponding pages. +2. Identify the major academic subtopics (common desired topic names "Paper X", "Theme X", "Content of X", "AS Unit X", "A2 Unit X", or similar headings). +3. For each subtopic, give the range of pages [start_page, end_page] (1-based) from the table of contents. +4. Output only valid JSON of the form: + {{ + "Subtopic A": [start_page, end_page], + "Subtopic B": [start_page, end_page] + }} +5. If you can't find any subtopics, return an empty JSON. +Important notes: +- The correct "end_page" must be the page number of the next topic or subtopic minus 1. +- The final output must be valid JSON only, with no extra text or code blocks. +Examples: +1. Given this table of contents: +1 Introduction – 2 + Why choose Edexcel A Level Mathematics? - 2 + Supporting you in planning and implementing this qualification - 3 + Qualification at a glance - 5 +2 Subject content and assessment information – 7 + Paper 1 and Paper 2: Pure Mathematics - 11 + Paper 3: Statistics and Mechanics - 30 + Assessment Objectives - 40 +3 Administration and general information – 42 + Entries - 42 + Access arrangements, reasonable adjustments, special consideration and malpractice - 42 + Student recruitment and progression - 45 +Appendix 1: Formulae – 49 +Appendix 2: Notation – 53 +Appendix 3: Use of calculators – 59 +Appendix 4: Assessment Objectives – 60 +Appendix 5: The context for the development of this qualification – 62 +Appendix 6: Transferable skills – 64 +Appendix 7: Level 3 Extended Project qualification – 65 +Appendix 8: Codes – 67 +The correct output should be: +{{ + "Paper 1 and Paper 2: Pure Mathematics": [11, 29], + "Paper 3: Statistics and Mechanics": [30, 42] +}} +2. Given this table of contents: +Qualification at a glance – 1 + Assessment Objectives and weightings - 4 +Knowledge, skills and understanding – 5 + Theme 1: Introduction to markets and market failure - 5 + Theme 2: The UK economy – performance and policies - 11 + Theme 3: Business behaviour and the labour market - 21 + Theme 4: A global perspective - 29 +Assessment – 39 + Assessment summary - 39 + Assessment objectives - 41 + Assessment overview - 42 + Breakdown of assessment objectives - 42 + Synoptic assessment - 43 + Discount code and performance tables - 43 + Access arrangements, reasonable adjustments and special consideration - 44 + Malpractice - 45 + Equality Act 2010 and Pearson equality policy - 45 + Synoptic assessment - 46 + Awarding and reporting - 47 +Other information – 49 + Student recruitment -49 + Prior learning and other requirements -49 + Progression - 49 +Appendix 1: Transferable skills – 53 +Appendix 2: Level 3 Extended Project qualification – 55 +Appendix 3: Quantitative skills – 59 +Appendix 4: Codes – 61 +Appendix 5: Index – 63 +The correct output should be: +{{ + "Theme 1: Introduction to markets and market failure": [5, 10], + "Theme 2: The UK economy – performance and policies": [11, 20], + "Theme 3: Business behaviour and the labour market": [21, 28], + "Theme 4: A global perspective": [29, 38] +}} +3. You might also see sections like: +2.1 AS Unit 1 11 +2.2 AS Unit 2 18 +2.3 A2 Unit 3 24 +2.4 A2 Unit 4 31 +In that scenario, your output might look like: +{{ + "2.1 AS Unit 1": [11, 17], + "2.2 AS Unit 2": [18, 23], + "2.3 A2 Unit 3": [24, 30], + "2.4 A2 Unit 4": [31, 35] +}} +4. Another example might list subtopics: +3.1 Overarching themes 11 +3.2 A: Proof 12 +3.3 B: Algebra and functions 13 +3.4 C: Coordinate geometry in the ( x , y ) plane 14 +3.5 D: Sequences and series 15 +3.6 E: Trigonometry 16 +3.7 F: Exponentials and logarithms 17 +3.8 G: Differentiation 18 +3.9 H: Integration 19 +3.10 I: Numerical methods 20 +3.11 J: Vectors 20 +3.12 K: Statistical sampling 21 +3.13 L: Data presentation and interpretation 21 +3.14 M: Probability 22 +3.15 N: Statistical distributions 23 +3.16 O: Statistical hypothesis testing 23 +3.17 P: Quantities and units in mechanics 24 +3.18 Q: Kinematics 24 +3.19 R: Forces and Newton’s laws 24 +3.20 S: Moments 25 +3.21 Use of data in statistics 26 +Here the correct output might look like: +{{ + "A: Proof": [12, 12], + "B: Algebra and functions": [13, 13], + ... +}} +Now, extract topics from this text: +{first_pages_text} +""" + global _GEMINI_CLIENT + if _GEMINI_CLIENT is None: + _GEMINI_CLIENT = genai.Client(api_key=self.api_key) + client = _GEMINI_CLIENT + try: + response = client.models.generate_content( + model="gemini-2.0-flash", + contents=[prompt], + config=types.GenerateContentConfig(temperature=0.0) + ) + if not response or not response.text: + logger.warning("No text from LLM => returning empty subtopics.") + return {} + raw_json = response.text.strip() + cleaned = raw_json.replace("```json", "").replace("```", "") + try: + data = json.loads(cleaned) + except Exception as json_err: + logger.error(f"JSON parsing error: {json_err}") + return {} + final_dict = {} + found_sub_dict = None + for k, v in data.items(): + if isinstance(v, dict): + found_sub_dict = v + break + if found_sub_dict is not None: + for subk, rng in found_sub_dict.items(): + if isinstance(rng, list) and len(rng) == 2: + final_dict[subk] = rng + else: + for subk, rng in data.items(): + if isinstance(rng, list) and len(rng) == 2: + final_dict[subk] = rng + return final_dict + except Exception as e: + logger.error(f"Gemini subtopic extraction error: {e}") + return {} + + def _read_first_pages_raw(self, pdf_path: str, num_pages: int) -> str: + text_parts = [] + try: + if pdf_path.startswith("http://") or pdf_path.startswith("https://"): + response = requests.get(pdf_path) + if response.status_code != 200: + logger.error("Failed to download PDF from %s. Status code: %d", pdf_path, response.status_code) + return "" + pdf_bytes = response.content + else: + with open(pdf_path, "rb") as f: + pdf_bytes = f.read() + doc = fitz.open(stream=pdf_bytes, filetype="pdf") + pages_to_read = min(num_pages, doc.page_count) + for i in range(pages_to_read): + raw_text = doc[i].get_text("raw") + text_parts.append(raw_text) + doc.close() + except Exception as e: + logger.error(f"Could not open PDF: {e}") + return "\n".join(text_parts) + + +class MineruNoTextProcessor: + def __init__(self, output_folder: str, gemini_api_key: str): + self.output_folder = output_folder + os.makedirs(self.output_folder, exist_ok=True) + self.layout_model = "doclayout_yolo" + self.formula_enable = True + self.table_enable = False + self.language = "en" + + self.subtopic_extractor = GeminiTopicExtractor(api_key=gemini_api_key, num_pages=20) + self.gemini_api_key = gemini_api_key or os.getenv("GEMINI_API_KEY", "") + + self.use_s3 = True + self.s3_writer = s3Writer( + ak=os.getenv("S3_ACCESS_KEY"), + sk=os.getenv("S3_SECRET_KEY"), + bucket="quextro-resources", + endpoint_url=os.getenv("S3_ENDPOINT") + ) + + def cleanup_gpu(self): + try: + gc.collect() + torch.cuda.empty_cache() + logger.info("GPU memory cleaned up.") + except Exception as e: + logger.error(f"Error during GPU cleanup: {e}") + + def process(self, pdf_path: str) -> Dict[str, Any]: + logger.info(f"Processing PDF: {pdf_path}") + try: + # 1) Possibly call subtopic_extractor on first pages to find subtopics in the PDF as a whole + subtopics = self.subtopic_extractor.extract_subtopics(pdf_path) + logger.info(f"Gemini returned subtopics: {subtopics}") + + if pdf_path.startswith("http://") or pdf_path.startswith("https://"): + response = requests.get(pdf_path) + if response.status_code != 200: + logger.error("Failed to download PDF from %s. Status code: %d", pdf_path, response.status_code) + raise Exception(f"Failed to download PDF: {pdf_path}") + pdf_bytes = response.content + logger.info("Downloaded %d bytes for pdf_url='%s'", len(pdf_bytes), pdf_path) + else: + with open(pdf_path, "rb") as f: + pdf_bytes = f.read() + logger.info("Loaded %d bytes from local file '%s'", len(pdf_bytes), pdf_path) + + doc = fitz.open(stream=pdf_bytes, filetype="pdf") + total_pages = doc.page_count + doc.close() + + # 3) Decide which pages to process + final_pages = set() + if not subtopics: + # fallback + final_pages = set(range(total_pages)) + else: + offset_candidates = [] + for subname, rng in subtopics.items(): + start_p, _ = rng + occs = find_all_occurrences(pdf_bytes, subname) + for p in occs: + candidate = p - (start_p - 1) + if candidate > 0: + offset_candidates.append(candidate) + if offset_candidates: + try: + from statistics import mode + global_offset = mode(offset_candidates) + except: + from statistics import median + global_offset = int(median(offset_candidates)) + else: + global_offset = 0 + + logger.info(f"Computed global offset: {global_offset}") + for subname, rng in subtopics.items(): + if not (isinstance(rng, list) and len(rng) == 2): + continue + start_p, end_p = rng + if start_p > end_p: + continue + s0 = (start_p - 1) + global_offset + e0 = (end_p - 1) + global_offset + for pp in range(s0, e0 + 1): + final_pages.add(pp) + + if not final_pages: + final_pages = set(range(total_pages)) + + logger.info(f"Processing pages (0-based): {sorted(final_pages)}") + subset_pdf_bytes = create_subset_pdf(pdf_bytes, sorted(final_pages)) + + # 4) Analyze and produce markdown + dataset = PymuDocDataset(subset_pdf_bytes) + inference = doc_analyze( + dataset, + ocr=True, + lang=self.language, + layout_model=self.layout_model, + formula_enable=self.formula_enable, + table_enable=self.table_enable + ) + writer = S3ImageWriter(self.s3_writer, "/topic-extraction", self.gemini_api_key) + md_prefix = "/topic-extraction/" + pipe_result = inference.pipe_ocr_mode(writer, lang=self.language) + md_content = pipe_result.get_markdown(md_prefix) + final_markdown = writer.post_process(md_prefix, md_content) + + subtopic_list = list(writer.extracted_subtopics.values()) + + out_path = os.path.join(self.output_folder, "final_subtopics.json") + with open(out_path, "w", encoding="utf-8") as f: + json.dump(subtopic_list, f, indent=2) + logger.info(f"Final subtopics JSON saved locally at {out_path}") + + return { + "final_markdown": final_markdown, + "subtopics_extracted": subtopic_list + } + finally: + self.cleanup_gpu() + +if __name__ == "__main__": + input_pdf = "/home/user/app/input_output/a-level-pearson-mathematics-specification.pdf" + output_dir = "/home/user/app/we/we_ars" + gemini_key = os.getenv("GEMINI_API_KEY", "AIzaSyDtoakpXa2pjJwcQB6TJ5QaXHNSA5JxcrU") + try: + processor = MineruNoTextProcessor(output_folder=output_dir, gemini_api_key=gemini_key) + result = processor.process(input_pdf) + logger.info("Processing completed successfully.") + # The result includes final_markdown and subtopics_extracted + except Exception as e: + logger.error(f"Processing failed: {e}") \ No newline at end of file diff --git a/topic_extraction.log b/topic_extraction.log index 759fbd980d718cde9203dce56b27041c1acbf760..ba2f33eb98b145bc8cceaf7522fe013b9817ce36 100644 --- a/topic_extraction.log +++ b/topic_extraction.log @@ -1192,3 +1192,2259 @@ and series'. Using page 7. 2025-03-03 14:13:31,102 [INFO] __main__ - Final subtopics JSON saved locally at /home/user/app/we/final_subtopics.json 2025-03-03 14:13:31,434 [INFO] __main__ - GPU memory cleaned up. 2025-03-03 14:13:31,442 [INFO] __main__ - Processing completed successfully. +2025-03-03 14:35:18,303 [INFO] __main__ - Running in test mode: using local image writer. +2025-03-03 14:35:18,304 [INFO] __main__ - Processing PDF: /home/user/app/input_output/wjec-gce-maths-spec-from-2017-e.pdf +2025-03-03 14:35:18,304 [ERROR] __main__ - Could not open PDF: [Errno 2] No such file or directory: '/home/user/app/input_output/wjec-gce-maths-spec-from-2017-e.pdf' +2025-03-03 14:35:18,304 [ERROR] __main__ - No text from first pages => cannot extract subtopics. +2025-03-03 14:35:18,304 [INFO] __main__ - Gemini returned subtopics: {} +2025-03-03 14:35:18,576 [INFO] __main__ - GPU memory cleaned up. +2025-03-03 14:35:18,576 [ERROR] __main__ - Processing failed: [Errno 2] No such file or directory: '/home/user/app/input_output/wjec-gce-maths-spec-from-2017-e.pdf' +2025-03-03 14:36:15,476 [INFO] __main__ - Running in test mode: using local image writer. +2025-03-03 14:36:15,476 [INFO] __main__ - Processing PDF: /home/user/app/input_output/a-level-pearson-mathematics-specification.pdf +2025-03-03 14:36:16,171 [INFO] __main__ - Gemini returned subtopics: {'Paper 1 and Paper 2: Pure Mathematics': [11, 29], 'Paper 3: Statistics and Mechanics': [30, 40]} +2025-03-03 14:36:16,172 [INFO] __main__ - Loaded 1135473 bytes from local file '/home/user/app/input_output/a-level-pearson-mathematics-specification.pdf' +2025-03-03 14:36:16,326 [INFO] __main__ - Occurrences of subtopic 'Paper 1 and Paper 2: Pure Mathematics': [2, 3, 4, 14] +2025-03-03 14:36:16,513 [INFO] __main__ - Occurrences of subtopic 'Paper 3: Statistics and Mechanics': [3, 4, 9, 13, 33, 44] +2025-03-03 14:36:16,514 [INFO] __main__ - Processing pages (0-based): [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43] +2025-03-03 14:37:10,283 [INFO] __main__ - doc_analyze complete. Extracting images. +2025-03-03 14:37:18,985 [INFO] __main__ - Classifying images to detect tables. +2025-03-03 14:37:47,492 [INFO] __main__ - Processing table image => img_1.jpg, columns=three +2025-03-03 14:37:50,068 [INFO] __main__ - Processing table image => img_2.jpg, columns=three +2025-03-03 14:37:52,978 [INFO] __main__ - Processing table image => img_3.jpg, columns=three +2025-03-03 14:37:56,011 [INFO] __main__ - Processing table image => img_4.jpg, columns=three +2025-03-03 14:37:58,952 [INFO] __main__ - Processing table image => img_5.jpg, columns=three +2025-03-03 14:38:02,311 [INFO] __main__ - Processing table image => img_6.jpg, columns=three +2025-03-03 14:38:05,509 [INFO] __main__ - Processing table image => img_7.jpg, columns=three +2025-03-03 14:38:08,662 [INFO] __main__ - Processing table image => img_8.jpg, columns=three +2025-03-03 14:38:11,711 [INFO] __main__ - Processing table image => img_9.jpg, columns=three +2025-03-03 14:38:15,102 [INFO] __main__ - Processing table image => img_10.jpg, columns=three +2025-03-03 14:38:18,262 [INFO] __main__ - Processing table image => img_11.jpg, columns=two +2025-03-03 14:38:21,059 [INFO] __main__ - Processing table image => img_12.jpg, columns=three +2025-03-03 14:38:23,975 [INFO] __main__ - Processing table image => img_13.jpg, columns=three +2025-03-03 14:38:26,770 [INFO] __main__ - Processing table image => img_14.jpg, columns=three +2025-03-03 14:38:30,620 [INFO] __main__ - Processing table image => img_15.jpg, columns=three +2025-03-03 14:38:33,945 [INFO] __main__ - Processing table image => img_16.jpg, columns=three +2025-03-03 14:38:37,404 [INFO] __main__ - Processing table image => img_17.jpg, columns=three +2025-03-03 14:38:40,601 [INFO] __main__ - Processing table image => img_18.jpg, columns=three +2025-03-03 14:38:41,459 [INFO] __main__ - Processing table image => img_19.jpg, columns=three +2025-03-03 14:38:44,087 [INFO] __main__ - Processing table image => img_20.jpg, columns=three +2025-03-03 14:38:47,631 [INFO] __main__ - Processing table image => img_21.jpg, columns=three +2025-03-03 14:38:50,716 [INFO] __main__ - Processing table image => img_22.jpg, columns=three +2025-03-03 14:38:54,125 [INFO] __main__ - Processing table image => img_23.jpg, columns=three +2025-03-03 14:38:57,256 [INFO] __main__ - Processing table image => img_24.jpg, columns=three +2025-03-03 14:39:00,533 [INFO] __main__ - Processing table image => img_25.jpg, columns=two +2025-03-03 14:39:03,631 [INFO] __main__ - Processing table image => img_26.jpg, columns=three +2025-03-03 14:39:06,279 [INFO] __main__ - Processing table image => img_27.jpg, columns=three +2025-03-03 14:39:08,942 [INFO] __main__ - Processing table image => img_28.jpg, columns=two +2025-03-03 14:39:11,877 [INFO] __main__ - Final JSON saved locally at /home/user/app/wje/final_output.json +2025-03-03 14:39:11,877 [INFO] __main__ - Final JSON saved locally at /home/user/app/wje/final_output_local.json +2025-03-03 14:39:12,200 [INFO] __main__ - GPU memory cleaned up. +2025-03-03 14:39:12,207 [INFO] __main__ - Processing completed successfully. +2025-03-03 14:40:32,170 [INFO] __main__ - Processing PDF: /home/user/app/input_output/a-level-pearson-mathematics-specification.pdf +2025-03-03 14:40:32,970 [INFO] __main__ - Gemini returned subtopics: {'Paper 1 and Paper 2: Pure Mathematics': [11, 29], 'Paper 3: Statistics and Mechanics': [30, 40]} +2025-03-03 14:40:32,971 [INFO] __main__ - Loaded 1135473 bytes from local file '/home/user/app/input_output/a-level-pearson-mathematics-specification.pdf' +2025-03-03 14:40:33,339 [INFO] __main__ - Computed global offset: 4 +2025-03-03 14:40:33,340 [INFO] __main__ - Processing pages (0-based): [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43] +2025-03-03 14:41:29,053 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_1.jpg +2025-03-03 14:41:30,998 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_2.jpg +2025-03-03 14:41:31,535 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_3.jpg +2025-03-03 14:41:32,063 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_4.jpg +2025-03-03 14:41:32,706 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_5.jpg +2025-03-03 14:41:33,299 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_6.jpg +2025-03-03 14:41:33,822 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_7.jpg +2025-03-03 14:41:34,284 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_8.jpg +2025-03-03 14:41:34,858 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_9.jpg +2025-03-03 14:41:35,387 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_10.jpg +2025-03-03 14:41:35,909 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_11.jpg +2025-03-03 14:41:36,445 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_12.jpg +2025-03-03 14:41:36,898 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_13.jpg +2025-03-03 14:41:37,505 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_14.jpg +2025-03-03 14:41:38,061 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_15.jpg +2025-03-03 14:41:38,617 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_16.jpg +2025-03-03 14:41:39,193 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_17.jpg +2025-03-03 14:41:39,510 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_18.jpg +2025-03-03 14:41:39,998 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_19.jpg +2025-03-03 14:41:40,612 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_20.jpg +2025-03-03 14:41:41,206 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_21.jpg +2025-03-03 14:41:41,798 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_22.jpg +2025-03-03 14:41:42,242 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_23.jpg +2025-03-03 14:41:42,723 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_24.jpg +2025-03-03 14:41:43,337 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_25.jpg +2025-03-03 14:41:43,795 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_26.jpg +2025-03-03 14:41:44,246 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_27.jpg +2025-03-03 14:41:44,848 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_28.jpg +2025-03-03 14:41:45,258 [INFO] __main__ - Classifying images to detect tables. +2025-03-03 14:41:49,188 [INFO] __main__ - Processing table image: /topic-extraction/img_1.jpg, columns=three +2025-03-03 14:41:52,437 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r0_c0.jpg +2025-03-03 14:41:52,752 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r0_c1.jpg +2025-03-03 14:41:52,968 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r1_c0.jpg +2025-03-03 14:41:53,195 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r1_c1.jpg +2025-03-03 14:41:53,696 [INFO] __main__ - Processing table image: /topic-extraction/img_2.jpg, columns=three +2025-03-03 14:41:57,147 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r0_c0.jpg +2025-03-03 14:41:57,453 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r0_c1.jpg +2025-03-03 14:41:57,682 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r1_c0.jpg +2025-03-03 14:41:57,910 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r2_c0.jpg +2025-03-03 14:41:58,166 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r3_c0.jpg +2025-03-03 14:41:58,667 [INFO] __main__ - Processing table image: /topic-extraction/img_3.jpg, columns=three +2025-03-03 14:42:02,140 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_3.jpg_r0_c0.jpg +2025-03-03 14:42:02,430 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_3.jpg_r0_c1.jpg +2025-03-03 14:42:02,663 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_3.jpg_r1_c0.jpg +2025-03-03 14:42:03,164 [INFO] __main__ - Processing table image: /topic-extraction/img_4.jpg, columns=three +2025-03-03 14:42:06,264 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r0_c0.jpg +2025-03-03 14:42:06,558 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r0_c1.jpg +2025-03-03 14:42:06,783 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r1_c0.jpg +2025-03-03 14:42:06,996 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r1_c1.jpg +2025-03-03 14:42:07,497 [INFO] __main__ - Processing table image: /topic-extraction/img_5.jpg, columns=three +2025-03-03 14:42:11,013 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r0_c0.jpg +2025-03-03 14:42:11,312 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r0_c1.jpg +2025-03-03 14:42:11,503 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r1_c0.jpg +2025-03-03 14:42:11,721 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r1_c1.jpg +2025-03-03 14:42:11,982 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r2_c0.jpg +2025-03-03 14:42:12,484 [INFO] __main__ - Processing table image: /topic-extraction/img_6.jpg, columns=three +2025-03-03 14:42:16,389 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r0_c0.jpg +2025-03-03 14:42:16,683 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r0_c1.jpg +2025-03-03 14:42:16,900 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r1_c0.jpg +2025-03-03 14:42:17,133 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r1_c1.jpg +2025-03-03 14:42:17,634 [INFO] __main__ - Processing table image: /topic-extraction/img_7.jpg, columns=three +2025-03-03 14:42:21,186 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r0_c0.jpg +2025-03-03 14:42:21,499 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r0_c1.jpg +2025-03-03 14:42:21,717 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r1_c0.jpg +2025-03-03 14:42:22,011 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r2_c0.jpg +2025-03-03 14:42:22,248 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r2_c1.jpg +2025-03-03 14:42:22,750 [INFO] __main__ - Processing table image: /topic-extraction/img_8.jpg, columns=three +2025-03-03 14:42:26,069 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r0_c0.jpg +2025-03-03 14:42:26,374 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r0_c1.jpg +2025-03-03 14:42:26,616 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r0_c2.jpg +2025-03-03 14:42:26,848 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r1_c0.jpg +2025-03-03 14:42:27,062 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r1_c1.jpg +2025-03-03 14:42:27,295 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r1_c2.jpg +2025-03-03 14:42:27,507 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r2_c0.jpg +2025-03-03 14:42:27,736 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r2_c1.jpg +2025-03-03 14:42:27,960 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r3_c0.jpg +2025-03-03 14:42:28,170 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r3_c1.jpg +2025-03-03 14:42:28,377 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r4_c0.jpg +2025-03-03 14:42:28,613 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r4_c1.jpg +2025-03-03 14:42:28,846 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r5_c0.jpg +2025-03-03 14:42:29,068 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r5_c1.jpg +2025-03-03 14:42:29,569 [INFO] __main__ - Processing table image: /topic-extraction/img_9.jpg, columns=three +2025-03-03 14:42:33,545 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r0_c0.jpg +2025-03-03 14:42:33,843 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r0_c1.jpg +2025-03-03 14:42:34,053 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r0_c2.jpg +2025-03-03 14:42:34,267 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r1_c0.jpg +2025-03-03 14:42:34,488 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r1_c1.jpg +2025-03-03 14:42:34,706 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r2_c0.jpg +2025-03-03 14:42:34,914 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r2_c1.jpg +2025-03-03 14:42:35,159 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r3_c0.jpg +2025-03-03 14:42:35,367 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r3_c1.jpg +2025-03-03 14:42:35,867 [INFO] __main__ - Processing table image: /topic-extraction/img_10.jpg, columns=three +2025-03-03 14:42:39,999 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r0_c0.jpg +2025-03-03 14:42:40,351 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r0_c1.jpg +2025-03-03 14:42:40,572 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r1_c0.jpg +2025-03-03 14:42:40,781 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r2_c0.jpg +2025-03-03 14:42:40,969 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r3_c0.jpg +2025-03-03 14:42:41,470 [INFO] __main__ - Processing table image: /topic-extraction/img_11.jpg, columns=two +2025-03-03 14:42:44,724 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r0_c0.jpg +2025-03-03 14:42:45,053 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r1_c0.jpg +2025-03-03 14:42:45,266 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r2_c0.jpg +2025-03-03 14:42:45,496 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r3_c0.jpg +2025-03-03 14:42:45,715 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r4_c0.jpg +2025-03-03 14:42:45,925 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r5_c0.jpg +2025-03-03 14:42:46,426 [INFO] __main__ - Processing table image: /topic-extraction/img_12.jpg, columns=three +2025-03-03 14:42:50,046 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r0_c0.jpg +2025-03-03 14:42:50,340 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r0_c1.jpg +2025-03-03 14:42:50,548 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r1_c0.jpg +2025-03-03 14:42:50,760 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r1_c1.jpg +2025-03-03 14:42:50,979 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r2_c0.jpg +2025-03-03 14:42:51,207 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r2_c1.jpg +2025-03-03 14:42:51,708 [INFO] __main__ - Processing table image: /topic-extraction/img_13.jpg, columns=three +2025-03-03 14:42:55,058 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r0_c0.jpg +2025-03-03 14:42:55,385 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r0_c1.jpg +2025-03-03 14:42:55,616 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r1_c0.jpg +2025-03-03 14:42:55,838 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r1_c1.jpg +2025-03-03 14:42:56,071 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r2_c0.jpg +2025-03-03 14:42:56,293 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r3_c0.jpg +2025-03-03 14:42:56,794 [INFO] __main__ - Processing table image: /topic-extraction/img_14.jpg, columns=three +2025-03-03 14:43:01,227 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r0_c0.jpg +2025-03-03 14:43:01,523 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r0_c1.jpg +2025-03-03 14:43:01,761 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r1_c0.jpg +2025-03-03 14:43:01,984 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r1_c1.jpg +2025-03-03 14:43:02,209 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r2_c0.jpg +2025-03-03 14:43:02,433 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r3_c0.jpg +2025-03-03 14:43:02,677 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r4_c0.jpg +2025-03-03 14:43:02,871 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r4_c1.jpg +2025-03-03 14:43:03,091 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r5_c0.jpg +2025-03-03 14:43:03,592 [INFO] __main__ - Processing table image: /topic-extraction/img_15.jpg, columns=three +2025-03-03 14:43:07,715 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r0_c0.jpg +2025-03-03 14:43:07,982 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r0_c1.jpg +2025-03-03 14:43:08,169 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r1_c0.jpg +2025-03-03 14:43:08,360 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r1_c1.jpg +2025-03-03 14:43:08,623 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r2_c0.jpg +2025-03-03 14:43:08,855 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r3_c0.jpg +2025-03-03 14:43:09,088 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r4_c0.jpg +2025-03-03 14:43:09,589 [INFO] __main__ - Processing table image: /topic-extraction/img_16.jpg, columns=three +2025-03-03 14:43:13,265 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r0_c0.jpg +2025-03-03 14:43:13,580 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r0_c1.jpg +2025-03-03 14:43:13,802 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r1_c0.jpg +2025-03-03 14:43:14,017 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r1_c1.jpg +2025-03-03 14:43:14,240 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r2_c0.jpg +2025-03-03 14:43:14,444 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r3_c0.jpg +2025-03-03 14:43:14,664 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r3_c1.jpg +2025-03-03 14:43:14,882 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r4_c0.jpg +2025-03-03 14:43:15,692 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r5_c0.jpg +2025-03-03 14:43:16,193 [INFO] __main__ - Processing table image: /topic-extraction/img_17.jpg, columns=three +2025-03-03 14:43:20,159 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r0_c0.jpg +2025-03-03 14:43:20,446 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r0_c1.jpg +2025-03-03 14:43:20,634 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r1_c0.jpg +2025-03-03 14:43:20,849 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r2_c0.jpg +2025-03-03 14:43:21,057 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r2_c1.jpg +2025-03-03 14:43:21,296 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r3_c0.jpg +2025-03-03 14:43:21,521 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r4_c0.jpg +2025-03-03 14:43:21,731 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r5_c0.jpg +2025-03-03 14:43:22,232 [INFO] __main__ - Processing table image: /topic-extraction/img_18.jpg, columns=three +2025-03-03 14:43:23,394 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r0_c0.jpg +2025-03-03 14:43:23,649 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r0_c1.jpg +2025-03-03 14:43:23,830 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r1_c0.jpg +2025-03-03 14:43:24,014 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r1_c1.jpg +2025-03-03 14:43:24,515 [INFO] __main__ - Processing table image: /topic-extraction/img_19.jpg, columns=three +2025-03-03 14:43:27,381 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r0_c0.jpg +2025-03-03 14:43:27,677 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r0_c1.jpg +2025-03-03 14:43:27,862 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r1_c0.jpg +2025-03-03 14:43:28,099 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r1_c1.jpg +2025-03-03 14:43:28,327 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r2_c0.jpg +2025-03-03 14:43:28,532 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r2_c1.jpg +2025-03-03 14:43:29,033 [INFO] __main__ - Processing table image: /topic-extraction/img_20.jpg, columns=three +2025-03-03 14:43:33,082 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r0_c0.jpg +2025-03-03 14:43:33,375 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r0_c1.jpg +2025-03-03 14:43:33,565 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r1_c0.jpg +2025-03-03 14:43:33,779 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r1_c1.jpg +2025-03-03 14:43:34,280 [INFO] __main__ - Processing table image: /topic-extraction/img_21.jpg, columns=three +2025-03-03 14:43:37,916 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r0_c0.jpg +2025-03-03 14:43:38,204 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r0_c1.jpg +2025-03-03 14:43:38,389 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r1_c0.jpg +2025-03-03 14:43:38,602 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r1_c1.jpg +2025-03-03 14:43:39,103 [INFO] __main__ - Processing table image: /topic-extraction/img_22.jpg, columns=three +2025-03-03 14:43:43,240 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r0_c0.jpg +2025-03-03 14:43:43,525 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r0_c1.jpg +2025-03-03 14:43:43,724 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r1_c0.jpg +2025-03-03 14:43:43,948 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r1_c1.jpg +2025-03-03 14:43:44,175 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r2_c0.jpg +2025-03-03 14:43:44,382 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r2_c1.jpg +2025-03-03 14:43:44,580 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r3_c0.jpg +2025-03-03 14:43:45,081 [INFO] __main__ - Processing table image: /topic-extraction/img_23.jpg, columns=three +2025-03-03 14:43:48,504 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r0_c0.jpg +2025-03-03 14:43:48,761 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r0_c1.jpg +2025-03-03 14:43:48,988 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r1_c0.jpg +2025-03-03 14:43:49,194 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r1_c1.jpg +2025-03-03 14:43:49,695 [INFO] __main__ - Processing table image: /topic-extraction/img_24.jpg, columns=three +2025-03-03 14:43:53,228 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r0_c0.jpg +2025-03-03 14:43:53,517 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r0_c1.jpg +2025-03-03 14:43:53,728 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r1_c0.jpg +2025-03-03 14:43:53,961 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r1_c1.jpg +2025-03-03 14:43:54,180 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r2_c0.jpg +2025-03-03 14:43:54,681 [INFO] __main__ - Processing table image: /topic-extraction/img_25.jpg, columns=two +2025-03-03 14:43:57,957 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r0_c0.jpg +2025-03-03 14:43:58,230 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r1_c0.jpg +2025-03-03 14:43:58,415 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r2_c0.jpg +2025-03-03 14:43:58,672 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r3_c0.jpg +2025-03-03 14:43:58,857 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r4_c0.jpg +2025-03-03 14:43:59,067 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r5_c0.jpg +2025-03-03 14:43:59,568 [INFO] __main__ - Processing table image: /topic-extraction/img_26.jpg, columns=three +2025-03-03 14:44:02,774 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r0_c0.jpg +2025-03-03 14:44:03,053 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r0_c1.jpg +2025-03-03 14:44:03,283 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r1_c0.jpg +2025-03-03 14:44:03,506 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r2_c0.jpg +2025-03-03 14:44:04,007 [INFO] __main__ - Processing table image: /topic-extraction/img_27.jpg, columns=three +2025-03-03 14:44:06,977 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r0_c0.jpg +2025-03-03 14:44:07,259 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r0_c1.jpg +2025-03-03 14:44:07,476 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r1_c0.jpg +2025-03-03 14:44:07,696 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r1_c1.jpg +2025-03-03 14:44:07,914 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r2_c0.jpg +2025-03-03 14:44:08,123 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r3_c0.jpg +2025-03-03 14:44:08,350 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r4_c0.jpg +2025-03-03 14:44:08,561 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r4_c1.jpg +2025-03-03 14:44:09,062 [INFO] __main__ - Processing table image: /topic-extraction/img_28.jpg, columns=two +2025-03-03 14:44:12,602 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_28.jpg_r0_c0.jpg +2025-03-03 14:44:12,906 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_28.jpg_r1_c0.jpg +2025-03-03 14:44:13,091 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_28.jpg_r2_c0.jpg +2025-03-03 14:44:13,323 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_28.jpg_r3_c0.jpg +2025-03-03 14:44:13,828 [INFO] __main__ - Final subtopics JSON saved locally at /home/user/app/we/final_subtopics.json +2025-03-03 14:44:14,194 [INFO] __main__ - GPU memory cleaned up. +2025-03-03 14:44:14,201 [INFO] __main__ - Processing completed successfully. +2025-03-03 15:06:28,358 [INFO] __main__ - Processing PDF: /home/user/app/input_output/a-level-pearson-mathematics-specification.pdf +2025-03-03 15:06:29,190 [INFO] __main__ - Gemini returned subtopics: {'Paper 1 and Paper 2: Pure Mathematics': [11, 29], 'Paper 3: Statistics and Mechanics': [30, 40]} +2025-03-03 15:06:29,191 [INFO] __main__ - Loaded 1135473 bytes from local file '/home/user/app/input_output/a-level-pearson-mathematics-specification.pdf' +2025-03-03 15:06:29,684 [INFO] __main__ - Computed global offset: 4 +2025-03-03 15:06:29,684 [INFO] __main__ - Processing pages (0-based): [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43] +2025-03-03 15:07:26,351 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_1.jpg +2025-03-03 15:07:28,271 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_2.jpg +2025-03-03 15:07:28,880 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_3.jpg +2025-03-03 15:07:29,380 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_4.jpg +2025-03-03 15:07:29,978 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_5.jpg +2025-03-03 15:07:30,515 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_6.jpg +2025-03-03 15:07:31,001 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_7.jpg +2025-03-03 15:07:31,467 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_8.jpg +2025-03-03 15:07:32,038 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_9.jpg +2025-03-03 15:07:32,648 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_10.jpg +2025-03-03 15:07:33,107 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_11.jpg +2025-03-03 15:07:33,683 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_12.jpg +2025-03-03 15:07:34,203 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_13.jpg +2025-03-03 15:07:34,803 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_14.jpg +2025-03-03 15:07:35,241 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_15.jpg +2025-03-03 15:07:35,821 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_16.jpg +2025-03-03 15:07:36,441 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_17.jpg +2025-03-03 15:07:36,793 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_18.jpg +2025-03-03 15:07:37,345 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_19.jpg +2025-03-03 15:07:37,943 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_20.jpg +2025-03-03 15:07:38,490 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_21.jpg +2025-03-03 15:07:39,145 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_22.jpg +2025-03-03 15:07:39,623 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_23.jpg +2025-03-03 15:07:40,159 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_24.jpg +2025-03-03 15:07:40,778 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_25.jpg +2025-03-03 15:07:41,328 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_26.jpg +2025-03-03 15:07:41,899 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_27.jpg +2025-03-03 15:07:42,663 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_28.jpg +2025-03-03 15:07:43,118 [INFO] __main__ - Classifying images to detect tables. +2025-03-03 15:07:46,985 [INFO] __main__ - Processing table image: /topic-extraction/img_1.jpg, columns=three +2025-03-03 15:07:52,030 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r0_c0.jpg +2025-03-03 15:07:52,359 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r0_c1.jpg +2025-03-03 15:07:52,583 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r1_c0.jpg +2025-03-03 15:07:52,818 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r1_c1.jpg +2025-03-03 15:07:52,819 [ERROR] __main__ - Error processing table image /topic-extraction/img_1.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:07:52,819 [INFO] __main__ - Processing table image: /topic-extraction/img_2.jpg, columns=three +2025-03-03 15:07:56,276 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r0_c0.jpg +2025-03-03 15:07:56,585 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r0_c1.jpg +2025-03-03 15:07:56,773 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r1_c0.jpg +2025-03-03 15:07:57,004 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r2_c0.jpg +2025-03-03 15:07:57,234 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r3_c0.jpg +2025-03-03 15:07:57,234 [ERROR] __main__ - Error processing table image /topic-extraction/img_2.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:07:57,235 [INFO] __main__ - Processing table image: /topic-extraction/img_3.jpg, columns=three +2025-03-03 15:08:00,963 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_3.jpg_r0_c0.jpg +2025-03-03 15:08:01,254 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_3.jpg_r0_c1.jpg +2025-03-03 15:08:01,442 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_3.jpg_r1_c0.jpg +2025-03-03 15:08:01,442 [ERROR] __main__ - Error processing table image /topic-extraction/img_3.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:08:01,443 [INFO] __main__ - Processing table image: /topic-extraction/img_4.jpg, columns=three +2025-03-03 15:08:04,900 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r0_c0.jpg +2025-03-03 15:08:05,196 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r0_c1.jpg +2025-03-03 15:08:05,413 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r1_c0.jpg +2025-03-03 15:08:05,624 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r1_c1.jpg +2025-03-03 15:08:05,624 [ERROR] __main__ - Error processing table image /topic-extraction/img_4.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:08:05,624 [INFO] __main__ - Processing table image: /topic-extraction/img_5.jpg, columns=three +2025-03-03 15:08:09,549 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r0_c0.jpg +2025-03-03 15:08:09,860 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r0_c1.jpg +2025-03-03 15:08:10,084 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r1_c0.jpg +2025-03-03 15:08:10,315 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r1_c1.jpg +2025-03-03 15:08:10,529 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r2_c0.jpg +2025-03-03 15:08:10,529 [ERROR] __main__ - Error processing table image /topic-extraction/img_5.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:08:10,529 [INFO] __main__ - Processing table image: /topic-extraction/img_6.jpg, columns=three +2025-03-03 15:08:14,256 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r0_c0.jpg +2025-03-03 15:08:14,539 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r0_c1.jpg +2025-03-03 15:08:14,764 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r1_c0.jpg +2025-03-03 15:08:14,973 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r1_c1.jpg +2025-03-03 15:08:14,973 [ERROR] __main__ - Error processing table image /topic-extraction/img_6.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:08:14,973 [INFO] __main__ - Processing table image: /topic-extraction/img_7.jpg, columns=three +2025-03-03 15:08:18,857 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r0_c0.jpg +2025-03-03 15:08:19,176 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r0_c1.jpg +2025-03-03 15:08:19,406 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r1_c0.jpg +2025-03-03 15:08:19,628 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r2_c0.jpg +2025-03-03 15:08:19,815 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r2_c1.jpg +2025-03-03 15:08:19,815 [ERROR] __main__ - Error processing table image /topic-extraction/img_7.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:08:19,815 [INFO] __main__ - Processing table image: /topic-extraction/img_8.jpg, columns=three +2025-03-03 15:08:23,570 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r0_c0.jpg +2025-03-03 15:08:23,858 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r0_c1.jpg +2025-03-03 15:08:24,110 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r0_c2.jpg +2025-03-03 15:08:24,345 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r1_c0.jpg +2025-03-03 15:08:24,563 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r1_c1.jpg +2025-03-03 15:08:24,779 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r1_c2.jpg +2025-03-03 15:08:25,006 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r2_c0.jpg +2025-03-03 15:08:25,260 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r2_c1.jpg +2025-03-03 15:08:25,479 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r3_c0.jpg +2025-03-03 15:08:25,695 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r3_c1.jpg +2025-03-03 15:08:25,909 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r4_c0.jpg +2025-03-03 15:08:26,095 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r4_c1.jpg +2025-03-03 15:08:26,331 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r5_c0.jpg +2025-03-03 15:08:26,541 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r5_c1.jpg +2025-03-03 15:08:26,541 [ERROR] __main__ - Error processing table image /topic-extraction/img_8.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:08:26,541 [INFO] __main__ - Processing table image: /topic-extraction/img_9.jpg, columns=three +2025-03-03 15:08:30,374 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r0_c0.jpg +2025-03-03 15:08:30,676 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r0_c1.jpg +2025-03-03 15:08:30,888 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r0_c2.jpg +2025-03-03 15:08:31,120 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r1_c0.jpg +2025-03-03 15:08:31,338 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r1_c1.jpg +2025-03-03 15:08:31,579 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r2_c0.jpg +2025-03-03 15:08:31,801 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r2_c1.jpg +2025-03-03 15:08:32,004 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r3_c0.jpg +2025-03-03 15:08:32,213 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r3_c1.jpg +2025-03-03 15:08:32,214 [ERROR] __main__ - Error processing table image /topic-extraction/img_9.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:08:32,214 [INFO] __main__ - Processing table image: /topic-extraction/img_10.jpg, columns=three +2025-03-03 15:08:35,917 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r0_c0.jpg +2025-03-03 15:08:36,212 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r0_c1.jpg +2025-03-03 15:08:36,464 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r1_c0.jpg +2025-03-03 15:08:36,689 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r2_c0.jpg +2025-03-03 15:08:36,933 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r3_c0.jpg +2025-03-03 15:08:36,934 [ERROR] __main__ - Error processing table image /topic-extraction/img_10.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:08:36,934 [INFO] __main__ - Processing table image: /topic-extraction/img_11.jpg, columns=two +2025-03-03 15:08:40,244 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r0_c0.jpg +2025-03-03 15:08:40,533 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r1_c0.jpg +2025-03-03 15:08:40,756 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r2_c0.jpg +2025-03-03 15:08:40,996 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r3_c0.jpg +2025-03-03 15:08:41,209 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r4_c0.jpg +2025-03-03 15:08:41,440 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r5_c0.jpg +2025-03-03 15:08:41,440 [ERROR] __main__ - Error processing table image /topic-extraction/img_11.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:08:41,440 [INFO] __main__ - Processing table image: /topic-extraction/img_12.jpg, columns=three +2025-03-03 15:08:44,731 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r0_c0.jpg +2025-03-03 15:08:45,033 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r0_c1.jpg +2025-03-03 15:08:45,244 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r1_c0.jpg +2025-03-03 15:08:45,427 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r1_c1.jpg +2025-03-03 15:08:45,678 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r2_c0.jpg +2025-03-03 15:08:45,919 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r2_c1.jpg +2025-03-03 15:08:45,919 [ERROR] __main__ - Error processing table image /topic-extraction/img_12.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:08:45,919 [INFO] __main__ - Processing table image: /topic-extraction/img_13.jpg, columns=three +2025-03-03 15:08:49,575 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r0_c0.jpg +2025-03-03 15:08:49,833 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r0_c1.jpg +2025-03-03 15:08:50,062 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r1_c0.jpg +2025-03-03 15:08:50,268 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r1_c1.jpg +2025-03-03 15:08:50,492 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r2_c0.jpg +2025-03-03 15:08:50,681 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r3_c0.jpg +2025-03-03 15:08:50,681 [ERROR] __main__ - Error processing table image /topic-extraction/img_13.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:08:50,682 [INFO] __main__ - Processing table image: /topic-extraction/img_14.jpg, columns=three +2025-03-03 15:08:54,968 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r0_c0.jpg +2025-03-03 15:08:55,283 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r0_c1.jpg +2025-03-03 15:08:55,502 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r1_c0.jpg +2025-03-03 15:08:55,718 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r1_c1.jpg +2025-03-03 15:08:55,913 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r2_c0.jpg +2025-03-03 15:08:56,148 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r3_c0.jpg +2025-03-03 15:08:56,395 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r4_c0.jpg +2025-03-03 15:08:56,628 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r4_c1.jpg +2025-03-03 15:08:56,846 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r5_c0.jpg +2025-03-03 15:08:56,847 [ERROR] __main__ - Error processing table image /topic-extraction/img_14.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:08:56,847 [INFO] __main__ - Processing table image: /topic-extraction/img_15.jpg, columns=three +2025-03-03 15:09:00,646 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r0_c0.jpg +2025-03-03 15:09:00,938 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r0_c1.jpg +2025-03-03 15:09:01,168 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r1_c0.jpg +2025-03-03 15:09:01,395 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r1_c1.jpg +2025-03-03 15:09:01,594 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r2_c0.jpg +2025-03-03 15:09:01,818 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r3_c0.jpg +2025-03-03 15:09:02,037 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r4_c0.jpg +2025-03-03 15:09:02,037 [ERROR] __main__ - Error processing table image /topic-extraction/img_15.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:09:02,037 [INFO] __main__ - Processing table image: /topic-extraction/img_16.jpg, columns=three +2025-03-03 15:09:06,565 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r0_c0.jpg +2025-03-03 15:09:06,824 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r0_c1.jpg +2025-03-03 15:09:07,056 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r1_c0.jpg +2025-03-03 15:09:07,270 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r1_c1.jpg +2025-03-03 15:09:07,488 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r2_c0.jpg +2025-03-03 15:09:07,703 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r3_c0.jpg +2025-03-03 15:09:07,891 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r3_c1.jpg +2025-03-03 15:09:08,117 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r4_c0.jpg +2025-03-03 15:09:08,993 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r5_c0.jpg +2025-03-03 15:09:08,993 [ERROR] __main__ - Error processing table image /topic-extraction/img_16.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:09:08,994 [INFO] __main__ - Processing table image: /topic-extraction/img_17.jpg, columns=three +2025-03-03 15:09:12,825 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r0_c0.jpg +2025-03-03 15:09:13,119 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r0_c1.jpg +2025-03-03 15:09:13,357 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r1_c0.jpg +2025-03-03 15:09:13,563 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r2_c0.jpg +2025-03-03 15:09:13,882 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r2_c1.jpg +2025-03-03 15:09:14,081 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r3_c0.jpg +2025-03-03 15:09:14,306 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r4_c0.jpg +2025-03-03 15:09:14,501 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r5_c0.jpg +2025-03-03 15:09:14,502 [ERROR] __main__ - Error processing table image /topic-extraction/img_17.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:09:14,502 [INFO] __main__ - Processing table image: /topic-extraction/img_18.jpg, columns=three +2025-03-03 15:09:15,797 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r0_c0.jpg +2025-03-03 15:09:15,990 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r0_c1.jpg +2025-03-03 15:09:16,192 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r1_c0.jpg +2025-03-03 15:09:16,383 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r1_c1.jpg +2025-03-03 15:09:16,383 [ERROR] __main__ - Error processing table image /topic-extraction/img_18.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:09:16,384 [INFO] __main__ - Processing table image: /topic-extraction/img_19.jpg, columns=three +2025-03-03 15:09:19,388 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r0_c0.jpg +2025-03-03 15:09:19,717 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r0_c1.jpg +2025-03-03 15:09:19,935 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r1_c0.jpg +2025-03-03 15:09:20,147 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r1_c1.jpg +2025-03-03 15:09:20,371 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r2_c0.jpg +2025-03-03 15:09:20,584 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r2_c1.jpg +2025-03-03 15:09:20,584 [ERROR] __main__ - Error processing table image /topic-extraction/img_19.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:09:20,585 [INFO] __main__ - Processing table image: /topic-extraction/img_20.jpg, columns=three +2025-03-03 15:09:25,259 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r0_c0.jpg +2025-03-03 15:09:25,550 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r0_c1.jpg +2025-03-03 15:09:25,773 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r1_c0.jpg +2025-03-03 15:09:25,987 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r1_c1.jpg +2025-03-03 15:09:25,988 [ERROR] __main__ - Error processing table image /topic-extraction/img_20.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:09:25,988 [INFO] __main__ - Processing table image: /topic-extraction/img_21.jpg, columns=three +2025-03-03 15:09:29,922 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r0_c0.jpg +2025-03-03 15:09:30,236 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r0_c1.jpg +2025-03-03 15:09:30,470 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r1_c0.jpg +2025-03-03 15:09:30,696 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r1_c1.jpg +2025-03-03 15:09:30,697 [ERROR] __main__ - Error processing table image /topic-extraction/img_21.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:09:30,697 [INFO] __main__ - Processing table image: /topic-extraction/img_22.jpg, columns=three +2025-03-03 15:09:34,601 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r0_c0.jpg +2025-03-03 15:09:34,919 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r0_c1.jpg +2025-03-03 15:09:35,162 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r1_c0.jpg +2025-03-03 15:09:35,391 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r1_c1.jpg +2025-03-03 15:09:35,635 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r2_c0.jpg +2025-03-03 15:09:35,880 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r2_c1.jpg +2025-03-03 15:09:36,103 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r3_c0.jpg +2025-03-03 15:09:36,103 [ERROR] __main__ - Error processing table image /topic-extraction/img_22.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:09:36,103 [INFO] __main__ - Processing table image: /topic-extraction/img_23.jpg, columns=three +2025-03-03 15:09:39,775 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r0_c0.jpg +2025-03-03 15:09:40,053 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r0_c1.jpg +2025-03-03 15:09:40,311 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r1_c0.jpg +2025-03-03 15:09:40,556 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r1_c1.jpg +2025-03-03 15:09:40,557 [ERROR] __main__ - Error processing table image /topic-extraction/img_23.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:09:40,557 [INFO] __main__ - Processing table image: /topic-extraction/img_24.jpg, columns=three +2025-03-03 15:09:44,517 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r0_c0.jpg +2025-03-03 15:09:44,816 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r0_c1.jpg +2025-03-03 15:09:45,025 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r1_c0.jpg +2025-03-03 15:09:45,216 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r1_c1.jpg +2025-03-03 15:09:45,474 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r2_c0.jpg +2025-03-03 15:09:45,475 [ERROR] __main__ - Error processing table image /topic-extraction/img_24.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:09:45,475 [INFO] __main__ - Processing table image: /topic-extraction/img_25.jpg, columns=two +2025-03-03 15:09:48,747 [INFO] __main__ - GPU memory cleaned up. +2025-03-03 15:11:02,660 [INFO] __main__ - Processing PDF: /home/user/app/input_output/a-level-pearson-mathematics-specification.pdf +2025-03-03 15:11:03,435 [INFO] __main__ - Gemini returned subtopics: {'Paper 1 and Paper 2: Pure Mathematics': [11, 29], 'Paper 3: Statistics and Mechanics': [30, 40]} +2025-03-03 15:11:03,436 [INFO] __main__ - Loaded 1135473 bytes from local file '/home/user/app/input_output/a-level-pearson-mathematics-specification.pdf' +2025-03-03 15:11:03,810 [INFO] __main__ - Computed global offset: 4 +2025-03-03 15:11:03,810 [INFO] __main__ - Processing pages (0-based): [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43] +2025-03-03 15:12:00,303 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_1.jpg +2025-03-03 15:12:02,229 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_2.jpg +2025-03-03 15:12:02,838 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_3.jpg +2025-03-03 15:12:03,387 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_4.jpg +2025-03-03 15:12:03,934 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_5.jpg +2025-03-03 15:12:04,493 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_6.jpg +2025-03-03 15:12:04,974 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_7.jpg +2025-03-03 15:12:05,462 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_8.jpg +2025-03-03 15:12:05,948 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_9.jpg +2025-03-03 15:12:06,494 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_10.jpg +2025-03-03 15:12:06,959 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_11.jpg +2025-03-03 15:12:07,507 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_12.jpg +2025-03-03 15:12:08,001 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_13.jpg +2025-03-03 15:12:08,622 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_14.jpg +2025-03-03 15:12:09,113 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_15.jpg +2025-03-03 15:12:09,687 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_16.jpg +2025-03-03 15:12:10,253 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_17.jpg +2025-03-03 15:12:10,551 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_18.jpg +2025-03-03 15:12:11,029 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_19.jpg +2025-03-03 15:12:11,646 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_20.jpg +2025-03-03 15:12:12,199 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_21.jpg +2025-03-03 15:12:12,773 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_22.jpg +2025-03-03 15:12:13,230 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_23.jpg +2025-03-03 15:12:13,744 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_24.jpg +2025-03-03 15:12:14,334 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_25.jpg +2025-03-03 15:12:14,814 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_26.jpg +2025-03-03 15:12:15,247 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_27.jpg +2025-03-03 15:12:15,865 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_28.jpg +2025-03-03 15:12:16,255 [INFO] __main__ - Classifying images to detect tables. +2025-03-03 15:12:20,255 [INFO] __main__ - Processing table image: /topic-extraction/img_1.jpg, columns=three +2025-03-03 15:12:23,921 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r0_c0.jpg +2025-03-03 15:12:24,217 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r0_c1.jpg +2025-03-03 15:12:24,450 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r1_c0.jpg +2025-03-03 15:12:24,703 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r1_c1.jpg +2025-03-03 15:12:24,703 [ERROR] __main__ - Error processing table image /topic-extraction/img_1.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:12:24,703 [INFO] __main__ - Processing table image: /topic-extraction/img_2.jpg, columns=three +2025-03-03 15:12:28,056 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r0_c0.jpg +2025-03-03 15:12:28,350 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r0_c1.jpg +2025-03-03 15:12:28,608 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r1_c0.jpg +2025-03-03 15:12:28,802 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r2_c0.jpg +2025-03-03 15:12:29,021 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r3_c0.jpg +2025-03-03 15:12:29,021 [ERROR] __main__ - Error processing table image /topic-extraction/img_2.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:12:29,021 [INFO] __main__ - Processing table image: /topic-extraction/img_3.jpg, columns=three +2025-03-03 15:12:32,674 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_3.jpg_r0_c0.jpg +2025-03-03 15:12:32,963 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_3.jpg_r0_c1.jpg +2025-03-03 15:12:33,184 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_3.jpg_r1_c0.jpg +2025-03-03 15:12:33,184 [ERROR] __main__ - Error processing table image /topic-extraction/img_3.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:12:33,184 [INFO] __main__ - Processing table image: /topic-extraction/img_4.jpg, columns=three +2025-03-03 15:12:36,359 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r0_c0.jpg +2025-03-03 15:12:36,659 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r0_c1.jpg +2025-03-03 15:12:36,883 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r1_c0.jpg +2025-03-03 15:12:37,114 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r1_c1.jpg +2025-03-03 15:12:37,114 [ERROR] __main__ - Error processing table image /topic-extraction/img_4.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:12:37,115 [INFO] __main__ - Processing table image: /topic-extraction/img_5.jpg, columns=three +2025-03-03 15:12:40,889 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r0_c0.jpg +2025-03-03 15:12:41,192 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r0_c1.jpg +2025-03-03 15:12:41,416 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r1_c0.jpg +2025-03-03 15:12:41,662 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r1_c1.jpg +2025-03-03 15:12:41,866 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r2_c0.jpg +2025-03-03 15:12:41,867 [ERROR] __main__ - Error processing table image /topic-extraction/img_5.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:12:41,867 [INFO] __main__ - Processing table image: /topic-extraction/img_6.jpg, columns=three +2025-03-03 15:12:46,953 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r0_c0.jpg +2025-03-03 15:12:47,267 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r0_c1.jpg +2025-03-03 15:12:47,502 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r1_c0.jpg +2025-03-03 15:12:47,770 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r1_c1.jpg +2025-03-03 15:12:47,771 [ERROR] __main__ - Error processing table image /topic-extraction/img_6.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:12:47,772 [INFO] __main__ - Processing table image: /topic-extraction/img_7.jpg, columns=three +2025-03-03 15:12:52,731 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r0_c0.jpg +2025-03-03 15:12:53,003 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r0_c1.jpg +2025-03-03 15:12:53,198 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r1_c0.jpg +2025-03-03 15:12:53,431 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r2_c0.jpg +2025-03-03 15:12:53,655 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r2_c1.jpg +2025-03-03 15:12:53,655 [ERROR] __main__ - Error processing table image /topic-extraction/img_7.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:12:53,655 [INFO] __main__ - Processing table image: /topic-extraction/img_8.jpg, columns=three +2025-03-03 15:12:57,299 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r0_c0.jpg +2025-03-03 15:12:57,597 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r0_c1.jpg +2025-03-03 15:12:57,822 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r0_c2.jpg +2025-03-03 15:12:58,055 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r1_c0.jpg +2025-03-03 15:12:58,290 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r1_c1.jpg +2025-03-03 15:12:58,518 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r1_c2.jpg +2025-03-03 15:12:58,753 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r2_c0.jpg +2025-03-03 15:12:58,987 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r2_c1.jpg +2025-03-03 15:12:59,218 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r3_c0.jpg +2025-03-03 15:12:59,434 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r3_c1.jpg +2025-03-03 15:12:59,632 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r4_c0.jpg +2025-03-03 15:12:59,877 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r4_c1.jpg +2025-03-03 15:13:00,135 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r5_c0.jpg +2025-03-03 15:13:00,374 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r5_c1.jpg +2025-03-03 15:13:00,374 [ERROR] __main__ - Error processing table image /topic-extraction/img_8.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:13:00,375 [INFO] __main__ - Processing table image: /topic-extraction/img_9.jpg, columns=three +2025-03-03 15:13:04,350 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r0_c0.jpg +2025-03-03 15:13:04,658 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r0_c1.jpg +2025-03-03 15:13:04,896 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r0_c2.jpg +2025-03-03 15:13:05,132 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r1_c0.jpg +2025-03-03 15:13:05,358 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r1_c1.jpg +2025-03-03 15:13:05,590 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r2_c0.jpg +2025-03-03 15:13:05,817 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r2_c1.jpg +2025-03-03 15:13:06,048 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r3_c0.jpg +2025-03-03 15:13:06,274 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r3_c1.jpg +2025-03-03 15:13:06,275 [ERROR] __main__ - Error processing table image /topic-extraction/img_9.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:13:06,275 [INFO] __main__ - Processing table image: /topic-extraction/img_10.jpg, columns=three +2025-03-03 15:13:09,774 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r0_c0.jpg +2025-03-03 15:13:10,098 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r0_c1.jpg +2025-03-03 15:13:10,328 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r1_c0.jpg +2025-03-03 15:13:10,570 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r2_c0.jpg +2025-03-03 15:13:10,816 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r3_c0.jpg +2025-03-03 15:13:10,817 [ERROR] __main__ - Error processing table image /topic-extraction/img_10.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:13:10,818 [INFO] __main__ - Processing table image: /topic-extraction/img_11.jpg, columns=two +2025-03-03 15:13:14,111 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r0_c0.jpg +2025-03-03 15:13:14,417 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r1_c0.jpg +2025-03-03 15:13:14,656 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r2_c0.jpg +2025-03-03 15:13:14,910 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r3_c0.jpg +2025-03-03 15:13:15,126 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r4_c0.jpg +2025-03-03 15:13:15,344 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r5_c0.jpg +2025-03-03 15:13:15,344 [ERROR] __main__ - Error processing table image /topic-extraction/img_11.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:13:15,344 [INFO] __main__ - Processing table image: /topic-extraction/img_12.jpg, columns=three +2025-03-03 15:13:18,945 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r0_c0.jpg +2025-03-03 15:13:19,257 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r0_c1.jpg +2025-03-03 15:13:19,469 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r1_c0.jpg +2025-03-03 15:13:19,698 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r1_c1.jpg +2025-03-03 15:13:19,919 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r2_c0.jpg +2025-03-03 15:13:20,141 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r2_c1.jpg +2025-03-03 15:13:20,141 [ERROR] __main__ - Error processing table image /topic-extraction/img_12.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:13:20,141 [INFO] __main__ - Processing table image: /topic-extraction/img_13.jpg, columns=three +2025-03-03 15:13:23,436 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r0_c0.jpg +2025-03-03 15:13:23,732 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r0_c1.jpg +2025-03-03 15:13:23,960 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r1_c0.jpg +2025-03-03 15:13:24,187 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r1_c1.jpg +2025-03-03 15:13:24,424 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r2_c0.jpg +2025-03-03 15:13:24,698 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r3_c0.jpg +2025-03-03 15:13:24,699 [ERROR] __main__ - Error processing table image /topic-extraction/img_13.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:13:24,699 [INFO] __main__ - Processing table image: /topic-extraction/img_14.jpg, columns=three +2025-03-03 15:13:29,158 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r0_c0.jpg +2025-03-03 15:13:29,447 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r0_c1.jpg +2025-03-03 15:13:29,679 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r1_c0.jpg +2025-03-03 15:13:29,915 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r1_c1.jpg +2025-03-03 15:13:30,141 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r2_c0.jpg +2025-03-03 15:13:30,365 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r3_c0.jpg +2025-03-03 15:13:30,597 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r4_c0.jpg +2025-03-03 15:13:30,815 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r4_c1.jpg +2025-03-03 15:13:31,054 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r5_c0.jpg +2025-03-03 15:13:31,055 [ERROR] __main__ - Error processing table image /topic-extraction/img_14.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:13:31,055 [INFO] __main__ - Processing table image: /topic-extraction/img_15.jpg, columns=three +2025-03-03 15:13:34,757 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r0_c0.jpg +2025-03-03 15:13:35,068 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r0_c1.jpg +2025-03-03 15:13:35,311 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r1_c0.jpg +2025-03-03 15:13:35,553 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r1_c1.jpg +2025-03-03 15:13:35,772 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r2_c0.jpg +2025-03-03 15:13:35,967 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r3_c0.jpg +2025-03-03 15:13:36,164 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r4_c0.jpg +2025-03-03 15:13:36,164 [ERROR] __main__ - Error processing table image /topic-extraction/img_15.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:13:36,165 [INFO] __main__ - Processing table image: /topic-extraction/img_16.jpg, columns=three +2025-03-03 15:13:40,188 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r0_c0.jpg +2025-03-03 15:13:40,459 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r0_c1.jpg +2025-03-03 15:13:40,671 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r1_c0.jpg +2025-03-03 15:13:40,881 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r1_c1.jpg +2025-03-03 15:13:41,105 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r2_c0.jpg +2025-03-03 15:13:41,348 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r3_c0.jpg +2025-03-03 15:13:41,563 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r3_c1.jpg +2025-03-03 15:13:41,786 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r4_c0.jpg +2025-03-03 15:13:42,769 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r5_c0.jpg +2025-03-03 15:13:42,770 [ERROR] __main__ - Error processing table image /topic-extraction/img_16.jpg: local variable 'recognized_main_topic' referenced before assignment +2025-03-03 15:13:42,770 [INFO] __main__ - Processing table image: /topic-extraction/img_17.jpg, columns=three +2025-03-03 15:13:46,506 [INFO] __main__ - GPU memory cleaned up. +2025-03-03 15:17:44,033 [INFO] __main__ - Processing PDF: /home/user/app/input_output/a-level-pearson-mathematics-specification.pdf +2025-03-03 15:17:44,943 [INFO] __main__ - Gemini returned subtopics: {'Paper 1 and Paper 2: Pure Mathematics': [11, 29], 'Paper 3: Statistics and Mechanics': [30, 40]} +2025-03-03 15:17:44,944 [INFO] __main__ - Loaded 1135473 bytes from local file '/home/user/app/input_output/a-level-pearson-mathematics-specification.pdf' +2025-03-03 15:17:45,443 [INFO] __main__ - Computed global offset: 4 +2025-03-03 15:17:45,444 [INFO] __main__ - Processing pages (0-based): [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43] +2025-03-03 15:17:46,698 [INFO] __main__ - GPU memory cleaned up. +2025-03-03 15:19:23,940 [INFO] __main__ - Processing PDF: /home/user/app/input_output/a-level-pearson-mathematics-specification.pdf +2025-03-03 15:19:24,695 [INFO] __main__ - Gemini returned subtopics: {'Paper 1 and Paper 2: Pure Mathematics': [11, 29], 'Paper 3: Statistics and Mechanics': [30, 40]} +2025-03-03 15:19:24,696 [INFO] __main__ - Loaded 1135473 bytes from local file '/home/user/app/input_output/a-level-pearson-mathematics-specification.pdf' +2025-03-03 15:19:25,034 [INFO] __main__ - Computed global offset: 4 +2025-03-03 15:19:25,035 [INFO] __main__ - Processing pages (0-based): [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43] +2025-03-03 15:20:21,942 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_1.jpg +2025-03-03 15:20:23,866 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_2.jpg +2025-03-03 15:20:24,397 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_3.jpg +2025-03-03 15:20:24,943 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_4.jpg +2025-03-03 15:20:25,502 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_5.jpg +2025-03-03 15:20:26,063 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_6.jpg +2025-03-03 15:20:26,555 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_7.jpg +2025-03-03 15:20:27,020 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_8.jpg +2025-03-03 15:20:27,549 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_9.jpg +2025-03-03 15:20:28,121 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_10.jpg +2025-03-03 15:20:28,682 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_11.jpg +2025-03-03 15:20:29,233 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_12.jpg +2025-03-03 15:20:29,708 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_13.jpg +2025-03-03 15:20:30,309 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_14.jpg +2025-03-03 15:20:30,792 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_15.jpg +2025-03-03 15:20:31,381 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_16.jpg +2025-03-03 15:20:32,012 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_17.jpg +2025-03-03 15:20:32,303 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_18.jpg +2025-03-03 15:20:32,775 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_19.jpg +2025-03-03 15:20:33,407 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_20.jpg +2025-03-03 15:20:34,029 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_21.jpg +2025-03-03 15:20:34,610 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_22.jpg +2025-03-03 15:20:35,055 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_23.jpg +2025-03-03 15:20:35,568 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_24.jpg +2025-03-03 15:20:36,120 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_25.jpg +2025-03-03 15:20:36,549 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_26.jpg +2025-03-03 15:20:37,025 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_27.jpg +2025-03-03 15:20:37,655 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_28.jpg +2025-03-03 15:20:38,043 [INFO] __main__ - Classifying images to detect tables. +2025-03-03 15:20:42,180 [INFO] __main__ - Processing table image: /topic-extraction/img_1.jpg, columns=three +2025-03-03 15:20:45,639 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r0_c0.jpg +2025-03-03 15:20:45,932 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r0_c1.jpg +2025-03-03 15:20:46,117 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r1_c0.jpg +2025-03-03 15:20:46,338 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r1_c1.jpg +2025-03-03 15:20:46,338 [INFO] __main__ - Processing table image: /topic-extraction/img_2.jpg, columns=three +2025-03-03 15:20:50,116 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r0_c0.jpg +2025-03-03 15:20:50,422 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r0_c1.jpg +2025-03-03 15:20:50,660 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r1_c0.jpg +2025-03-03 15:20:50,871 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r2_c0.jpg +2025-03-03 15:20:51,056 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r3_c0.jpg +2025-03-03 15:20:51,057 [INFO] __main__ - Processing table image: /topic-extraction/img_3.jpg, columns=three +2025-03-03 15:20:54,596 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_3.jpg_r0_c0.jpg +2025-03-03 15:20:54,864 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_3.jpg_r0_c1.jpg +2025-03-03 15:20:55,053 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_3.jpg_r1_c0.jpg +2025-03-03 15:20:55,054 [INFO] __main__ - Processing table image: /topic-extraction/img_4.jpg, columns=three +2025-03-03 15:20:58,584 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r0_c0.jpg +2025-03-03 15:20:58,873 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r0_c1.jpg +2025-03-03 15:20:59,086 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r1_c0.jpg +2025-03-03 15:20:59,317 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r1_c1.jpg +2025-03-03 15:20:59,318 [INFO] __main__ - Processing table image: /topic-extraction/img_5.jpg, columns=three +2025-03-03 15:21:02,998 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r0_c0.jpg +2025-03-03 15:21:03,281 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r0_c1.jpg +2025-03-03 15:21:03,502 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r1_c0.jpg +2025-03-03 15:21:03,709 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r1_c1.jpg +2025-03-03 15:21:03,943 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r2_c0.jpg +2025-03-03 15:21:03,944 [INFO] __main__ - Processing table image: /topic-extraction/img_6.jpg, columns=three +2025-03-03 15:21:08,925 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r0_c0.jpg +2025-03-03 15:21:09,224 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r0_c1.jpg +2025-03-03 15:21:09,448 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r1_c0.jpg +2025-03-03 15:21:09,681 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r1_c1.jpg +2025-03-03 15:21:09,682 [INFO] __main__ - Processing table image: /topic-extraction/img_7.jpg, columns=three +2025-03-03 15:21:13,431 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r0_c0.jpg +2025-03-03 15:21:13,739 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r0_c1.jpg +2025-03-03 15:21:13,972 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r1_c0.jpg +2025-03-03 15:21:14,183 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r2_c0.jpg +2025-03-03 15:21:14,399 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r2_c1.jpg +2025-03-03 15:21:14,400 [INFO] __main__ - Processing table image: /topic-extraction/img_8.jpg, columns=three +2025-03-03 15:21:18,274 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r0_c0.jpg +2025-03-03 15:21:18,533 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r0_c1.jpg +2025-03-03 15:21:18,720 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r0_c2.jpg +2025-03-03 15:21:18,946 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r1_c0.jpg +2025-03-03 15:21:19,154 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r1_c1.jpg +2025-03-03 15:21:19,378 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r1_c2.jpg +2025-03-03 15:21:19,588 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r2_c0.jpg +2025-03-03 15:21:19,821 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r2_c1.jpg +2025-03-03 15:21:20,049 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r3_c0.jpg +2025-03-03 15:21:20,274 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r3_c1.jpg +2025-03-03 15:21:20,509 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r4_c0.jpg +2025-03-03 15:21:20,739 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r4_c1.jpg +2025-03-03 15:21:20,959 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r5_c0.jpg +2025-03-03 15:21:21,179 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r5_c1.jpg +2025-03-03 15:21:21,179 [INFO] __main__ - Processing table image: /topic-extraction/img_9.jpg, columns=three +2025-03-03 15:21:25,070 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r0_c0.jpg +2025-03-03 15:21:25,356 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r0_c1.jpg +2025-03-03 15:21:25,544 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r0_c2.jpg +2025-03-03 15:21:25,732 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r1_c0.jpg +2025-03-03 15:21:25,948 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r1_c1.jpg +2025-03-03 15:21:26,140 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r2_c0.jpg +2025-03-03 15:21:26,348 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r2_c1.jpg +2025-03-03 15:21:26,564 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r3_c0.jpg +2025-03-03 15:21:26,791 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r3_c1.jpg +2025-03-03 15:21:26,791 [INFO] __main__ - Processing table image: /topic-extraction/img_10.jpg, columns=three +2025-03-03 15:21:30,393 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r0_c0.jpg +2025-03-03 15:21:30,695 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r0_c1.jpg +2025-03-03 15:21:30,914 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r1_c0.jpg +2025-03-03 15:21:31,189 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r2_c0.jpg +2025-03-03 15:21:31,375 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r3_c0.jpg +2025-03-03 15:21:31,375 [INFO] __main__ - Processing table image: /topic-extraction/img_11.jpg, columns=two +2025-03-03 15:21:34,741 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r0_c0.jpg +2025-03-03 15:21:35,030 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r1_c0.jpg +2025-03-03 15:21:35,254 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r2_c0.jpg +2025-03-03 15:21:35,473 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r3_c0.jpg +2025-03-03 15:21:35,698 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r4_c0.jpg +2025-03-03 15:21:35,898 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r5_c0.jpg +2025-03-03 15:21:35,898 [INFO] __main__ - Processing table image: /topic-extraction/img_12.jpg, columns=three +2025-03-03 15:21:39,541 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r0_c0.jpg +2025-03-03 15:21:39,846 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r0_c1.jpg +2025-03-03 15:21:40,073 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r1_c0.jpg +2025-03-03 15:21:40,285 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r1_c1.jpg +2025-03-03 15:21:40,512 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r2_c0.jpg +2025-03-03 15:21:40,767 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r2_c1.jpg +2025-03-03 15:21:40,767 [INFO] __main__ - Processing table image: /topic-extraction/img_13.jpg, columns=three +2025-03-03 15:21:44,049 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r0_c0.jpg +2025-03-03 15:21:44,342 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r0_c1.jpg +2025-03-03 15:21:44,563 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r1_c0.jpg +2025-03-03 15:21:44,750 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r1_c1.jpg +2025-03-03 15:21:44,999 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r2_c0.jpg +2025-03-03 15:21:45,216 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r3_c0.jpg +2025-03-03 15:21:45,217 [INFO] __main__ - Processing table image: /topic-extraction/img_14.jpg, columns=three +2025-03-03 15:21:50,026 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r0_c0.jpg +2025-03-03 15:21:50,324 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r0_c1.jpg +2025-03-03 15:21:50,539 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r1_c0.jpg +2025-03-03 15:21:50,748 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r1_c1.jpg +2025-03-03 15:21:50,963 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r2_c0.jpg +2025-03-03 15:21:51,203 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r3_c0.jpg +2025-03-03 15:21:51,459 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r4_c0.jpg +2025-03-03 15:21:51,671 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r4_c1.jpg +2025-03-03 15:21:51,898 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r5_c0.jpg +2025-03-03 15:21:51,898 [INFO] __main__ - Processing table image: /topic-extraction/img_15.jpg, columns=three +2025-03-03 15:21:56,013 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r0_c0.jpg +2025-03-03 15:21:56,332 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r0_c1.jpg +2025-03-03 15:21:56,521 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r1_c0.jpg +2025-03-03 15:21:56,768 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r1_c1.jpg +2025-03-03 15:21:57,016 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r2_c0.jpg +2025-03-03 15:21:57,229 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r3_c0.jpg +2025-03-03 15:21:57,413 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r4_c0.jpg +2025-03-03 15:21:57,414 [INFO] __main__ - Processing table image: /topic-extraction/img_16.jpg, columns=three +2025-03-03 15:22:03,240 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r0_c0.jpg +2025-03-03 15:22:03,526 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r0_c1.jpg +2025-03-03 15:22:03,838 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r1_c0.jpg +2025-03-03 15:22:04,072 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r1_c1.jpg +2025-03-03 15:22:04,303 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r2_c0.jpg +2025-03-03 15:22:04,510 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r3_c0.jpg +2025-03-03 15:22:04,737 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r3_c1.jpg +2025-03-03 15:22:04,963 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r4_c0.jpg +2025-03-03 15:22:05,842 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r5_c0.jpg +2025-03-03 15:22:05,843 [INFO] __main__ - Processing table image: /topic-extraction/img_17.jpg, columns=three +2025-03-03 15:22:06,029 [INFO] __main__ - Processing PDF: /home/user/app/input_output/a-level-pearson-mathematics-specification.pdf +2025-03-03 15:22:07,171 [INFO] __main__ - Gemini returned subtopics: {'Paper 1 and Paper 2: Pure Mathematics': [11, 29], 'Paper 3: Statistics and Mechanics': [30, 40]} +2025-03-03 15:22:07,172 [INFO] __main__ - Loaded 1135473 bytes from local file '/home/user/app/input_output/a-level-pearson-mathematics-specification.pdf' +2025-03-03 15:22:07,700 [INFO] __main__ - Computed global offset: 4 +2025-03-03 15:22:07,701 [INFO] __main__ - Processing pages (0-based): [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43] +2025-03-03 15:22:10,835 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r0_c0.jpg +2025-03-03 15:22:11,166 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r0_c1.jpg +2025-03-03 15:22:11,383 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r1_c0.jpg +2025-03-03 15:22:11,616 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r2_c0.jpg +2025-03-03 15:22:11,852 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r2_c1.jpg +2025-03-03 15:22:12,098 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r3_c0.jpg +2025-03-03 15:22:12,327 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r4_c0.jpg +2025-03-03 15:22:12,518 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r5_c0.jpg +2025-03-03 15:22:12,519 [INFO] __main__ - Processing table image: /topic-extraction/img_18.jpg, columns=three +2025-03-03 15:22:14,316 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r0_c0.jpg +2025-03-03 15:22:14,503 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r0_c1.jpg +2025-03-03 15:22:14,698 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r1_c0.jpg +2025-03-03 15:22:14,894 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r1_c1.jpg +2025-03-03 15:22:14,895 [INFO] __main__ - Processing table image: /topic-extraction/img_19.jpg, columns=three +2025-03-03 15:22:18,689 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r0_c0.jpg +2025-03-03 15:22:19,052 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r0_c1.jpg +2025-03-03 15:22:19,269 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r1_c0.jpg +2025-03-03 15:22:19,533 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r1_c1.jpg +2025-03-03 15:22:19,730 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r2_c0.jpg +2025-03-03 15:22:19,924 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r2_c1.jpg +2025-03-03 15:22:19,925 [INFO] __main__ - Processing table image: /topic-extraction/img_20.jpg, columns=three +2025-03-03 15:22:25,628 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r0_c0.jpg +2025-03-03 15:22:25,959 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r0_c1.jpg +2025-03-03 15:22:26,188 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r1_c0.jpg +2025-03-03 15:22:26,423 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r1_c1.jpg +2025-03-03 15:22:26,424 [INFO] __main__ - Processing table image: /topic-extraction/img_21.jpg, columns=three +2025-03-03 15:22:31,077 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r0_c0.jpg +2025-03-03 15:22:31,343 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r0_c1.jpg +2025-03-03 15:22:31,603 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r1_c0.jpg +2025-03-03 15:22:31,835 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r1_c1.jpg +2025-03-03 15:22:31,836 [INFO] __main__ - Processing table image: /topic-extraction/img_22.jpg, columns=three +2025-03-03 15:22:36,950 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r0_c0.jpg +2025-03-03 15:22:37,234 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r0_c1.jpg +2025-03-03 15:22:37,450 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r1_c0.jpg +2025-03-03 15:22:37,664 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r1_c1.jpg +2025-03-03 15:22:37,882 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r2_c0.jpg +2025-03-03 15:22:38,078 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r2_c1.jpg +2025-03-03 15:22:38,308 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r3_c0.jpg +2025-03-03 15:22:38,309 [INFO] __main__ - Processing table image: /topic-extraction/img_23.jpg, columns=three +2025-03-03 15:22:43,854 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r0_c0.jpg +2025-03-03 15:22:44,170 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r0_c1.jpg +2025-03-03 15:22:44,367 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r1_c0.jpg +2025-03-03 15:22:44,601 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r1_c1.jpg +2025-03-03 15:22:44,602 [INFO] __main__ - Processing table image: /topic-extraction/img_24.jpg, columns=three +2025-03-03 15:22:50,835 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r0_c0.jpg +2025-03-03 15:22:51,129 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r0_c1.jpg +2025-03-03 15:22:51,352 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r1_c0.jpg +2025-03-03 15:22:51,568 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r1_c1.jpg +2025-03-03 15:22:51,799 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r2_c0.jpg +2025-03-03 15:22:51,800 [INFO] __main__ - Processing table image: /topic-extraction/img_25.jpg, columns=two +2025-03-03 15:22:57,515 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r0_c0.jpg +2025-03-03 15:22:57,832 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r1_c0.jpg +2025-03-03 15:22:58,035 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r2_c0.jpg +2025-03-03 15:22:58,281 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r3_c0.jpg +2025-03-03 15:22:58,507 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r4_c0.jpg +2025-03-03 15:22:58,694 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r5_c0.jpg +2025-03-03 15:22:58,694 [INFO] __main__ - Processing table image: /topic-extraction/img_26.jpg, columns=three +2025-03-03 15:23:02,977 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r0_c0.jpg +2025-03-03 15:23:03,247 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r0_c1.jpg +2025-03-03 15:23:03,496 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r1_c0.jpg +2025-03-03 15:23:03,727 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r2_c0.jpg +2025-03-03 15:23:03,728 [INFO] __main__ - Processing table image: /topic-extraction/img_27.jpg, columns=three +2025-03-03 15:23:07,668 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r0_c0.jpg +2025-03-03 15:23:07,990 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r0_c1.jpg +2025-03-03 15:23:08,181 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r1_c0.jpg +2025-03-03 15:23:08,440 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r1_c1.jpg +2025-03-03 15:23:08,675 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r2_c0.jpg +2025-03-03 15:23:08,902 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r3_c0.jpg +2025-03-03 15:23:09,119 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r4_c0.jpg +2025-03-03 15:23:09,308 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r4_c1.jpg +2025-03-03 15:23:09,309 [INFO] __main__ - Processing table image: /topic-extraction/img_28.jpg, columns=two +2025-03-03 15:23:13,622 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_28.jpg_r0_c0.jpg +2025-03-03 15:23:13,931 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_28.jpg_r1_c0.jpg +2025-03-03 15:23:14,164 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_28.jpg_r2_c0.jpg +2025-03-03 15:23:14,394 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_28.jpg_r3_c0.jpg +2025-03-03 15:23:14,399 [INFO] __main__ - Final subtopics JSON saved locally at /home/user/app/pearson_json/final_subtopics.json +2025-03-03 15:23:14,792 [INFO] __main__ - GPU memory cleaned up. +2025-03-03 15:23:14,800 [INFO] __main__ - Processing completed successfully. +2025-03-03 15:23:16,376 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_1.jpg +2025-03-03 15:23:18,490 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_2.jpg +2025-03-03 15:23:19,071 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_3.jpg +2025-03-03 15:23:19,624 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_4.jpg +2025-03-03 15:23:20,184 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_5.jpg +2025-03-03 15:23:20,749 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_6.jpg +2025-03-03 15:23:21,303 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_7.jpg +2025-03-03 15:23:21,747 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_8.jpg +2025-03-03 15:23:22,263 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_9.jpg +2025-03-03 15:23:22,793 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_10.jpg +2025-03-03 15:23:23,254 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_11.jpg +2025-03-03 15:23:23,812 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_12.jpg +2025-03-03 15:23:24,292 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_13.jpg +2025-03-03 15:23:24,919 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_14.jpg +2025-03-03 15:23:25,357 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_15.jpg +2025-03-03 15:23:25,966 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_16.jpg +2025-03-03 15:23:26,540 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_17.jpg +2025-03-03 15:23:26,834 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_18.jpg +2025-03-03 15:23:27,323 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_19.jpg +2025-03-03 15:23:27,870 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_20.jpg +2025-03-03 15:23:28,392 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_21.jpg +2025-03-03 15:23:28,972 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_22.jpg +2025-03-03 15:23:29,421 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_23.jpg +2025-03-03 15:23:30,016 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_24.jpg +2025-03-03 15:23:30,682 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_25.jpg +2025-03-03 15:23:31,146 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_26.jpg +2025-03-03 15:23:31,597 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_27.jpg +2025-03-03 15:23:32,202 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_28.jpg +2025-03-03 15:23:32,634 [INFO] __main__ - Classifying images to detect tables. +2025-03-03 15:23:36,443 [INFO] __main__ - Processing table image: /topic-extraction/img_1.jpg, columns=three +2025-03-03 15:23:40,026 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r0_c0.jpg +2025-03-03 15:23:40,315 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r0_c1.jpg +2025-03-03 15:23:40,527 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r1_c0.jpg +2025-03-03 15:23:40,744 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r1_c1.jpg +2025-03-03 15:23:40,744 [INFO] __main__ - Processing table image: /topic-extraction/img_2.jpg, columns=three +2025-03-03 15:23:44,104 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r0_c0.jpg +2025-03-03 15:23:44,391 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r0_c1.jpg +2025-03-03 15:23:44,640 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r1_c0.jpg +2025-03-03 15:23:44,885 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r2_c0.jpg +2025-03-03 15:23:45,134 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r3_c0.jpg +2025-03-03 15:23:45,134 [INFO] __main__ - Processing table image: /topic-extraction/img_3.jpg, columns=three +2025-03-03 15:23:48,906 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_3.jpg_r0_c0.jpg +2025-03-03 15:23:49,222 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_3.jpg_r0_c1.jpg +2025-03-03 15:23:49,474 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_3.jpg_r1_c0.jpg +2025-03-03 15:23:49,475 [INFO] __main__ - Processing table image: /topic-extraction/img_4.jpg, columns=three +2025-03-03 15:23:52,653 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r0_c0.jpg +2025-03-03 15:23:52,949 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r0_c1.jpg +2025-03-03 15:23:53,167 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r1_c0.jpg +2025-03-03 15:23:53,382 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r1_c1.jpg +2025-03-03 15:23:53,382 [INFO] __main__ - Processing table image: /topic-extraction/img_5.jpg, columns=three +2025-03-03 15:23:57,190 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r0_c0.jpg +2025-03-03 15:23:57,451 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r0_c1.jpg +2025-03-03 15:23:57,676 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r1_c0.jpg +2025-03-03 15:23:57,889 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r1_c1.jpg +2025-03-03 15:23:58,128 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r2_c0.jpg +2025-03-03 15:23:58,129 [INFO] __main__ - Processing table image: /topic-extraction/img_6.jpg, columns=three +2025-03-03 15:24:01,879 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r0_c0.jpg +2025-03-03 15:24:02,174 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r0_c1.jpg +2025-03-03 15:24:02,391 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r1_c0.jpg +2025-03-03 15:24:02,624 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r1_c1.jpg +2025-03-03 15:24:02,625 [INFO] __main__ - Processing table image: /topic-extraction/img_7.jpg, columns=three +2025-03-03 15:24:06,022 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r0_c0.jpg +2025-03-03 15:24:06,307 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r0_c1.jpg +2025-03-03 15:24:06,540 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r1_c0.jpg +2025-03-03 15:24:06,746 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r2_c0.jpg +2025-03-03 15:24:06,963 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r2_c1.jpg +2025-03-03 15:24:06,963 [INFO] __main__ - Processing table image: /topic-extraction/img_8.jpg, columns=three +2025-03-03 15:24:10,802 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r0_c0.jpg +2025-03-03 15:24:11,123 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r0_c1.jpg +2025-03-03 15:24:11,313 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r0_c2.jpg +2025-03-03 15:24:11,526 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r1_c0.jpg +2025-03-03 15:24:11,743 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r1_c1.jpg +2025-03-03 15:24:11,929 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r1_c2.jpg +2025-03-03 15:24:12,151 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r2_c0.jpg +2025-03-03 15:24:12,370 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r2_c1.jpg +2025-03-03 15:24:12,555 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r3_c0.jpg +2025-03-03 15:24:12,745 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r3_c1.jpg +2025-03-03 15:24:12,972 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r4_c0.jpg +2025-03-03 15:24:13,156 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r4_c1.jpg +2025-03-03 15:24:13,390 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r5_c0.jpg +2025-03-03 15:24:13,623 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r5_c1.jpg +2025-03-03 15:24:13,623 [INFO] __main__ - Processing table image: /topic-extraction/img_9.jpg, columns=three +2025-03-03 15:24:17,854 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r0_c0.jpg +2025-03-03 15:24:18,114 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r0_c1.jpg +2025-03-03 15:24:18,319 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r0_c2.jpg +2025-03-03 15:24:18,550 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r1_c0.jpg +2025-03-03 15:24:18,738 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r1_c1.jpg +2025-03-03 15:24:18,982 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r2_c0.jpg +2025-03-03 15:24:19,223 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r2_c1.jpg +2025-03-03 15:24:19,479 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r3_c0.jpg +2025-03-03 15:24:19,686 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r3_c1.jpg +2025-03-03 15:24:19,686 [INFO] __main__ - Processing table image: /topic-extraction/img_10.jpg, columns=three +2025-03-03 15:24:23,403 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r0_c0.jpg +2025-03-03 15:24:23,661 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r0_c1.jpg +2025-03-03 15:24:23,846 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r1_c0.jpg +2025-03-03 15:24:24,033 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r2_c0.jpg +2025-03-03 15:24:24,275 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r3_c0.jpg +2025-03-03 15:24:24,276 [INFO] __main__ - Processing table image: /topic-extraction/img_11.jpg, columns=two +2025-03-03 15:24:27,485 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r0_c0.jpg +2025-03-03 15:24:27,743 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r1_c0.jpg +2025-03-03 15:24:27,985 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r2_c0.jpg +2025-03-03 15:24:28,194 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r3_c0.jpg +2025-03-03 15:24:28,430 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r4_c0.jpg +2025-03-03 15:24:28,650 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r5_c0.jpg +2025-03-03 15:24:28,650 [INFO] __main__ - Processing table image: /topic-extraction/img_12.jpg, columns=three +2025-03-03 15:24:31,948 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r0_c0.jpg +2025-03-03 15:24:32,241 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r0_c1.jpg +2025-03-03 15:24:32,469 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r1_c0.jpg +2025-03-03 15:24:32,704 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r1_c1.jpg +2025-03-03 15:24:32,890 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r2_c0.jpg +2025-03-03 15:24:33,110 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r2_c1.jpg +2025-03-03 15:24:33,110 [INFO] __main__ - Processing table image: /topic-extraction/img_13.jpg, columns=three +2025-03-03 15:24:36,659 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r0_c0.jpg +2025-03-03 15:24:36,949 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r0_c1.jpg +2025-03-03 15:24:37,171 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r1_c0.jpg +2025-03-03 15:24:37,383 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r1_c1.jpg +2025-03-03 15:24:37,590 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r2_c0.jpg +2025-03-03 15:24:37,799 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r3_c0.jpg +2025-03-03 15:24:37,799 [INFO] __main__ - Processing table image: /topic-extraction/img_14.jpg, columns=three +2025-03-03 15:24:42,224 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r0_c0.jpg +2025-03-03 15:24:42,511 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r0_c1.jpg +2025-03-03 15:24:42,696 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r1_c0.jpg +2025-03-03 15:24:42,914 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r1_c1.jpg +2025-03-03 15:24:43,154 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r2_c0.jpg +2025-03-03 15:24:43,401 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r3_c0.jpg +2025-03-03 15:24:43,589 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r4_c0.jpg +2025-03-03 15:24:43,784 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r4_c1.jpg +2025-03-03 15:24:44,012 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r5_c0.jpg +2025-03-03 15:24:44,013 [INFO] __main__ - Processing table image: /topic-extraction/img_15.jpg, columns=three +2025-03-03 15:24:47,657 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r0_c0.jpg +2025-03-03 15:24:47,954 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r0_c1.jpg +2025-03-03 15:24:48,182 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r1_c0.jpg +2025-03-03 15:24:48,370 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r1_c1.jpg +2025-03-03 15:24:48,582 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r2_c0.jpg +2025-03-03 15:24:48,813 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r3_c0.jpg +2025-03-03 15:24:49,044 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r4_c0.jpg +2025-03-03 15:24:49,044 [INFO] __main__ - Processing table image: /topic-extraction/img_16.jpg, columns=three +2025-03-03 15:24:53,044 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r0_c0.jpg +2025-03-03 15:24:53,303 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r0_c1.jpg +2025-03-03 15:24:53,541 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r1_c0.jpg +2025-03-03 15:24:53,764 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r1_c1.jpg +2025-03-03 15:24:53,980 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r2_c0.jpg +2025-03-03 15:24:54,195 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r3_c0.jpg +2025-03-03 15:24:54,420 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r3_c1.jpg +2025-03-03 15:24:54,647 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r4_c0.jpg +2025-03-03 15:24:55,449 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r5_c0.jpg +2025-03-03 15:24:55,450 [INFO] __main__ - Processing table image: /topic-extraction/img_17.jpg, columns=three +2025-03-03 15:24:59,334 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r0_c0.jpg +2025-03-03 15:24:59,611 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r0_c1.jpg +2025-03-03 15:24:59,820 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r1_c0.jpg +2025-03-03 15:25:00,017 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r2_c0.jpg +2025-03-03 15:25:00,225 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r2_c1.jpg +2025-03-03 15:25:00,426 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r3_c0.jpg +2025-03-03 15:25:00,629 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r4_c0.jpg +2025-03-03 15:25:00,813 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r5_c0.jpg +2025-03-03 15:25:00,813 [INFO] __main__ - Processing table image: /topic-extraction/img_18.jpg, columns=three +2025-03-03 15:25:02,260 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r0_c0.jpg +2025-03-03 15:25:02,437 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r0_c1.jpg +2025-03-03 15:25:02,616 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r1_c0.jpg +2025-03-03 15:25:02,794 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r1_c1.jpg +2025-03-03 15:25:02,795 [INFO] __main__ - Processing table image: /topic-extraction/img_19.jpg, columns=three +2025-03-03 15:25:05,728 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r0_c0.jpg +2025-03-03 15:25:06,016 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r0_c1.jpg +2025-03-03 15:25:06,220 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r1_c0.jpg +2025-03-03 15:25:06,407 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r1_c1.jpg +2025-03-03 15:25:06,614 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r2_c0.jpg +2025-03-03 15:25:06,823 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r2_c1.jpg +2025-03-03 15:25:06,824 [INFO] __main__ - Processing table image: /topic-extraction/img_20.jpg, columns=three +2025-03-03 15:25:10,984 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r0_c0.jpg +2025-03-03 15:25:11,271 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r0_c1.jpg +2025-03-03 15:25:11,476 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r1_c0.jpg +2025-03-03 15:25:11,689 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r1_c1.jpg +2025-03-03 15:25:11,690 [INFO] __main__ - Processing table image: /topic-extraction/img_21.jpg, columns=three +2025-03-03 15:25:15,864 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r0_c0.jpg +2025-03-03 15:25:16,151 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r0_c1.jpg +2025-03-03 15:25:16,361 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r1_c0.jpg +2025-03-03 15:25:16,580 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r1_c1.jpg +2025-03-03 15:25:16,581 [INFO] __main__ - Processing table image: /topic-extraction/img_22.jpg, columns=three +2025-03-03 15:25:20,753 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r0_c0.jpg +2025-03-03 15:25:21,035 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r0_c1.jpg +2025-03-03 15:25:21,255 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r1_c0.jpg +2025-03-03 15:25:21,463 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r1_c1.jpg +2025-03-03 15:25:21,683 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r2_c0.jpg +2025-03-03 15:25:21,905 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r2_c1.jpg +2025-03-03 15:25:22,123 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r3_c0.jpg +2025-03-03 15:25:22,123 [INFO] __main__ - Processing table image: /topic-extraction/img_23.jpg, columns=three +2025-03-03 15:25:25,343 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r0_c0.jpg +2025-03-03 15:25:25,658 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r0_c1.jpg +2025-03-03 15:25:25,875 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r1_c0.jpg +2025-03-03 15:25:26,081 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r1_c1.jpg +2025-03-03 15:25:26,082 [INFO] __main__ - Processing table image: /topic-extraction/img_24.jpg, columns=three +2025-03-03 15:25:29,857 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r0_c0.jpg +2025-03-03 15:25:30,146 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r0_c1.jpg +2025-03-03 15:25:30,363 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r1_c0.jpg +2025-03-03 15:25:30,549 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r1_c1.jpg +2025-03-03 15:25:30,775 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r2_c0.jpg +2025-03-03 15:25:30,776 [INFO] __main__ - Processing table image: /topic-extraction/img_25.jpg, columns=two +2025-03-03 15:25:34,058 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r0_c0.jpg +2025-03-03 15:25:34,360 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r1_c0.jpg +2025-03-03 15:25:34,543 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r2_c0.jpg +2025-03-03 15:25:34,781 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r3_c0.jpg +2025-03-03 15:25:34,989 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r4_c0.jpg +2025-03-03 15:25:35,200 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r5_c0.jpg +2025-03-03 15:25:35,200 [INFO] __main__ - Processing table image: /topic-extraction/img_26.jpg, columns=three +2025-03-03 15:25:38,544 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r0_c0.jpg +2025-03-03 15:25:38,844 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r0_c1.jpg +2025-03-03 15:25:39,043 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r1_c0.jpg +2025-03-03 15:25:39,275 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r2_c0.jpg +2025-03-03 15:25:39,275 [INFO] __main__ - Processing table image: /topic-extraction/img_27.jpg, columns=three +2025-03-03 15:25:42,304 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r0_c0.jpg +2025-03-03 15:25:42,588 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r0_c1.jpg +2025-03-03 15:25:42,837 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r1_c0.jpg +2025-03-03 15:25:43,076 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r1_c1.jpg +2025-03-03 15:25:43,300 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r2_c0.jpg +2025-03-03 15:25:43,545 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r3_c0.jpg +2025-03-03 15:25:43,758 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r4_c0.jpg +2025-03-03 15:25:43,941 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r4_c1.jpg +2025-03-03 15:25:43,941 [INFO] __main__ - Processing table image: /topic-extraction/img_28.jpg, columns=two +2025-03-03 15:25:47,406 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_28.jpg_r0_c0.jpg +2025-03-03 15:25:47,702 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_28.jpg_r1_c0.jpg +2025-03-03 15:25:47,938 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_28.jpg_r2_c0.jpg +2025-03-03 15:25:48,147 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_28.jpg_r3_c0.jpg +2025-03-03 15:25:48,150 [INFO] __main__ - Final subtopics JSON saved locally at /home/user/app/we/we_ars/final_subtopics.json +2025-03-03 15:25:48,440 [INFO] __main__ - GPU memory cleaned up. +2025-03-03 15:25:48,447 [INFO] __main__ - Processing completed successfully. +2025-03-03 15:35:13,179 [INFO] __main__ - Processing PDF: /home/user/app/input_output/a-level-pearson-mathematics-specification.pdf +2025-03-03 15:35:13,975 [INFO] __main__ - Gemini returned subtopics: {'Paper 1 and Paper 2: Pure Mathematics': [11, 29], 'Paper 3: Statistics and Mechanics': [30, 40]} +2025-03-03 15:35:13,975 [INFO] __main__ - Loaded 1135473 bytes from local file '/home/user/app/input_output/a-level-pearson-mathematics-specification.pdf' +2025-03-03 15:35:14,274 [INFO] __main__ - Computed global offset: 4 +2025-03-03 15:35:14,275 [INFO] __main__ - Processing pages (0-based): [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43] +2025-03-03 15:36:08,757 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_1.jpg +2025-03-03 15:36:10,548 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_2.jpg +2025-03-03 15:36:11,124 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_3.jpg +2025-03-03 15:36:11,652 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_4.jpg +2025-03-03 15:36:12,292 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_5.jpg +2025-03-03 15:36:12,807 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_6.jpg +2025-03-03 15:36:13,299 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_7.jpg +2025-03-03 15:36:13,756 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_8.jpg +2025-03-03 15:36:14,268 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_9.jpg +2025-03-03 15:36:14,851 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_10.jpg +2025-03-03 15:36:15,306 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_11.jpg +2025-03-03 15:36:15,825 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_12.jpg +2025-03-03 15:36:16,252 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_13.jpg +2025-03-03 15:36:16,864 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_14.jpg +2025-03-03 15:36:17,409 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_15.jpg +2025-03-03 15:36:17,982 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_16.jpg +2025-03-03 15:36:18,557 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_17.jpg +2025-03-03 15:36:18,855 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_18.jpg +2025-03-03 15:36:19,385 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_19.jpg +2025-03-03 15:36:19,960 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_20.jpg +2025-03-03 15:36:20,618 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_21.jpg +2025-03-03 15:36:21,229 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_22.jpg +2025-03-03 15:36:21,682 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_23.jpg +2025-03-03 15:36:22,202 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_24.jpg +2025-03-03 15:36:22,746 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_25.jpg +2025-03-03 15:36:23,199 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_26.jpg +2025-03-03 15:36:23,616 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_27.jpg +2025-03-03 15:36:24,228 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_28.jpg +2025-03-03 15:36:24,610 [INFO] __main__ - Classifying images to detect tables. +2025-03-03 15:36:28,771 [INFO] __main__ - Processing table image: /topic-extraction/img_1.jpg, columns=three +2025-03-03 15:36:32,613 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r0_c0.jpg +2025-03-03 15:36:33,042 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r0_c1.jpg +2025-03-03 15:36:33,360 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r1_c0.jpg +2025-03-03 15:36:33,578 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r1_c1.jpg +2025-03-03 15:36:33,578 [INFO] __main__ - Processing table image: /topic-extraction/img_2.jpg, columns=three +2025-03-03 15:36:36,960 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r0_c0.jpg +2025-03-03 15:36:37,258 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r0_c1.jpg +2025-03-03 15:36:37,495 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r1_c0.jpg +2025-03-03 15:36:37,741 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r2_c0.jpg +2025-03-03 15:36:37,966 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r3_c0.jpg +2025-03-03 15:36:37,966 [INFO] __main__ - Processing table image: /topic-extraction/img_3.jpg, columns=three +2025-03-03 15:36:42,132 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_3.jpg_r0_c0.jpg +2025-03-03 15:36:42,430 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_3.jpg_r0_c1.jpg +2025-03-03 15:36:42,677 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_3.jpg_r1_c0.jpg +2025-03-03 15:36:42,678 [INFO] __main__ - Processing table image: /topic-extraction/img_4.jpg, columns=three +2025-03-03 15:36:45,924 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r0_c0.jpg +2025-03-03 15:36:46,226 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r0_c1.jpg +2025-03-03 15:36:46,454 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r1_c0.jpg +2025-03-03 15:36:46,653 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r1_c1.jpg +2025-03-03 15:36:46,654 [INFO] __main__ - Processing table image: /topic-extraction/img_5.jpg, columns=three +2025-03-03 15:36:50,162 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r0_c0.jpg +2025-03-03 15:36:50,480 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r0_c1.jpg +2025-03-03 15:36:50,684 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r1_c0.jpg +2025-03-03 15:36:50,903 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r1_c1.jpg +2025-03-03 15:36:51,121 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r2_c0.jpg +2025-03-03 15:36:51,121 [INFO] __main__ - Processing table image: /topic-extraction/img_6.jpg, columns=three +2025-03-03 15:36:54,777 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r0_c0.jpg +2025-03-03 15:36:55,062 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r0_c1.jpg +2025-03-03 15:36:55,316 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r1_c0.jpg +2025-03-03 15:36:55,563 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r1_c1.jpg +2025-03-03 15:36:55,564 [INFO] __main__ - Processing table image: /topic-extraction/img_7.jpg, columns=three +2025-03-03 15:36:59,067 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r0_c0.jpg +2025-03-03 15:36:59,360 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r0_c1.jpg +2025-03-03 15:36:59,579 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r1_c0.jpg +2025-03-03 15:36:59,812 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r2_c0.jpg +2025-03-03 15:37:00,022 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r2_c1.jpg +2025-03-03 15:37:00,022 [INFO] __main__ - Processing table image: /topic-extraction/img_8.jpg, columns=three +2025-03-03 15:37:03,516 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r0_c0.jpg +2025-03-03 15:37:03,844 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r0_c1.jpg +2025-03-03 15:37:04,081 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r0_c2.jpg +2025-03-03 15:37:04,297 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r1_c0.jpg +2025-03-03 15:37:04,519 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r1_c1.jpg +2025-03-03 15:37:04,713 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r1_c2.jpg +2025-03-03 15:37:04,938 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r2_c0.jpg +2025-03-03 15:37:05,173 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r2_c1.jpg +2025-03-03 15:37:05,378 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r3_c0.jpg +2025-03-03 15:37:05,606 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r3_c1.jpg +2025-03-03 15:37:05,813 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r4_c0.jpg +2025-03-03 15:37:06,035 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r4_c1.jpg +2025-03-03 15:37:06,230 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r5_c0.jpg +2025-03-03 15:37:06,465 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r5_c1.jpg +2025-03-03 15:37:06,466 [INFO] __main__ - Processing table image: /topic-extraction/img_9.jpg, columns=three +2025-03-03 15:37:10,452 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r0_c0.jpg +2025-03-03 15:37:10,769 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r0_c1.jpg +2025-03-03 15:37:10,994 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r0_c2.jpg +2025-03-03 15:37:11,186 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r1_c0.jpg +2025-03-03 15:37:11,386 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r1_c1.jpg +2025-03-03 15:37:11,614 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r2_c0.jpg +2025-03-03 15:37:11,854 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r2_c1.jpg +2025-03-03 15:37:12,079 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r3_c0.jpg +2025-03-03 15:37:12,325 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r3_c1.jpg +2025-03-03 15:37:12,326 [INFO] __main__ - Processing table image: /topic-extraction/img_10.jpg, columns=three +2025-03-03 15:37:15,839 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r0_c0.jpg +2025-03-03 15:37:16,156 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r0_c1.jpg +2025-03-03 15:37:16,364 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r1_c0.jpg +2025-03-03 15:37:16,599 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r2_c0.jpg +2025-03-03 15:37:16,836 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r3_c0.jpg +2025-03-03 15:37:16,836 [INFO] __main__ - Processing table image: /topic-extraction/img_11.jpg, columns=two +2025-03-03 15:37:20,234 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r0_c0.jpg +2025-03-03 15:37:20,533 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r1_c0.jpg +2025-03-03 15:37:20,755 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r2_c0.jpg +2025-03-03 15:37:20,986 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r3_c0.jpg +2025-03-03 15:37:21,212 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r4_c0.jpg +2025-03-03 15:37:21,460 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r5_c0.jpg +2025-03-03 15:37:21,461 [INFO] __main__ - Processing table image: /topic-extraction/img_12.jpg, columns=three +2025-03-03 15:37:24,854 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r0_c0.jpg +2025-03-03 15:37:25,123 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r0_c1.jpg +2025-03-03 15:37:25,321 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r1_c0.jpg +2025-03-03 15:37:25,564 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r1_c1.jpg +2025-03-03 15:37:25,784 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r2_c0.jpg +2025-03-03 15:37:26,011 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r2_c1.jpg +2025-03-03 15:37:26,011 [INFO] __main__ - Processing table image: /topic-extraction/img_13.jpg, columns=three +2025-03-03 15:37:29,369 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r0_c0.jpg +2025-03-03 15:37:29,639 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r0_c1.jpg +2025-03-03 15:37:29,833 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r1_c0.jpg +2025-03-03 15:37:30,067 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r1_c1.jpg +2025-03-03 15:37:30,300 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r2_c0.jpg +2025-03-03 15:37:30,521 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r3_c0.jpg +2025-03-03 15:37:30,522 [INFO] __main__ - Processing table image: /topic-extraction/img_14.jpg, columns=three +2025-03-03 15:37:34,845 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r0_c0.jpg +2025-03-03 15:37:35,143 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r0_c1.jpg +2025-03-03 15:37:35,369 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r1_c0.jpg +2025-03-03 15:37:35,584 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r1_c1.jpg +2025-03-03 15:37:35,812 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r2_c0.jpg +2025-03-03 15:37:36,047 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r3_c0.jpg +2025-03-03 15:37:36,263 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r4_c0.jpg +2025-03-03 15:37:36,499 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r4_c1.jpg +2025-03-03 15:37:36,731 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r5_c0.jpg +2025-03-03 15:37:36,731 [INFO] __main__ - Processing table image: /topic-extraction/img_15.jpg, columns=three +2025-03-03 15:37:40,443 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r0_c0.jpg +2025-03-03 15:37:40,739 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r0_c1.jpg +2025-03-03 15:37:40,976 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r1_c0.jpg +2025-03-03 15:37:41,215 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r1_c1.jpg +2025-03-03 15:37:41,432 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r2_c0.jpg +2025-03-03 15:37:41,680 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r3_c0.jpg +2025-03-03 15:37:41,875 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r4_c0.jpg +2025-03-03 15:37:41,875 [INFO] __main__ - Processing table image: /topic-extraction/img_16.jpg, columns=three +2025-03-03 15:37:47,207 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r0_c0.jpg +2025-03-03 15:37:47,496 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r0_c1.jpg +2025-03-03 15:37:47,709 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r1_c0.jpg +2025-03-03 15:37:47,933 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r1_c1.jpg +2025-03-03 15:37:48,157 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r2_c0.jpg +2025-03-03 15:37:48,378 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r3_c0.jpg +2025-03-03 15:37:48,606 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r3_c1.jpg +2025-03-03 15:37:48,843 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r4_c0.jpg +2025-03-03 15:37:49,659 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r5_c0.jpg +2025-03-03 15:37:49,660 [INFO] __main__ - Processing table image: /topic-extraction/img_17.jpg, columns=three +2025-03-03 15:37:54,496 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r0_c0.jpg +2025-03-03 15:37:54,789 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r0_c1.jpg +2025-03-03 15:37:55,013 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r1_c0.jpg +2025-03-03 15:37:55,227 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r2_c0.jpg +2025-03-03 15:37:55,435 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r2_c1.jpg +2025-03-03 15:37:55,621 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r3_c0.jpg +2025-03-03 15:37:55,841 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r4_c0.jpg +2025-03-03 15:37:56,031 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r5_c0.jpg +2025-03-03 15:37:56,032 [INFO] __main__ - Processing table image: /topic-extraction/img_18.jpg, columns=three +2025-03-03 15:37:57,656 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r0_c0.jpg +2025-03-03 15:37:57,843 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r0_c1.jpg +2025-03-03 15:37:58,025 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r1_c0.jpg +2025-03-03 15:37:58,205 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r1_c1.jpg +2025-03-03 15:37:58,206 [INFO] __main__ - Processing table image: /topic-extraction/img_19.jpg, columns=three +2025-03-03 15:38:01,173 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r0_c0.jpg +2025-03-03 15:38:01,468 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r0_c1.jpg +2025-03-03 15:38:01,717 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r1_c0.jpg +2025-03-03 15:38:01,957 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r1_c1.jpg +2025-03-03 15:38:02,169 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r2_c0.jpg +2025-03-03 15:38:02,390 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r2_c1.jpg +2025-03-03 15:38:02,391 [INFO] __main__ - Processing table image: /topic-extraction/img_20.jpg, columns=three +2025-03-03 15:38:06,804 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r0_c0.jpg +2025-03-03 15:38:07,126 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r0_c1.jpg +2025-03-03 15:38:07,371 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r1_c0.jpg +2025-03-03 15:38:07,619 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r1_c1.jpg +2025-03-03 15:38:07,620 [INFO] __main__ - Processing table image: /topic-extraction/img_21.jpg, columns=three +2025-03-03 15:38:11,257 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r0_c0.jpg +2025-03-03 15:38:11,521 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r0_c1.jpg +2025-03-03 15:38:11,778 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r1_c0.jpg +2025-03-03 15:38:11,997 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r1_c1.jpg +2025-03-03 15:38:11,997 [INFO] __main__ - Processing table image: /topic-extraction/img_22.jpg, columns=three +2025-03-03 15:38:15,768 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r0_c0.jpg +2025-03-03 15:38:16,060 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r0_c1.jpg +2025-03-03 15:38:16,291 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r1_c0.jpg +2025-03-03 15:38:16,510 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r1_c1.jpg +2025-03-03 15:38:16,735 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r2_c0.jpg +2025-03-03 15:38:16,954 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r2_c1.jpg +2025-03-03 15:38:17,167 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r3_c0.jpg +2025-03-03 15:38:17,167 [INFO] __main__ - Processing table image: /topic-extraction/img_23.jpg, columns=three +2025-03-03 15:38:20,451 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r0_c0.jpg +2025-03-03 15:38:20,752 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r0_c1.jpg +2025-03-03 15:38:20,977 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r1_c0.jpg +2025-03-03 15:38:21,189 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r1_c1.jpg +2025-03-03 15:38:21,189 [INFO] __main__ - Processing table image: /topic-extraction/img_24.jpg, columns=three +2025-03-03 15:38:24,928 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r0_c0.jpg +2025-03-03 15:38:25,223 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r0_c1.jpg +2025-03-03 15:38:25,467 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r1_c0.jpg +2025-03-03 15:38:25,682 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r1_c1.jpg +2025-03-03 15:38:25,891 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r2_c0.jpg +2025-03-03 15:38:25,892 [INFO] __main__ - Processing table image: /topic-extraction/img_25.jpg, columns=two +2025-03-03 15:38:29,433 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r0_c0.jpg +2025-03-03 15:38:29,727 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r1_c0.jpg +2025-03-03 15:38:29,962 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r2_c0.jpg +2025-03-03 15:38:30,193 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r3_c0.jpg +2025-03-03 15:38:30,418 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r4_c0.jpg +2025-03-03 15:38:30,663 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r5_c0.jpg +2025-03-03 15:38:30,663 [INFO] __main__ - Processing table image: /topic-extraction/img_26.jpg, columns=three +2025-03-03 15:38:33,787 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r0_c0.jpg +2025-03-03 15:38:34,093 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r0_c1.jpg +2025-03-03 15:38:34,340 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r1_c0.jpg +2025-03-03 15:38:34,527 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r2_c0.jpg +2025-03-03 15:38:34,528 [INFO] __main__ - Processing table image: /topic-extraction/img_27.jpg, columns=three +2025-03-03 15:38:37,830 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r0_c0.jpg +2025-03-03 15:38:38,135 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r0_c1.jpg +2025-03-03 15:38:38,348 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r1_c0.jpg +2025-03-03 15:38:38,557 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r1_c1.jpg +2025-03-03 15:38:38,779 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r2_c0.jpg +2025-03-03 15:38:38,985 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r3_c0.jpg +2025-03-03 15:38:39,202 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r4_c0.jpg +2025-03-03 15:38:39,412 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r4_c1.jpg +2025-03-03 15:38:39,412 [INFO] __main__ - Processing table image: /topic-extraction/img_28.jpg, columns=two +2025-03-03 15:38:42,913 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_28.jpg_r0_c0.jpg +2025-03-03 15:38:43,203 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_28.jpg_r1_c0.jpg +2025-03-03 15:38:43,415 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_28.jpg_r2_c0.jpg +2025-03-03 15:38:43,632 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_28.jpg_r3_c0.jpg +2025-03-03 15:38:43,635 [INFO] __main__ - Final subtopics JSON saved locally at /home/user/app/we/we_ars/final_subtopics.json +2025-03-03 15:38:43,937 [INFO] __main__ - GPU memory cleaned up. +2025-03-03 15:38:43,944 [INFO] __main__ - Processing completed successfully. +2025-03-03 15:42:34,738 [INFO] __main__ - Processing PDF: /home/user/app/input_output/a-level-pearson-mathematics-specification.pdf +2025-03-03 15:42:35,614 [INFO] __main__ - Gemini returned subtopics: {'Paper 1 and Paper 2: Pure Mathematics': [11, 29], 'Paper 3: Statistics and Mechanics': [30, 40]} +2025-03-03 15:42:35,614 [INFO] __main__ - Loaded 1135473 bytes from local file '/home/user/app/input_output/a-level-pearson-mathematics-specification.pdf' +2025-03-03 15:42:35,901 [INFO] __main__ - Computed global offset: 4 +2025-03-03 15:42:35,901 [INFO] __main__ - Processing pages (0-based): [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43] +2025-03-03 15:43:32,916 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_1.jpg +2025-03-03 15:43:34,890 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_2.jpg +2025-03-03 15:43:35,454 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_3.jpg +2025-03-03 15:43:35,979 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_4.jpg +2025-03-03 15:43:36,599 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_5.jpg +2025-03-03 15:43:37,168 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_6.jpg +2025-03-03 15:43:37,654 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_7.jpg +2025-03-03 15:43:38,122 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_8.jpg +2025-03-03 15:43:38,625 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_9.jpg +2025-03-03 15:43:39,217 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_10.jpg +2025-03-03 15:43:39,653 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_11.jpg +2025-03-03 15:43:40,219 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_12.jpg +2025-03-03 15:43:40,688 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_13.jpg +2025-03-03 15:43:41,333 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_14.jpg +2025-03-03 15:43:41,810 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_15.jpg +2025-03-03 15:43:42,373 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_16.jpg +2025-03-03 15:43:42,930 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_17.jpg +2025-03-03 15:43:43,215 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_18.jpg +2025-03-03 15:43:43,761 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_19.jpg +2025-03-03 15:43:44,343 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_20.jpg +2025-03-03 15:43:44,916 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_21.jpg +2025-03-03 15:43:45,497 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_22.jpg +2025-03-03 15:43:46,030 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_23.jpg +2025-03-03 15:43:46,531 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_24.jpg +2025-03-03 15:43:47,102 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_25.jpg +2025-03-03 15:43:47,527 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_26.jpg +2025-03-03 15:43:47,978 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_27.jpg +2025-03-03 15:43:48,600 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_28.jpg +2025-03-03 15:43:48,979 [INFO] __main__ - Classifying images to detect tables. +2025-03-03 15:43:53,041 [INFO] __main__ - Processing table image: /topic-extraction/img_1.jpg, columns=three +2025-03-03 15:43:55,635 [INFO] __main__ - Processing table image: /topic-extraction/img_2.jpg, columns=three +2025-03-03 15:43:58,390 [INFO] __main__ - Processing table image: /topic-extraction/img_3.jpg, columns=three +2025-03-03 15:44:01,626 [INFO] __main__ - Processing table image: /topic-extraction/img_4.jpg, columns=three +2025-03-03 15:44:04,265 [INFO] __main__ - Processing table image: /topic-extraction/img_5.jpg, columns=three +2025-03-03 15:44:07,390 [INFO] __main__ - Processing table image: /topic-extraction/img_6.jpg, columns=three +2025-03-03 15:44:10,407 [INFO] __main__ - Processing table image: /topic-extraction/img_7.jpg, columns=three +2025-03-03 15:44:13,406 [INFO] __main__ - Processing table image: /topic-extraction/img_8.jpg, columns=three +2025-03-03 15:44:16,584 [INFO] __main__ - Processing table image: /topic-extraction/img_9.jpg, columns=three +2025-03-03 15:44:19,774 [INFO] __main__ - Processing table image: /topic-extraction/img_10.jpg, columns=three +2025-03-03 15:44:22,710 [INFO] __main__ - Processing table image: /topic-extraction/img_11.jpg, columns=two +2025-03-03 15:44:25,348 [INFO] __main__ - Processing table image: /topic-extraction/img_12.jpg, columns=three +2025-03-03 15:44:28,209 [INFO] __main__ - Processing table image: /topic-extraction/img_13.jpg, columns=three +2025-03-03 15:44:30,871 [INFO] __main__ - Processing table image: /topic-extraction/img_14.jpg, columns=three +2025-03-03 15:44:34,586 [INFO] __main__ - Processing table image: /topic-extraction/img_15.jpg, columns=three +2025-03-03 15:44:37,728 [INFO] __main__ - Processing table image: /topic-extraction/img_16.jpg, columns=three +2025-03-03 15:44:41,186 [INFO] __main__ - Processing table image: /topic-extraction/img_17.jpg, columns=three +2025-03-03 15:44:44,225 [INFO] __main__ - Processing table image: /topic-extraction/img_18.jpg, columns=three +2025-03-03 15:44:45,215 [INFO] __main__ - Processing table image: /topic-extraction/img_19.jpg, columns=three +2025-03-03 15:44:47,826 [INFO] __main__ - Processing table image: /topic-extraction/img_20.jpg, columns=three +2025-03-03 15:44:51,247 [INFO] __main__ - Processing table image: /topic-extraction/img_21.jpg, columns=three +2025-03-03 15:44:54,347 [INFO] __main__ - Processing table image: /topic-extraction/img_22.jpg, columns=three +2025-03-03 15:44:58,189 [INFO] __main__ - Processing table image: /topic-extraction/img_23.jpg, columns=three +2025-03-03 15:45:01,536 [INFO] __main__ - Processing table image: /topic-extraction/img_24.jpg, columns=three +2025-03-03 15:45:04,841 [INFO] __main__ - Processing table image: /topic-extraction/img_25.jpg, columns=two +2025-03-03 15:45:07,739 [INFO] __main__ - Processing table image: /topic-extraction/img_26.jpg, columns=three +2025-03-03 15:45:10,526 [INFO] __main__ - Processing table image: /topic-extraction/img_27.jpg, columns=three +2025-03-03 15:45:13,345 [INFO] __main__ - Processing table image: /topic-extraction/img_28.jpg, columns=two +2025-03-03 15:45:16,242 [INFO] __main__ - Final subtopics JSON saved locally at /home/user/app/pearson_json/final_subtopics.json +2025-03-03 15:45:16,577 [INFO] __main__ - GPU memory cleaned up. +2025-03-03 15:45:16,586 [INFO] __main__ - Processing completed successfully. +2025-03-03 15:57:41,431 [INFO] __main__ - Processing PDF: /home/user/app/input_output/a-level-pearson-mathematics-specification.pdf +2025-03-03 15:57:42,258 [INFO] __main__ - Gemini returned subtopics: {'Paper 1 and Paper 2: Pure Mathematics': [11, 29], 'Paper 3: Statistics and Mechanics': [30, 40]} +2025-03-03 15:57:42,259 [INFO] __main__ - Loaded 1135473 bytes from local file '/home/user/app/input_output/a-level-pearson-mathematics-specification.pdf' +2025-03-03 15:57:42,742 [INFO] __main__ - Computed global offset: 4 +2025-03-03 15:57:42,743 [INFO] __main__ - Processing pages (0-based): [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43] +2025-03-03 15:58:39,849 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_1.jpg +2025-03-03 15:58:41,633 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_2.jpg +2025-03-03 15:58:42,153 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_3.jpg +2025-03-03 15:58:42,662 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_4.jpg +2025-03-03 15:58:43,172 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_5.jpg +2025-03-03 15:58:43,731 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_6.jpg +2025-03-03 15:58:44,204 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_7.jpg +2025-03-03 15:58:44,670 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_8.jpg +2025-03-03 15:58:45,137 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_9.jpg +2025-03-03 15:58:45,759 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_10.jpg +2025-03-03 15:58:46,209 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_11.jpg +2025-03-03 15:58:46,777 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_12.jpg +2025-03-03 15:58:47,219 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_13.jpg +2025-03-03 15:58:47,782 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_14.jpg +2025-03-03 15:58:48,283 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_15.jpg +2025-03-03 15:58:48,810 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_16.jpg +2025-03-03 15:58:49,338 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_17.jpg +2025-03-03 15:58:49,627 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_18.jpg +2025-03-03 15:58:50,141 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_19.jpg +2025-03-03 15:58:50,715 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_20.jpg +2025-03-03 15:58:51,276 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_21.jpg +2025-03-03 15:58:51,914 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_22.jpg +2025-03-03 15:58:52,367 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_23.jpg +2025-03-03 15:58:52,816 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_24.jpg +2025-03-03 15:58:53,315 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_25.jpg +2025-03-03 15:58:53,749 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_26.jpg +2025-03-03 15:58:54,300 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_27.jpg +2025-03-03 15:58:54,855 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_28.jpg +2025-03-03 15:58:55,224 [INFO] __main__ - Classifying images to detect tables. +2025-03-03 15:58:59,109 [INFO] __main__ - Processing table image: /topic-extraction/img_1.jpg, columns=three +2025-03-03 15:59:02,125 [INFO] __main__ - Processing table image: /topic-extraction/img_2.jpg, columns=three +2025-03-03 15:59:04,863 [INFO] __main__ - Processing table image: /topic-extraction/img_3.jpg, columns=three +2025-03-03 15:59:07,805 [INFO] __main__ - Processing table image: /topic-extraction/img_4.jpg, columns=three +2025-03-03 15:59:10,471 [INFO] __main__ - Processing table image: /topic-extraction/img_5.jpg, columns=three +2025-03-03 15:59:13,381 [INFO] __main__ - Processing table image: /topic-extraction/img_6.jpg, columns=three +2025-03-03 15:59:16,869 [INFO] __main__ - Processing table image: /topic-extraction/img_7.jpg, columns=three +2025-03-03 15:59:20,177 [INFO] __main__ - Processing table image: /topic-extraction/img_8.jpg, columns=three +2025-03-03 15:59:23,119 [INFO] __main__ - Processing table image: /topic-extraction/img_9.jpg, columns=three +2025-03-03 15:59:26,242 [INFO] __main__ - Processing table image: /topic-extraction/img_10.jpg, columns=three +2025-03-03 15:59:29,179 [INFO] __main__ - Processing table image: /topic-extraction/img_11.jpg, columns=two +2025-03-03 15:59:32,005 [INFO] __main__ - Processing table image: /topic-extraction/img_12.jpg, columns=three +2025-03-03 15:59:34,843 [INFO] __main__ - Processing table image: /topic-extraction/img_13.jpg, columns=three +2025-03-03 15:59:37,550 [INFO] __main__ - Processing table image: /topic-extraction/img_14.jpg, columns=three +2025-03-03 15:59:41,218 [INFO] __main__ - Processing table image: /topic-extraction/img_15.jpg, columns=three +2025-03-03 15:59:44,176 [INFO] __main__ - Processing table image: /topic-extraction/img_16.jpg, columns=three +2025-03-03 15:59:48,175 [INFO] __main__ - Processing table image: /topic-extraction/img_17.jpg, columns=three +2025-03-03 15:59:51,345 [INFO] __main__ - Processing table image: /topic-extraction/img_18.jpg, columns=three +2025-03-03 15:59:52,204 [INFO] __main__ - Processing table image: /topic-extraction/img_19.jpg, columns=three +2025-03-03 15:59:54,653 [INFO] __main__ - Processing table image: /topic-extraction/img_20.jpg, columns=three +2025-03-03 15:59:58,328 [INFO] __main__ - Processing table image: /topic-extraction/img_21.jpg, columns=three +2025-03-03 16:00:01,476 [INFO] __main__ - Processing table image: /topic-extraction/img_22.jpg, columns=three +2025-03-03 16:00:04,927 [INFO] __main__ - Processing table image: /topic-extraction/img_23.jpg, columns=three +2025-03-03 16:00:07,802 [INFO] __main__ - Processing table image: /topic-extraction/img_24.jpg, columns=three +2025-03-03 16:00:10,839 [INFO] __main__ - Processing table image: /topic-extraction/img_25.jpg, columns=two +2025-03-03 16:00:13,583 [INFO] __main__ - Processing table image: /topic-extraction/img_26.jpg, columns=three +2025-03-03 16:00:16,261 [INFO] __main__ - Processing table image: /topic-extraction/img_27.jpg, columns=three +2025-03-03 16:00:18,834 [INFO] __main__ - Processing table image: /topic-extraction/img_28.jpg, columns=two +2025-03-03 16:00:21,585 [INFO] __main__ - Final subtopics JSON saved locally at /home/user/app/pearson_json/final_subtopics.json +2025-03-03 16:00:21,894 [INFO] __main__ - GPU memory cleaned up. +2025-03-03 16:00:21,904 [INFO] __main__ - Processing completed successfully. +2025-03-03 16:10:28,834 [INFO] __main__ - Processing PDF: /home/user/app/input_output/a-level-pearson-mathematics-specification.pdf +2025-03-03 16:10:29,650 [INFO] __main__ - Gemini returned subtopics: {'Paper 1 and Paper 2: Pure Mathematics': [11, 29], 'Paper 3: Statistics and Mechanics': [30, 40]} +2025-03-03 16:10:29,652 [INFO] __main__ - Loaded 1135473 bytes from local file '/home/user/app/input_output/a-level-pearson-mathematics-specification.pdf' +2025-03-03 16:10:30,160 [INFO] __main__ - Computed global offset: 4 +2025-03-03 16:10:30,161 [INFO] __main__ - Processing pages (0-based): [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43] +2025-03-03 16:11:28,663 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_1.jpg +2025-03-03 16:11:30,405 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_2.jpg +2025-03-03 16:11:30,930 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_3.jpg +2025-03-03 16:11:31,457 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_4.jpg +2025-03-03 16:11:32,063 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_5.jpg +2025-03-03 16:11:32,602 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_6.jpg +2025-03-03 16:11:33,092 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_7.jpg +2025-03-03 16:11:33,567 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_8.jpg +2025-03-03 16:11:34,048 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_9.jpg +2025-03-03 16:11:34,608 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_10.jpg +2025-03-03 16:11:35,064 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_11.jpg +2025-03-03 16:11:35,615 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_12.jpg +2025-03-03 16:11:36,193 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_13.jpg +2025-03-03 16:11:36,888 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_14.jpg +2025-03-03 16:11:37,467 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_15.jpg +2025-03-03 16:11:38,018 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_16.jpg +2025-03-03 16:11:38,628 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_17.jpg +2025-03-03 16:11:38,959 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_18.jpg +2025-03-03 16:11:39,505 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_19.jpg +2025-03-03 16:11:40,087 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_20.jpg +2025-03-03 16:11:40,633 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_21.jpg +2025-03-03 16:11:41,250 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_22.jpg +2025-03-03 16:11:41,747 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_23.jpg +2025-03-03 16:11:42,227 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_24.jpg +2025-03-03 16:11:42,777 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_25.jpg +2025-03-03 16:11:43,219 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_26.jpg +2025-03-03 16:11:43,659 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_27.jpg +2025-03-03 16:11:44,281 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_28.jpg +2025-03-03 16:11:44,761 [INFO] __main__ - Classifying images to detect tables. +2025-03-03 16:11:48,542 [INFO] __main__ - Processing table image: /topic-extraction/img_1.jpg, columns=three +2025-03-03 16:11:51,341 [INFO] __main__ - Processing table image: /topic-extraction/img_2.jpg, columns=three +2025-03-03 16:11:54,134 [INFO] __main__ - Processing table image: /topic-extraction/img_3.jpg, columns=three +2025-03-03 16:11:57,533 [INFO] __main__ - Processing table image: /topic-extraction/img_4.jpg, columns=three +2025-03-03 16:12:00,316 [INFO] __main__ - Processing table image: /topic-extraction/img_5.jpg, columns=three +2025-03-03 16:12:03,403 [INFO] __main__ - Processing table image: /topic-extraction/img_6.jpg, columns=three +2025-03-03 16:12:06,708 [INFO] __main__ - Processing table image: /topic-extraction/img_7.jpg, columns=three +2025-03-03 16:12:09,743 [INFO] __main__ - Processing table image: /topic-extraction/img_8.jpg, columns=three +2025-03-03 16:12:12,739 [INFO] __main__ - Processing table image: /topic-extraction/img_9.jpg, columns=three +2025-03-03 16:12:15,944 [INFO] __main__ - Processing table image: /topic-extraction/img_10.jpg, columns=three +2025-03-03 16:12:19,014 [INFO] __main__ - Processing table image: /topic-extraction/img_11.jpg, columns=two +2025-03-03 16:12:22,037 [INFO] __main__ - Processing table image: /topic-extraction/img_12.jpg, columns=three +2025-03-03 16:12:25,060 [INFO] __main__ - Processing table image: /topic-extraction/img_13.jpg, columns=three +2025-03-03 16:12:27,839 [INFO] __main__ - Processing table image: /topic-extraction/img_14.jpg, columns=three +2025-03-03 16:12:31,482 [INFO] __main__ - Processing table image: /topic-extraction/img_15.jpg, columns=three +2025-03-03 16:12:34,595 [INFO] __main__ - Processing table image: /topic-extraction/img_16.jpg, columns=three +2025-03-03 16:12:38,218 [INFO] __main__ - Processing table image: /topic-extraction/img_17.jpg, columns=three +2025-03-03 16:12:41,340 [INFO] __main__ - Processing table image: /topic-extraction/img_18.jpg, columns=three +2025-03-03 16:12:42,340 [INFO] __main__ - Processing table image: /topic-extraction/img_19.jpg, columns=three +2025-03-03 16:12:44,692 [INFO] __main__ - Processing table image: /topic-extraction/img_20.jpg, columns=three +2025-03-03 16:12:48,729 [INFO] __main__ - Processing table image: /topic-extraction/img_21.jpg, columns=three +2025-03-03 16:12:51,870 [INFO] __main__ - Processing table image: /topic-extraction/img_22.jpg, columns=three +2025-03-03 16:12:55,261 [INFO] __main__ - Processing table image: /topic-extraction/img_23.jpg, columns=three +2025-03-03 16:12:58,310 [INFO] __main__ - Processing table image: /topic-extraction/img_24.jpg, columns=three +2025-03-03 16:13:01,583 [INFO] __main__ - Processing table image: /topic-extraction/img_25.jpg, columns=two +2025-03-03 16:13:04,585 [INFO] __main__ - Processing table image: /topic-extraction/img_26.jpg, columns=three +2025-03-03 16:13:07,386 [INFO] __main__ - Processing table image: /topic-extraction/img_27.jpg, columns=three +2025-03-03 16:13:10,093 [INFO] __main__ - Processing table image: /topic-extraction/img_28.jpg, columns=two +2025-03-03 16:13:12,995 [INFO] __main__ - Final subtopics JSON saved locally at /home/user/app/pearson_json/final_subtopics.json +2025-03-03 16:13:13,319 [INFO] __main__ - GPU memory cleaned up. +2025-03-03 16:13:13,326 [INFO] __main__ - Processing completed successfully. +2025-03-03 16:17:39,572 [INFO] __main__ - Processing PDF: /home/user/app/input_output/a-level-pearson-mathematics-specification.pdf +2025-03-03 16:17:40,359 [INFO] __main__ - Gemini returned subtopics: {'Paper 1 and Paper 2: Pure Mathematics': [11, 29], 'Paper 3: Statistics and Mechanics': [30, 40]} +2025-03-03 16:17:40,360 [INFO] __main__ - Loaded 1135473 bytes from local file '/home/user/app/input_output/a-level-pearson-mathematics-specification.pdf' +2025-03-03 16:17:40,832 [INFO] __main__ - Computed global offset: 4 +2025-03-03 16:17:40,832 [INFO] __main__ - Processing pages (0-based): [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43] +2025-03-03 16:18:36,191 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_1.jpg +2025-03-03 16:18:38,048 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_2.jpg +2025-03-03 16:18:38,583 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_3.jpg +2025-03-03 16:18:39,096 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_4.jpg +2025-03-03 16:18:39,618 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_5.jpg +2025-03-03 16:18:40,200 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_6.jpg +2025-03-03 16:18:40,691 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_7.jpg +2025-03-03 16:18:41,167 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_8.jpg +2025-03-03 16:18:41,618 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_9.jpg +2025-03-03 16:18:42,191 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_10.jpg +2025-03-03 16:18:42,630 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_11.jpg +2025-03-03 16:18:43,147 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_12.jpg +2025-03-03 16:18:43,615 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_13.jpg +2025-03-03 16:18:44,242 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_14.jpg +2025-03-03 16:18:44,697 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_15.jpg +2025-03-03 16:18:45,344 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_16.jpg +2025-03-03 16:18:45,897 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_17.jpg +2025-03-03 16:18:46,196 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_18.jpg +2025-03-03 16:18:46,687 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_19.jpg +2025-03-03 16:18:47,347 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_20.jpg +2025-03-03 16:18:47,886 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_21.jpg +2025-03-03 16:18:48,468 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_22.jpg +2025-03-03 16:18:48,884 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_23.jpg +2025-03-03 16:18:49,349 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_24.jpg +2025-03-03 16:18:49,906 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_25.jpg +2025-03-03 16:18:50,347 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_26.jpg +2025-03-03 16:18:50,747 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_27.jpg +2025-03-03 16:18:51,318 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_28.jpg +2025-03-03 16:18:51,718 [INFO] __main__ - Classifying images to detect tables. +2025-03-03 16:18:55,669 [INFO] __main__ - Processing table image: /topic-extraction/img_1.jpg, columns=three +2025-03-03 16:18:58,365 [INFO] __main__ - Processing table image: /topic-extraction/img_2.jpg, columns=three +2025-03-03 16:19:01,432 [INFO] __main__ - Processing table image: /topic-extraction/img_3.jpg, columns=three +2025-03-03 16:19:04,431 [INFO] __main__ - Processing table image: /topic-extraction/img_4.jpg, columns=three +2025-03-03 16:19:07,043 [INFO] __main__ - Processing table image: /topic-extraction/img_5.jpg, columns=three +2025-03-03 16:19:09,947 [INFO] __main__ - Processing table image: /topic-extraction/img_6.jpg, columns=three +2025-03-03 16:19:13,069 [INFO] __main__ - Processing table image: /topic-extraction/img_7.jpg, columns=three +2025-03-03 16:19:16,110 [INFO] __main__ - Processing table image: /topic-extraction/img_8.jpg, columns=three +2025-03-03 16:19:19,225 [INFO] __main__ - Processing table image: /topic-extraction/img_9.jpg, columns=three +2025-03-03 16:19:22,435 [INFO] __main__ - Processing table image: /topic-extraction/img_10.jpg, columns=three +2025-03-03 16:19:25,263 [INFO] __main__ - Processing table image: /topic-extraction/img_11.jpg, columns=two +2025-03-03 16:19:27,914 [INFO] __main__ - Processing table image: /topic-extraction/img_12.jpg, columns=three +2025-03-03 16:19:30,818 [INFO] __main__ - Processing table image: /topic-extraction/img_13.jpg, columns=three +2025-03-03 16:19:33,439 [INFO] __main__ - Processing table image: /topic-extraction/img_14.jpg, columns=three +2025-03-03 16:19:37,263 [INFO] __main__ - Processing table image: /topic-extraction/img_15.jpg, columns=three +2025-03-03 16:19:40,284 [INFO] __main__ - Processing table image: /topic-extraction/img_16.jpg, columns=three +2025-03-03 16:19:43,672 [INFO] __main__ - Processing table image: /topic-extraction/img_17.jpg, columns=three +2025-03-03 16:19:46,752 [INFO] __main__ - Processing table image: /topic-extraction/img_18.jpg, columns=three +2025-03-03 16:19:47,590 [INFO] __main__ - Processing table image: /topic-extraction/img_19.jpg, columns=three +2025-03-03 16:19:49,890 [INFO] __main__ - Processing table image: /topic-extraction/img_20.jpg, columns=three +2025-03-03 16:19:53,362 [INFO] __main__ - Processing table image: /topic-extraction/img_21.jpg, columns=three +2025-03-03 16:19:56,514 [INFO] __main__ - Processing table image: /topic-extraction/img_22.jpg, columns=three +2025-03-03 16:19:59,760 [INFO] __main__ - Processing table image: /topic-extraction/img_23.jpg, columns=three +2025-03-03 16:20:02,816 [INFO] __main__ - Processing table image: /topic-extraction/img_24.jpg, columns=three +2025-03-03 16:20:05,964 [INFO] __main__ - Processing table image: /topic-extraction/img_25.jpg, columns=two +2025-03-03 16:20:08,853 [INFO] __main__ - Processing table image: /topic-extraction/img_26.jpg, columns=three +2025-03-03 16:20:11,414 [INFO] __main__ - Processing table image: /topic-extraction/img_27.jpg, columns=three +2025-03-03 16:20:14,113 [INFO] __main__ - Processing table image: /topic-extraction/img_28.jpg, columns=two +2025-03-03 16:20:17,013 [INFO] __main__ - Final subtopics JSON saved locally at /home/user/app/pearson_json/final_subtopics.json +2025-03-03 16:20:17,302 [INFO] __main__ - GPU memory cleaned up. +2025-03-03 16:20:17,310 [INFO] __main__ - Processing completed successfully. +2025-03-03 16:29:55,533 [INFO] __main__ - Processing PDF: /home/user/app/input_output/a-level-pearson-mathematics-specification.pdf +2025-03-03 16:29:56,335 [INFO] __main__ - Gemini returned subtopics: {'Paper 1 and Paper 2: Pure Mathematics': [11, 29], 'Paper 3: Statistics and Mechanics': [30, 40]} +2025-03-03 16:29:56,335 [INFO] __main__ - Loaded 1135473 bytes from local file '/home/user/app/input_output/a-level-pearson-mathematics-specification.pdf' +2025-03-03 16:29:56,683 [INFO] __main__ - Computed global offset: 4 +2025-03-03 16:29:56,684 [INFO] __main__ - Processing pages (0-based): [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43] +2025-03-03 16:30:54,842 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_1.jpg +2025-03-03 16:30:56,704 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_2.jpg +2025-03-03 16:30:57,248 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_3.jpg +2025-03-03 16:30:57,760 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_4.jpg +2025-03-03 16:30:58,328 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_5.jpg +2025-03-03 16:30:58,896 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_6.jpg +2025-03-03 16:30:59,381 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_7.jpg +2025-03-03 16:30:59,848 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_8.jpg +2025-03-03 16:31:00,350 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_9.jpg +2025-03-03 16:31:00,906 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_10.jpg +2025-03-03 16:31:01,322 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_11.jpg +2025-03-03 16:31:01,858 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_12.jpg +2025-03-03 16:31:02,304 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_13.jpg +2025-03-03 16:31:02,900 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_14.jpg +2025-03-03 16:31:03,362 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_15.jpg +2025-03-03 16:31:03,956 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_16.jpg +2025-03-03 16:31:04,503 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_17.jpg +2025-03-03 16:31:04,806 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_18.jpg +2025-03-03 16:31:05,281 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_19.jpg +2025-03-03 16:31:05,889 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_20.jpg +2025-03-03 16:31:06,437 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_21.jpg +2025-03-03 16:31:07,001 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_22.jpg +2025-03-03 16:31:07,493 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_23.jpg +2025-03-03 16:31:07,962 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_24.jpg +2025-03-03 16:31:08,501 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_25.jpg +2025-03-03 16:31:09,004 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_26.jpg +2025-03-03 16:31:09,512 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_27.jpg +2025-03-03 16:31:10,086 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_28.jpg +2025-03-03 16:31:10,503 [INFO] __main__ - Classifying images to detect tables. +2025-03-03 16:31:14,364 [INFO] __main__ - Processing table image: /topic-extraction/img_1.jpg, columns=three +2025-03-03 16:31:17,142 [INFO] __main__ - Extracted 2 rows from /tmp/tmp3fu7msc1.jpg +2025-03-03 16:31:17,142 [INFO] __main__ - Row 0 has 2 cells +2025-03-03 16:31:17,142 [INFO] __main__ - Row 1 has 2 cells +2025-03-03 16:31:17,240 [WARNING] __main__ - Cell image not found: /tmp/tmp3fu7msc1.jpg_rows/row_0/col_0.jpg +2025-03-03 16:31:17,241 [WARNING] __main__ - Cell image not found: /tmp/tmp3fu7msc1.jpg_rows/row_0/col_1.jpg +2025-03-03 16:31:17,241 [WARNING] __main__ - Cell image not found: /tmp/tmp3fu7msc1.jpg_rows/row_1/col_0.jpg +2025-03-03 16:31:17,241 [WARNING] __main__ - Cell image not found: /tmp/tmp3fu7msc1.jpg_rows/row_1/col_1.jpg +2025-03-03 16:31:17,241 [INFO] __main__ - Processing table image: /topic-extraction/img_2.jpg, columns=three +2025-03-03 16:31:19,966 [INFO] __main__ - Extracted 4 rows from /tmp/tmp24x1tzyf.jpg +2025-03-03 16:31:19,967 [INFO] __main__ - Row 0 has 2 cells +2025-03-03 16:31:19,967 [INFO] __main__ - Row 1 has 1 cells +2025-03-03 16:31:19,967 [INFO] __main__ - Row 2 has 1 cells +2025-03-03 16:31:19,967 [INFO] __main__ - Row 3 has 1 cells +2025-03-03 16:31:20,069 [WARNING] __main__ - Cell image not found: /tmp/tmp24x1tzyf.jpg_rows/row_0/col_0.jpg +2025-03-03 16:31:20,069 [WARNING] __main__ - Cell image not found: /tmp/tmp24x1tzyf.jpg_rows/row_0/col_1.jpg +2025-03-03 16:31:20,070 [WARNING] __main__ - Cell image not found: /tmp/tmp24x1tzyf.jpg_rows/row_1/col_0.jpg +2025-03-03 16:31:20,070 [WARNING] __main__ - Cell image not found: /tmp/tmp24x1tzyf.jpg_rows/row_2/col_0.jpg +2025-03-03 16:31:20,070 [WARNING] __main__ - Cell image not found: /tmp/tmp24x1tzyf.jpg_rows/row_3/col_0.jpg +2025-03-03 16:31:20,070 [INFO] __main__ - Processing table image: /topic-extraction/img_3.jpg, columns=three +2025-03-03 16:31:22,860 [INFO] __main__ - Extracted 2 rows from /tmp/tmpqj8kzjlh.jpg +2025-03-03 16:31:22,860 [INFO] __main__ - Row 0 has 2 cells +2025-03-03 16:31:22,861 [INFO] __main__ - Row 1 has 1 cells +2025-03-03 16:31:22,974 [WARNING] __main__ - Cell image not found: /tmp/tmpqj8kzjlh.jpg_rows/row_0/col_0.jpg +2025-03-03 16:31:22,974 [WARNING] __main__ - Cell image not found: /tmp/tmpqj8kzjlh.jpg_rows/row_0/col_1.jpg +2025-03-03 16:31:22,974 [WARNING] __main__ - Cell image not found: /tmp/tmpqj8kzjlh.jpg_rows/row_1/col_0.jpg +2025-03-03 16:31:22,975 [INFO] __main__ - Processing table image: /topic-extraction/img_4.jpg, columns=three +2025-03-03 16:31:25,631 [INFO] __main__ - Extracted 2 rows from /tmp/tmp3i00mmdz.jpg +2025-03-03 16:31:25,632 [INFO] __main__ - Row 0 has 2 cells +2025-03-03 16:31:25,632 [INFO] __main__ - Row 1 has 2 cells +2025-03-03 16:31:25,740 [WARNING] __main__ - Cell image not found: /tmp/tmp3i00mmdz.jpg_rows/row_0/col_0.jpg +2025-03-03 16:31:25,740 [WARNING] __main__ - Cell image not found: /tmp/tmp3i00mmdz.jpg_rows/row_0/col_1.jpg +2025-03-03 16:31:25,741 [WARNING] __main__ - Cell image not found: /tmp/tmp3i00mmdz.jpg_rows/row_1/col_0.jpg +2025-03-03 16:31:25,741 [WARNING] __main__ - Cell image not found: /tmp/tmp3i00mmdz.jpg_rows/row_1/col_1.jpg +2025-03-03 16:31:25,741 [INFO] __main__ - Processing table image: /topic-extraction/img_5.jpg, columns=three +2025-03-03 16:31:29,048 [INFO] __main__ - Extracted 3 rows from /tmp/tmpo6lnusuq.jpg +2025-03-03 16:31:29,048 [INFO] __main__ - Row 0 has 2 cells +2025-03-03 16:31:29,049 [INFO] __main__ - Row 1 has 2 cells +2025-03-03 16:31:29,049 [INFO] __main__ - Row 2 has 1 cells +2025-03-03 16:31:29,152 [WARNING] __main__ - Cell image not found: /tmp/tmpo6lnusuq.jpg_rows/row_0/col_0.jpg +2025-03-03 16:31:29,152 [WARNING] __main__ - Cell image not found: /tmp/tmpo6lnusuq.jpg_rows/row_0/col_1.jpg +2025-03-03 16:31:29,152 [WARNING] __main__ - Cell image not found: /tmp/tmpo6lnusuq.jpg_rows/row_1/col_0.jpg +2025-03-03 16:31:29,152 [WARNING] __main__ - Cell image not found: /tmp/tmpo6lnusuq.jpg_rows/row_1/col_1.jpg +2025-03-03 16:31:29,152 [WARNING] __main__ - Cell image not found: /tmp/tmpo6lnusuq.jpg_rows/row_2/col_0.jpg +2025-03-03 16:31:29,152 [INFO] __main__ - Processing table image: /topic-extraction/img_6.jpg, columns=three +2025-03-03 16:31:32,089 [INFO] __main__ - Extracted 2 rows from /tmp/tmpk3rl8rco.jpg +2025-03-03 16:31:32,089 [INFO] __main__ - Row 0 has 2 cells +2025-03-03 16:31:32,089 [INFO] __main__ - Row 1 has 2 cells +2025-03-03 16:31:32,201 [WARNING] __main__ - Cell image not found: /tmp/tmpk3rl8rco.jpg_rows/row_0/col_0.jpg +2025-03-03 16:31:32,201 [WARNING] __main__ - Cell image not found: /tmp/tmpk3rl8rco.jpg_rows/row_0/col_1.jpg +2025-03-03 16:31:32,201 [WARNING] __main__ - Cell image not found: /tmp/tmpk3rl8rco.jpg_rows/row_1/col_0.jpg +2025-03-03 16:31:32,201 [WARNING] __main__ - Cell image not found: /tmp/tmpk3rl8rco.jpg_rows/row_1/col_1.jpg +2025-03-03 16:31:32,202 [INFO] __main__ - Processing table image: /topic-extraction/img_7.jpg, columns=three +2025-03-03 16:31:34,953 [INFO] __main__ - Extracted 3 rows from /tmp/tmp3iuybv9f.jpg +2025-03-03 16:31:34,953 [INFO] __main__ - Row 0 has 2 cells +2025-03-03 16:31:34,953 [INFO] __main__ - Row 1 has 1 cells +2025-03-03 16:31:34,953 [INFO] __main__ - Row 2 has 2 cells +2025-03-03 16:31:35,071 [WARNING] __main__ - Cell image not found: /tmp/tmp3iuybv9f.jpg_rows/row_0/col_0.jpg +2025-03-03 16:31:35,072 [WARNING] __main__ - Cell image not found: /tmp/tmp3iuybv9f.jpg_rows/row_0/col_1.jpg +2025-03-03 16:31:35,072 [WARNING] __main__ - Cell image not found: /tmp/tmp3iuybv9f.jpg_rows/row_1/col_0.jpg +2025-03-03 16:31:35,072 [WARNING] __main__ - Cell image not found: /tmp/tmp3iuybv9f.jpg_rows/row_2/col_0.jpg +2025-03-03 16:31:35,072 [WARNING] __main__ - Cell image not found: /tmp/tmp3iuybv9f.jpg_rows/row_2/col_1.jpg +2025-03-03 16:31:35,073 [INFO] __main__ - Processing table image: /topic-extraction/img_8.jpg, columns=three +2025-03-03 16:31:37,913 [INFO] __main__ - Extracted 6 rows from /tmp/tmpbg2n4l31.jpg +2025-03-03 16:31:37,914 [INFO] __main__ - Row 0 has 3 cells +2025-03-03 16:31:37,914 [INFO] __main__ - Row 1 has 3 cells +2025-03-03 16:31:37,914 [INFO] __main__ - Row 2 has 2 cells +2025-03-03 16:31:37,914 [INFO] __main__ - Row 3 has 2 cells +2025-03-03 16:31:37,914 [INFO] __main__ - Row 4 has 2 cells +2025-03-03 16:31:37,914 [INFO] __main__ - Row 5 has 2 cells +2025-03-03 16:31:38,027 [WARNING] __main__ - Cell image not found: /tmp/tmpbg2n4l31.jpg_rows/row_0/col_0.jpg +2025-03-03 16:31:38,028 [WARNING] __main__ - Cell image not found: /tmp/tmpbg2n4l31.jpg_rows/row_0/col_1.jpg +2025-03-03 16:31:38,028 [WARNING] __main__ - Cell image not found: /tmp/tmpbg2n4l31.jpg_rows/row_0/col_2.jpg +2025-03-03 16:31:38,028 [WARNING] __main__ - Cell image not found: /tmp/tmpbg2n4l31.jpg_rows/row_1/col_0.jpg +2025-03-03 16:31:38,028 [WARNING] __main__ - Cell image not found: /tmp/tmpbg2n4l31.jpg_rows/row_1/col_1.jpg +2025-03-03 16:31:38,029 [WARNING] __main__ - Cell image not found: /tmp/tmpbg2n4l31.jpg_rows/row_1/col_2.jpg +2025-03-03 16:31:38,029 [WARNING] __main__ - Cell image not found: /tmp/tmpbg2n4l31.jpg_rows/row_2/col_0.jpg +2025-03-03 16:31:38,029 [WARNING] __main__ - Cell image not found: /tmp/tmpbg2n4l31.jpg_rows/row_2/col_1.jpg +2025-03-03 16:31:38,029 [WARNING] __main__ - Cell image not found: /tmp/tmpbg2n4l31.jpg_rows/row_3/col_0.jpg +2025-03-03 16:31:38,029 [WARNING] __main__ - Cell image not found: /tmp/tmpbg2n4l31.jpg_rows/row_3/col_1.jpg +2025-03-03 16:31:38,030 [WARNING] __main__ - Cell image not found: /tmp/tmpbg2n4l31.jpg_rows/row_4/col_0.jpg +2025-03-03 16:31:38,030 [WARNING] __main__ - Cell image not found: /tmp/tmpbg2n4l31.jpg_rows/row_4/col_1.jpg +2025-03-03 16:31:38,030 [WARNING] __main__ - Cell image not found: /tmp/tmpbg2n4l31.jpg_rows/row_5/col_0.jpg +2025-03-03 16:31:38,030 [WARNING] __main__ - Cell image not found: /tmp/tmpbg2n4l31.jpg_rows/row_5/col_1.jpg +2025-03-03 16:31:38,030 [INFO] __main__ - Processing table image: /topic-extraction/img_9.jpg, columns=three +2025-03-03 16:31:41,370 [INFO] __main__ - Extracted 4 rows from /tmp/tmpuozb49tl.jpg +2025-03-03 16:31:41,370 [INFO] __main__ - Row 0 has 3 cells +2025-03-03 16:31:41,370 [INFO] __main__ - Row 1 has 2 cells +2025-03-03 16:31:41,370 [INFO] __main__ - Row 2 has 2 cells +2025-03-03 16:31:41,370 [INFO] __main__ - Row 3 has 2 cells +2025-03-03 16:31:41,490 [WARNING] __main__ - Cell image not found: /tmp/tmpuozb49tl.jpg_rows/row_0/col_0.jpg +2025-03-03 16:31:41,490 [WARNING] __main__ - Cell image not found: /tmp/tmpuozb49tl.jpg_rows/row_0/col_1.jpg +2025-03-03 16:31:41,491 [WARNING] __main__ - Cell image not found: /tmp/tmpuozb49tl.jpg_rows/row_0/col_2.jpg +2025-03-03 16:31:41,491 [WARNING] __main__ - Cell image not found: /tmp/tmpuozb49tl.jpg_rows/row_1/col_0.jpg +2025-03-03 16:31:41,491 [WARNING] __main__ - Cell image not found: /tmp/tmpuozb49tl.jpg_rows/row_1/col_1.jpg +2025-03-03 16:31:41,491 [WARNING] __main__ - Cell image not found: /tmp/tmpuozb49tl.jpg_rows/row_2/col_0.jpg +2025-03-03 16:31:41,491 [WARNING] __main__ - Cell image not found: /tmp/tmpuozb49tl.jpg_rows/row_2/col_1.jpg +2025-03-03 16:31:41,491 [WARNING] __main__ - Cell image not found: /tmp/tmpuozb49tl.jpg_rows/row_3/col_0.jpg +2025-03-03 16:31:41,491 [WARNING] __main__ - Cell image not found: /tmp/tmpuozb49tl.jpg_rows/row_3/col_1.jpg +2025-03-03 16:31:41,492 [INFO] __main__ - Processing table image: /topic-extraction/img_10.jpg, columns=three +2025-03-03 16:31:44,447 [INFO] __main__ - Extracted 4 rows from /tmp/tmplrwra3bt.jpg +2025-03-03 16:31:44,448 [INFO] __main__ - Row 0 has 2 cells +2025-03-03 16:31:44,448 [INFO] __main__ - Row 1 has 1 cells +2025-03-03 16:31:44,448 [INFO] __main__ - Row 2 has 1 cells +2025-03-03 16:31:44,448 [INFO] __main__ - Row 3 has 1 cells +2025-03-03 16:31:44,554 [WARNING] __main__ - Cell image not found: /tmp/tmplrwra3bt.jpg_rows/row_0/col_0.jpg +2025-03-03 16:31:44,555 [WARNING] __main__ - Cell image not found: /tmp/tmplrwra3bt.jpg_rows/row_0/col_1.jpg +2025-03-03 16:31:44,555 [WARNING] __main__ - Cell image not found: /tmp/tmplrwra3bt.jpg_rows/row_1/col_0.jpg +2025-03-03 16:31:44,555 [WARNING] __main__ - Cell image not found: /tmp/tmplrwra3bt.jpg_rows/row_2/col_0.jpg +2025-03-03 16:31:44,555 [WARNING] __main__ - Cell image not found: /tmp/tmplrwra3bt.jpg_rows/row_3/col_0.jpg +2025-03-03 16:31:44,555 [INFO] __main__ - Processing table image: /topic-extraction/img_11.jpg, columns=two +2025-03-03 16:31:47,726 [INFO] __main__ - GPU memory cleaned up. +2025-03-03 16:38:00,505 [INFO] __main__ - Processing PDF: /home/user/app/input_output/a-level-pearson-mathematics-specification.pdf +2025-03-03 16:38:01,273 [INFO] __main__ - Gemini returned subtopics: {'Paper 1 and Paper 2: Pure Mathematics': [11, 29], 'Paper 3: Statistics and Mechanics': [30, 40]} +2025-03-03 16:38:01,274 [INFO] __main__ - Loaded 1135473 bytes from local file '/home/user/app/input_output/a-level-pearson-mathematics-specification.pdf' +2025-03-03 16:38:01,586 [INFO] __main__ - Computed global offset: 4 +2025-03-03 16:38:01,587 [INFO] __main__ - Processing pages (0-based): [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43] +2025-03-03 16:38:55,658 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_1.jpg +2025-03-03 16:38:57,514 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_2.jpg +2025-03-03 16:38:58,085 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_3.jpg +2025-03-03 16:38:58,674 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_4.jpg +2025-03-03 16:38:59,259 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_5.jpg +2025-03-03 16:38:59,865 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_6.jpg +2025-03-03 16:39:00,442 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_7.jpg +2025-03-03 16:39:00,917 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_8.jpg +2025-03-03 16:39:01,415 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_9.jpg +2025-03-03 16:39:01,970 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_10.jpg +2025-03-03 16:39:02,391 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_11.jpg +2025-03-03 16:39:02,884 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_12.jpg +2025-03-03 16:39:03,352 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_13.jpg +2025-03-03 16:39:03,934 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_14.jpg +2025-03-03 16:39:04,441 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_15.jpg +2025-03-03 16:39:04,988 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_16.jpg +2025-03-03 16:39:05,540 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_17.jpg +2025-03-03 16:39:05,840 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_18.jpg +2025-03-03 16:39:06,313 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_19.jpg +2025-03-03 16:39:06,928 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_20.jpg +2025-03-03 16:39:07,507 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_21.jpg +2025-03-03 16:39:08,099 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_22.jpg +2025-03-03 16:39:08,546 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_23.jpg +2025-03-03 16:39:09,044 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_24.jpg +2025-03-03 16:39:09,667 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_25.jpg +2025-03-03 16:39:10,150 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_26.jpg +2025-03-03 16:39:10,586 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_27.jpg +2025-03-03 16:39:11,171 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_28.jpg +2025-03-03 16:39:11,553 [INFO] __main__ - Classifying images to detect tables. +2025-03-03 16:39:16,188 [INFO] __main__ - Processing table image: /topic-extraction/img_1.jpg, columns=three +2025-03-03 16:39:18,827 [INFO] __main__ - Extracted 2 rows from /tmp/tmpsadbyp6s.jpg +2025-03-03 16:39:18,828 [INFO] __main__ - Row 0 has 2 cells +2025-03-03 16:39:18,828 [INFO] __main__ - Row 1 has 2 cells +2025-03-03 16:39:18,925 [INFO] __main__ - Files in /tmp/tmpsadbyp6s.jpg_rows: +2025-03-03 16:39:18,925 [INFO] __main__ - /tmp/tmpsadbyp6s.jpg_rows: [] +2025-03-03 16:39:18,926 [INFO] __main__ - /tmp/tmpsadbyp6s.jpg_rows/row_0: ['col_0.png', 'col_1.png'] +2025-03-03 16:39:18,926 [INFO] __main__ - /tmp/tmpsadbyp6s.jpg_rows/row_1: ['col_0.png', 'col_1.png'] +2025-03-03 16:39:18,926 [WARNING] __main__ - Cell image not found: /tmp/tmpsadbyp6s.jpg_rows/row_0/col_0.jpg +2025-03-03 16:39:18,926 [WARNING] __main__ - Cell image not found: /tmp/tmpsadbyp6s.jpg_rows/row_0/col_1.jpg +2025-03-03 16:39:18,926 [WARNING] __main__ - Cell image not found: /tmp/tmpsadbyp6s.jpg_rows/row_1/col_0.jpg +2025-03-03 16:39:18,926 [WARNING] __main__ - Cell image not found: /tmp/tmpsadbyp6s.jpg_rows/row_1/col_1.jpg +2025-03-03 16:39:18,927 [INFO] __main__ - Processing table image: /topic-extraction/img_2.jpg, columns=three +2025-03-03 16:39:21,658 [INFO] __main__ - Extracted 4 rows from /tmp/tmpbis7s1y9.jpg +2025-03-03 16:39:21,658 [INFO] __main__ - Row 0 has 2 cells +2025-03-03 16:39:21,658 [INFO] __main__ - Row 1 has 1 cells +2025-03-03 16:39:21,659 [INFO] __main__ - Row 2 has 1 cells +2025-03-03 16:39:21,659 [INFO] __main__ - Row 3 has 1 cells +2025-03-03 16:39:21,761 [INFO] __main__ - Files in /tmp/tmpbis7s1y9.jpg_rows: +2025-03-03 16:39:21,762 [INFO] __main__ - /tmp/tmpbis7s1y9.jpg_rows: [] +2025-03-03 16:39:21,762 [INFO] __main__ - /tmp/tmpbis7s1y9.jpg_rows/row_0: ['col_0.png', 'col_1.png'] +2025-03-03 16:39:21,763 [INFO] __main__ - /tmp/tmpbis7s1y9.jpg_rows/row_1: ['col_0.png'] +2025-03-03 16:39:21,763 [INFO] __main__ - /tmp/tmpbis7s1y9.jpg_rows/row_2: ['col_0.png'] +2025-03-03 16:39:21,763 [INFO] __main__ - /tmp/tmpbis7s1y9.jpg_rows/row_3: ['col_0.png'] +2025-03-03 16:39:21,763 [WARNING] __main__ - Cell image not found: /tmp/tmpbis7s1y9.jpg_rows/row_0/col_0.jpg +2025-03-03 16:39:21,764 [WARNING] __main__ - Cell image not found: /tmp/tmpbis7s1y9.jpg_rows/row_0/col_1.jpg +2025-03-03 16:39:21,764 [WARNING] __main__ - Cell image not found: /tmp/tmpbis7s1y9.jpg_rows/row_1/col_0.jpg +2025-03-03 16:39:21,764 [WARNING] __main__ - Cell image not found: /tmp/tmpbis7s1y9.jpg_rows/row_2/col_0.jpg +2025-03-03 16:39:21,765 [WARNING] __main__ - Cell image not found: /tmp/tmpbis7s1y9.jpg_rows/row_3/col_0.jpg +2025-03-03 16:39:21,765 [INFO] __main__ - Processing table image: /topic-extraction/img_3.jpg, columns=three +2025-03-03 16:39:24,543 [INFO] __main__ - Extracted 2 rows from /tmp/tmpv_qjs9zo.jpg +2025-03-03 16:39:24,544 [INFO] __main__ - Row 0 has 2 cells +2025-03-03 16:39:24,544 [INFO] __main__ - Row 1 has 1 cells +2025-03-03 16:39:24,656 [INFO] __main__ - Files in /tmp/tmpv_qjs9zo.jpg_rows: +2025-03-03 16:39:24,656 [INFO] __main__ - /tmp/tmpv_qjs9zo.jpg_rows: [] +2025-03-03 16:39:24,657 [INFO] __main__ - /tmp/tmpv_qjs9zo.jpg_rows/row_0: ['col_0.png', 'col_1.png'] +2025-03-03 16:39:24,657 [INFO] __main__ - /tmp/tmpv_qjs9zo.jpg_rows/row_1: ['col_0.png'] +2025-03-03 16:39:24,657 [WARNING] __main__ - Cell image not found: /tmp/tmpv_qjs9zo.jpg_rows/row_0/col_0.jpg +2025-03-03 16:39:24,657 [WARNING] __main__ - Cell image not found: /tmp/tmpv_qjs9zo.jpg_rows/row_0/col_1.jpg +2025-03-03 16:39:24,658 [WARNING] __main__ - Cell image not found: /tmp/tmpv_qjs9zo.jpg_rows/row_1/col_0.jpg +2025-03-03 16:39:24,658 [INFO] __main__ - Processing table image: /topic-extraction/img_4.jpg, columns=three +2025-03-03 16:39:27,236 [INFO] __main__ - Extracted 2 rows from /tmp/tmp4wmm7x9f.jpg +2025-03-03 16:39:27,236 [INFO] __main__ - Row 0 has 2 cells +2025-03-03 16:39:27,237 [INFO] __main__ - Row 1 has 2 cells +2025-03-03 16:39:27,342 [INFO] __main__ - Files in /tmp/tmp4wmm7x9f.jpg_rows: +2025-03-03 16:39:27,343 [INFO] __main__ - /tmp/tmp4wmm7x9f.jpg_rows: [] +2025-03-03 16:39:27,343 [INFO] __main__ - /tmp/tmp4wmm7x9f.jpg_rows/row_0: ['col_0.png', 'col_1.png'] +2025-03-03 16:39:27,343 [INFO] __main__ - /tmp/tmp4wmm7x9f.jpg_rows/row_1: ['col_0.png', 'col_1.png'] +2025-03-03 16:39:27,343 [WARNING] __main__ - Cell image not found: /tmp/tmp4wmm7x9f.jpg_rows/row_0/col_0.jpg +2025-03-03 16:39:27,344 [WARNING] __main__ - Cell image not found: /tmp/tmp4wmm7x9f.jpg_rows/row_0/col_1.jpg +2025-03-03 16:39:27,344 [WARNING] __main__ - Cell image not found: /tmp/tmp4wmm7x9f.jpg_rows/row_1/col_0.jpg +2025-03-03 16:39:27,344 [WARNING] __main__ - Cell image not found: /tmp/tmp4wmm7x9f.jpg_rows/row_1/col_1.jpg +2025-03-03 16:39:27,344 [INFO] __main__ - Processing table image: /topic-extraction/img_5.jpg, columns=three +2025-03-03 16:39:30,319 [INFO] __main__ - Extracted 3 rows from /tmp/tmpl2shhsgo.jpg +2025-03-03 16:39:30,319 [INFO] __main__ - Row 0 has 2 cells +2025-03-03 16:39:30,319 [INFO] __main__ - Row 1 has 2 cells +2025-03-03 16:39:30,319 [INFO] __main__ - Row 2 has 1 cells +2025-03-03 16:39:30,419 [INFO] __main__ - Files in /tmp/tmpl2shhsgo.jpg_rows: +2025-03-03 16:39:30,419 [INFO] __main__ - /tmp/tmpl2shhsgo.jpg_rows: [] +2025-03-03 16:39:30,420 [INFO] __main__ - /tmp/tmpl2shhsgo.jpg_rows/row_0: ['col_0.png', 'col_1.png'] +2025-03-03 16:39:30,420 [INFO] __main__ - /tmp/tmpl2shhsgo.jpg_rows/row_1: ['col_0.png', 'col_1.png'] +2025-03-03 16:39:30,420 [INFO] __main__ - /tmp/tmpl2shhsgo.jpg_rows/row_2: ['col_0.png'] +2025-03-03 16:39:30,420 [WARNING] __main__ - Cell image not found: /tmp/tmpl2shhsgo.jpg_rows/row_0/col_0.jpg +2025-03-03 16:39:30,421 [WARNING] __main__ - Cell image not found: /tmp/tmpl2shhsgo.jpg_rows/row_0/col_1.jpg +2025-03-03 16:39:30,421 [WARNING] __main__ - Cell image not found: /tmp/tmpl2shhsgo.jpg_rows/row_1/col_0.jpg +2025-03-03 16:39:30,421 [WARNING] __main__ - Cell image not found: /tmp/tmpl2shhsgo.jpg_rows/row_1/col_1.jpg +2025-03-03 16:39:30,421 [WARNING] __main__ - Cell image not found: /tmp/tmpl2shhsgo.jpg_rows/row_2/col_0.jpg +2025-03-03 16:39:30,421 [INFO] __main__ - Processing table image: /topic-extraction/img_6.jpg, columns=three +2025-03-03 16:39:33,398 [INFO] __main__ - Extracted 2 rows from /tmp/tmpqlryvzi3.jpg +2025-03-03 16:39:33,399 [INFO] __main__ - Row 0 has 2 cells +2025-03-03 16:39:33,399 [INFO] __main__ - Row 1 has 2 cells +2025-03-03 16:39:33,490 [INFO] __main__ - Files in /tmp/tmpqlryvzi3.jpg_rows: +2025-03-03 16:39:33,490 [INFO] __main__ - /tmp/tmpqlryvzi3.jpg_rows: [] +2025-03-03 16:39:33,490 [INFO] __main__ - /tmp/tmpqlryvzi3.jpg_rows/row_0: ['col_0.png', 'col_1.png'] +2025-03-03 16:39:33,491 [INFO] __main__ - /tmp/tmpqlryvzi3.jpg_rows/row_1: ['col_0.png', 'col_1.png'] +2025-03-03 16:39:33,491 [WARNING] __main__ - Cell image not found: /tmp/tmpqlryvzi3.jpg_rows/row_0/col_0.jpg +2025-03-03 16:39:33,491 [WARNING] __main__ - Cell image not found: /tmp/tmpqlryvzi3.jpg_rows/row_0/col_1.jpg +2025-03-03 16:39:33,491 [WARNING] __main__ - Cell image not found: /tmp/tmpqlryvzi3.jpg_rows/row_1/col_0.jpg +2025-03-03 16:39:33,491 [WARNING] __main__ - Cell image not found: /tmp/tmpqlryvzi3.jpg_rows/row_1/col_1.jpg +2025-03-03 16:39:33,491 [INFO] __main__ - Processing table image: /topic-extraction/img_7.jpg, columns=three +2025-03-03 16:39:36,216 [INFO] __main__ - Extracted 3 rows from /tmp/tmpvpurws23.jpg +2025-03-03 16:39:36,216 [INFO] __main__ - Row 0 has 2 cells +2025-03-03 16:39:36,216 [INFO] __main__ - Row 1 has 1 cells +2025-03-03 16:39:36,216 [INFO] __main__ - Row 2 has 2 cells +2025-03-03 16:39:36,355 [INFO] __main__ - Files in /tmp/tmpvpurws23.jpg_rows: +2025-03-03 16:39:36,355 [INFO] __main__ - /tmp/tmpvpurws23.jpg_rows: [] +2025-03-03 16:39:36,356 [INFO] __main__ - /tmp/tmpvpurws23.jpg_rows/row_0: ['col_0.png', 'col_1.png'] +2025-03-03 16:39:36,356 [INFO] __main__ - /tmp/tmpvpurws23.jpg_rows/row_1: ['col_0.png'] +2025-03-03 16:39:36,356 [INFO] __main__ - /tmp/tmpvpurws23.jpg_rows/row_2: ['col_0.png', 'col_1.png'] +2025-03-03 16:39:36,356 [WARNING] __main__ - Cell image not found: /tmp/tmpvpurws23.jpg_rows/row_0/col_0.jpg +2025-03-03 16:39:36,357 [WARNING] __main__ - Cell image not found: /tmp/tmpvpurws23.jpg_rows/row_0/col_1.jpg +2025-03-03 16:39:36,357 [WARNING] __main__ - Cell image not found: /tmp/tmpvpurws23.jpg_rows/row_1/col_0.jpg +2025-03-03 16:39:36,357 [WARNING] __main__ - Cell image not found: /tmp/tmpvpurws23.jpg_rows/row_2/col_0.jpg +2025-03-03 16:39:36,357 [WARNING] __main__ - Cell image not found: /tmp/tmpvpurws23.jpg_rows/row_2/col_1.jpg +2025-03-03 16:39:36,358 [INFO] __main__ - Processing table image: /topic-extraction/img_8.jpg, columns=three +2025-03-03 16:39:39,285 [INFO] __main__ - Extracted 6 rows from /tmp/tmp0m93om1m.jpg +2025-03-03 16:39:39,285 [INFO] __main__ - Row 0 has 3 cells +2025-03-03 16:39:39,286 [INFO] __main__ - Row 1 has 3 cells +2025-03-03 16:39:39,286 [INFO] __main__ - Row 2 has 2 cells +2025-03-03 16:39:39,286 [INFO] __main__ - Row 3 has 2 cells +2025-03-03 16:39:39,286 [INFO] __main__ - Row 4 has 2 cells +2025-03-03 16:39:39,286 [INFO] __main__ - Row 5 has 2 cells +2025-03-03 16:39:39,398 [INFO] __main__ - Files in /tmp/tmp0m93om1m.jpg_rows: +2025-03-03 16:39:39,399 [INFO] __main__ - /tmp/tmp0m93om1m.jpg_rows: [] +2025-03-03 16:39:39,399 [INFO] __main__ - /tmp/tmp0m93om1m.jpg_rows/row_0: ['col_0.png', 'col_1.png', 'col_2.png'] +2025-03-03 16:39:39,399 [INFO] __main__ - /tmp/tmp0m93om1m.jpg_rows/row_1: ['col_0.png', 'col_1.png', 'col_2.png'] +2025-03-03 16:39:39,400 [INFO] __main__ - /tmp/tmp0m93om1m.jpg_rows/row_2: ['col_0.png', 'col_1.png'] +2025-03-03 16:39:39,400 [INFO] __main__ - /tmp/tmp0m93om1m.jpg_rows/row_3: ['col_0.png', 'col_1.png'] +2025-03-03 16:39:39,400 [INFO] __main__ - /tmp/tmp0m93om1m.jpg_rows/row_4: ['col_0.png', 'col_1.png'] +2025-03-03 16:39:39,400 [INFO] __main__ - /tmp/tmp0m93om1m.jpg_rows/row_5: ['col_0.png', 'col_1.png'] +2025-03-03 16:39:39,401 [WARNING] __main__ - Cell image not found: /tmp/tmp0m93om1m.jpg_rows/row_0/col_0.jpg +2025-03-03 16:39:39,401 [WARNING] __main__ - Cell image not found: /tmp/tmp0m93om1m.jpg_rows/row_0/col_1.jpg +2025-03-03 16:39:39,401 [WARNING] __main__ - Cell image not found: /tmp/tmp0m93om1m.jpg_rows/row_0/col_2.jpg +2025-03-03 16:39:39,402 [WARNING] __main__ - Cell image not found: /tmp/tmp0m93om1m.jpg_rows/row_1/col_0.jpg +2025-03-03 16:39:39,402 [WARNING] __main__ - Cell image not found: /tmp/tmp0m93om1m.jpg_rows/row_1/col_1.jpg +2025-03-03 16:39:39,402 [WARNING] __main__ - Cell image not found: /tmp/tmp0m93om1m.jpg_rows/row_1/col_2.jpg +2025-03-03 16:39:39,402 [WARNING] __main__ - Cell image not found: /tmp/tmp0m93om1m.jpg_rows/row_2/col_0.jpg +2025-03-03 16:39:39,403 [WARNING] __main__ - Cell image not found: /tmp/tmp0m93om1m.jpg_rows/row_2/col_1.jpg +2025-03-03 16:39:39,403 [WARNING] __main__ - Cell image not found: /tmp/tmp0m93om1m.jpg_rows/row_3/col_0.jpg +2025-03-03 16:39:39,403 [WARNING] __main__ - Cell image not found: /tmp/tmp0m93om1m.jpg_rows/row_3/col_1.jpg +2025-03-03 16:39:39,403 [WARNING] __main__ - Cell image not found: /tmp/tmp0m93om1m.jpg_rows/row_4/col_0.jpg +2025-03-03 16:39:39,404 [WARNING] __main__ - Cell image not found: /tmp/tmp0m93om1m.jpg_rows/row_4/col_1.jpg +2025-03-03 16:39:39,404 [WARNING] __main__ - Cell image not found: /tmp/tmp0m93om1m.jpg_rows/row_5/col_0.jpg +2025-03-03 16:39:39,404 [WARNING] __main__ - Cell image not found: /tmp/tmp0m93om1m.jpg_rows/row_5/col_1.jpg +2025-03-03 16:39:39,405 [INFO] __main__ - Processing table image: /topic-extraction/img_9.jpg, columns=three +2025-03-03 16:39:42,590 [INFO] __main__ - Extracted 4 rows from /tmp/tmpt381bxab.jpg +2025-03-03 16:39:42,590 [INFO] __main__ - Row 0 has 3 cells +2025-03-03 16:39:42,590 [INFO] __main__ - Row 1 has 2 cells +2025-03-03 16:39:42,591 [INFO] __main__ - Row 2 has 2 cells +2025-03-03 16:39:42,591 [INFO] __main__ - Row 3 has 2 cells +2025-03-03 16:39:42,704 [INFO] __main__ - Files in /tmp/tmpt381bxab.jpg_rows: +2025-03-03 16:39:42,705 [INFO] __main__ - /tmp/tmpt381bxab.jpg_rows: [] +2025-03-03 16:39:42,705 [INFO] __main__ - /tmp/tmpt381bxab.jpg_rows/row_0: ['col_0.png', 'col_1.png', 'col_2.png'] +2025-03-03 16:39:42,705 [INFO] __main__ - /tmp/tmpt381bxab.jpg_rows/row_1: ['col_0.png', 'col_1.png'] +2025-03-03 16:39:42,705 [INFO] __main__ - /tmp/tmpt381bxab.jpg_rows/row_2: ['col_0.png', 'col_1.png'] +2025-03-03 16:39:42,706 [INFO] __main__ - /tmp/tmpt381bxab.jpg_rows/row_3: ['col_0.png', 'col_1.png'] +2025-03-03 16:39:42,706 [WARNING] __main__ - Cell image not found: /tmp/tmpt381bxab.jpg_rows/row_0/col_0.jpg +2025-03-03 16:39:42,706 [WARNING] __main__ - Cell image not found: /tmp/tmpt381bxab.jpg_rows/row_0/col_1.jpg +2025-03-03 16:39:42,706 [WARNING] __main__ - Cell image not found: /tmp/tmpt381bxab.jpg_rows/row_0/col_2.jpg +2025-03-03 16:39:42,707 [WARNING] __main__ - Cell image not found: /tmp/tmpt381bxab.jpg_rows/row_1/col_0.jpg +2025-03-03 16:39:42,707 [WARNING] __main__ - Cell image not found: /tmp/tmpt381bxab.jpg_rows/row_1/col_1.jpg +2025-03-03 16:39:42,707 [WARNING] __main__ - Cell image not found: /tmp/tmpt381bxab.jpg_rows/row_2/col_0.jpg +2025-03-03 16:39:42,707 [WARNING] __main__ - Cell image not found: /tmp/tmpt381bxab.jpg_rows/row_2/col_1.jpg +2025-03-03 16:39:42,708 [WARNING] __main__ - Cell image not found: /tmp/tmpt381bxab.jpg_rows/row_3/col_0.jpg +2025-03-03 16:39:42,708 [WARNING] __main__ - Cell image not found: /tmp/tmpt381bxab.jpg_rows/row_3/col_1.jpg +2025-03-03 16:39:42,708 [INFO] __main__ - Processing table image: /topic-extraction/img_10.jpg, columns=three +2025-03-03 16:39:45,399 [INFO] __main__ - Extracted 4 rows from /tmp/tmpti8mweyh.jpg +2025-03-03 16:39:45,400 [INFO] __main__ - Row 0 has 2 cells +2025-03-03 16:39:45,400 [INFO] __main__ - Row 1 has 1 cells +2025-03-03 16:39:45,400 [INFO] __main__ - Row 2 has 1 cells +2025-03-03 16:39:45,400 [INFO] __main__ - Row 3 has 1 cells +2025-03-03 16:39:45,500 [INFO] __main__ - Files in /tmp/tmpti8mweyh.jpg_rows: +2025-03-03 16:39:45,501 [INFO] __main__ - /tmp/tmpti8mweyh.jpg_rows: [] +2025-03-03 16:39:45,501 [INFO] __main__ - /tmp/tmpti8mweyh.jpg_rows/row_0: ['col_0.png', 'col_1.png'] +2025-03-03 16:39:45,501 [INFO] __main__ - /tmp/tmpti8mweyh.jpg_rows/row_1: ['col_0.png'] +2025-03-03 16:39:45,501 [INFO] __main__ - /tmp/tmpti8mweyh.jpg_rows/row_2: ['col_0.png'] +2025-03-03 16:39:45,502 [INFO] __main__ - /tmp/tmpti8mweyh.jpg_rows/row_3: ['col_0.png'] +2025-03-03 16:39:45,502 [WARNING] __main__ - Cell image not found: /tmp/tmpti8mweyh.jpg_rows/row_0/col_0.jpg +2025-03-03 16:39:45,502 [WARNING] __main__ - Cell image not found: /tmp/tmpti8mweyh.jpg_rows/row_0/col_1.jpg +2025-03-03 16:39:45,502 [WARNING] __main__ - Cell image not found: /tmp/tmpti8mweyh.jpg_rows/row_1/col_0.jpg +2025-03-03 16:39:45,503 [WARNING] __main__ - Cell image not found: /tmp/tmpti8mweyh.jpg_rows/row_2/col_0.jpg +2025-03-03 16:39:45,503 [WARNING] __main__ - Cell image not found: /tmp/tmpti8mweyh.jpg_rows/row_3/col_0.jpg +2025-03-03 16:39:45,503 [INFO] __main__ - Processing table image: /topic-extraction/img_11.jpg, columns=two +2025-03-03 16:39:48,245 [INFO] __main__ - Extracted 6 rows from /tmp/tmp_8j1l0bq.jpg +2025-03-03 16:39:48,245 [INFO] __main__ - Row 0 has 1 cells +2025-03-03 16:39:48,245 [INFO] __main__ - Row 1 has 1 cells +2025-03-03 16:39:48,246 [INFO] __main__ - Row 2 has 1 cells +2025-03-03 16:39:48,246 [INFO] __main__ - Row 3 has 1 cells +2025-03-03 16:39:48,246 [INFO] __main__ - Row 4 has 1 cells +2025-03-03 16:39:48,246 [INFO] __main__ - Row 5 has 1 cells +2025-03-03 16:39:48,360 [INFO] __main__ - Files in /tmp/tmp_8j1l0bq.jpg_rows: +2025-03-03 16:39:48,361 [INFO] __main__ - /tmp/tmp_8j1l0bq.jpg_rows: [] +2025-03-03 16:39:48,361 [INFO] __main__ - /tmp/tmp_8j1l0bq.jpg_rows/row_0: ['col_0.png'] +2025-03-03 16:39:48,361 [INFO] __main__ - /tmp/tmp_8j1l0bq.jpg_rows/row_1: ['col_0.png'] +2025-03-03 16:39:48,361 [INFO] __main__ - /tmp/tmp_8j1l0bq.jpg_rows/row_2: ['col_0.png'] +2025-03-03 16:39:48,362 [INFO] __main__ - /tmp/tmp_8j1l0bq.jpg_rows/row_3: ['col_0.png'] +2025-03-03 16:39:48,362 [INFO] __main__ - /tmp/tmp_8j1l0bq.jpg_rows/row_4: ['col_0.png'] +2025-03-03 16:39:48,362 [INFO] __main__ - /tmp/tmp_8j1l0bq.jpg_rows/row_5: ['col_0.png'] +2025-03-03 16:39:48,362 [WARNING] __main__ - Cell image not found: /tmp/tmp_8j1l0bq.jpg_rows/row_0/col_0.jpg +2025-03-03 16:39:48,363 [WARNING] __main__ - Cell image not found: /tmp/tmp_8j1l0bq.jpg_rows/row_1/col_0.jpg +2025-03-03 16:39:48,363 [WARNING] __main__ - Cell image not found: /tmp/tmp_8j1l0bq.jpg_rows/row_2/col_0.jpg +2025-03-03 16:39:48,363 [WARNING] __main__ - Cell image not found: /tmp/tmp_8j1l0bq.jpg_rows/row_3/col_0.jpg +2025-03-03 16:39:48,363 [WARNING] __main__ - Cell image not found: /tmp/tmp_8j1l0bq.jpg_rows/row_4/col_0.jpg +2025-03-03 16:39:48,363 [WARNING] __main__ - Cell image not found: /tmp/tmp_8j1l0bq.jpg_rows/row_5/col_0.jpg +2025-03-03 16:39:48,364 [INFO] __main__ - Processing table image: /topic-extraction/img_12.jpg, columns=three +2025-03-03 16:39:50,951 [INFO] __main__ - Extracted 3 rows from /tmp/tmp0x9tljbb.jpg +2025-03-03 16:39:50,951 [INFO] __main__ - Row 0 has 2 cells +2025-03-03 16:39:50,951 [INFO] __main__ - Row 1 has 2 cells +2025-03-03 16:39:50,952 [INFO] __main__ - Row 2 has 2 cells +2025-03-03 16:39:51,123 [INFO] __main__ - Files in /tmp/tmp0x9tljbb.jpg_rows: +2025-03-03 16:39:51,123 [INFO] __main__ - /tmp/tmp0x9tljbb.jpg_rows: [] +2025-03-03 16:39:51,124 [INFO] __main__ - /tmp/tmp0x9tljbb.jpg_rows/row_0: ['col_0.png', 'col_1.png'] +2025-03-03 16:39:51,124 [INFO] __main__ - /tmp/tmp0x9tljbb.jpg_rows/row_1: ['col_0.png', 'col_1.png'] +2025-03-03 16:39:51,124 [INFO] __main__ - /tmp/tmp0x9tljbb.jpg_rows/row_2: ['col_0.png', 'col_1.png'] +2025-03-03 16:39:51,125 [WARNING] __main__ - Cell image not found: /tmp/tmp0x9tljbb.jpg_rows/row_0/col_0.jpg +2025-03-03 16:39:51,125 [WARNING] __main__ - Cell image not found: /tmp/tmp0x9tljbb.jpg_rows/row_0/col_1.jpg +2025-03-03 16:39:51,125 [WARNING] __main__ - Cell image not found: /tmp/tmp0x9tljbb.jpg_rows/row_1/col_0.jpg +2025-03-03 16:39:51,126 [WARNING] __main__ - Cell image not found: /tmp/tmp0x9tljbb.jpg_rows/row_1/col_1.jpg +2025-03-03 16:39:51,126 [WARNING] __main__ - Cell image not found: /tmp/tmp0x9tljbb.jpg_rows/row_2/col_0.jpg +2025-03-03 16:39:51,126 [WARNING] __main__ - Cell image not found: /tmp/tmp0x9tljbb.jpg_rows/row_2/col_1.jpg +2025-03-03 16:39:51,127 [INFO] __main__ - Processing table image: /topic-extraction/img_13.jpg, columns=three +2025-03-03 16:39:53,832 [INFO] __main__ - Extracted 4 rows from /tmp/tmp0xq8v429.jpg +2025-03-03 16:39:53,832 [INFO] __main__ - Row 0 has 2 cells +2025-03-03 16:39:53,832 [INFO] __main__ - Row 1 has 2 cells +2025-03-03 16:39:53,833 [INFO] __main__ - Row 2 has 1 cells +2025-03-03 16:39:53,833 [INFO] __main__ - Row 3 has 1 cells +2025-03-03 16:39:53,932 [INFO] __main__ - Files in /tmp/tmp0xq8v429.jpg_rows: +2025-03-03 16:39:53,933 [INFO] __main__ - /tmp/tmp0xq8v429.jpg_rows: [] +2025-03-03 16:39:53,933 [INFO] __main__ - /tmp/tmp0xq8v429.jpg_rows/row_0: ['col_0.png', 'col_1.png'] +2025-03-03 16:39:53,933 [INFO] __main__ - /tmp/tmp0xq8v429.jpg_rows/row_1: ['col_0.png', 'col_1.png'] +2025-03-03 16:39:53,933 [INFO] __main__ - /tmp/tmp0xq8v429.jpg_rows/row_2: ['col_0.png'] +2025-03-03 16:39:53,934 [INFO] __main__ - /tmp/tmp0xq8v429.jpg_rows/row_3: ['col_0.png'] +2025-03-03 16:39:53,934 [WARNING] __main__ - Cell image not found: /tmp/tmp0xq8v429.jpg_rows/row_0/col_0.jpg +2025-03-03 16:39:53,934 [WARNING] __main__ - Cell image not found: /tmp/tmp0xq8v429.jpg_rows/row_0/col_1.jpg +2025-03-03 16:39:53,934 [WARNING] __main__ - Cell image not found: /tmp/tmp0xq8v429.jpg_rows/row_1/col_0.jpg +2025-03-03 16:39:53,934 [WARNING] __main__ - Cell image not found: /tmp/tmp0xq8v429.jpg_rows/row_1/col_1.jpg +2025-03-03 16:39:53,934 [WARNING] __main__ - Cell image not found: /tmp/tmp0xq8v429.jpg_rows/row_2/col_0.jpg +2025-03-03 16:39:53,935 [WARNING] __main__ - Cell image not found: /tmp/tmp0xq8v429.jpg_rows/row_3/col_0.jpg +2025-03-03 16:39:53,935 [INFO] __main__ - Processing table image: /topic-extraction/img_14.jpg, columns=three +2025-03-03 16:39:57,417 [INFO] __main__ - Extracted 6 rows from /tmp/tmp32obdcoa.jpg +2025-03-03 16:39:57,417 [INFO] __main__ - Row 0 has 2 cells +2025-03-03 16:39:57,417 [INFO] __main__ - Row 1 has 2 cells +2025-03-03 16:39:57,417 [INFO] __main__ - Row 2 has 1 cells +2025-03-03 16:39:57,417 [INFO] __main__ - Row 3 has 1 cells +2025-03-03 16:39:57,417 [INFO] __main__ - Row 4 has 2 cells +2025-03-03 16:39:57,418 [INFO] __main__ - Row 5 has 1 cells +2025-03-03 16:39:57,561 [INFO] __main__ - Files in /tmp/tmp32obdcoa.jpg_rows: +2025-03-03 16:39:57,561 [INFO] __main__ - /tmp/tmp32obdcoa.jpg_rows: [] +2025-03-03 16:39:57,561 [INFO] __main__ - /tmp/tmp32obdcoa.jpg_rows/row_0: ['col_0.png', 'col_1.png'] +2025-03-03 16:39:57,561 [INFO] __main__ - /tmp/tmp32obdcoa.jpg_rows/row_1: ['col_0.png', 'col_1.png'] +2025-03-03 16:39:57,562 [INFO] __main__ - /tmp/tmp32obdcoa.jpg_rows/row_2: ['col_0.png'] +2025-03-03 16:39:57,562 [INFO] __main__ - /tmp/tmp32obdcoa.jpg_rows/row_3: ['col_0.png'] +2025-03-03 16:39:57,562 [INFO] __main__ - /tmp/tmp32obdcoa.jpg_rows/row_4: ['col_0.png', 'col_1.png'] +2025-03-03 16:39:57,562 [INFO] __main__ - /tmp/tmp32obdcoa.jpg_rows/row_5: ['col_0.png'] +2025-03-03 16:39:57,562 [WARNING] __main__ - Cell image not found: /tmp/tmp32obdcoa.jpg_rows/row_0/col_0.jpg +2025-03-03 16:39:57,562 [WARNING] __main__ - Cell image not found: /tmp/tmp32obdcoa.jpg_rows/row_0/col_1.jpg +2025-03-03 16:39:57,562 [WARNING] __main__ - Cell image not found: /tmp/tmp32obdcoa.jpg_rows/row_1/col_0.jpg +2025-03-03 16:39:57,563 [WARNING] __main__ - Cell image not found: /tmp/tmp32obdcoa.jpg_rows/row_1/col_1.jpg +2025-03-03 16:39:57,563 [WARNING] __main__ - Cell image not found: /tmp/tmp32obdcoa.jpg_rows/row_2/col_0.jpg +2025-03-03 16:39:57,563 [WARNING] __main__ - Cell image not found: /tmp/tmp32obdcoa.jpg_rows/row_3/col_0.jpg +2025-03-03 16:39:57,563 [WARNING] __main__ - Cell image not found: /tmp/tmp32obdcoa.jpg_rows/row_4/col_0.jpg +2025-03-03 16:39:57,563 [WARNING] __main__ - Cell image not found: /tmp/tmp32obdcoa.jpg_rows/row_4/col_1.jpg +2025-03-03 16:39:57,563 [WARNING] __main__ - Cell image not found: /tmp/tmp32obdcoa.jpg_rows/row_5/col_0.jpg +2025-03-03 16:39:57,564 [INFO] __main__ - Processing table image: /topic-extraction/img_15.jpg, columns=three +2025-03-03 16:40:00,740 [INFO] __main__ - Extracted 5 rows from /tmp/tmpteq6tjo9.jpg +2025-03-03 16:40:00,741 [INFO] __main__ - Row 0 has 2 cells +2025-03-03 16:40:00,741 [INFO] __main__ - Row 1 has 2 cells +2025-03-03 16:40:00,741 [INFO] __main__ - Row 2 has 1 cells +2025-03-03 16:40:00,741 [INFO] __main__ - Row 3 has 1 cells +2025-03-03 16:40:00,741 [INFO] __main__ - Row 4 has 1 cells +2025-03-03 16:40:00,855 [INFO] __main__ - Files in /tmp/tmpteq6tjo9.jpg_rows: +2025-03-03 16:40:00,855 [INFO] __main__ - /tmp/tmpteq6tjo9.jpg_rows: [] +2025-03-03 16:40:00,855 [INFO] __main__ - /tmp/tmpteq6tjo9.jpg_rows/row_0: ['col_0.png', 'col_1.png'] +2025-03-03 16:40:00,855 [INFO] __main__ - /tmp/tmpteq6tjo9.jpg_rows/row_1: ['col_0.png', 'col_1.png'] +2025-03-03 16:40:00,856 [INFO] __main__ - /tmp/tmpteq6tjo9.jpg_rows/row_2: ['col_0.png'] +2025-03-03 16:40:00,856 [INFO] __main__ - /tmp/tmpteq6tjo9.jpg_rows/row_3: ['col_0.png'] +2025-03-03 16:40:00,856 [INFO] __main__ - /tmp/tmpteq6tjo9.jpg_rows/row_4: ['col_0.png'] +2025-03-03 16:40:00,856 [WARNING] __main__ - Cell image not found: /tmp/tmpteq6tjo9.jpg_rows/row_0/col_0.jpg +2025-03-03 16:40:00,856 [WARNING] __main__ - Cell image not found: /tmp/tmpteq6tjo9.jpg_rows/row_0/col_1.jpg +2025-03-03 16:40:00,856 [WARNING] __main__ - Cell image not found: /tmp/tmpteq6tjo9.jpg_rows/row_1/col_0.jpg +2025-03-03 16:40:00,856 [WARNING] __main__ - Cell image not found: /tmp/tmpteq6tjo9.jpg_rows/row_1/col_1.jpg +2025-03-03 16:40:00,857 [WARNING] __main__ - Cell image not found: /tmp/tmpteq6tjo9.jpg_rows/row_2/col_0.jpg +2025-03-03 16:40:00,857 [WARNING] __main__ - Cell image not found: /tmp/tmpteq6tjo9.jpg_rows/row_3/col_0.jpg +2025-03-03 16:40:00,857 [WARNING] __main__ - Cell image not found: /tmp/tmpteq6tjo9.jpg_rows/row_4/col_0.jpg +2025-03-03 16:40:00,857 [INFO] __main__ - Processing table image: /topic-extraction/img_16.jpg, columns=three +2025-03-03 16:40:03,992 [INFO] __main__ - Extracted 6 rows from /tmp/tmpcjn56rie.jpg +2025-03-03 16:40:03,993 [INFO] __main__ - Row 0 has 2 cells +2025-03-03 16:40:03,993 [INFO] __main__ - Row 1 has 2 cells +2025-03-03 16:40:03,993 [INFO] __main__ - Row 2 has 1 cells +2025-03-03 16:40:03,993 [INFO] __main__ - Row 3 has 2 cells +2025-03-03 16:40:03,994 [INFO] __main__ - Row 4 has 1 cells +2025-03-03 16:40:03,994 [INFO] __main__ - Row 5 has 1 cells +2025-03-03 16:40:04,117 [INFO] __main__ - Files in /tmp/tmpcjn56rie.jpg_rows: +2025-03-03 16:40:04,117 [INFO] __main__ - /tmp/tmpcjn56rie.jpg_rows: [] +2025-03-03 16:40:04,117 [INFO] __main__ - /tmp/tmpcjn56rie.jpg_rows/row_0: ['col_0.png', 'col_1.png'] +2025-03-03 16:40:04,118 [INFO] __main__ - /tmp/tmpcjn56rie.jpg_rows/row_1: ['col_0.png', 'col_1.png'] +2025-03-03 16:40:04,118 [INFO] __main__ - /tmp/tmpcjn56rie.jpg_rows/row_2: ['col_0.png'] +2025-03-03 16:40:04,118 [INFO] __main__ - /tmp/tmpcjn56rie.jpg_rows/row_3: ['col_0.png', 'col_1.png'] +2025-03-03 16:40:04,118 [INFO] __main__ - /tmp/tmpcjn56rie.jpg_rows/row_4: ['col_0.png'] +2025-03-03 16:40:04,119 [INFO] __main__ - /tmp/tmpcjn56rie.jpg_rows/row_5: ['col_0.png'] +2025-03-03 16:40:04,119 [WARNING] __main__ - Cell image not found: /tmp/tmpcjn56rie.jpg_rows/row_0/col_0.jpg +2025-03-03 16:40:04,119 [WARNING] __main__ - Cell image not found: /tmp/tmpcjn56rie.jpg_rows/row_0/col_1.jpg +2025-03-03 16:40:04,119 [WARNING] __main__ - Cell image not found: /tmp/tmpcjn56rie.jpg_rows/row_1/col_0.jpg +2025-03-03 16:40:04,119 [WARNING] __main__ - Cell image not found: /tmp/tmpcjn56rie.jpg_rows/row_1/col_1.jpg +2025-03-03 16:40:04,120 [WARNING] __main__ - Cell image not found: /tmp/tmpcjn56rie.jpg_rows/row_2/col_0.jpg +2025-03-03 16:40:04,120 [WARNING] __main__ - Cell image not found: /tmp/tmpcjn56rie.jpg_rows/row_3/col_0.jpg +2025-03-03 16:40:04,120 [WARNING] __main__ - Cell image not found: /tmp/tmpcjn56rie.jpg_rows/row_3/col_1.jpg +2025-03-03 16:40:04,120 [WARNING] __main__ - Cell image not found: /tmp/tmpcjn56rie.jpg_rows/row_4/col_0.jpg +2025-03-03 16:40:04,120 [WARNING] __main__ - Cell image not found: /tmp/tmpcjn56rie.jpg_rows/row_5/col_0.jpg +2025-03-03 16:40:04,121 [INFO] __main__ - Processing table image: /topic-extraction/img_17.jpg, columns=three +2025-03-03 16:40:07,145 [INFO] __main__ - Extracted 6 rows from /tmp/tmp_0pwqboj.jpg +2025-03-03 16:40:07,146 [INFO] __main__ - Row 0 has 2 cells +2025-03-03 16:40:07,146 [INFO] __main__ - Row 1 has 1 cells +2025-03-03 16:40:07,146 [INFO] __main__ - Row 2 has 2 cells +2025-03-03 16:40:07,147 [INFO] __main__ - Row 3 has 1 cells +2025-03-03 16:40:07,147 [INFO] __main__ - Row 4 has 1 cells +2025-03-03 16:40:07,147 [INFO] __main__ - Row 5 has 1 cells +2025-03-03 16:40:07,269 [INFO] __main__ - Files in /tmp/tmp_0pwqboj.jpg_rows: +2025-03-03 16:40:07,270 [INFO] __main__ - /tmp/tmp_0pwqboj.jpg_rows: [] +2025-03-03 16:40:07,270 [INFO] __main__ - /tmp/tmp_0pwqboj.jpg_rows/row_0: ['col_0.png', 'col_1.png'] +2025-03-03 16:40:07,270 [INFO] __main__ - /tmp/tmp_0pwqboj.jpg_rows/row_1: ['col_0.png'] +2025-03-03 16:40:07,270 [INFO] __main__ - /tmp/tmp_0pwqboj.jpg_rows/row_2: ['col_0.png', 'col_1.png'] +2025-03-03 16:40:07,271 [INFO] __main__ - /tmp/tmp_0pwqboj.jpg_rows/row_3: ['col_0.png'] +2025-03-03 16:40:07,271 [INFO] __main__ - /tmp/tmp_0pwqboj.jpg_rows/row_4: ['col_0.png'] +2025-03-03 16:40:07,271 [INFO] __main__ - /tmp/tmp_0pwqboj.jpg_rows/row_5: ['col_0.png'] +2025-03-03 16:40:07,271 [WARNING] __main__ - Cell image not found: /tmp/tmp_0pwqboj.jpg_rows/row_0/col_0.jpg +2025-03-03 16:40:07,272 [WARNING] __main__ - Cell image not found: /tmp/tmp_0pwqboj.jpg_rows/row_0/col_1.jpg +2025-03-03 16:40:07,272 [WARNING] __main__ - Cell image not found: /tmp/tmp_0pwqboj.jpg_rows/row_1/col_0.jpg +2025-03-03 16:40:07,272 [WARNING] __main__ - Cell image not found: /tmp/tmp_0pwqboj.jpg_rows/row_2/col_0.jpg +2025-03-03 16:40:07,272 [WARNING] __main__ - Cell image not found: /tmp/tmp_0pwqboj.jpg_rows/row_2/col_1.jpg +2025-03-03 16:40:07,272 [WARNING] __main__ - Cell image not found: /tmp/tmp_0pwqboj.jpg_rows/row_3/col_0.jpg +2025-03-03 16:40:07,273 [WARNING] __main__ - Cell image not found: /tmp/tmp_0pwqboj.jpg_rows/row_4/col_0.jpg +2025-03-03 16:40:07,273 [WARNING] __main__ - Cell image not found: /tmp/tmp_0pwqboj.jpg_rows/row_5/col_0.jpg +2025-03-03 16:40:07,273 [INFO] __main__ - Processing table image: /topic-extraction/img_18.jpg, columns=three +2025-03-03 16:40:08,066 [INFO] __main__ - Extracted 2 rows from /tmp/tmpk5z84_tz.jpg +2025-03-03 16:40:08,066 [INFO] __main__ - Row 0 has 2 cells +2025-03-03 16:40:08,066 [INFO] __main__ - Row 1 has 2 cells +2025-03-03 16:40:08,100 [INFO] __main__ - Files in /tmp/tmpk5z84_tz.jpg_rows: +2025-03-03 16:40:08,100 [INFO] __main__ - /tmp/tmpk5z84_tz.jpg_rows: [] +2025-03-03 16:40:08,100 [INFO] __main__ - /tmp/tmpk5z84_tz.jpg_rows/row_0: ['col_0.png', 'col_1.png'] +2025-03-03 16:40:08,100 [INFO] __main__ - /tmp/tmpk5z84_tz.jpg_rows/row_1: ['col_0.png', 'col_1.png'] +2025-03-03 16:40:08,100 [WARNING] __main__ - Cell image not found: /tmp/tmpk5z84_tz.jpg_rows/row_0/col_0.jpg +2025-03-03 16:40:08,100 [WARNING] __main__ - Cell image not found: /tmp/tmpk5z84_tz.jpg_rows/row_0/col_1.jpg +2025-03-03 16:40:08,101 [WARNING] __main__ - Cell image not found: /tmp/tmpk5z84_tz.jpg_rows/row_1/col_0.jpg +2025-03-03 16:40:08,101 [WARNING] __main__ - Cell image not found: /tmp/tmpk5z84_tz.jpg_rows/row_1/col_1.jpg +2025-03-03 16:40:08,101 [INFO] __main__ - Processing table image: /topic-extraction/img_19.jpg, columns=three +2025-03-03 16:40:10,563 [INFO] __main__ - GPU memory cleaned up. +2025-03-03 16:40:56,164 [INFO] __main__ - Processing PDF: /home/user/app/input_output/a-level-pearson-mathematics-specification.pdf +2025-03-03 16:40:56,929 [INFO] __main__ - Gemini returned subtopics: {'Paper 1 and Paper 2: Pure Mathematics': [11, 29], 'Paper 3: Statistics and Mechanics': [30, 40]} +2025-03-03 16:40:56,930 [INFO] __main__ - Loaded 1135473 bytes from local file '/home/user/app/input_output/a-level-pearson-mathematics-specification.pdf' +2025-03-03 16:40:57,214 [INFO] __main__ - Computed global offset: 4 +2025-03-03 16:40:57,215 [INFO] __main__ - Processing pages (0-based): [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43] +2025-03-03 16:41:50,960 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_1.jpg +2025-03-03 16:41:52,844 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_2.jpg +2025-03-03 16:41:53,383 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_3.jpg +2025-03-03 16:41:53,923 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_4.jpg +2025-03-03 16:41:54,525 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_5.jpg +2025-03-03 16:41:55,160 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_6.jpg +2025-03-03 16:41:55,630 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_7.jpg +2025-03-03 16:41:56,152 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_8.jpg +2025-03-03 16:41:56,681 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_9.jpg +2025-03-03 16:41:57,213 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_10.jpg +2025-03-03 16:41:57,792 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_11.jpg +2025-03-03 16:41:58,296 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_12.jpg +2025-03-03 16:41:58,702 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_13.jpg +2025-03-03 16:41:59,409 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_14.jpg +2025-03-03 16:41:59,869 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_15.jpg +2025-03-03 16:42:00,474 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_16.jpg +2025-03-03 16:42:01,030 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_17.jpg +2025-03-03 16:42:01,336 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_18.jpg +2025-03-03 16:42:01,851 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_19.jpg +2025-03-03 16:42:02,414 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_20.jpg +2025-03-03 16:42:02,940 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_21.jpg +2025-03-03 16:42:03,487 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_22.jpg +2025-03-03 16:42:03,925 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_23.jpg +2025-03-03 16:42:04,459 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_24.jpg +2025-03-03 16:42:04,999 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_25.jpg +2025-03-03 16:42:05,488 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_26.jpg +2025-03-03 16:42:05,884 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_27.jpg +2025-03-03 16:42:06,475 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_28.jpg +2025-03-03 16:42:06,866 [INFO] __main__ - Classifying images to detect tables. +2025-03-03 16:42:10,746 [INFO] __main__ - Processing table image: /topic-extraction/img_1.jpg, columns=three +2025-03-03 16:42:13,127 [INFO] __main__ - Extracted 2 rows from /tmp/tmp5g6lny8d.jpg +2025-03-03 16:42:13,127 [INFO] __main__ - Row 0 has 2 cells +2025-03-03 16:42:13,128 [INFO] __main__ - Row 1 has 2 cells +2025-03-03 16:42:13,228 [INFO] __main__ - Files in /tmp/tmp5g6lny8d.jpg_rows: +2025-03-03 16:42:13,229 [INFO] __main__ - /tmp/tmp5g6lny8d.jpg_rows: [] +2025-03-03 16:42:13,229 [INFO] __main__ - /tmp/tmp5g6lny8d.jpg_rows/row_0: ['col_0.png', 'col_1.png'] +2025-03-03 16:42:13,229 [INFO] __main__ - /tmp/tmp5g6lny8d.jpg_rows/row_1: ['col_0.png', 'col_1.png'] +2025-03-03 16:42:13,636 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_1.jpg_r0_c0.jpg +2025-03-03 16:42:14,677 [INFO] __main__ - Gemini subtopic extraction raw response: ```json +{ + "title": "Topics", + "subtopics": [] +} +``` +2025-03-03 16:42:14,678 [ERROR] __main__ - Gemini subtopic identification error on attempt 0: Expecting value: line 1 column 1 (char 0) +2025-03-03 16:42:15,889 [INFO] __main__ - Gemini subtopic extraction raw response: ```json +{ + "title": "Topics", + "subtopics": [] +} +``` +2025-03-03 16:42:15,890 [ERROR] __main__ - Gemini subtopic identification error on attempt 1: Expecting value: line 1 column 1 (char 0) +2025-03-03 16:42:15,890 [INFO] __main__ - Gemini subtopic extraction result for cell /tmp/tmp5g6lny8d.jpg_rows/row_0/col_0.png: {'title': '', 'subtopics': []} +2025-03-03 16:42:16,159 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_1.jpg_r0_c1.jpg +2025-03-03 16:42:17,348 [INFO] __main__ - Gemini subtopic extraction raw response: ```json +{ + "title": "", + "subtopics": [] +} +``` +2025-03-03 16:42:17,349 [ERROR] __main__ - Gemini subtopic identification error on attempt 0: Expecting value: line 1 column 1 (char 0) +2025-03-03 16:42:18,700 [INFO] __main__ - Gemini subtopic extraction raw response: ```json +{ + "title": "", + "subtopics": [] +} +``` +2025-03-03 16:42:18,704 [ERROR] __main__ - Gemini subtopic identification error on attempt 1: Expecting value: line 1 column 1 (char 0) +2025-03-03 16:42:18,704 [INFO] __main__ - Gemini subtopic extraction result for cell /tmp/tmp5g6lny8d.jpg_rows/row_0/col_1.png: {'title': '', 'subtopics': []} +2025-03-03 16:42:18,961 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_1.jpg_r1_c0.jpg +2025-03-03 16:42:20,132 [INFO] __main__ - Gemini subtopic extraction raw response: ```json +{ + "title": "1 Proof", + "subtopics": [] +} +``` +2025-03-03 16:42:20,132 [ERROR] __main__ - Gemini subtopic identification error on attempt 0: Expecting value: line 1 column 1 (char 0) +2025-03-03 16:42:21,478 [INFO] __main__ - Gemini subtopic extraction raw response: ```json +{ + "title": "1 Proof", + "subtopics": [] +} +``` +2025-03-03 16:42:21,478 [ERROR] __main__ - Gemini subtopic identification error on attempt 1: Expecting value: line 1 column 1 (char 0) +2025-03-03 16:42:21,478 [INFO] __main__ - Gemini subtopic extraction result for cell /tmp/tmp5g6lny8d.jpg_rows/row_1/col_0.png: {'title': '', 'subtopics': []} +2025-03-03 16:42:22,094 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_1.jpg_r1_c1.jpg +2025-03-03 16:42:23,562 [INFO] __main__ - Gemini subtopic extraction raw response: ```json +{ + "title": "", + "subtopics": ["1.1"] +} +``` +2025-03-03 16:42:23,563 [ERROR] __main__ - Gemini subtopic identification error on attempt 0: Expecting value: line 1 column 1 (char 0) +2025-03-03 16:42:25,127 [INFO] __main__ - Gemini subtopic extraction raw response: ```json +{ + "title": "", + "subtopics": ["1.1"] +} +``` +2025-03-03 16:42:25,127 [ERROR] __main__ - Gemini subtopic identification error on attempt 1: Expecting value: line 1 column 1 (char 0) +2025-03-03 16:42:25,127 [INFO] __main__ - Gemini subtopic extraction result for cell /tmp/tmp5g6lny8d.jpg_rows/row_1/col_1.png: {'title': '', 'subtopics': []} +2025-03-03 16:42:25,128 [INFO] __main__ - Processing table image: /topic-extraction/img_2.jpg, columns=three +2025-03-03 16:42:27,781 [INFO] __main__ - Extracted 4 rows from /tmp/tmp1l6kamqx.jpg +2025-03-03 16:42:27,781 [INFO] __main__ - Row 0 has 2 cells +2025-03-03 16:42:27,781 [INFO] __main__ - Row 1 has 1 cells +2025-03-03 16:42:27,781 [INFO] __main__ - Row 2 has 1 cells +2025-03-03 16:42:27,781 [INFO] __main__ - Row 3 has 1 cells +2025-03-03 16:42:27,883 [INFO] __main__ - Files in /tmp/tmp1l6kamqx.jpg_rows: +2025-03-03 16:42:27,883 [INFO] __main__ - /tmp/tmp1l6kamqx.jpg_rows: [] +2025-03-03 16:42:27,883 [INFO] __main__ - /tmp/tmp1l6kamqx.jpg_rows/row_0: ['col_0.png', 'col_1.png'] +2025-03-03 16:42:27,883 [INFO] __main__ - /tmp/tmp1l6kamqx.jpg_rows/row_1: ['col_0.png'] +2025-03-03 16:42:27,884 [INFO] __main__ - /tmp/tmp1l6kamqx.jpg_rows/row_2: ['col_0.png'] +2025-03-03 16:42:27,884 [INFO] __main__ - /tmp/tmp1l6kamqx.jpg_rows/row_3: ['col_0.png'] +2025-03-03 16:42:28,438 [INFO] __main__ - GPU memory cleaned up. diff --git a/topic_extraction.py b/topic_extraction.py index 3e8319b96e126ac4f8a0824d22cfe48d42312b99..8333cf0ce6c7fbbb20f50280e36858f31c87cd81 100644 --- a/topic_extraction.py +++ b/topic_extraction.py @@ -721,7 +721,7 @@ class MineruNoTextProcessor: self.cleanup_gpu() if __name__ == "__main__": - input_pdf = "/home/user/app/input_output/wjec-gce-maths-spec-from-2017-e.pdf" + input_pdf = "/home/user/app/input_output/a-level-pearson-mathematics-specification.pdf" output_dir = "/home/user/app/wje" gemini_key = os.getenv("GEMINI_API_KEY", "AIzaSyDtoakpXa2pjJwcQB6TJ5QaXHNSA5JxcrU") try: diff --git a/topic_extraction_ars.log b/topic_extraction_ars.log new file mode 100644 index 0000000000000000000000000000000000000000..cbe8741d1b4e4910bba830899caa7e4716a0c536 --- /dev/null +++ b/topic_extraction_ars.log @@ -0,0 +1,460 @@ +2025-03-03 15:45:38,171 [INFO] __main__ - Processing PDF: /home/user/app/input_output/a-level-pearson-mathematics-specification.pdf +2025-03-03 15:45:38,974 [INFO] __main__ - Gemini returned subtopics: {'Paper 1 and Paper 2: Pure Mathematics': [11, 29], 'Paper 3: Statistics and Mechanics': [30, 40]} +2025-03-03 15:45:38,975 [INFO] __main__ - Loaded 1135473 bytes from local file '/home/user/app/input_output/a-level-pearson-mathematics-specification.pdf' +2025-03-03 15:45:39,261 [INFO] __main__ - Computed global offset: 4 +2025-03-03 15:45:39,261 [INFO] __main__ - Processing pages (0-based): [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43] +2025-03-03 15:46:34,912 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_1.jpg +2025-03-03 15:46:36,964 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_2.jpg +2025-03-03 15:46:37,539 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_3.jpg +2025-03-03 15:46:38,161 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_4.jpg +2025-03-03 15:46:38,703 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_5.jpg +2025-03-03 15:46:39,330 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_6.jpg +2025-03-03 15:46:39,805 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_7.jpg +2025-03-03 15:46:40,281 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_8.jpg +2025-03-03 15:46:40,751 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_9.jpg +2025-03-03 15:46:41,336 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_10.jpg +2025-03-03 15:46:41,773 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_11.jpg +2025-03-03 15:46:42,431 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_12.jpg +2025-03-03 15:46:42,903 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_13.jpg +2025-03-03 15:46:43,490 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_14.jpg +2025-03-03 15:46:43,962 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_15.jpg +2025-03-03 15:46:44,566 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_16.jpg +2025-03-03 15:46:45,155 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_17.jpg +2025-03-03 15:46:45,448 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_18.jpg +2025-03-03 15:46:45,896 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_19.jpg +2025-03-03 15:46:46,485 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_20.jpg +2025-03-03 15:46:47,081 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_21.jpg +2025-03-03 15:46:47,652 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_22.jpg +2025-03-03 15:46:48,109 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_23.jpg +2025-03-03 15:46:48,593 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_24.jpg +2025-03-03 15:46:49,101 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_25.jpg +2025-03-03 15:46:49,644 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_26.jpg +2025-03-03 15:46:50,274 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_27.jpg +2025-03-03 15:46:50,891 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_28.jpg +2025-03-03 15:46:51,327 [INFO] __main__ - Classifying images to detect tables. +2025-03-03 15:46:55,176 [INFO] __main__ - Processing table image: /topic-extraction/img_1.jpg, columns=three +2025-03-03 15:46:58,654 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r0_c0.jpg +2025-03-03 15:46:58,952 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r0_c1.jpg +2025-03-03 15:46:59,179 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r1_c0.jpg +2025-03-03 15:46:59,433 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r1_c1.jpg +2025-03-03 15:46:59,434 [INFO] __main__ - Processing table image: /topic-extraction/img_2.jpg, columns=three +2025-03-03 15:47:02,885 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r0_c0.jpg +2025-03-03 15:47:03,187 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r0_c1.jpg +2025-03-03 15:47:03,419 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r1_c0.jpg +2025-03-03 15:47:03,657 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r2_c0.jpg +2025-03-03 15:47:03,872 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r3_c0.jpg +2025-03-03 15:47:03,873 [INFO] __main__ - Processing table image: /topic-extraction/img_3.jpg, columns=three +2025-03-03 15:47:07,421 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_3.jpg_r0_c0.jpg +2025-03-03 15:47:07,712 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_3.jpg_r0_c1.jpg +2025-03-03 15:47:07,918 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_3.jpg_r1_c0.jpg +2025-03-03 15:47:07,918 [INFO] __main__ - Processing table image: /topic-extraction/img_4.jpg, columns=three +2025-03-03 15:47:11,395 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r0_c0.jpg +2025-03-03 15:47:11,689 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r0_c1.jpg +2025-03-03 15:47:11,904 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r1_c0.jpg +2025-03-03 15:47:12,137 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r1_c1.jpg +2025-03-03 15:47:12,138 [INFO] __main__ - Processing table image: /topic-extraction/img_5.jpg, columns=three +2025-03-03 15:47:15,853 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r0_c0.jpg +2025-03-03 15:47:16,176 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r0_c1.jpg +2025-03-03 15:47:16,379 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r1_c0.jpg +2025-03-03 15:47:16,611 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r1_c1.jpg +2025-03-03 15:47:16,850 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r2_c0.jpg +2025-03-03 15:47:16,850 [INFO] __main__ - Processing table image: /topic-extraction/img_6.jpg, columns=three +2025-03-03 15:47:20,810 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r0_c0.jpg +2025-03-03 15:47:21,101 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r0_c1.jpg +2025-03-03 15:47:21,322 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r1_c0.jpg +2025-03-03 15:47:21,549 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r1_c1.jpg +2025-03-03 15:47:21,549 [INFO] __main__ - Processing table image: /topic-extraction/img_7.jpg, columns=three +2025-03-03 15:47:25,075 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r0_c0.jpg +2025-03-03 15:47:25,405 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r0_c1.jpg +2025-03-03 15:47:25,599 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r1_c0.jpg +2025-03-03 15:47:25,823 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r2_c0.jpg +2025-03-03 15:47:26,054 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r2_c1.jpg +2025-03-03 15:47:26,054 [INFO] __main__ - Processing table image: /topic-extraction/img_8.jpg, columns=three +2025-03-03 15:47:29,662 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r0_c0.jpg +2025-03-03 15:47:29,944 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r0_c1.jpg +2025-03-03 15:47:30,160 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r0_c2.jpg +2025-03-03 15:47:30,354 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r1_c0.jpg +2025-03-03 15:47:30,586 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r1_c1.jpg +2025-03-03 15:47:30,801 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r1_c2.jpg +2025-03-03 15:47:31,028 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r2_c0.jpg +2025-03-03 15:47:31,232 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r2_c1.jpg +2025-03-03 15:47:31,461 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r3_c0.jpg +2025-03-03 15:47:31,654 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r3_c1.jpg +2025-03-03 15:47:31,912 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r4_c0.jpg +2025-03-03 15:47:32,139 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r4_c1.jpg +2025-03-03 15:47:32,345 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r5_c0.jpg +2025-03-03 15:47:32,586 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r5_c1.jpg +2025-03-03 15:47:32,587 [INFO] __main__ - Processing table image: /topic-extraction/img_9.jpg, columns=three +2025-03-03 15:47:36,350 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r0_c0.jpg +2025-03-03 15:47:36,676 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r0_c1.jpg +2025-03-03 15:47:36,893 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r0_c2.jpg +2025-03-03 15:47:37,141 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r1_c0.jpg +2025-03-03 15:47:37,374 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r1_c1.jpg +2025-03-03 15:47:37,565 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r2_c0.jpg +2025-03-03 15:47:37,760 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r2_c1.jpg +2025-03-03 15:47:38,012 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r3_c0.jpg +2025-03-03 15:47:38,226 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r3_c1.jpg +2025-03-03 15:47:38,226 [INFO] __main__ - Processing table image: /topic-extraction/img_10.jpg, columns=three +2025-03-03 15:47:42,402 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r0_c0.jpg +2025-03-03 15:47:42,675 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r0_c1.jpg +2025-03-03 15:47:42,917 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r1_c0.jpg +2025-03-03 15:47:43,133 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r2_c0.jpg +2025-03-03 15:47:43,355 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r3_c0.jpg +2025-03-03 15:47:43,355 [INFO] __main__ - Processing table image: /topic-extraction/img_11.jpg, columns=two +2025-03-03 15:47:48,037 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r0_c0.jpg +2025-03-03 15:47:48,332 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r1_c0.jpg +2025-03-03 15:47:48,540 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r2_c0.jpg +2025-03-03 15:47:48,786 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r3_c0.jpg +2025-03-03 15:47:49,037 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r4_c0.jpg +2025-03-03 15:47:49,264 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r5_c0.jpg +2025-03-03 15:47:49,264 [INFO] __main__ - Processing table image: /topic-extraction/img_12.jpg, columns=three +2025-03-03 15:47:53,266 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r0_c0.jpg +2025-03-03 15:47:53,598 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r0_c1.jpg +2025-03-03 15:47:53,819 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r1_c0.jpg +2025-03-03 15:47:54,034 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r1_c1.jpg +2025-03-03 15:47:54,250 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r2_c0.jpg +2025-03-03 15:47:54,474 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r2_c1.jpg +2025-03-03 15:47:54,474 [INFO] __main__ - Processing table image: /topic-extraction/img_13.jpg, columns=three +2025-03-03 15:47:57,779 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r0_c0.jpg +2025-03-03 15:47:58,103 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r0_c1.jpg +2025-03-03 15:47:58,326 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r1_c0.jpg +2025-03-03 15:47:58,545 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r1_c1.jpg +2025-03-03 15:47:58,738 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r2_c0.jpg +2025-03-03 15:47:58,994 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r3_c0.jpg +2025-03-03 15:47:58,994 [INFO] __main__ - Processing table image: /topic-extraction/img_14.jpg, columns=three +2025-03-03 15:48:03,866 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r0_c0.jpg +2025-03-03 15:48:04,164 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r0_c1.jpg +2025-03-03 15:48:04,382 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r1_c0.jpg +2025-03-03 15:48:04,605 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r1_c1.jpg +2025-03-03 15:48:04,799 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r2_c0.jpg +2025-03-03 15:48:05,032 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r3_c0.jpg +2025-03-03 15:48:05,247 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r4_c0.jpg +2025-03-03 15:48:05,493 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r4_c1.jpg +2025-03-03 15:48:05,710 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r5_c0.jpg +2025-03-03 15:48:05,711 [INFO] __main__ - Processing table image: /topic-extraction/img_15.jpg, columns=three +2025-03-03 15:48:09,411 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r0_c0.jpg +2025-03-03 15:48:09,698 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r0_c1.jpg +2025-03-03 15:48:09,923 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r1_c0.jpg +2025-03-03 15:48:10,113 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r1_c1.jpg +2025-03-03 15:48:10,361 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r2_c0.jpg +2025-03-03 15:48:10,587 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r3_c0.jpg +2025-03-03 15:48:10,799 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r4_c0.jpg +2025-03-03 15:48:10,800 [INFO] __main__ - Processing table image: /topic-extraction/img_16.jpg, columns=three +2025-03-03 15:48:14,668 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r0_c0.jpg +2025-03-03 15:48:14,969 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r0_c1.jpg +2025-03-03 15:48:15,207 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r1_c0.jpg +2025-03-03 15:48:15,414 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r1_c1.jpg +2025-03-03 15:48:15,634 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r2_c0.jpg +2025-03-03 15:48:15,893 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r3_c0.jpg +2025-03-03 15:48:16,111 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r3_c1.jpg +2025-03-03 15:48:16,343 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r4_c0.jpg +2025-03-03 15:48:17,176 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r5_c0.jpg +2025-03-03 15:48:17,176 [INFO] __main__ - Processing table image: /topic-extraction/img_17.jpg, columns=three +2025-03-03 15:48:20,954 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r0_c0.jpg +2025-03-03 15:48:21,213 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r0_c1.jpg +2025-03-03 15:48:21,423 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r1_c0.jpg +2025-03-03 15:48:21,634 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r2_c0.jpg +2025-03-03 15:48:21,832 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r2_c1.jpg +2025-03-03 15:48:22,056 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r3_c0.jpg +2025-03-03 15:48:22,261 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r4_c0.jpg +2025-03-03 15:48:22,481 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r5_c0.jpg +2025-03-03 15:48:22,482 [INFO] __main__ - Processing table image: /topic-extraction/img_18.jpg, columns=three +2025-03-03 15:48:23,665 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r0_c0.jpg +2025-03-03 15:48:23,852 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r0_c1.jpg +2025-03-03 15:48:24,035 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r1_c0.jpg +2025-03-03 15:48:24,219 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r1_c1.jpg +2025-03-03 15:48:24,219 [INFO] __main__ - Processing table image: /topic-extraction/img_19.jpg, columns=three +2025-03-03 15:48:27,206 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r0_c0.jpg +2025-03-03 15:48:27,482 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r0_c1.jpg +2025-03-03 15:48:27,693 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r1_c0.jpg +2025-03-03 15:48:27,924 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r1_c1.jpg +2025-03-03 15:48:28,131 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r2_c0.jpg +2025-03-03 15:48:28,337 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r2_c1.jpg +2025-03-03 15:48:28,338 [INFO] __main__ - Processing table image: /topic-extraction/img_20.jpg, columns=three +2025-03-03 15:48:32,733 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r0_c0.jpg +2025-03-03 15:48:32,995 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r0_c1.jpg +2025-03-03 15:48:33,221 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r1_c0.jpg +2025-03-03 15:48:33,449 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r1_c1.jpg +2025-03-03 15:48:33,449 [INFO] __main__ - Processing table image: /topic-extraction/img_21.jpg, columns=three +2025-03-03 15:48:37,495 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r0_c0.jpg +2025-03-03 15:48:37,802 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r0_c1.jpg +2025-03-03 15:48:38,060 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r1_c0.jpg +2025-03-03 15:48:38,267 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r1_c1.jpg +2025-03-03 15:48:38,267 [INFO] __main__ - Processing table image: /topic-extraction/img_22.jpg, columns=three +2025-03-03 15:48:42,539 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r0_c0.jpg +2025-03-03 15:48:42,847 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r0_c1.jpg +2025-03-03 15:48:43,064 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r1_c0.jpg +2025-03-03 15:48:43,280 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r1_c1.jpg +2025-03-03 15:48:43,487 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r2_c0.jpg +2025-03-03 15:48:43,716 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r2_c1.jpg +2025-03-03 15:48:43,918 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r3_c0.jpg +2025-03-03 15:48:43,918 [INFO] __main__ - Processing table image: /topic-extraction/img_23.jpg, columns=three +2025-03-03 15:48:47,600 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r0_c0.jpg +2025-03-03 15:48:47,900 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r0_c1.jpg +2025-03-03 15:48:48,125 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r1_c0.jpg +2025-03-03 15:48:48,343 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r1_c1.jpg +2025-03-03 15:48:48,343 [INFO] __main__ - Processing table image: /topic-extraction/img_24.jpg, columns=three +2025-03-03 15:48:52,065 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r0_c0.jpg +2025-03-03 15:48:52,376 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r0_c1.jpg +2025-03-03 15:48:52,614 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r1_c0.jpg +2025-03-03 15:48:52,870 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r1_c1.jpg +2025-03-03 15:48:53,066 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r2_c0.jpg +2025-03-03 15:48:53,066 [INFO] __main__ - Processing table image: /topic-extraction/img_25.jpg, columns=two +2025-03-03 15:48:56,548 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r0_c0.jpg +2025-03-03 15:48:56,863 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r1_c0.jpg +2025-03-03 15:48:57,087 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r2_c0.jpg +2025-03-03 15:48:57,301 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r3_c0.jpg +2025-03-03 15:48:57,526 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r4_c0.jpg +2025-03-03 15:48:57,759 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r5_c0.jpg +2025-03-03 15:48:57,759 [INFO] __main__ - Processing table image: /topic-extraction/img_26.jpg, columns=three +2025-03-03 15:49:01,116 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r0_c0.jpg +2025-03-03 15:49:01,407 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r0_c1.jpg +2025-03-03 15:49:01,618 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r1_c0.jpg +2025-03-03 15:49:01,847 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r2_c0.jpg +2025-03-03 15:49:01,847 [INFO] __main__ - Processing table image: /topic-extraction/img_27.jpg, columns=three +2025-03-03 15:49:04,977 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r0_c0.jpg +2025-03-03 15:49:05,258 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r0_c1.jpg +2025-03-03 15:49:05,498 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r1_c0.jpg +2025-03-03 15:49:05,712 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r1_c1.jpg +2025-03-03 15:49:05,934 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r2_c0.jpg +2025-03-03 15:49:06,162 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r3_c0.jpg +2025-03-03 15:49:06,385 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r4_c0.jpg +2025-03-03 15:49:06,612 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r4_c1.jpg +2025-03-03 15:49:06,613 [INFO] __main__ - Processing table image: /topic-extraction/img_28.jpg, columns=two +2025-03-03 15:49:10,036 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_28.jpg_r0_c0.jpg +2025-03-03 15:49:10,328 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_28.jpg_r1_c0.jpg +2025-03-03 15:49:10,548 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_28.jpg_r2_c0.jpg +2025-03-03 15:49:10,777 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_28.jpg_r3_c0.jpg +2025-03-03 15:49:10,780 [INFO] __main__ - Final subtopics JSON saved locally at /home/user/app/we/we_ars/final_subtopics.json +2025-03-03 15:49:11,098 [INFO] __main__ - GPU memory cleaned up. +2025-03-03 15:49:11,106 [INFO] __main__ - Processing completed successfully. +2025-03-03 15:53:27,401 [INFO] __main__ - Processing PDF: /home/user/app/input_output/a-level-pearson-mathematics-specification.pdf +2025-03-03 15:53:28,230 [INFO] __main__ - Gemini returned subtopics: {'Paper 1 and Paper 2: Pure Mathematics': [11, 29], 'Paper 3: Statistics and Mechanics': [30, 40]} +2025-03-03 15:53:28,231 [INFO] __main__ - Loaded 1135473 bytes from local file '/home/user/app/input_output/a-level-pearson-mathematics-specification.pdf' +2025-03-03 15:53:28,557 [INFO] __main__ - Computed global offset: 4 +2025-03-03 15:53:28,557 [INFO] __main__ - Processing pages (0-based): [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43] +2025-03-03 15:54:23,423 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_1.jpg +2025-03-03 15:54:25,210 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_2.jpg +2025-03-03 15:54:25,742 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_3.jpg +2025-03-03 15:54:26,250 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_4.jpg +2025-03-03 15:54:26,794 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_5.jpg +2025-03-03 15:54:27,347 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_6.jpg +2025-03-03 15:54:27,803 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_7.jpg +2025-03-03 15:54:28,391 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_8.jpg +2025-03-03 15:54:28,891 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_9.jpg +2025-03-03 15:54:29,437 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_10.jpg +2025-03-03 15:54:29,870 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_11.jpg +2025-03-03 15:54:30,421 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_12.jpg +2025-03-03 15:54:30,852 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_13.jpg +2025-03-03 15:54:31,438 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_14.jpg +2025-03-03 15:54:32,029 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_15.jpg +2025-03-03 15:54:32,600 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_16.jpg +2025-03-03 15:54:33,157 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_17.jpg +2025-03-03 15:54:33,444 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_18.jpg +2025-03-03 15:54:33,920 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_19.jpg +2025-03-03 15:54:34,554 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_20.jpg +2025-03-03 15:54:35,147 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_21.jpg +2025-03-03 15:54:35,680 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_22.jpg +2025-03-03 15:54:36,094 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_23.jpg +2025-03-03 15:54:36,554 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_24.jpg +2025-03-03 15:54:37,089 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_25.jpg +2025-03-03 15:54:37,502 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_26.jpg +2025-03-03 15:54:38,008 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_27.jpg +2025-03-03 15:54:38,585 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_28.jpg +2025-03-03 15:54:39,068 [INFO] __main__ - Classifying images to detect tables. +2025-03-03 15:54:42,753 [INFO] __main__ - Processing table image: /topic-extraction/img_1.jpg, columns=three +2025-03-03 15:54:46,419 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r0_c0.jpg +2025-03-03 15:54:46,711 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r0_c1.jpg +2025-03-03 15:54:46,896 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r1_c0.jpg +2025-03-03 15:54:47,110 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r1_c1.jpg +2025-03-03 15:54:47,110 [INFO] __main__ - Processing table image: /topic-extraction/img_2.jpg, columns=three +2025-03-03 15:54:50,464 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r0_c0.jpg +2025-03-03 15:54:50,784 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r0_c1.jpg +2025-03-03 15:54:50,976 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r1_c0.jpg +2025-03-03 15:54:51,228 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r2_c0.jpg +2025-03-03 15:54:51,462 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r3_c0.jpg +2025-03-03 15:54:51,463 [INFO] __main__ - Processing table image: /topic-extraction/img_3.jpg, columns=three +2025-03-03 15:54:55,079 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_3.jpg_r0_c0.jpg +2025-03-03 15:54:55,364 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_3.jpg_r0_c1.jpg +2025-03-03 15:54:55,570 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_3.jpg_r1_c0.jpg +2025-03-03 15:54:55,571 [INFO] __main__ - Processing table image: /topic-extraction/img_4.jpg, columns=three +2025-03-03 15:54:58,838 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r0_c0.jpg +2025-03-03 15:54:59,144 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r0_c1.jpg +2025-03-03 15:54:59,326 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r1_c0.jpg +2025-03-03 15:54:59,577 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r1_c1.jpg +2025-03-03 15:54:59,578 [INFO] __main__ - Processing table image: /topic-extraction/img_5.jpg, columns=three +2025-03-03 15:55:03,518 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r0_c0.jpg +2025-03-03 15:55:03,801 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r0_c1.jpg +2025-03-03 15:55:03,983 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r1_c0.jpg +2025-03-03 15:55:04,202 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r1_c1.jpg +2025-03-03 15:55:04,417 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r2_c0.jpg +2025-03-03 15:55:04,417 [INFO] __main__ - Processing table image: /topic-extraction/img_6.jpg, columns=three +2025-03-03 15:55:08,109 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r0_c0.jpg +2025-03-03 15:55:08,423 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r0_c1.jpg +2025-03-03 15:55:08,629 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r1_c0.jpg +2025-03-03 15:55:08,816 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r1_c1.jpg +2025-03-03 15:55:08,816 [INFO] __main__ - Processing table image: /topic-extraction/img_7.jpg, columns=three +2025-03-03 15:55:12,344 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r0_c0.jpg +2025-03-03 15:55:12,644 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r0_c1.jpg +2025-03-03 15:55:12,867 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r1_c0.jpg +2025-03-03 15:55:13,114 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r2_c0.jpg +2025-03-03 15:55:13,343 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r2_c1.jpg +2025-03-03 15:55:13,344 [INFO] __main__ - Processing table image: /topic-extraction/img_8.jpg, columns=three +2025-03-03 15:55:16,823 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r0_c0.jpg +2025-03-03 15:55:17,140 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r0_c1.jpg +2025-03-03 15:55:17,422 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r0_c2.jpg +2025-03-03 15:55:17,706 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r1_c0.jpg +2025-03-03 15:55:18,019 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r1_c1.jpg +2025-03-03 15:55:18,320 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r1_c2.jpg +2025-03-03 15:55:18,619 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r2_c0.jpg +2025-03-03 15:55:18,911 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r2_c1.jpg +2025-03-03 15:55:19,208 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r3_c0.jpg +2025-03-03 15:55:19,491 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r3_c1.jpg +2025-03-03 15:55:19,806 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r4_c0.jpg +2025-03-03 15:55:20,093 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r4_c1.jpg +2025-03-03 15:55:20,406 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r5_c0.jpg +2025-03-03 15:55:20,689 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r5_c1.jpg +2025-03-03 15:55:20,690 [INFO] __main__ - Processing table image: /topic-extraction/img_9.jpg, columns=three +2025-03-03 15:55:24,558 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r0_c0.jpg +2025-03-03 15:55:24,859 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r0_c1.jpg +2025-03-03 15:55:25,142 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r0_c2.jpg +2025-03-03 15:55:25,422 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r1_c0.jpg +2025-03-03 15:55:25,738 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r1_c1.jpg +2025-03-03 15:55:26,031 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r2_c0.jpg +2025-03-03 15:55:26,335 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r2_c1.jpg +2025-03-03 15:55:26,616 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r3_c0.jpg +2025-03-03 15:55:26,908 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r3_c1.jpg +2025-03-03 15:55:26,909 [INFO] __main__ - Processing table image: /topic-extraction/img_10.jpg, columns=three +2025-03-03 15:55:30,379 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r0_c0.jpg +2025-03-03 15:55:30,667 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r0_c1.jpg +2025-03-03 15:55:30,961 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r1_c0.jpg +2025-03-03 15:55:31,248 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r2_c0.jpg +2025-03-03 15:55:31,547 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r3_c0.jpg +2025-03-03 15:55:31,549 [INFO] __main__ - Processing table image: /topic-extraction/img_11.jpg, columns=two +2025-03-03 15:55:34,706 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r0_c0.jpg +2025-03-03 15:55:34,994 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r1_c0.jpg +2025-03-03 15:55:35,254 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r2_c0.jpg +2025-03-03 15:55:35,558 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r3_c0.jpg +2025-03-03 15:55:35,852 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r4_c0.jpg +2025-03-03 15:55:36,137 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r5_c0.jpg +2025-03-03 15:55:36,137 [INFO] __main__ - Processing table image: /topic-extraction/img_12.jpg, columns=three +2025-03-03 15:55:39,497 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r0_c0.jpg +2025-03-03 15:55:39,757 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r0_c1.jpg +2025-03-03 15:55:40,062 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r1_c0.jpg +2025-03-03 15:55:40,345 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r1_c1.jpg +2025-03-03 15:55:40,666 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r2_c0.jpg +2025-03-03 15:55:40,976 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r2_c1.jpg +2025-03-03 15:55:40,977 [INFO] __main__ - Processing table image: /topic-extraction/img_13.jpg, columns=three +2025-03-03 15:55:44,159 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r0_c0.jpg +2025-03-03 15:55:44,436 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r0_c1.jpg +2025-03-03 15:55:44,643 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r1_c0.jpg +2025-03-03 15:55:44,853 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r1_c1.jpg +2025-03-03 15:55:45,041 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r2_c0.jpg +2025-03-03 15:55:45,254 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r3_c0.jpg +2025-03-03 15:55:45,255 [INFO] __main__ - Processing table image: /topic-extraction/img_14.jpg, columns=three +2025-03-03 15:55:49,508 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r0_c0.jpg +2025-03-03 15:55:49,786 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r0_c1.jpg +2025-03-03 15:55:50,075 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r1_c0.jpg +2025-03-03 15:55:50,355 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r1_c1.jpg +2025-03-03 15:55:50,647 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r2_c0.jpg +2025-03-03 15:55:50,978 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r3_c0.jpg +2025-03-03 15:55:51,295 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r4_c0.jpg +2025-03-03 15:55:51,582 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r4_c1.jpg +2025-03-03 15:55:51,855 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r5_c0.jpg +2025-03-03 15:55:51,856 [INFO] __main__ - Processing table image: /topic-extraction/img_15.jpg, columns=three +2025-03-03 15:55:55,882 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r0_c0.jpg +2025-03-03 15:55:56,182 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r0_c1.jpg +2025-03-03 15:55:56,463 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r1_c0.jpg +2025-03-03 15:55:56,727 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r1_c1.jpg +2025-03-03 15:55:57,005 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r2_c0.jpg +2025-03-03 15:55:57,301 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r3_c0.jpg +2025-03-03 15:55:57,584 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r4_c0.jpg +2025-03-03 15:55:57,584 [INFO] __main__ - Processing table image: /topic-extraction/img_16.jpg, columns=three +2025-03-03 15:56:01,615 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r0_c0.jpg +2025-03-03 15:56:01,906 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r0_c1.jpg +2025-03-03 15:56:02,222 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r1_c0.jpg +2025-03-03 15:56:02,513 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r1_c1.jpg +2025-03-03 15:56:02,801 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r2_c0.jpg +2025-03-03 15:56:03,083 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r3_c0.jpg +2025-03-03 15:56:03,393 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r3_c1.jpg +2025-03-03 15:56:03,676 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r4_c0.jpg +2025-03-03 15:56:04,667 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r5_c0.jpg +2025-03-03 15:56:04,667 [INFO] __main__ - Processing table image: /topic-extraction/img_17.jpg, columns=three +2025-03-03 15:56:09,007 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r0_c0.jpg +2025-03-03 15:56:09,286 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r0_c1.jpg +2025-03-03 15:56:09,520 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r1_c0.jpg +2025-03-03 15:56:09,740 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r2_c0.jpg +2025-03-03 15:56:09,947 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r2_c1.jpg +2025-03-03 15:56:10,171 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r3_c0.jpg +2025-03-03 15:56:10,389 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r4_c0.jpg +2025-03-03 15:56:10,610 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r5_c0.jpg +2025-03-03 15:56:10,610 [INFO] __main__ - Processing table image: /topic-extraction/img_18.jpg, columns=three +2025-03-03 15:56:11,718 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r0_c0.jpg +2025-03-03 15:56:11,899 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r0_c1.jpg +2025-03-03 15:56:12,081 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r1_c0.jpg +2025-03-03 15:56:12,266 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r1_c1.jpg +2025-03-03 15:56:12,266 [INFO] __main__ - Processing table image: /topic-extraction/img_19.jpg, columns=three +2025-03-03 15:56:15,231 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r0_c0.jpg +2025-03-03 15:56:15,582 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r0_c1.jpg +2025-03-03 15:56:15,802 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r1_c0.jpg +2025-03-03 15:56:16,018 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r1_c1.jpg +2025-03-03 15:56:16,234 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r2_c0.jpg +2025-03-03 15:56:16,451 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r2_c1.jpg +2025-03-03 15:56:16,452 [INFO] __main__ - Processing table image: /topic-extraction/img_20.jpg, columns=three +2025-03-03 15:56:20,970 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r0_c0.jpg +2025-03-03 15:56:21,300 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r0_c1.jpg +2025-03-03 15:56:21,518 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r1_c0.jpg +2025-03-03 15:56:21,742 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r1_c1.jpg +2025-03-03 15:56:21,742 [INFO] __main__ - Processing table image: /topic-extraction/img_21.jpg, columns=three +2025-03-03 15:56:25,577 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r0_c0.jpg +2025-03-03 15:56:25,883 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r0_c1.jpg +2025-03-03 15:56:26,108 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r1_c0.jpg +2025-03-03 15:56:26,319 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r1_c1.jpg +2025-03-03 15:56:26,320 [INFO] __main__ - Processing table image: /topic-extraction/img_22.jpg, columns=three +2025-03-03 15:56:30,722 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r0_c0.jpg +2025-03-03 15:56:31,018 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r0_c1.jpg +2025-03-03 15:56:31,267 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r1_c0.jpg +2025-03-03 15:56:31,455 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r1_c1.jpg +2025-03-03 15:56:31,684 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r2_c0.jpg +2025-03-03 15:56:31,904 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r2_c1.jpg +2025-03-03 15:56:32,136 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r3_c0.jpg +2025-03-03 15:56:32,136 [INFO] __main__ - Processing table image: /topic-extraction/img_23.jpg, columns=three +2025-03-03 15:56:35,410 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r0_c0.jpg +2025-03-03 15:56:35,689 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r0_c1.jpg +2025-03-03 15:56:35,917 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r1_c0.jpg +2025-03-03 15:56:36,143 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r1_c1.jpg +2025-03-03 15:56:36,144 [INFO] __main__ - Processing table image: /topic-extraction/img_24.jpg, columns=three +2025-03-03 15:56:39,869 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r0_c0.jpg +2025-03-03 15:56:40,150 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r0_c1.jpg +2025-03-03 15:56:40,387 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r1_c0.jpg +2025-03-03 15:56:40,608 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r1_c1.jpg +2025-03-03 15:56:40,828 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r2_c0.jpg +2025-03-03 15:56:40,829 [INFO] __main__ - Processing table image: /topic-extraction/img_25.jpg, columns=two +2025-03-03 15:56:44,221 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r0_c0.jpg +2025-03-03 15:56:44,522 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r1_c0.jpg +2025-03-03 15:56:44,728 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r2_c0.jpg +2025-03-03 15:56:44,929 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r3_c0.jpg +2025-03-03 15:56:45,153 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r4_c0.jpg +2025-03-03 15:56:45,372 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r5_c0.jpg +2025-03-03 15:56:45,372 [INFO] __main__ - Processing table image: /topic-extraction/img_26.jpg, columns=three +2025-03-03 15:56:48,485 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r0_c0.jpg +2025-03-03 15:56:48,806 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r0_c1.jpg +2025-03-03 15:56:49,036 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r1_c0.jpg +2025-03-03 15:56:49,282 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r2_c0.jpg +2025-03-03 15:56:49,282 [INFO] __main__ - Processing table image: /topic-extraction/img_27.jpg, columns=three +2025-03-03 15:56:52,374 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r0_c0.jpg +2025-03-03 15:56:52,664 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r0_c1.jpg +2025-03-03 15:56:52,887 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r1_c0.jpg +2025-03-03 15:56:53,103 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r1_c1.jpg +2025-03-03 15:56:53,329 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r2_c0.jpg +2025-03-03 15:56:53,543 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r3_c0.jpg +2025-03-03 15:56:53,759 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r4_c0.jpg +2025-03-03 15:56:53,978 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r4_c1.jpg +2025-03-03 15:56:53,979 [INFO] __main__ - Processing table image: /topic-extraction/img_28.jpg, columns=two +2025-03-03 15:56:57,389 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_28.jpg_r0_c0.jpg +2025-03-03 15:56:57,690 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_28.jpg_r1_c0.jpg +2025-03-03 15:56:57,897 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_28.jpg_r2_c0.jpg +2025-03-03 15:56:58,126 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_28.jpg_r3_c0.jpg +2025-03-03 15:56:58,131 [INFO] __main__ - Final subtopics JSON saved locally at /home/user/app/we/we_ars/final_subtopics.json +2025-03-03 15:56:58,438 [INFO] __main__ - GPU memory cleaned up. +2025-03-03 15:56:58,445 [INFO] __main__ - Processing completed successfully. diff --git a/we/final_subtopics.json b/we/final_subtopics.json index 0637a088a01e8ddab3bf3fa98dbe804cbde1a0dc..571bb23a3bb544f16ba9f5c4041f3d79f92d9a86 100644 --- a/we/final_subtopics.json +++ b/we/final_subtopics.json @@ -1 +1,282 @@ -[] \ No newline at end of file +[ + { + "title": "", + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_1.jpg" + } + ], + "children": [] + }, + { + "title": "", + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_2.jpg" + } + ], + "children": [] + }, + { + "title": "", + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_3.jpg" + } + ], + "children": [] + }, + { + "title": "", + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_4.jpg" + } + ], + "children": [] + }, + { + "title": "", + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_5.jpg" + } + ], + "children": [] + }, + { + "title": "", + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_6.jpg" + } + ], + "children": [] + }, + { + "title": "", + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_7.jpg" + } + ], + "children": [] + }, + { + "title": "", + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_8.jpg" + } + ], + "children": [] + }, + { + "title": "", + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_9.jpg" + } + ], + "children": [] + }, + { + "title": "", + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_10.jpg" + } + ], + "children": [] + }, + { + "title": "", + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_11.jpg" + } + ], + "children": [] + }, + { + "title": "", + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_12.jpg" + } + ], + "children": [] + }, + { + "title": "", + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_13.jpg" + } + ], + "children": [] + }, + { + "title": "", + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_14.jpg" + } + ], + "children": [] + }, + { + "title": "", + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_15.jpg" + } + ], + "children": [] + }, + { + "title": "", + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_16.jpg" + } + ], + "children": [] + }, + { + "title": "", + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_17.jpg" + } + ], + "children": [] + }, + { + "title": "", + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_18.jpg" + } + ], + "children": [] + }, + { + "title": "", + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_19.jpg" + } + ], + "children": [] + }, + { + "title": "", + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_20.jpg" + } + ], + "children": [] + }, + { + "title": "", + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_21.jpg" + } + ], + "children": [] + }, + { + "title": "", + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_22.jpg" + } + ], + "children": [] + }, + { + "title": "", + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_23.jpg" + } + ], + "children": [] + }, + { + "title": "", + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_24.jpg" + } + ], + "children": [] + }, + { + "title": "", + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_25.jpg" + } + ], + "children": [] + }, + { + "title": "", + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_26.jpg" + } + ], + "children": [] + }, + { + "title": "", + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_27.jpg" + } + ], + "children": [] + }, + { + "title": "", + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_28.jpg" + } + ], + "children": [] + } +] \ No newline at end of file diff --git a/we/we_ars/final_subtopics.json b/we/we_ars/final_subtopics.json new file mode 100644 index 0000000000000000000000000000000000000000..8e20a5fa31df0c64e8bcfdcad27c66ed29afa522 --- /dev/null +++ b/we/we_ars/final_subtopics.json @@ -0,0 +1,282 @@ +[ + { + "title": null, + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_1.jpg" + } + ], + "children": [] + }, + { + "title": null, + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_2.jpg" + } + ], + "children": [] + }, + { + "title": null, + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_3.jpg" + } + ], + "children": [] + }, + { + "title": null, + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_4.jpg" + } + ], + "children": [] + }, + { + "title": null, + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_5.jpg" + } + ], + "children": [] + }, + { + "title": null, + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_6.jpg" + } + ], + "children": [] + }, + { + "title": null, + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_7.jpg" + } + ], + "children": [] + }, + { + "title": null, + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_8.jpg" + } + ], + "children": [] + }, + { + "title": null, + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_9.jpg" + } + ], + "children": [] + }, + { + "title": null, + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_10.jpg" + } + ], + "children": [] + }, + { + "title": null, + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_11.jpg" + } + ], + "children": [] + }, + { + "title": null, + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_12.jpg" + } + ], + "children": [] + }, + { + "title": null, + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_13.jpg" + } + ], + "children": [] + }, + { + "title": null, + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_14.jpg" + } + ], + "children": [] + }, + { + "title": null, + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_15.jpg" + } + ], + "children": [] + }, + { + "title": null, + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_16.jpg" + } + ], + "children": [] + }, + { + "title": null, + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_17.jpg" + } + ], + "children": [] + }, + { + "title": null, + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_18.jpg" + } + ], + "children": [] + }, + { + "title": null, + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_19.jpg" + } + ], + "children": [] + }, + { + "title": null, + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_20.jpg" + } + ], + "children": [] + }, + { + "title": null, + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_21.jpg" + } + ], + "children": [] + }, + { + "title": null, + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_22.jpg" + } + ], + "children": [] + }, + { + "title": null, + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_23.jpg" + } + ], + "children": [] + }, + { + "title": null, + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_24.jpg" + } + ], + "children": [] + }, + { + "title": null, + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_25.jpg" + } + ], + "children": [] + }, + { + "title": null, + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_26.jpg" + } + ], + "children": [] + }, + { + "title": null, + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_27.jpg" + } + ], + "children": [] + }, + { + "title": null, + "contents": [ + { + "type": "image", + "key": "/topic-extraction/img_28.jpg" + } + ], + "children": [] + } +] \ No newline at end of file diff --git a/wje/final_output.json b/wje/final_output.json new file mode 100644 index 0000000000000000000000000000000000000000..e752424d16981291929a091f6b560361e8e5d559 --- /dev/null +++ b/wje/final_output.json @@ -0,0 +1,265 @@ +{ + "subtopics": { + "Paper 1 and Paper 2: Pure Mathematics": [ + 11, + 29 + ], + "Paper 3: Statistics and Mechanics": [ + 30, + 40 + ] + }, + "local_images": { + "e7e5c8f3c0a6316c2b50698c45ebe05b49bfd8bbe47a07b7b1929dd3cfd3e609.jpg": "img_1.jpg", + "b243ef738ec2465b1cc00f4dd8dd0e5f5e10a91debf7762903ac6c023dd238c4.jpg": "img_2.jpg", + "5e22a8a8c5bc23ee4d16bda9cce4a6ab4bb53854074fd4d691531d5adb9f3ebe.jpg": "img_3.jpg", + "0e70645e72eadab75c88846b7947fc1216cf31d325febf02fbdf4898b430465d.jpg": "img_4.jpg", + "52484f429af5d74ef75e96bf132b15fdc4acd2ed46accb981d670592dcc57ff2.jpg": "img_5.jpg", + "5a153708e7a128d8f6477cb294d2f902d3a9bc57af709c81ccc3937b96580137.jpg": "img_6.jpg", + "fd3a52607bef204e6998e09db82d195de76d929399c2cb1a63e26f87054eec6f.jpg": "img_7.jpg", + "93885318f77c148b9fb1cd162cb9938d6f6cd795d000d5b997f2297198462fcf.jpg": "img_8.jpg", + "133a659582f49fb71dc5fcae918278e6659a257026e35741ba8e6b94fcdb9de6.jpg": "img_9.jpg", + "860d10a56a7e892c674f74fd030592339e629fb80d6e6dbfc343f95ec65a7c16.jpg": "img_10.jpg", + "ae5ee4479ae736ff433ca9b2a1c3f753bbc8cc11a384e27cb710b426757c31e9.jpg": "img_11.jpg", + "39ac9ccc8cd681e552fb1ae08341b4a2dcb33ea8fe6c787daf99fb993d29e57f.jpg": "img_12.jpg", + "6d67beb5c0bf2168a87ad6b7c179ff9c7de8bbd7e720f77f7bf206080cceb589.jpg": "img_13.jpg", + "b89d31200bc06fda181bd2538b5f3274de3e52b0adc7dd023ca676e168e6d487.jpg": "img_14.jpg", + "78907967ba7a56221a0987e6e696e361c82fcf057f41659e4aa77943a62b6763.jpg": "img_15.jpg", + "bd3eb31469dd7b72e9773564915dc768e2e152878d887dcab34e83875e0625bb.jpg": "img_16.jpg", + "f1f1acb21df3d785fa3120fbae5fc74f7064769d9b38524bb991cfaa110177f6.jpg": "img_17.jpg", + "b8b803d008ec9053c40f4a9c2c265a8a0b15742059331dc7997336c94ab74dc4.jpg": "img_18.jpg", + "9cbb4e3b89d75d1d5da2fe8c6ccc4c1d3f612779abaccf3322f8b78b2db8a161.jpg": "img_19.jpg", + "c6c4dfd8d7d1b83ef05d0ad30d4d09e75fe1d1152099b976eef7aededb872873.jpg": "img_20.jpg", + "7eaeb5261341b3dbe0554989b2681f87c4b7a418e21445f3e88aa873e16db0df.jpg": "img_21.jpg", + "22cbebb54b25ccf620ab043fc977fcc709fd5692d1e74b02267b8f689284225d.jpg": "img_22.jpg", + "7a3f07a668cfc19e26c35fb1421908638d5a233723942301eda2764a1e81374d.jpg": "img_23.jpg", + "42b9e068a3fddcc2adaa6736e0ccee448c0302349547c8eaed8a07c870d29b17.jpg": "img_24.jpg", + "2efcd74e6c9447686d3e08d2dca6998ffd44f5cf0323d7d93b4213a2337b32ab.jpg": "img_25.jpg", + "6ba16781c7909a8a47a6a51e520e739320c22791147ad6bbd482473cf5c96717.jpg": "img_26.jpg", + "3d3cdfbca59671749e9d93714510a36441a10769f6b43720f9f3e733d893ea3a.jpg": "img_27.jpg", + "35394d307566e17440ab0322a3c915a4537db1db85628b38f2fe7827d19d719d.jpg": "img_28.jpg" + }, + "tables_extracted": { + "img_1.jpg": [ + "img_1.jpg_rows/row_0/col_0.png", + "img_1.jpg_rows/row_0/col_1.png", + "img_1.jpg_rows/row_1/col_0.png", + "img_1.jpg_rows/row_1/col_1.png" + ], + "img_2.jpg": [ + "img_2.jpg_rows/row_0/col_0.png", + "img_2.jpg_rows/row_0/col_1.png", + "img_2.jpg_rows/row_1/col_0.png", + "img_2.jpg_rows/row_2/col_0.png", + "img_2.jpg_rows/row_3/col_0.png" + ], + "img_3.jpg": [ + "img_3.jpg_rows/row_0/col_0.png", + "img_3.jpg_rows/row_0/col_1.png", + "img_3.jpg_rows/row_1/col_0.png" + ], + "img_4.jpg": [ + "img_4.jpg_rows/row_0/col_0.png", + "img_4.jpg_rows/row_0/col_1.png", + "img_4.jpg_rows/row_1/col_0.png", + "img_4.jpg_rows/row_1/col_1.png" + ], + "img_5.jpg": [ + "img_5.jpg_rows/row_0/col_0.png", + "img_5.jpg_rows/row_0/col_1.png", + "img_5.jpg_rows/row_1/col_0.png", + "img_5.jpg_rows/row_1/col_1.png", + "img_5.jpg_rows/row_2/col_0.png" + ], + "img_6.jpg": [ + "img_6.jpg_rows/row_0/col_0.png", + "img_6.jpg_rows/row_0/col_1.png", + "img_6.jpg_rows/row_1/col_0.png", + "img_6.jpg_rows/row_1/col_1.png" + ], + "img_7.jpg": [ + "img_7.jpg_rows/row_0/col_0.png", + "img_7.jpg_rows/row_0/col_1.png", + "img_7.jpg_rows/row_1/col_0.png", + "img_7.jpg_rows/row_2/col_0.png", + "img_7.jpg_rows/row_2/col_1.png" + ], + "img_8.jpg": [ + "img_8.jpg_rows/row_0/col_0.png", + "img_8.jpg_rows/row_0/col_1.png", + "img_8.jpg_rows/row_0/col_2.png", + "img_8.jpg_rows/row_1/col_0.png", + "img_8.jpg_rows/row_1/col_1.png", + "img_8.jpg_rows/row_1/col_2.png", + "img_8.jpg_rows/row_2/col_0.png", + "img_8.jpg_rows/row_2/col_1.png", + "img_8.jpg_rows/row_3/col_0.png", + "img_8.jpg_rows/row_3/col_1.png", + "img_8.jpg_rows/row_4/col_0.png", + "img_8.jpg_rows/row_4/col_1.png", + "img_8.jpg_rows/row_5/col_0.png", + "img_8.jpg_rows/row_5/col_1.png" + ], + "img_9.jpg": [ + "img_9.jpg_rows/row_0/col_0.png", + "img_9.jpg_rows/row_0/col_1.png", + "img_9.jpg_rows/row_0/col_2.png", + "img_9.jpg_rows/row_1/col_0.png", + "img_9.jpg_rows/row_1/col_1.png", + "img_9.jpg_rows/row_2/col_0.png", + "img_9.jpg_rows/row_2/col_1.png", + "img_9.jpg_rows/row_3/col_0.png", + "img_9.jpg_rows/row_3/col_1.png" + ], + "img_10.jpg": [ + "img_10.jpg_rows/row_0/col_0.png", + "img_10.jpg_rows/row_0/col_1.png", + "img_10.jpg_rows/row_1/col_0.png", + "img_10.jpg_rows/row_2/col_0.png", + "img_10.jpg_rows/row_3/col_0.png" + ], + "img_11.jpg": [ + "img_11.jpg_rows/row_0/col_0.png", + "img_11.jpg_rows/row_1/col_0.png", + "img_11.jpg_rows/row_2/col_0.png", + "img_11.jpg_rows/row_3/col_0.png", + "img_11.jpg_rows/row_4/col_0.png", + "img_11.jpg_rows/row_5/col_0.png" + ], + "img_12.jpg": [ + "img_12.jpg_rows/row_0/col_0.png", + "img_12.jpg_rows/row_0/col_1.png", + "img_12.jpg_rows/row_1/col_0.png", + "img_12.jpg_rows/row_1/col_1.png", + "img_12.jpg_rows/row_2/col_0.png", + "img_12.jpg_rows/row_2/col_1.png" + ], + "img_13.jpg": [ + "img_13.jpg_rows/row_0/col_0.png", + "img_13.jpg_rows/row_0/col_1.png", + "img_13.jpg_rows/row_1/col_0.png", + "img_13.jpg_rows/row_1/col_1.png", + "img_13.jpg_rows/row_2/col_0.png", + "img_13.jpg_rows/row_3/col_0.png" + ], + "img_14.jpg": [ + "img_14.jpg_rows/row_0/col_0.png", + "img_14.jpg_rows/row_0/col_1.png", + "img_14.jpg_rows/row_1/col_0.png", + "img_14.jpg_rows/row_1/col_1.png", + "img_14.jpg_rows/row_2/col_0.png", + "img_14.jpg_rows/row_3/col_0.png", + "img_14.jpg_rows/row_4/col_0.png", + "img_14.jpg_rows/row_4/col_1.png", + "img_14.jpg_rows/row_5/col_0.png" + ], + "img_15.jpg": [ + "img_15.jpg_rows/row_0/col_0.png", + "img_15.jpg_rows/row_0/col_1.png", + "img_15.jpg_rows/row_1/col_0.png", + "img_15.jpg_rows/row_1/col_1.png", + "img_15.jpg_rows/row_2/col_0.png", + "img_15.jpg_rows/row_3/col_0.png", + "img_15.jpg_rows/row_4/col_0.png" + ], + "img_16.jpg": [ + "img_16.jpg_rows/row_0/col_0.png", + "img_16.jpg_rows/row_0/col_1.png", + "img_16.jpg_rows/row_1/col_0.png", + "img_16.jpg_rows/row_1/col_1.png", + "img_16.jpg_rows/row_2/col_0.png", + "img_16.jpg_rows/row_3/col_0.png", + "img_16.jpg_rows/row_3/col_1.png", + "img_16.jpg_rows/row_4/col_0.png", + "img_16.jpg_rows/row_5/col_0.png" + ], + "img_17.jpg": [ + "img_17.jpg_rows/row_0/col_0.png", + "img_17.jpg_rows/row_0/col_1.png", + "img_17.jpg_rows/row_1/col_0.png", + "img_17.jpg_rows/row_2/col_0.png", + "img_17.jpg_rows/row_2/col_1.png", + "img_17.jpg_rows/row_3/col_0.png", + "img_17.jpg_rows/row_4/col_0.png", + "img_17.jpg_rows/row_5/col_0.png" + ], + "img_18.jpg": [ + "img_18.jpg_rows/row_0/col_0.png", + "img_18.jpg_rows/row_0/col_1.png", + "img_18.jpg_rows/row_1/col_0.png", + "img_18.jpg_rows/row_1/col_1.png" + ], + "img_19.jpg": [ + "img_19.jpg_rows/row_0/col_0.png", + "img_19.jpg_rows/row_0/col_1.png", + "img_19.jpg_rows/row_1/col_0.png", + "img_19.jpg_rows/row_1/col_1.png", + "img_19.jpg_rows/row_2/col_0.png", + "img_19.jpg_rows/row_2/col_1.png" + ], + "img_20.jpg": [ + "img_20.jpg_rows/row_0/col_0.png", + "img_20.jpg_rows/row_0/col_1.png", + "img_20.jpg_rows/row_1/col_0.png", + "img_20.jpg_rows/row_1/col_1.png" + ], + "img_21.jpg": [ + "img_21.jpg_rows/row_0/col_0.png", + "img_21.jpg_rows/row_0/col_1.png", + "img_21.jpg_rows/row_1/col_0.png", + "img_21.jpg_rows/row_1/col_1.png" + ], + "img_22.jpg": [ + "img_22.jpg_rows/row_0/col_0.png", + "img_22.jpg_rows/row_0/col_1.png", + "img_22.jpg_rows/row_1/col_0.png", + "img_22.jpg_rows/row_1/col_1.png", + "img_22.jpg_rows/row_2/col_0.png", + "img_22.jpg_rows/row_2/col_1.png", + "img_22.jpg_rows/row_3/col_0.png" + ], + "img_23.jpg": [ + "img_23.jpg_rows/row_0/col_0.png", + "img_23.jpg_rows/row_0/col_1.png", + "img_23.jpg_rows/row_1/col_0.png", + "img_23.jpg_rows/row_1/col_1.png" + ], + "img_24.jpg": [ + "img_24.jpg_rows/row_0/col_0.png", + "img_24.jpg_rows/row_0/col_1.png", + "img_24.jpg_rows/row_1/col_0.png", + "img_24.jpg_rows/row_1/col_1.png", + "img_24.jpg_rows/row_2/col_0.png" + ], + "img_25.jpg": [ + "img_25.jpg_rows/row_0/col_0.png", + "img_25.jpg_rows/row_1/col_0.png", + "img_25.jpg_rows/row_2/col_0.png", + "img_25.jpg_rows/row_3/col_0.png", + "img_25.jpg_rows/row_4/col_0.png", + "img_25.jpg_rows/row_5/col_0.png" + ], + "img_26.jpg": [ + "img_26.jpg_rows/row_0/col_0.png", + "img_26.jpg_rows/row_0/col_1.png", + "img_26.jpg_rows/row_1/col_0.png", + "img_26.jpg_rows/row_2/col_0.png" + ], + "img_27.jpg": [ + "img_27.jpg_rows/row_0/col_0.png", + "img_27.jpg_rows/row_0/col_1.png", + "img_27.jpg_rows/row_1/col_0.png", + "img_27.jpg_rows/row_1/col_1.png", + "img_27.jpg_rows/row_2/col_0.png", + "img_27.jpg_rows/row_3/col_0.png", + "img_27.jpg_rows/row_4/col_0.png", + "img_27.jpg_rows/row_4/col_1.png" + ], + "img_28.jpg": [ + "img_28.jpg_rows/row_0/col_0.png", + "img_28.jpg_rows/row_1/col_0.png", + "img_28.jpg_rows/row_2/col_0.png", + "img_28.jpg_rows/row_3/col_0.png" + ] + } +} \ No newline at end of file diff --git a/wje/final_output_local.json b/wje/final_output_local.json new file mode 100644 index 0000000000000000000000000000000000000000..e752424d16981291929a091f6b560361e8e5d559 --- /dev/null +++ b/wje/final_output_local.json @@ -0,0 +1,265 @@ +{ + "subtopics": { + "Paper 1 and Paper 2: Pure Mathematics": [ + 11, + 29 + ], + "Paper 3: Statistics and Mechanics": [ + 30, + 40 + ] + }, + "local_images": { + "e7e5c8f3c0a6316c2b50698c45ebe05b49bfd8bbe47a07b7b1929dd3cfd3e609.jpg": "img_1.jpg", + "b243ef738ec2465b1cc00f4dd8dd0e5f5e10a91debf7762903ac6c023dd238c4.jpg": "img_2.jpg", + "5e22a8a8c5bc23ee4d16bda9cce4a6ab4bb53854074fd4d691531d5adb9f3ebe.jpg": "img_3.jpg", + "0e70645e72eadab75c88846b7947fc1216cf31d325febf02fbdf4898b430465d.jpg": "img_4.jpg", + "52484f429af5d74ef75e96bf132b15fdc4acd2ed46accb981d670592dcc57ff2.jpg": "img_5.jpg", + "5a153708e7a128d8f6477cb294d2f902d3a9bc57af709c81ccc3937b96580137.jpg": "img_6.jpg", + "fd3a52607bef204e6998e09db82d195de76d929399c2cb1a63e26f87054eec6f.jpg": "img_7.jpg", + "93885318f77c148b9fb1cd162cb9938d6f6cd795d000d5b997f2297198462fcf.jpg": "img_8.jpg", + "133a659582f49fb71dc5fcae918278e6659a257026e35741ba8e6b94fcdb9de6.jpg": "img_9.jpg", + "860d10a56a7e892c674f74fd030592339e629fb80d6e6dbfc343f95ec65a7c16.jpg": "img_10.jpg", + "ae5ee4479ae736ff433ca9b2a1c3f753bbc8cc11a384e27cb710b426757c31e9.jpg": "img_11.jpg", + "39ac9ccc8cd681e552fb1ae08341b4a2dcb33ea8fe6c787daf99fb993d29e57f.jpg": "img_12.jpg", + "6d67beb5c0bf2168a87ad6b7c179ff9c7de8bbd7e720f77f7bf206080cceb589.jpg": "img_13.jpg", + "b89d31200bc06fda181bd2538b5f3274de3e52b0adc7dd023ca676e168e6d487.jpg": "img_14.jpg", + "78907967ba7a56221a0987e6e696e361c82fcf057f41659e4aa77943a62b6763.jpg": "img_15.jpg", + "bd3eb31469dd7b72e9773564915dc768e2e152878d887dcab34e83875e0625bb.jpg": "img_16.jpg", + "f1f1acb21df3d785fa3120fbae5fc74f7064769d9b38524bb991cfaa110177f6.jpg": "img_17.jpg", + "b8b803d008ec9053c40f4a9c2c265a8a0b15742059331dc7997336c94ab74dc4.jpg": "img_18.jpg", + "9cbb4e3b89d75d1d5da2fe8c6ccc4c1d3f612779abaccf3322f8b78b2db8a161.jpg": "img_19.jpg", + "c6c4dfd8d7d1b83ef05d0ad30d4d09e75fe1d1152099b976eef7aededb872873.jpg": "img_20.jpg", + "7eaeb5261341b3dbe0554989b2681f87c4b7a418e21445f3e88aa873e16db0df.jpg": "img_21.jpg", + "22cbebb54b25ccf620ab043fc977fcc709fd5692d1e74b02267b8f689284225d.jpg": "img_22.jpg", + "7a3f07a668cfc19e26c35fb1421908638d5a233723942301eda2764a1e81374d.jpg": "img_23.jpg", + "42b9e068a3fddcc2adaa6736e0ccee448c0302349547c8eaed8a07c870d29b17.jpg": "img_24.jpg", + "2efcd74e6c9447686d3e08d2dca6998ffd44f5cf0323d7d93b4213a2337b32ab.jpg": "img_25.jpg", + "6ba16781c7909a8a47a6a51e520e739320c22791147ad6bbd482473cf5c96717.jpg": "img_26.jpg", + "3d3cdfbca59671749e9d93714510a36441a10769f6b43720f9f3e733d893ea3a.jpg": "img_27.jpg", + "35394d307566e17440ab0322a3c915a4537db1db85628b38f2fe7827d19d719d.jpg": "img_28.jpg" + }, + "tables_extracted": { + "img_1.jpg": [ + "img_1.jpg_rows/row_0/col_0.png", + "img_1.jpg_rows/row_0/col_1.png", + "img_1.jpg_rows/row_1/col_0.png", + "img_1.jpg_rows/row_1/col_1.png" + ], + "img_2.jpg": [ + "img_2.jpg_rows/row_0/col_0.png", + "img_2.jpg_rows/row_0/col_1.png", + "img_2.jpg_rows/row_1/col_0.png", + "img_2.jpg_rows/row_2/col_0.png", + "img_2.jpg_rows/row_3/col_0.png" + ], + "img_3.jpg": [ + "img_3.jpg_rows/row_0/col_0.png", + "img_3.jpg_rows/row_0/col_1.png", + "img_3.jpg_rows/row_1/col_0.png" + ], + "img_4.jpg": [ + "img_4.jpg_rows/row_0/col_0.png", + "img_4.jpg_rows/row_0/col_1.png", + "img_4.jpg_rows/row_1/col_0.png", + "img_4.jpg_rows/row_1/col_1.png" + ], + "img_5.jpg": [ + "img_5.jpg_rows/row_0/col_0.png", + "img_5.jpg_rows/row_0/col_1.png", + "img_5.jpg_rows/row_1/col_0.png", + "img_5.jpg_rows/row_1/col_1.png", + "img_5.jpg_rows/row_2/col_0.png" + ], + "img_6.jpg": [ + "img_6.jpg_rows/row_0/col_0.png", + "img_6.jpg_rows/row_0/col_1.png", + "img_6.jpg_rows/row_1/col_0.png", + "img_6.jpg_rows/row_1/col_1.png" + ], + "img_7.jpg": [ + "img_7.jpg_rows/row_0/col_0.png", + "img_7.jpg_rows/row_0/col_1.png", + "img_7.jpg_rows/row_1/col_0.png", + "img_7.jpg_rows/row_2/col_0.png", + "img_7.jpg_rows/row_2/col_1.png" + ], + "img_8.jpg": [ + "img_8.jpg_rows/row_0/col_0.png", + "img_8.jpg_rows/row_0/col_1.png", + "img_8.jpg_rows/row_0/col_2.png", + "img_8.jpg_rows/row_1/col_0.png", + "img_8.jpg_rows/row_1/col_1.png", + "img_8.jpg_rows/row_1/col_2.png", + "img_8.jpg_rows/row_2/col_0.png", + "img_8.jpg_rows/row_2/col_1.png", + "img_8.jpg_rows/row_3/col_0.png", + "img_8.jpg_rows/row_3/col_1.png", + "img_8.jpg_rows/row_4/col_0.png", + "img_8.jpg_rows/row_4/col_1.png", + "img_8.jpg_rows/row_5/col_0.png", + "img_8.jpg_rows/row_5/col_1.png" + ], + "img_9.jpg": [ + "img_9.jpg_rows/row_0/col_0.png", + "img_9.jpg_rows/row_0/col_1.png", + "img_9.jpg_rows/row_0/col_2.png", + "img_9.jpg_rows/row_1/col_0.png", + "img_9.jpg_rows/row_1/col_1.png", + "img_9.jpg_rows/row_2/col_0.png", + "img_9.jpg_rows/row_2/col_1.png", + "img_9.jpg_rows/row_3/col_0.png", + "img_9.jpg_rows/row_3/col_1.png" + ], + "img_10.jpg": [ + "img_10.jpg_rows/row_0/col_0.png", + "img_10.jpg_rows/row_0/col_1.png", + "img_10.jpg_rows/row_1/col_0.png", + "img_10.jpg_rows/row_2/col_0.png", + "img_10.jpg_rows/row_3/col_0.png" + ], + "img_11.jpg": [ + "img_11.jpg_rows/row_0/col_0.png", + "img_11.jpg_rows/row_1/col_0.png", + "img_11.jpg_rows/row_2/col_0.png", + "img_11.jpg_rows/row_3/col_0.png", + "img_11.jpg_rows/row_4/col_0.png", + "img_11.jpg_rows/row_5/col_0.png" + ], + "img_12.jpg": [ + "img_12.jpg_rows/row_0/col_0.png", + "img_12.jpg_rows/row_0/col_1.png", + "img_12.jpg_rows/row_1/col_0.png", + "img_12.jpg_rows/row_1/col_1.png", + "img_12.jpg_rows/row_2/col_0.png", + "img_12.jpg_rows/row_2/col_1.png" + ], + "img_13.jpg": [ + "img_13.jpg_rows/row_0/col_0.png", + "img_13.jpg_rows/row_0/col_1.png", + "img_13.jpg_rows/row_1/col_0.png", + "img_13.jpg_rows/row_1/col_1.png", + "img_13.jpg_rows/row_2/col_0.png", + "img_13.jpg_rows/row_3/col_0.png" + ], + "img_14.jpg": [ + "img_14.jpg_rows/row_0/col_0.png", + "img_14.jpg_rows/row_0/col_1.png", + "img_14.jpg_rows/row_1/col_0.png", + "img_14.jpg_rows/row_1/col_1.png", + "img_14.jpg_rows/row_2/col_0.png", + "img_14.jpg_rows/row_3/col_0.png", + "img_14.jpg_rows/row_4/col_0.png", + "img_14.jpg_rows/row_4/col_1.png", + "img_14.jpg_rows/row_5/col_0.png" + ], + "img_15.jpg": [ + "img_15.jpg_rows/row_0/col_0.png", + "img_15.jpg_rows/row_0/col_1.png", + "img_15.jpg_rows/row_1/col_0.png", + "img_15.jpg_rows/row_1/col_1.png", + "img_15.jpg_rows/row_2/col_0.png", + "img_15.jpg_rows/row_3/col_0.png", + "img_15.jpg_rows/row_4/col_0.png" + ], + "img_16.jpg": [ + "img_16.jpg_rows/row_0/col_0.png", + "img_16.jpg_rows/row_0/col_1.png", + "img_16.jpg_rows/row_1/col_0.png", + "img_16.jpg_rows/row_1/col_1.png", + "img_16.jpg_rows/row_2/col_0.png", + "img_16.jpg_rows/row_3/col_0.png", + "img_16.jpg_rows/row_3/col_1.png", + "img_16.jpg_rows/row_4/col_0.png", + "img_16.jpg_rows/row_5/col_0.png" + ], + "img_17.jpg": [ + "img_17.jpg_rows/row_0/col_0.png", + "img_17.jpg_rows/row_0/col_1.png", + "img_17.jpg_rows/row_1/col_0.png", + "img_17.jpg_rows/row_2/col_0.png", + "img_17.jpg_rows/row_2/col_1.png", + "img_17.jpg_rows/row_3/col_0.png", + "img_17.jpg_rows/row_4/col_0.png", + "img_17.jpg_rows/row_5/col_0.png" + ], + "img_18.jpg": [ + "img_18.jpg_rows/row_0/col_0.png", + "img_18.jpg_rows/row_0/col_1.png", + "img_18.jpg_rows/row_1/col_0.png", + "img_18.jpg_rows/row_1/col_1.png" + ], + "img_19.jpg": [ + "img_19.jpg_rows/row_0/col_0.png", + "img_19.jpg_rows/row_0/col_1.png", + "img_19.jpg_rows/row_1/col_0.png", + "img_19.jpg_rows/row_1/col_1.png", + "img_19.jpg_rows/row_2/col_0.png", + "img_19.jpg_rows/row_2/col_1.png" + ], + "img_20.jpg": [ + "img_20.jpg_rows/row_0/col_0.png", + "img_20.jpg_rows/row_0/col_1.png", + "img_20.jpg_rows/row_1/col_0.png", + "img_20.jpg_rows/row_1/col_1.png" + ], + "img_21.jpg": [ + "img_21.jpg_rows/row_0/col_0.png", + "img_21.jpg_rows/row_0/col_1.png", + "img_21.jpg_rows/row_1/col_0.png", + "img_21.jpg_rows/row_1/col_1.png" + ], + "img_22.jpg": [ + "img_22.jpg_rows/row_0/col_0.png", + "img_22.jpg_rows/row_0/col_1.png", + "img_22.jpg_rows/row_1/col_0.png", + "img_22.jpg_rows/row_1/col_1.png", + "img_22.jpg_rows/row_2/col_0.png", + "img_22.jpg_rows/row_2/col_1.png", + "img_22.jpg_rows/row_3/col_0.png" + ], + "img_23.jpg": [ + "img_23.jpg_rows/row_0/col_0.png", + "img_23.jpg_rows/row_0/col_1.png", + "img_23.jpg_rows/row_1/col_0.png", + "img_23.jpg_rows/row_1/col_1.png" + ], + "img_24.jpg": [ + "img_24.jpg_rows/row_0/col_0.png", + "img_24.jpg_rows/row_0/col_1.png", + "img_24.jpg_rows/row_1/col_0.png", + "img_24.jpg_rows/row_1/col_1.png", + "img_24.jpg_rows/row_2/col_0.png" + ], + "img_25.jpg": [ + "img_25.jpg_rows/row_0/col_0.png", + "img_25.jpg_rows/row_1/col_0.png", + "img_25.jpg_rows/row_2/col_0.png", + "img_25.jpg_rows/row_3/col_0.png", + "img_25.jpg_rows/row_4/col_0.png", + "img_25.jpg_rows/row_5/col_0.png" + ], + "img_26.jpg": [ + "img_26.jpg_rows/row_0/col_0.png", + "img_26.jpg_rows/row_0/col_1.png", + "img_26.jpg_rows/row_1/col_0.png", + "img_26.jpg_rows/row_2/col_0.png" + ], + "img_27.jpg": [ + "img_27.jpg_rows/row_0/col_0.png", + "img_27.jpg_rows/row_0/col_1.png", + "img_27.jpg_rows/row_1/col_0.png", + "img_27.jpg_rows/row_1/col_1.png", + "img_27.jpg_rows/row_2/col_0.png", + "img_27.jpg_rows/row_3/col_0.png", + "img_27.jpg_rows/row_4/col_0.png", + "img_27.jpg_rows/row_4/col_1.png" + ], + "img_28.jpg": [ + "img_28.jpg_rows/row_0/col_0.png", + "img_28.jpg_rows/row_1/col_0.png", + "img_28.jpg_rows/row_2/col_0.png", + "img_28.jpg_rows/row_3/col_0.png" + ] + } +} \ No newline at end of file diff --git a/wje/img_1.jpg_rows/row_0/col_0.png b/wje/img_1.jpg_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..89e913c42dfb6bbc021f668a8e25b2a91d8a7895 Binary files /dev/null and b/wje/img_1.jpg_rows/row_0/col_0.png differ diff --git a/wje/img_1.jpg_rows/row_0/col_1.png b/wje/img_1.jpg_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..0388229256c3325d728ed9b8ad9c683940dc43f0 Binary files /dev/null and b/wje/img_1.jpg_rows/row_0/col_1.png differ diff --git a/wje/img_1.jpg_rows/row_1/col_0.png b/wje/img_1.jpg_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..eece6d1fe11464358b7c7c9ffb20827ba38d3dad Binary files /dev/null and b/wje/img_1.jpg_rows/row_1/col_0.png differ diff --git a/wje/img_1.jpg_rows/row_1/col_1.png b/wje/img_1.jpg_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..38a09260f541eeaa4fa5fe9cb7cb64ca0625f1b8 Binary files /dev/null and b/wje/img_1.jpg_rows/row_1/col_1.png differ diff --git a/wje/img_10.jpg_rows/row_0/col_0.png b/wje/img_10.jpg_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..26537b046972529528899d31bc4400f298bd22b0 Binary files /dev/null and b/wje/img_10.jpg_rows/row_0/col_0.png differ diff --git a/wje/img_10.jpg_rows/row_0/col_1.png b/wje/img_10.jpg_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..dc7a83f30102732b630e03d6a2e61e6ce321b994 Binary files /dev/null and b/wje/img_10.jpg_rows/row_0/col_1.png differ diff --git a/wje/img_10.jpg_rows/row_1/col_0.png b/wje/img_10.jpg_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..cfdd9efe4bf99a6a83411e9b94b98eedf626ac54 Binary files /dev/null and b/wje/img_10.jpg_rows/row_1/col_0.png differ diff --git a/wje/img_10.jpg_rows/row_2/col_0.png b/wje/img_10.jpg_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..3b45bf1a45620aca8f27e8c7daf89ae919a38af4 Binary files /dev/null and b/wje/img_10.jpg_rows/row_2/col_0.png differ diff --git a/wje/img_10.jpg_rows/row_3/col_0.png b/wje/img_10.jpg_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..7b28fc9c8ae796c35442a9352552386006ed42f1 Binary files /dev/null and b/wje/img_10.jpg_rows/row_3/col_0.png differ diff --git a/wje/img_11.jpg_rows/row_0/col_0.png b/wje/img_11.jpg_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..7f852785ff31389c4c143d9b38c33c92d584f17b Binary files /dev/null and b/wje/img_11.jpg_rows/row_0/col_0.png differ diff --git a/wje/img_11.jpg_rows/row_1/col_0.png b/wje/img_11.jpg_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..5b9b0ea20a6fa3b878f3450634eab82d445f190d Binary files /dev/null and b/wje/img_11.jpg_rows/row_1/col_0.png differ diff --git a/wje/img_11.jpg_rows/row_2/col_0.png b/wje/img_11.jpg_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..d7e08e40ddc5463301f55f3bf81f819276fb3e0a Binary files /dev/null and b/wje/img_11.jpg_rows/row_2/col_0.png differ diff --git a/wje/img_11.jpg_rows/row_3/col_0.png b/wje/img_11.jpg_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..ac389b4f121b3ea01fcf29ad14cddf6d615f180b Binary files /dev/null and b/wje/img_11.jpg_rows/row_3/col_0.png differ diff --git a/wje/img_11.jpg_rows/row_4/col_0.png b/wje/img_11.jpg_rows/row_4/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..0af51308d6e73eb3ad78dad9afbc6c6dc5f9a4c8 Binary files /dev/null and b/wje/img_11.jpg_rows/row_4/col_0.png differ diff --git a/wje/img_11.jpg_rows/row_5/col_0.png b/wje/img_11.jpg_rows/row_5/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..470b619e96352c26a400c0f5e41320904c77efba Binary files /dev/null and b/wje/img_11.jpg_rows/row_5/col_0.png differ diff --git a/wje/img_12.jpg_rows/row_0/col_0.png b/wje/img_12.jpg_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..1775bde2170a565216b62b821cbe612ff8a5a9ab Binary files /dev/null and b/wje/img_12.jpg_rows/row_0/col_0.png differ diff --git a/wje/img_12.jpg_rows/row_0/col_1.png b/wje/img_12.jpg_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..682f7963b4e84196b7f449ece97f228b22e893cc Binary files /dev/null and b/wje/img_12.jpg_rows/row_0/col_1.png differ diff --git a/wje/img_12.jpg_rows/row_1/col_0.png b/wje/img_12.jpg_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..4ca08524c8c80015e252be243d08d8c2dddf43e8 Binary files /dev/null and b/wje/img_12.jpg_rows/row_1/col_0.png differ diff --git a/wje/img_12.jpg_rows/row_1/col_1.png b/wje/img_12.jpg_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..ab3c69b0c0524d0a04c6f9cce3391400b41a0ca2 Binary files /dev/null and b/wje/img_12.jpg_rows/row_1/col_1.png differ diff --git a/wje/img_12.jpg_rows/row_2/col_0.png b/wje/img_12.jpg_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..25edc6ae617976a3b147b957f073f54fe3523f7e Binary files /dev/null and b/wje/img_12.jpg_rows/row_2/col_0.png differ diff --git a/wje/img_12.jpg_rows/row_2/col_1.png b/wje/img_12.jpg_rows/row_2/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..9cf1cd5c2fd92fb11e7024c4138e1a67d1d6cb5a Binary files /dev/null and b/wje/img_12.jpg_rows/row_2/col_1.png differ diff --git a/wje/img_13.jpg_rows/row_0/col_0.png b/wje/img_13.jpg_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..54e835fc7fda8b76658eb9732756c4f0a2e31fe1 Binary files /dev/null and b/wje/img_13.jpg_rows/row_0/col_0.png differ diff --git a/wje/img_13.jpg_rows/row_0/col_1.png b/wje/img_13.jpg_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..a800de22d2e8583dd9f72a5de826d64cc6934548 Binary files /dev/null and b/wje/img_13.jpg_rows/row_0/col_1.png differ diff --git a/wje/img_13.jpg_rows/row_1/col_0.png b/wje/img_13.jpg_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..6995e9eda17f7344e635d8fa6a665553967e67bf Binary files /dev/null and b/wje/img_13.jpg_rows/row_1/col_0.png differ diff --git a/wje/img_13.jpg_rows/row_1/col_1.png b/wje/img_13.jpg_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..cb83b1971ce1cd6af17c8ebf6077966235aaf6d4 Binary files /dev/null and b/wje/img_13.jpg_rows/row_1/col_1.png differ diff --git a/wje/img_13.jpg_rows/row_2/col_0.png b/wje/img_13.jpg_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..10356ac1f58485a3c71facfd31aeab0abc7d3d02 Binary files /dev/null and b/wje/img_13.jpg_rows/row_2/col_0.png differ diff --git a/wje/img_13.jpg_rows/row_3/col_0.png b/wje/img_13.jpg_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..0159a2d1a73906347ce9906b8c218736134e8f54 Binary files /dev/null and b/wje/img_13.jpg_rows/row_3/col_0.png differ diff --git a/wje/img_14.jpg_rows/row_0/col_0.png b/wje/img_14.jpg_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..50c6e51ed7dd096db7325aff304f4f6ffd86107d Binary files /dev/null and b/wje/img_14.jpg_rows/row_0/col_0.png differ diff --git a/wje/img_14.jpg_rows/row_0/col_1.png b/wje/img_14.jpg_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..1477e2ba7cb40c82f06451d6493eb2b81bbe7b95 Binary files /dev/null and b/wje/img_14.jpg_rows/row_0/col_1.png differ diff --git a/wje/img_14.jpg_rows/row_1/col_0.png b/wje/img_14.jpg_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..08f9ab77eab81088823b924f88dc297cfb5b2d82 Binary files /dev/null and b/wje/img_14.jpg_rows/row_1/col_0.png differ diff --git a/wje/img_14.jpg_rows/row_1/col_1.png b/wje/img_14.jpg_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..0cbc6f9597361c588b12e3ec5b4756b6ecafbdf6 Binary files /dev/null and b/wje/img_14.jpg_rows/row_1/col_1.png differ diff --git a/wje/img_14.jpg_rows/row_2/col_0.png b/wje/img_14.jpg_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..3770dde72e1879a179f1f55a0b9ae8f78198281a Binary files /dev/null and b/wje/img_14.jpg_rows/row_2/col_0.png differ diff --git a/wje/img_14.jpg_rows/row_3/col_0.png b/wje/img_14.jpg_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..2fc24d45b98c5135cfda0594a883c7abac23f046 Binary files /dev/null and b/wje/img_14.jpg_rows/row_3/col_0.png differ diff --git a/wje/img_14.jpg_rows/row_4/col_0.png b/wje/img_14.jpg_rows/row_4/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..f1cf0db022ba7108229ce76e337bfd434c5f44f3 Binary files /dev/null and b/wje/img_14.jpg_rows/row_4/col_0.png differ diff --git a/wje/img_14.jpg_rows/row_4/col_1.png b/wje/img_14.jpg_rows/row_4/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..442ded075c10bfc147f513cc8aa1c96457144f1e Binary files /dev/null and b/wje/img_14.jpg_rows/row_4/col_1.png differ diff --git a/wje/img_14.jpg_rows/row_5/col_0.png b/wje/img_14.jpg_rows/row_5/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..bbfcc51c21e3daa2e2a994ab8b966f59488062c7 Binary files /dev/null and b/wje/img_14.jpg_rows/row_5/col_0.png differ diff --git a/wje/img_15.jpg_rows/row_0/col_0.png b/wje/img_15.jpg_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..54e835fc7fda8b76658eb9732756c4f0a2e31fe1 Binary files /dev/null and b/wje/img_15.jpg_rows/row_0/col_0.png differ diff --git a/wje/img_15.jpg_rows/row_0/col_1.png b/wje/img_15.jpg_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..c2f19c57d8268d7a3937fe6edf0ebc0b84f694ff Binary files /dev/null and b/wje/img_15.jpg_rows/row_0/col_1.png differ diff --git a/wje/img_15.jpg_rows/row_1/col_0.png b/wje/img_15.jpg_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..1a54e55461bd2545391c43f8b3735aa2c1be8832 Binary files /dev/null and b/wje/img_15.jpg_rows/row_1/col_0.png differ diff --git a/wje/img_15.jpg_rows/row_1/col_1.png b/wje/img_15.jpg_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..df85e02c599bc4c53b15a07172375f46c24032f5 Binary files /dev/null and b/wje/img_15.jpg_rows/row_1/col_1.png differ diff --git a/wje/img_15.jpg_rows/row_2/col_0.png b/wje/img_15.jpg_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..a80f09fd932d3373e8461e620c23a790ef2c8112 Binary files /dev/null and b/wje/img_15.jpg_rows/row_2/col_0.png differ diff --git a/wje/img_15.jpg_rows/row_3/col_0.png b/wje/img_15.jpg_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..27e00f6d1e649901b041aedc3bfee9a1c125ff72 Binary files /dev/null and b/wje/img_15.jpg_rows/row_3/col_0.png differ diff --git a/wje/img_15.jpg_rows/row_4/col_0.png b/wje/img_15.jpg_rows/row_4/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..48b1b79cd6b31320ee8b8f542632fbe75f8ac691 Binary files /dev/null and b/wje/img_15.jpg_rows/row_4/col_0.png differ diff --git a/wje/img_16.jpg_rows/row_0/col_0.png b/wje/img_16.jpg_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..b9daca8820847089338fad518fa021684ef3c02f Binary files /dev/null and b/wje/img_16.jpg_rows/row_0/col_0.png differ diff --git a/wje/img_16.jpg_rows/row_0/col_1.png b/wje/img_16.jpg_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..2cccb9e0fe5934acb673aba90920f2fbe2710d9a Binary files /dev/null and b/wje/img_16.jpg_rows/row_0/col_1.png differ diff --git a/wje/img_16.jpg_rows/row_1/col_0.png b/wje/img_16.jpg_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..90557039a5dfea53a0d22ec5f97ae23c4a1fa84f Binary files /dev/null and b/wje/img_16.jpg_rows/row_1/col_0.png differ diff --git a/wje/img_16.jpg_rows/row_1/col_1.png b/wje/img_16.jpg_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..b6d323cae60d093d7782adcc31c18d356344e365 Binary files /dev/null and b/wje/img_16.jpg_rows/row_1/col_1.png differ diff --git a/wje/img_16.jpg_rows/row_2/col_0.png b/wje/img_16.jpg_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..c4b9f4443e52ada4fe2833532df203aef61edca7 Binary files /dev/null and b/wje/img_16.jpg_rows/row_2/col_0.png differ diff --git a/wje/img_16.jpg_rows/row_3/col_0.png b/wje/img_16.jpg_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..c7fbdd450463483bda559e2097f108894c415018 Binary files /dev/null and b/wje/img_16.jpg_rows/row_3/col_0.png differ diff --git a/wje/img_16.jpg_rows/row_3/col_1.png b/wje/img_16.jpg_rows/row_3/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..9ca527e500a7dbce6f6adcd383a408de8bd2aace Binary files /dev/null and b/wje/img_16.jpg_rows/row_3/col_1.png differ diff --git a/wje/img_16.jpg_rows/row_4/col_0.png b/wje/img_16.jpg_rows/row_4/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..90d84556cd5c8c97246fea7ad735e7ae2c606940 Binary files /dev/null and b/wje/img_16.jpg_rows/row_4/col_0.png differ diff --git a/wje/img_16.jpg_rows/row_5/col_0.png b/wje/img_16.jpg_rows/row_5/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..0a4acc3778c6bfe0320807ad11110b0fc4388cd2 Binary files /dev/null and b/wje/img_16.jpg_rows/row_5/col_0.png differ diff --git a/wje/img_17.jpg_rows/row_0/col_0.png b/wje/img_17.jpg_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..59185c8c6eaa964d669fe104752a3746c6c95cde Binary files /dev/null and b/wje/img_17.jpg_rows/row_0/col_0.png differ diff --git a/wje/img_17.jpg_rows/row_0/col_1.png b/wje/img_17.jpg_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..7de0f485e84753f996afefecdf2ac518811ec0cd Binary files /dev/null and b/wje/img_17.jpg_rows/row_0/col_1.png differ diff --git a/wje/img_17.jpg_rows/row_1/col_0.png b/wje/img_17.jpg_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..2d5890555351629532c2812d263cbb6ed661813b Binary files /dev/null and b/wje/img_17.jpg_rows/row_1/col_0.png differ diff --git a/wje/img_17.jpg_rows/row_2/col_0.png b/wje/img_17.jpg_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..84ba6a923104f1cc50677b2808886f6738927db1 Binary files /dev/null and b/wje/img_17.jpg_rows/row_2/col_0.png differ diff --git a/wje/img_17.jpg_rows/row_2/col_1.png b/wje/img_17.jpg_rows/row_2/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..0e8a1180045b6e0415fdbb5b92d92af4826e0c3e Binary files /dev/null and b/wje/img_17.jpg_rows/row_2/col_1.png differ diff --git a/wje/img_17.jpg_rows/row_3/col_0.png b/wje/img_17.jpg_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..dcc96ae0ddb397a69ab9d91ee264598a3b58f358 Binary files /dev/null and b/wje/img_17.jpg_rows/row_3/col_0.png differ diff --git a/wje/img_17.jpg_rows/row_4/col_0.png b/wje/img_17.jpg_rows/row_4/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..db30f63ca320da95f7ac276b79696d544cb09284 Binary files /dev/null and b/wje/img_17.jpg_rows/row_4/col_0.png differ diff --git a/wje/img_17.jpg_rows/row_5/col_0.png b/wje/img_17.jpg_rows/row_5/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..1c546a467901a44a8f74b604fab5ced98b312723 Binary files /dev/null and b/wje/img_17.jpg_rows/row_5/col_0.png differ diff --git a/wje/img_18.jpg_rows/row_0/col_0.png b/wje/img_18.jpg_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..54e835fc7fda8b76658eb9732756c4f0a2e31fe1 Binary files /dev/null and b/wje/img_18.jpg_rows/row_0/col_0.png differ diff --git a/wje/img_18.jpg_rows/row_0/col_1.png b/wje/img_18.jpg_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..45ae4a2e3228dfc8fdc2114137951e2065a42084 Binary files /dev/null and b/wje/img_18.jpg_rows/row_0/col_1.png differ diff --git a/wje/img_18.jpg_rows/row_1/col_0.png b/wje/img_18.jpg_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..dd31a8c27a82da3de67c83c61149a689513c58e1 Binary files /dev/null and b/wje/img_18.jpg_rows/row_1/col_0.png differ diff --git a/wje/img_18.jpg_rows/row_1/col_1.png b/wje/img_18.jpg_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..3fab4c63fe85d2a5583f9031ade0c198d4a8c7d9 Binary files /dev/null and b/wje/img_18.jpg_rows/row_1/col_1.png differ diff --git a/wje/img_19.jpg_rows/row_0/col_0.png b/wje/img_19.jpg_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..542e040e2ab5230775c46b07faaf56a875c2c71d Binary files /dev/null and b/wje/img_19.jpg_rows/row_0/col_0.png differ diff --git a/wje/img_19.jpg_rows/row_0/col_1.png b/wje/img_19.jpg_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..bae682b04e93a5bf3289a55e64227d252b3f3da2 Binary files /dev/null and b/wje/img_19.jpg_rows/row_0/col_1.png differ diff --git a/wje/img_19.jpg_rows/row_1/col_0.png b/wje/img_19.jpg_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..31fe1e1b68010cd2c40d910196866d8632d0c781 Binary files /dev/null and b/wje/img_19.jpg_rows/row_1/col_0.png differ diff --git a/wje/img_19.jpg_rows/row_1/col_1.png b/wje/img_19.jpg_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..564af1f7ec0b600300da698270ca7cc5a3891598 Binary files /dev/null and b/wje/img_19.jpg_rows/row_1/col_1.png differ diff --git a/wje/img_19.jpg_rows/row_2/col_0.png b/wje/img_19.jpg_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..4a388a3a0663a27fffe8086e97dad12b23f2152e Binary files /dev/null and b/wje/img_19.jpg_rows/row_2/col_0.png differ diff --git a/wje/img_19.jpg_rows/row_2/col_1.png b/wje/img_19.jpg_rows/row_2/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..1f77be6f3aa1f5c738808cd3031ab759c6d986ff Binary files /dev/null and b/wje/img_19.jpg_rows/row_2/col_1.png differ diff --git a/wje/img_2.jpg_rows/row_0/col_0.png b/wje/img_2.jpg_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..fbd51d91192cd83eabacf3a9edcee6ae180b3d00 Binary files /dev/null and b/wje/img_2.jpg_rows/row_0/col_0.png differ diff --git a/wje/img_2.jpg_rows/row_0/col_1.png b/wje/img_2.jpg_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..255c4195f33dedf5335f836ab93e46579d04bd39 Binary files /dev/null and b/wje/img_2.jpg_rows/row_0/col_1.png differ diff --git a/wje/img_2.jpg_rows/row_1/col_0.png b/wje/img_2.jpg_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..46d49431a250a290437d6851e4f4e612991f3c4e Binary files /dev/null and b/wje/img_2.jpg_rows/row_1/col_0.png differ diff --git a/wje/img_2.jpg_rows/row_2/col_0.png b/wje/img_2.jpg_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..9da4a65d8ff31f95550ba07eebd178b9b14fcf2b Binary files /dev/null and b/wje/img_2.jpg_rows/row_2/col_0.png differ diff --git a/wje/img_2.jpg_rows/row_3/col_0.png b/wje/img_2.jpg_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..bb9bb009b135374b46b62e827a6b6f545724a3df Binary files /dev/null and b/wje/img_2.jpg_rows/row_3/col_0.png differ diff --git a/wje/img_20.jpg_rows/row_0/col_0.png b/wje/img_20.jpg_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..80b1020e24d835450c76f556cfac8032f5688fa5 Binary files /dev/null and b/wje/img_20.jpg_rows/row_0/col_0.png differ diff --git a/wje/img_20.jpg_rows/row_0/col_1.png b/wje/img_20.jpg_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..11d064d5cf5186d32d2e072418b8549a7c935129 Binary files /dev/null and b/wje/img_20.jpg_rows/row_0/col_1.png differ diff --git a/wje/img_20.jpg_rows/row_1/col_0.png b/wje/img_20.jpg_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..503e017e826056707f174e42a105c2bc3492ad12 Binary files /dev/null and b/wje/img_20.jpg_rows/row_1/col_0.png differ diff --git a/wje/img_20.jpg_rows/row_1/col_1.png b/wje/img_20.jpg_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..9d51a48a5e944a79bce3f6f4114cc963b9f44904 Binary files /dev/null and b/wje/img_20.jpg_rows/row_1/col_1.png differ diff --git a/wje/img_21.jpg_rows/row_0/col_0.png b/wje/img_21.jpg_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..9a2a7a39fdfffbead06604019a82e4c00f89d3fc Binary files /dev/null and b/wje/img_21.jpg_rows/row_0/col_0.png differ diff --git a/wje/img_21.jpg_rows/row_0/col_1.png b/wje/img_21.jpg_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..d29815729d9c055d508ec5fae87ea203fc58fba2 Binary files /dev/null and b/wje/img_21.jpg_rows/row_0/col_1.png differ diff --git a/wje/img_21.jpg_rows/row_1/col_0.png b/wje/img_21.jpg_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..46c67ec9ec13925c0dc8587830a0e81223e01f3a Binary files /dev/null and b/wje/img_21.jpg_rows/row_1/col_0.png differ diff --git a/wje/img_21.jpg_rows/row_1/col_1.png b/wje/img_21.jpg_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..9ca59f8e9962e17473aa785d8fd8aee834d53dcb Binary files /dev/null and b/wje/img_21.jpg_rows/row_1/col_1.png differ diff --git a/wje/img_22.jpg_rows/row_0/col_0.png b/wje/img_22.jpg_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..25fc81dd2c0f04fa3c4aaa5026096df57fd42d9e Binary files /dev/null and b/wje/img_22.jpg_rows/row_0/col_0.png differ diff --git a/wje/img_22.jpg_rows/row_0/col_1.png b/wje/img_22.jpg_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..e10fda6ae7f5c12a7f10e035e8c323ba5f8ba24d Binary files /dev/null and b/wje/img_22.jpg_rows/row_0/col_1.png differ diff --git a/wje/img_22.jpg_rows/row_1/col_0.png b/wje/img_22.jpg_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..fe767c8364896e8467ca47cc9bed22a5b85816ab Binary files /dev/null and b/wje/img_22.jpg_rows/row_1/col_0.png differ diff --git a/wje/img_22.jpg_rows/row_1/col_1.png b/wje/img_22.jpg_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..6f0e6841a49a32f309bf98ef1002aca3858c5278 Binary files /dev/null and b/wje/img_22.jpg_rows/row_1/col_1.png differ diff --git a/wje/img_22.jpg_rows/row_2/col_0.png b/wje/img_22.jpg_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..77c0a6073ae3b39dce0f18eed86c744f2ce32afc Binary files /dev/null and b/wje/img_22.jpg_rows/row_2/col_0.png differ diff --git a/wje/img_22.jpg_rows/row_2/col_1.png b/wje/img_22.jpg_rows/row_2/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..33b9590288cf74040ddcf34944b7f182af70fbda Binary files /dev/null and b/wje/img_22.jpg_rows/row_2/col_1.png differ diff --git a/wje/img_22.jpg_rows/row_3/col_0.png b/wje/img_22.jpg_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..4bab81fc617a554e49a7a8e6d87ecca9d527b819 Binary files /dev/null and b/wje/img_22.jpg_rows/row_3/col_0.png differ diff --git a/wje/img_23.jpg_rows/row_0/col_0.png b/wje/img_23.jpg_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..925b06a520ceea02b32f1dcb0dbcdadf83495b6a Binary files /dev/null and b/wje/img_23.jpg_rows/row_0/col_0.png differ diff --git a/wje/img_23.jpg_rows/row_0/col_1.png b/wje/img_23.jpg_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..6b92bd4090fc43f249d13bf2f4b4e0a370706e78 Binary files /dev/null and b/wje/img_23.jpg_rows/row_0/col_1.png differ diff --git a/wje/img_23.jpg_rows/row_1/col_0.png b/wje/img_23.jpg_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..f21ea39e738ab6fd67457079d74a011ada749e75 Binary files /dev/null and b/wje/img_23.jpg_rows/row_1/col_0.png differ diff --git a/wje/img_23.jpg_rows/row_1/col_1.png b/wje/img_23.jpg_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..2f0cc7394cbc111c67bd6f225f5ee70475ab75e5 Binary files /dev/null and b/wje/img_23.jpg_rows/row_1/col_1.png differ diff --git a/wje/img_24.jpg_rows/row_0/col_0.png b/wje/img_24.jpg_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..5f189e59f1a57dce17a6dc7ef25de166a0666d87 Binary files /dev/null and b/wje/img_24.jpg_rows/row_0/col_0.png differ diff --git a/wje/img_24.jpg_rows/row_0/col_1.png b/wje/img_24.jpg_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..22144130c32593a18ef7452ac5062f66fc413804 Binary files /dev/null and b/wje/img_24.jpg_rows/row_0/col_1.png differ diff --git a/wje/img_24.jpg_rows/row_1/col_0.png b/wje/img_24.jpg_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..35e0c6dfcc1ec79b084b444292b7d2d5b7df556a Binary files /dev/null and b/wje/img_24.jpg_rows/row_1/col_0.png differ diff --git a/wje/img_24.jpg_rows/row_1/col_1.png b/wje/img_24.jpg_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..fa72c88f927fb132fae1015d0e416ca9abae7b7e Binary files /dev/null and b/wje/img_24.jpg_rows/row_1/col_1.png differ diff --git a/wje/img_24.jpg_rows/row_2/col_0.png b/wje/img_24.jpg_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..ce8c285da83830c4f22991c7260b6fd2ffd9a66f Binary files /dev/null and b/wje/img_24.jpg_rows/row_2/col_0.png differ diff --git a/wje/img_25.jpg_rows/row_0/col_0.png b/wje/img_25.jpg_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..64c8a51e88652511f483c13578c25f2c4d7e6f73 Binary files /dev/null and b/wje/img_25.jpg_rows/row_0/col_0.png differ diff --git a/wje/img_25.jpg_rows/row_1/col_0.png b/wje/img_25.jpg_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..68c86122e275e53ce0c3984255f015af455acc7d Binary files /dev/null and b/wje/img_25.jpg_rows/row_1/col_0.png differ diff --git a/wje/img_25.jpg_rows/row_2/col_0.png b/wje/img_25.jpg_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..7b44a69aba898c2ed707ea46681c859462647901 Binary files /dev/null and b/wje/img_25.jpg_rows/row_2/col_0.png differ diff --git a/wje/img_25.jpg_rows/row_3/col_0.png b/wje/img_25.jpg_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..80372c6b3273ed5f6390e0b649813040ac9e65b3 Binary files /dev/null and b/wje/img_25.jpg_rows/row_3/col_0.png differ diff --git a/wje/img_25.jpg_rows/row_4/col_0.png b/wje/img_25.jpg_rows/row_4/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..bbf7aec7eaacca196309a84080c967f836a348d4 Binary files /dev/null and b/wje/img_25.jpg_rows/row_4/col_0.png differ diff --git a/wje/img_25.jpg_rows/row_5/col_0.png b/wje/img_25.jpg_rows/row_5/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..bc4adfb4e07d51167ee134352ec4c2edfc559ce7 Binary files /dev/null and b/wje/img_25.jpg_rows/row_5/col_0.png differ diff --git a/wje/img_26.jpg_rows/row_0/col_0.png b/wje/img_26.jpg_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..61264796bf92d3cc3aacd3cf4160fa2870ba4f84 Binary files /dev/null and b/wje/img_26.jpg_rows/row_0/col_0.png differ diff --git a/wje/img_26.jpg_rows/row_0/col_1.png b/wje/img_26.jpg_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..1450a461086e0f97c24d07f88374732e5b453e7a Binary files /dev/null and b/wje/img_26.jpg_rows/row_0/col_1.png differ diff --git a/wje/img_26.jpg_rows/row_1/col_0.png b/wje/img_26.jpg_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..d7d31e4235e356d27c16c115bc97937b7fc62cc5 Binary files /dev/null and b/wje/img_26.jpg_rows/row_1/col_0.png differ diff --git a/wje/img_26.jpg_rows/row_2/col_0.png b/wje/img_26.jpg_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..e4fb8c58639e05fee1d8f05002bdff2f255dd083 Binary files /dev/null and b/wje/img_26.jpg_rows/row_2/col_0.png differ diff --git a/wje/img_27.jpg_rows/row_0/col_0.png b/wje/img_27.jpg_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..73c3c1b5088769d29c50f62cde765537b7839783 Binary files /dev/null and b/wje/img_27.jpg_rows/row_0/col_0.png differ diff --git a/wje/img_27.jpg_rows/row_0/col_1.png b/wje/img_27.jpg_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..f97f9307307aa967c8c1091bd6cc315120cb7c82 Binary files /dev/null and b/wje/img_27.jpg_rows/row_0/col_1.png differ diff --git a/wje/img_27.jpg_rows/row_1/col_0.png b/wje/img_27.jpg_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..2da55c27ef7422ab7c546556b4214ef1f3221b49 Binary files /dev/null and b/wje/img_27.jpg_rows/row_1/col_0.png differ diff --git a/wje/img_27.jpg_rows/row_1/col_1.png b/wje/img_27.jpg_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..bed5a4371b697a33339199af4a9e528430d4225a Binary files /dev/null and b/wje/img_27.jpg_rows/row_1/col_1.png differ diff --git a/wje/img_27.jpg_rows/row_2/col_0.png b/wje/img_27.jpg_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..9a4d3afed800402e52eb433dcfa320d2a3a4b8e0 Binary files /dev/null and b/wje/img_27.jpg_rows/row_2/col_0.png differ diff --git a/wje/img_27.jpg_rows/row_3/col_0.png b/wje/img_27.jpg_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..aacbb4a2a1c36998508fe8e4b649b74b46f7720c Binary files /dev/null and b/wje/img_27.jpg_rows/row_3/col_0.png differ diff --git a/wje/img_27.jpg_rows/row_4/col_0.png b/wje/img_27.jpg_rows/row_4/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..510ea03941f0e669a2e746bd33ceac543c337c4d Binary files /dev/null and b/wje/img_27.jpg_rows/row_4/col_0.png differ diff --git a/wje/img_27.jpg_rows/row_4/col_1.png b/wje/img_27.jpg_rows/row_4/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..a94a92bceb0561b088478bdfd1f105513614be93 Binary files /dev/null and b/wje/img_27.jpg_rows/row_4/col_1.png differ diff --git a/wje/img_28.jpg_rows/row_0/col_0.png b/wje/img_28.jpg_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..8ca18179dbccd67cab231d7b20c5b4b969ebf297 Binary files /dev/null and b/wje/img_28.jpg_rows/row_0/col_0.png differ diff --git a/wje/img_28.jpg_rows/row_1/col_0.png b/wje/img_28.jpg_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..c65df7b21377e5ba1e4a44c3c93fac4440b8a634 Binary files /dev/null and b/wje/img_28.jpg_rows/row_1/col_0.png differ diff --git a/wje/img_28.jpg_rows/row_2/col_0.png b/wje/img_28.jpg_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..638d0025a088e250c1b4960fc9aaf5b5d3bc465c Binary files /dev/null and b/wje/img_28.jpg_rows/row_2/col_0.png differ diff --git a/wje/img_28.jpg_rows/row_3/col_0.png b/wje/img_28.jpg_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..1cf0dcf6ba4f67043c7cd6d58898fec1a76468cc Binary files /dev/null and b/wje/img_28.jpg_rows/row_3/col_0.png differ diff --git a/wje/img_3.jpg_rows/row_0/col_0.png b/wje/img_3.jpg_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..d34f77527c632ccb5ea0ed5660f479d764e49f97 Binary files /dev/null and b/wje/img_3.jpg_rows/row_0/col_0.png differ diff --git a/wje/img_3.jpg_rows/row_0/col_1.png b/wje/img_3.jpg_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..57aba9dc40fd6f5476b035e2530695fa07c8c4ae Binary files /dev/null and b/wje/img_3.jpg_rows/row_0/col_1.png differ diff --git a/wje/img_3.jpg_rows/row_1/col_0.png b/wje/img_3.jpg_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..58814d84ffefea09eb19cb36bb2f1dd71096497d Binary files /dev/null and b/wje/img_3.jpg_rows/row_1/col_0.png differ diff --git a/wje/img_4.jpg_rows/row_1/col_0.png b/wje/img_4.jpg_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..8ecda41d195525e1526f1a620fd1162cf1914ad2 Binary files /dev/null and b/wje/img_4.jpg_rows/row_1/col_0.png differ diff --git a/wje/img_4.jpg_rows/row_1/col_1.png b/wje/img_4.jpg_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..56648484c180a4a12b511ed88c065c8184a9b8bd Binary files /dev/null and b/wje/img_4.jpg_rows/row_1/col_1.png differ diff --git a/wje/img_5.jpg_rows/row_0/col_0.png b/wje/img_5.jpg_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..54e835fc7fda8b76658eb9732756c4f0a2e31fe1 Binary files /dev/null and b/wje/img_5.jpg_rows/row_0/col_0.png differ diff --git a/wje/img_5.jpg_rows/row_0/col_1.png b/wje/img_5.jpg_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..a800de22d2e8583dd9f72a5de826d64cc6934548 Binary files /dev/null and b/wje/img_5.jpg_rows/row_0/col_1.png differ diff --git a/wje/img_5.jpg_rows/row_1/col_0.png b/wje/img_5.jpg_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..257aac0e47a2b1b25ffe3f2827f158d48e39aa74 Binary files /dev/null and b/wje/img_5.jpg_rows/row_1/col_0.png differ diff --git a/wje/img_5.jpg_rows/row_1/col_1.png b/wje/img_5.jpg_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..18ef074956b7d70759cca25788f51195c90e898d Binary files /dev/null and b/wje/img_5.jpg_rows/row_1/col_1.png differ diff --git a/wje/img_5.jpg_rows/row_2/col_0.png b/wje/img_5.jpg_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..cade544edb83ae507d809e8285fc9b022825f119 Binary files /dev/null and b/wje/img_5.jpg_rows/row_2/col_0.png differ diff --git a/wje/img_6.jpg_rows/row_0/col_0.png b/wje/img_6.jpg_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..d2bcb6341c7a0161b77dcac99d606eeec14b5659 Binary files /dev/null and b/wje/img_6.jpg_rows/row_0/col_0.png differ diff --git a/wje/img_6.jpg_rows/row_0/col_1.png b/wje/img_6.jpg_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..04eec05e20370257adb0d9707a19c357fc141eea Binary files /dev/null and b/wje/img_6.jpg_rows/row_0/col_1.png differ diff --git a/wje/img_6.jpg_rows/row_1/col_0.png b/wje/img_6.jpg_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..4a03b25084a20ea7ad2c2843cec5aed01c3d5fa3 Binary files /dev/null and b/wje/img_6.jpg_rows/row_1/col_0.png differ diff --git a/wje/img_6.jpg_rows/row_1/col_1.png b/wje/img_6.jpg_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..3aee171a64abcf92d743db31bb93b24e1d152da9 Binary files /dev/null and b/wje/img_6.jpg_rows/row_1/col_1.png differ diff --git a/wje/img_7.jpg_rows/row_0/col_0.png b/wje/img_7.jpg_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..07e92d3d4896ce937f1e5782e9d06ab289bdbfaf Binary files /dev/null and b/wje/img_7.jpg_rows/row_0/col_0.png differ diff --git a/wje/img_7.jpg_rows/row_0/col_1.png b/wje/img_7.jpg_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..5c0ed14732d76945b6d87027872c44d1294a55e5 Binary files /dev/null and b/wje/img_7.jpg_rows/row_0/col_1.png differ diff --git a/wje/img_7.jpg_rows/row_1/col_0.png b/wje/img_7.jpg_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..16746135d02d76403d7ec85c31278cf4bcebb97b Binary files /dev/null and b/wje/img_7.jpg_rows/row_1/col_0.png differ diff --git a/wje/img_7.jpg_rows/row_2/col_0.png b/wje/img_7.jpg_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..6e2ba1294e2f408ef63515082862e41e5da5df49 Binary files /dev/null and b/wje/img_7.jpg_rows/row_2/col_0.png differ diff --git a/wje/img_7.jpg_rows/row_2/col_1.png b/wje/img_7.jpg_rows/row_2/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..9f0d9f538fac95baf72307da8cfb33717608b3b1 Binary files /dev/null and b/wje/img_7.jpg_rows/row_2/col_1.png differ diff --git a/wje/img_8.jpg_rows/row_0/col_0.png b/wje/img_8.jpg_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..5c1f835fc680133dfd618ebe2fec0ef420c698cf Binary files /dev/null and b/wje/img_8.jpg_rows/row_0/col_0.png differ diff --git a/wje/img_8.jpg_rows/row_0/col_1.png b/wje/img_8.jpg_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..0136828af8bd0f803fc1a91781b5888d2e3b9a24 Binary files /dev/null and b/wje/img_8.jpg_rows/row_0/col_1.png differ diff --git a/wje/img_8.jpg_rows/row_0/col_2.png b/wje/img_8.jpg_rows/row_0/col_2.png new file mode 100644 index 0000000000000000000000000000000000000000..611fc15c42a338e6a9bb73953e97bf43d0553fde Binary files /dev/null and b/wje/img_8.jpg_rows/row_0/col_2.png differ diff --git a/wje/img_8.jpg_rows/row_1/col_0.png b/wje/img_8.jpg_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..e7257506e4e6b7ab40352b12cd3477ae8452aec4 Binary files /dev/null and b/wje/img_8.jpg_rows/row_1/col_0.png differ diff --git a/wje/img_8.jpg_rows/row_1/col_1.png b/wje/img_8.jpg_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..f23ea460fb2789f513ef6f257e10dc8dc5d3a923 Binary files /dev/null and b/wje/img_8.jpg_rows/row_1/col_1.png differ diff --git a/wje/img_8.jpg_rows/row_1/col_2.png b/wje/img_8.jpg_rows/row_1/col_2.png new file mode 100644 index 0000000000000000000000000000000000000000..9d761e2b90b2d9e1632a370c1ef7d0593da4b9a7 Binary files /dev/null and b/wje/img_8.jpg_rows/row_1/col_2.png differ diff --git a/wje/img_8.jpg_rows/row_2/col_0.png b/wje/img_8.jpg_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..8190f8f5b9d13764960ca6f16a66c479fcec7953 Binary files /dev/null and b/wje/img_8.jpg_rows/row_2/col_0.png differ diff --git a/wje/img_8.jpg_rows/row_2/col_1.png b/wje/img_8.jpg_rows/row_2/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..fe84fed74cb805b3dee88ae7032400c28e8f5b6e Binary files /dev/null and b/wje/img_8.jpg_rows/row_2/col_1.png differ diff --git a/wje/img_8.jpg_rows/row_3/col_0.png b/wje/img_8.jpg_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..b9daf99a0617d8c7a3b671c6d8a5902ea88c0172 Binary files /dev/null and b/wje/img_8.jpg_rows/row_3/col_0.png differ diff --git a/wje/img_8.jpg_rows/row_3/col_1.png b/wje/img_8.jpg_rows/row_3/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..e96a186e6b1787040bbe924aaab20d4b9a0e8d6f Binary files /dev/null and b/wje/img_8.jpg_rows/row_3/col_1.png differ diff --git a/wje/img_8.jpg_rows/row_4/col_0.png b/wje/img_8.jpg_rows/row_4/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..125c91f2551c117f499b9ccacf3d654c23a2eebe Binary files /dev/null and b/wje/img_8.jpg_rows/row_4/col_0.png differ diff --git a/wje/img_8.jpg_rows/row_4/col_1.png b/wje/img_8.jpg_rows/row_4/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..d987462a61782d0e4f3bb4f43827ef2d8dd40e3e Binary files /dev/null and b/wje/img_8.jpg_rows/row_4/col_1.png differ diff --git a/wje/img_8.jpg_rows/row_5/col_0.png b/wje/img_8.jpg_rows/row_5/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..caea0b519da35482dede79485696e586ac481103 Binary files /dev/null and b/wje/img_8.jpg_rows/row_5/col_0.png differ diff --git a/wje/img_8.jpg_rows/row_5/col_1.png b/wje/img_8.jpg_rows/row_5/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..83bf3f65e91b95ff1632222978c17640d1339f52 Binary files /dev/null and b/wje/img_8.jpg_rows/row_5/col_1.png differ diff --git a/wje/img_9.jpg_rows/row_0/col_0.png b/wje/img_9.jpg_rows/row_0/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..75cf6af5bfc97f1e9cf7e78f77dc232929a2d822 Binary files /dev/null and b/wje/img_9.jpg_rows/row_0/col_0.png differ diff --git a/wje/img_9.jpg_rows/row_0/col_1.png b/wje/img_9.jpg_rows/row_0/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..b6f32c9badda695415521c860d6d30465319772c Binary files /dev/null and b/wje/img_9.jpg_rows/row_0/col_1.png differ diff --git a/wje/img_9.jpg_rows/row_0/col_2.png b/wje/img_9.jpg_rows/row_0/col_2.png new file mode 100644 index 0000000000000000000000000000000000000000..47e497e30cb420488e259425f825e8a86a4be4a6 Binary files /dev/null and b/wje/img_9.jpg_rows/row_0/col_2.png differ diff --git a/wje/img_9.jpg_rows/row_1/col_0.png b/wje/img_9.jpg_rows/row_1/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..18badb90924b08fb59c67b0bf6f89001d9de1c5e Binary files /dev/null and b/wje/img_9.jpg_rows/row_1/col_0.png differ diff --git a/wje/img_9.jpg_rows/row_1/col_1.png b/wje/img_9.jpg_rows/row_1/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..e7849c2219a018146ebe5fe38adc893260defea5 Binary files /dev/null and b/wje/img_9.jpg_rows/row_1/col_1.png differ diff --git a/wje/img_9.jpg_rows/row_2/col_0.png b/wje/img_9.jpg_rows/row_2/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..3d1ad2fcc29169b5c70e6298750c0b1b83670c43 Binary files /dev/null and b/wje/img_9.jpg_rows/row_2/col_0.png differ diff --git a/wje/img_9.jpg_rows/row_2/col_1.png b/wje/img_9.jpg_rows/row_2/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..bf0eae79a8b54916fab6e943ff26fb1e6e5db1e4 Binary files /dev/null and b/wje/img_9.jpg_rows/row_2/col_1.png differ diff --git a/wje/img_9.jpg_rows/row_3/col_0.png b/wje/img_9.jpg_rows/row_3/col_0.png new file mode 100644 index 0000000000000000000000000000000000000000..0732583e28d02f28f3bb7cee11d3dae71fc31b15 Binary files /dev/null and b/wje/img_9.jpg_rows/row_3/col_0.png differ diff --git a/wje/img_9.jpg_rows/row_3/col_1.png b/wje/img_9.jpg_rows/row_3/col_1.png new file mode 100644 index 0000000000000000000000000000000000000000..0366c7a6fe78f482f5adf418a2aea192c09a57f8 Binary files /dev/null and b/wje/img_9.jpg_rows/row_3/col_1.png differ