Arsenii11 commited on
Commit
8e381ca
·
1 Parent(s): 622aefd

add JSON schema buil

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. __pycache__/mineru_single.cpython-310.pyc +0 -0
  2. mineru_single.py +1 -1
  3. output1.pdf +3 -0
  4. pearson_json/final_subtopics.json +0 -1139
  5. topic_extr.py +13 -115
  6. topic_extract_arsenii.py +45 -68
  7. topic_extraction.log +108 -0
  8. topic_extraction_ars.log +286 -0
  9. we/final_subtopics.json +944 -87
  10. wje/final_output.json +0 -265
  11. wje/final_output_local.json +0 -265
  12. wje/img_1.jpg_rows/row_0/col_0.png +0 -0
  13. wje/img_1.jpg_rows/row_0/col_1.png +0 -0
  14. wje/img_1.jpg_rows/row_1/col_0.png +0 -0
  15. wje/img_1.jpg_rows/row_1/col_1.png +0 -0
  16. wje/img_10.jpg_rows/row_0/col_0.png +0 -0
  17. wje/img_10.jpg_rows/row_0/col_1.png +0 -0
  18. wje/img_10.jpg_rows/row_1/col_0.png +0 -0
  19. wje/img_10.jpg_rows/row_2/col_0.png +0 -0
  20. wje/img_10.jpg_rows/row_3/col_0.png +0 -0
  21. wje/img_11.jpg_rows/row_0/col_0.png +0 -0
  22. wje/img_11.jpg_rows/row_1/col_0.png +0 -0
  23. wje/img_11.jpg_rows/row_2/col_0.png +0 -0
  24. wje/img_11.jpg_rows/row_3/col_0.png +0 -0
  25. wje/img_11.jpg_rows/row_4/col_0.png +0 -0
  26. wje/img_11.jpg_rows/row_5/col_0.png +0 -0
  27. wje/img_12.jpg_rows/row_0/col_0.png +0 -0
  28. wje/img_12.jpg_rows/row_0/col_1.png +0 -0
  29. wje/img_12.jpg_rows/row_1/col_0.png +0 -0
  30. wje/img_12.jpg_rows/row_1/col_1.png +0 -0
  31. wje/img_12.jpg_rows/row_2/col_0.png +0 -0
  32. wje/img_12.jpg_rows/row_2/col_1.png +0 -0
  33. wje/img_13.jpg_rows/row_0/col_0.png +0 -0
  34. wje/img_13.jpg_rows/row_0/col_1.png +0 -0
  35. wje/img_13.jpg_rows/row_1/col_0.png +0 -0
  36. wje/img_13.jpg_rows/row_1/col_1.png +0 -0
  37. wje/img_13.jpg_rows/row_2/col_0.png +0 -0
  38. wje/img_13.jpg_rows/row_3/col_0.png +0 -0
  39. wje/img_14.jpg_rows/row_0/col_0.png +0 -0
  40. wje/img_14.jpg_rows/row_0/col_1.png +0 -0
  41. wje/img_14.jpg_rows/row_1/col_0.png +0 -0
  42. wje/img_14.jpg_rows/row_1/col_1.png +0 -0
  43. wje/img_14.jpg_rows/row_2/col_0.png +0 -0
  44. wje/img_14.jpg_rows/row_3/col_0.png +0 -0
  45. wje/img_14.jpg_rows/row_4/col_0.png +0 -0
  46. wje/img_14.jpg_rows/row_4/col_1.png +0 -0
  47. wje/img_14.jpg_rows/row_5/col_0.png +0 -0
  48. wje/img_15.jpg_rows/row_0/col_0.png +0 -0
  49. wje/img_15.jpg_rows/row_0/col_1.png +0 -0
  50. wje/img_15.jpg_rows/row_1/col_0.png +0 -0
__pycache__/mineru_single.cpython-310.pyc CHANGED
Binary files a/__pycache__/mineru_single.cpython-310.pyc and b/__pycache__/mineru_single.cpython-310.pyc differ
 
mineru_single.py CHANGED
@@ -331,6 +331,6 @@ if __name__ == "__main__":
331
 
332
 
333
  processor = Processor()
334
- file_path = "./p_ms.PDF"
335
  markdown_result = processor.process(file_path, key="1234323")
336
  print("Single file Markdown:\n", markdown_result)
 
331
 
332
 
333
  processor = Processor()
334
+ file_path = "./output1.pdf"
335
  markdown_result = processor.process(file_path, key="1234323")
336
  print("Single file Markdown:\n", markdown_result)
output1.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b2f32c4f39c66673ac775c4061a57259a92b5fc69e81fec46374a9a0eb492b2
3
+ size 123145
pearson_json/final_subtopics.json DELETED
@@ -1,1139 +0,0 @@
1
- [
2
- {
3
- "title": "Topics",
4
- "contents": [
5
- {
6
- "type": "image",
7
- "key": "/topic-extraction/cells/img_1.jpg_r0_c0.png"
8
- }
9
- ],
10
- "children": [
11
- {
12
- "title": "1.1",
13
- "contents": [
14
- {
15
- "type": "image",
16
- "key": "/topic-extraction/cells/img_1.jpg_r1_c1.png"
17
- }
18
- ],
19
- "children": []
20
- }
21
- ]
22
- },
23
- {
24
- "title": "2 Algebra and functions",
25
- "contents": [
26
- {
27
- "type": "image",
28
- "key": "/topic-extraction/cells/img_2.jpg_r0_c0.png"
29
- }
30
- ],
31
- "children": [
32
- {
33
- "title": "2.1",
34
- "contents": [
35
- {
36
- "type": "image",
37
- "key": "/topic-extraction/cells/img_2.jpg_r0_c1.png"
38
- }
39
- ],
40
- "children": []
41
- },
42
- {
43
- "title": "2.2",
44
- "contents": [
45
- {
46
- "type": "image",
47
- "key": "/topic-extraction/cells/img_2.jpg_r1_c0.png"
48
- }
49
- ],
50
- "children": []
51
- },
52
- {
53
- "title": "2.3",
54
- "contents": [
55
- {
56
- "type": "image",
57
- "key": "/topic-extraction/cells/img_2.jpg_r2_c0.png"
58
- }
59
- ],
60
- "children": []
61
- },
62
- {
63
- "title": "2.4",
64
- "contents": [
65
- {
66
- "type": "image",
67
- "key": "/topic-extraction/cells/img_2.jpg_r3_c0.png"
68
- }
69
- ],
70
- "children": []
71
- }
72
- ]
73
- },
74
- {
75
- "title": "2 Algebra and functions continued",
76
- "contents": [
77
- {
78
- "type": "image",
79
- "key": "/topic-extraction/cells/img_3.jpg_r0_c0.png"
80
- }
81
- ],
82
- "children": [
83
- {
84
- "title": "2.5",
85
- "contents": [
86
- {
87
- "type": "image",
88
- "key": "/topic-extraction/cells/img_3.jpg_r0_c1.png"
89
- }
90
- ],
91
- "children": []
92
- },
93
- {
94
- "title": "2.6",
95
- "contents": [
96
- {
97
- "type": "image",
98
- "key": "/topic-extraction/cells/img_3.jpg_r1_c0.png"
99
- }
100
- ],
101
- "children": []
102
- }
103
- ]
104
- },
105
- {
106
- "title": "Topics",
107
- "contents": [
108
- {
109
- "type": "image",
110
- "key": "/topic-extraction/cells/img_4.jpg_r0_c0.png"
111
- }
112
- ],
113
- "children": [
114
- {
115
- "title": "2.7",
116
- "contents": [
117
- {
118
- "type": "image",
119
- "key": "/topic-extraction/cells/img_4.jpg_r1_c1.png"
120
- }
121
- ],
122
- "children": []
123
- }
124
- ]
125
- },
126
- {
127
- "title": "Topics",
128
- "contents": [
129
- {
130
- "type": "image",
131
- "key": "/topic-extraction/cells/img_5.jpg_r0_c0.png"
132
- }
133
- ],
134
- "children": [
135
- {
136
- "title": "2.8",
137
- "contents": [
138
- {
139
- "type": "image",
140
- "key": "/topic-extraction/cells/img_5.jpg_r1_c1.png"
141
- }
142
- ],
143
- "children": []
144
- },
145
- {
146
- "title": "2.9",
147
- "contents": [
148
- {
149
- "type": "image",
150
- "key": "/topic-extraction/cells/img_5.jpg_r2_c0.png"
151
- }
152
- ],
153
- "children": []
154
- }
155
- ]
156
- },
157
- {
158
- "title": "2 Algebra and functions continued",
159
- "contents": [
160
- {
161
- "type": "image",
162
- "key": "/topic-extraction/cells/img_6.jpg_r0_c0.png"
163
- }
164
- ],
165
- "children": [
166
- {
167
- "title": "2.11",
168
- "contents": [
169
- {
170
- "type": "image",
171
- "key": "/topic-extraction/cells/img_6.jpg_r0_c1.png"
172
- }
173
- ],
174
- "children": []
175
- },
176
- {
177
- "title": "3.1",
178
- "contents": [
179
- {
180
- "type": "image",
181
- "key": "/topic-extraction/cells/img_6.jpg_r1_c1.png"
182
- }
183
- ],
184
- "children": []
185
- }
186
- ]
187
- },
188
- {
189
- "title": "3 Coordinate geometry in the (x, y) plane continued",
190
- "contents": [
191
- {
192
- "type": "image",
193
- "key": "/topic-extraction/cells/img_7.jpg_r0_c0.png"
194
- }
195
- ],
196
- "children": [
197
- {
198
- "title": "3.3",
199
- "contents": [
200
- {
201
- "type": "image",
202
- "key": "/topic-extraction/cells/img_7.jpg_r0_c1.png"
203
- }
204
- ],
205
- "children": []
206
- },
207
- {
208
- "title": "3.4",
209
- "contents": [
210
- {
211
- "type": "image",
212
- "key": "/topic-extraction/cells/img_7.jpg_r1_c0.png"
213
- }
214
- ],
215
- "children": []
216
- },
217
- {
218
- "title": "4.1",
219
- "contents": [
220
- {
221
- "type": "image",
222
- "key": "/topic-extraction/cells/img_7.jpg_r2_c1.png"
223
- }
224
- ],
225
- "children": []
226
- }
227
- ]
228
- },
229
- {
230
- "title": "Topics",
231
- "contents": [
232
- {
233
- "type": "image",
234
- "key": "/topic-extraction/cells/img_8.jpg_r0_c0.png"
235
- }
236
- ],
237
- "children": [
238
- {
239
- "title": "4.2",
240
- "contents": [
241
- {
242
- "type": "image",
243
- "key": "/topic-extraction/cells/img_8.jpg_r1_c1.png"
244
- }
245
- ],
246
- "children": []
247
- },
248
- {
249
- "title": "4.3",
250
- "contents": [
251
- {
252
- "type": "image",
253
- "key": "/topic-extraction/cells/img_8.jpg_r2_c0.png"
254
- }
255
- ],
256
- "children": []
257
- },
258
- {
259
- "title": "4.4",
260
- "contents": [
261
- {
262
- "type": "image",
263
- "key": "/topic-extraction/cells/img_8.jpg_r3_c0.png"
264
- }
265
- ],
266
- "children": []
267
- },
268
- {
269
- "title": "4.5",
270
- "contents": [
271
- {
272
- "type": "image",
273
- "key": "/topic-extraction/cells/img_8.jpg_r4_c0.png"
274
- }
275
- ],
276
- "children": []
277
- },
278
- {
279
- "title": "4.6",
280
- "contents": [
281
- {
282
- "type": "image",
283
- "key": "/topic-extraction/cells/img_8.jpg_r5_c0.png"
284
- }
285
- ],
286
- "children": []
287
- }
288
- ]
289
- },
290
- {
291
- "title": "gonometry",
292
- "contents": [
293
- {
294
- "type": "image",
295
- "key": "/topic-extraction/cells/img_9.jpg_r0_c0.png"
296
- }
297
- ],
298
- "children": [
299
- {
300
- "title": "5.1",
301
- "contents": [
302
- {
303
- "type": "image",
304
- "key": "/topic-extraction/cells/img_9.jpg_r0_c1.png"
305
- }
306
- ],
307
- "children": []
308
- },
309
- {
310
- "title": "5.2",
311
- "contents": [
312
- {
313
- "type": "image",
314
- "key": "/topic-extraction/cells/img_9.jpg_r1_c0.png"
315
- }
316
- ],
317
- "children": []
318
- },
319
- {
320
- "title": "5.3",
321
- "contents": [
322
- {
323
- "type": "image",
324
- "key": "/topic-extraction/cells/img_9.jpg_r2_c0.png"
325
- }
326
- ],
327
- "children": []
328
- },
329
- {
330
- "title": "5.4",
331
- "contents": [
332
- {
333
- "type": "image",
334
- "key": "/topic-extraction/cells/img_9.jpg_r3_c0.png"
335
- }
336
- ],
337
- "children": []
338
- }
339
- ]
340
- },
341
- {
342
- "title": "5 Trigonometry continued",
343
- "contents": [
344
- {
345
- "type": "image",
346
- "key": "/topic-extraction/cells/img_10.jpg_r0_c0.png"
347
- }
348
- ],
349
- "children": [
350
- {
351
- "title": "5.5",
352
- "contents": [
353
- {
354
- "type": "image",
355
- "key": "/topic-extraction/cells/img_10.jpg_r0_c1.png"
356
- }
357
- ],
358
- "children": []
359
- },
360
- {
361
- "title": "5.6",
362
- "contents": [
363
- {
364
- "type": "image",
365
- "key": "/topic-extraction/cells/img_10.jpg_r1_c0.png"
366
- }
367
- ],
368
- "children": []
369
- },
370
- {
371
- "title": "5.7",
372
- "contents": [
373
- {
374
- "type": "image",
375
- "key": "/topic-extraction/cells/img_10.jpg_r2_c0.png"
376
- }
377
- ],
378
- "children": []
379
- },
380
- {
381
- "title": "5.8",
382
- "contents": [
383
- {
384
- "type": "image",
385
- "key": "/topic-extraction/cells/img_10.jpg_r3_c0.png"
386
- }
387
- ],
388
- "children": []
389
- }
390
- ]
391
- },
392
- {
393
- "title": "",
394
- "contents": [],
395
- "children": [
396
- {
397
- "title": "6.1",
398
- "contents": [
399
- {
400
- "type": "image",
401
- "key": "/topic-extraction/cells/img_11.jpg_r0_c0.png"
402
- }
403
- ],
404
- "children": []
405
- },
406
- {
407
- "title": "6.2",
408
- "contents": [
409
- {
410
- "type": "image",
411
- "key": "/topic-extraction/cells/img_11.jpg_r1_c0.png"
412
- }
413
- ],
414
- "children": []
415
- },
416
- {
417
- "title": "6.3",
418
- "contents": [
419
- {
420
- "type": "image",
421
- "key": "/topic-extraction/cells/img_11.jpg_r2_c0.png"
422
- }
423
- ],
424
- "children": []
425
- },
426
- {
427
- "title": "6.4",
428
- "contents": [
429
- {
430
- "type": "image",
431
- "key": "/topic-extraction/cells/img_11.jpg_r3_c0.png"
432
- }
433
- ],
434
- "children": []
435
- },
436
- {
437
- "title": "6.5",
438
- "contents": [
439
- {
440
- "type": "image",
441
- "key": "/topic-extraction/cells/img_11.jpg_r4_c0.png"
442
- }
443
- ],
444
- "children": []
445
- },
446
- {
447
- "title": "6.6",
448
- "contents": [
449
- {
450
- "type": "image",
451
- "key": "/topic-extraction/cells/img_11.jpg_r5_c0.png"
452
- }
453
- ],
454
- "children": []
455
- }
456
- ]
457
- },
458
- {
459
- "title": "Topics",
460
- "contents": [
461
- {
462
- "type": "image",
463
- "key": "/topic-extraction/cells/img_12.jpg_r0_c0.png"
464
- }
465
- ],
466
- "children": [
467
- {
468
- "title": "6.7",
469
- "contents": [
470
- {
471
- "type": "image",
472
- "key": "/topic-extraction/cells/img_12.jpg_r1_c1.png"
473
- }
474
- ],
475
- "children": []
476
- },
477
- {
478
- "title": "7.1",
479
- "contents": [
480
- {
481
- "type": "image",
482
- "key": "/topic-extraction/cells/img_12.jpg_r2_c1.png"
483
- }
484
- ],
485
- "children": []
486
- }
487
- ]
488
- },
489
- {
490
- "title": "Topics",
491
- "contents": [
492
- {
493
- "type": "image",
494
- "key": "/topic-extraction/cells/img_13.jpg_r0_c0.png"
495
- }
496
- ],
497
- "children": [
498
- {
499
- "title": "7.1",
500
- "contents": [
501
- {
502
- "type": "image",
503
- "key": "/topic-extraction/cells/img_13.jpg_r1_c1.png"
504
- }
505
- ],
506
- "children": []
507
- },
508
- {
509
- "title": "7.2",
510
- "contents": [
511
- {
512
- "type": "image",
513
- "key": "/topic-extraction/cells/img_13.jpg_r2_c0.png"
514
- }
515
- ],
516
- "children": []
517
- },
518
- {
519
- "title": "7.3",
520
- "contents": [
521
- {
522
- "type": "image",
523
- "key": "/topic-extraction/cells/img_13.jpg_r3_c0.png"
524
- }
525
- ],
526
- "children": []
527
- }
528
- ]
529
- },
530
- {
531
- "title": "Topics",
532
- "contents": [
533
- {
534
- "type": "image",
535
- "key": "/topic-extraction/cells/img_14.jpg_r0_c0.png"
536
- }
537
- ],
538
- "children": [
539
- {
540
- "title": "7.4",
541
- "contents": [
542
- {
543
- "type": "image",
544
- "key": "/topic-extraction/cells/img_14.jpg_r1_c1.png"
545
- }
546
- ],
547
- "children": []
548
- },
549
- {
550
- "title": "7.5",
551
- "contents": [
552
- {
553
- "type": "image",
554
- "key": "/topic-extraction/cells/img_14.jpg_r2_c0.png"
555
- }
556
- ],
557
- "children": []
558
- },
559
- {
560
- "title": "7.6",
561
- "contents": [
562
- {
563
- "type": "image",
564
- "key": "/topic-extraction/cells/img_14.jpg_r3_c0.png"
565
- }
566
- ],
567
- "children": []
568
- },
569
- {
570
- "title": "8.1",
571
- "contents": [
572
- {
573
- "type": "image",
574
- "key": "/topic-extraction/cells/img_14.jpg_r4_c1.png"
575
- }
576
- ],
577
- "children": []
578
- },
579
- {
580
- "title": "8.2",
581
- "contents": [
582
- {
583
- "type": "image",
584
- "key": "/topic-extraction/cells/img_14.jpg_r5_c0.png"
585
- }
586
- ],
587
- "children": []
588
- }
589
- ]
590
- },
591
- {
592
- "title": "Topics",
593
- "contents": [
594
- {
595
- "type": "image",
596
- "key": "/topic-extraction/cells/img_15.jpg_r0_c0.png"
597
- }
598
- ],
599
- "children": [
600
- {
601
- "title": "8.3",
602
- "contents": [
603
- {
604
- "type": "image",
605
- "key": "/topic-extraction/cells/img_15.jpg_r1_c1.png"
606
- }
607
- ],
608
- "children": []
609
- },
610
- {
611
- "title": "8.4",
612
- "contents": [
613
- {
614
- "type": "image",
615
- "key": "/topic-extraction/cells/img_15.jpg_r2_c0.png"
616
- }
617
- ],
618
- "children": []
619
- },
620
- {
621
- "title": "8.5",
622
- "contents": [
623
- {
624
- "type": "image",
625
- "key": "/topic-extraction/cells/img_15.jpg_r3_c0.png"
626
- }
627
- ],
628
- "children": []
629
- },
630
- {
631
- "title": "8.6",
632
- "contents": [
633
- {
634
- "type": "image",
635
- "key": "/topic-extraction/cells/img_15.jpg_r4_c0.png"
636
- }
637
- ],
638
- "children": []
639
- }
640
- ]
641
- },
642
- {
643
- "title": "Topics",
644
- "contents": [
645
- {
646
- "type": "image",
647
- "key": "/topic-extraction/cells/img_16.jpg_r0_c0.png"
648
- }
649
- ],
650
- "children": [
651
- {
652
- "title": "8.7",
653
- "contents": [
654
- {
655
- "type": "image",
656
- "key": "/topic-extraction/cells/img_16.jpg_r1_c1.png"
657
- }
658
- ],
659
- "children": []
660
- },
661
- {
662
- "title": "8.8",
663
- "contents": [
664
- {
665
- "type": "image",
666
- "key": "/topic-extraction/cells/img_16.jpg_r2_c0.png"
667
- }
668
- ],
669
- "children": []
670
- },
671
- {
672
- "title": "9.1",
673
- "contents": [
674
- {
675
- "type": "image",
676
- "key": "/topic-extraction/cells/img_16.jpg_r3_c1.png"
677
- }
678
- ],
679
- "children": []
680
- },
681
- {
682
- "title": "9.2",
683
- "contents": [
684
- {
685
- "type": "image",
686
- "key": "/topic-extraction/cells/img_16.jpg_r4_c0.png"
687
- }
688
- ],
689
- "children": []
690
- },
691
- {
692
- "title": "9.3",
693
- "contents": [
694
- {
695
- "type": "image",
696
- "key": "/topic-extraction/cells/img_16.jpg_r5_c0.png"
697
- }
698
- ],
699
- "children": []
700
- }
701
- ]
702
- },
703
- {
704
- "title": "9 Numerical methods",
705
- "contents": [
706
- {
707
- "type": "image",
708
- "key": "/topic-extraction/cells/img_17.jpg_r0_c0.png"
709
- }
710
- ],
711
- "children": [
712
- {
713
- "title": "9.4",
714
- "contents": [
715
- {
716
- "type": "image",
717
- "key": "/topic-extraction/cells/img_17.jpg_r0_c1.png"
718
- }
719
- ],
720
- "children": []
721
- },
722
- {
723
- "title": "9.5",
724
- "contents": [
725
- {
726
- "type": "image",
727
- "key": "/topic-extraction/cells/img_17.jpg_r1_c0.png"
728
- }
729
- ],
730
- "children": []
731
- },
732
- {
733
- "title": "10.1",
734
- "contents": [
735
- {
736
- "type": "image",
737
- "key": "/topic-extraction/cells/img_17.jpg_r2_c1.png"
738
- }
739
- ],
740
- "children": []
741
- },
742
- {
743
- "title": "10.2",
744
- "contents": [
745
- {
746
- "type": "image",
747
- "key": "/topic-extraction/cells/img_17.jpg_r3_c0.png"
748
- }
749
- ],
750
- "children": []
751
- },
752
- {
753
- "title": "10.3",
754
- "contents": [
755
- {
756
- "type": "image",
757
- "key": "/topic-extraction/cells/img_17.jpg_r4_c0.png"
758
- }
759
- ],
760
- "children": []
761
- },
762
- {
763
- "title": "10.4",
764
- "contents": [
765
- {
766
- "type": "image",
767
- "key": "/topic-extraction/cells/img_17.jpg_r5_c0.png"
768
- }
769
- ],
770
- "children": []
771
- }
772
- ]
773
- },
774
- {
775
- "title": "Topics",
776
- "contents": [
777
- {
778
- "type": "image",
779
- "key": "/topic-extraction/cells/img_18.jpg_r0_c0.png"
780
- }
781
- ],
782
- "children": [
783
- {
784
- "title": "10.5",
785
- "contents": [
786
- {
787
- "type": "image",
788
- "key": "/topic-extraction/cells/img_18.jpg_r1_c1.png"
789
- }
790
- ],
791
- "children": []
792
- }
793
- ]
794
- },
795
- {
796
- "title": "Topics",
797
- "contents": [
798
- {
799
- "type": "image",
800
- "key": "/topic-extraction/cells/img_19.jpg_r0_c0.png"
801
- }
802
- ],
803
- "children": [
804
- {
805
- "title": "1.1",
806
- "contents": [
807
- {
808
- "type": "image",
809
- "key": "/topic-extraction/cells/img_19.jpg_r1_c1.png"
810
- }
811
- ],
812
- "children": []
813
- },
814
- {
815
- "title": "2.1",
816
- "contents": [
817
- {
818
- "type": "image",
819
- "key": "/topic-extraction/cells/img_19.jpg_r2_c1.png"
820
- }
821
- ],
822
- "children": []
823
- }
824
- ]
825
- },
826
- {
827
- "title": "Topics",
828
- "contents": [
829
- {
830
- "type": "image",
831
- "key": "/topic-extraction/cells/img_20.jpg_r0_c0.png"
832
- }
833
- ],
834
- "children": [
835
- {
836
- "title": "2.2",
837
- "contents": [
838
- {
839
- "type": "image",
840
- "key": "/topic-extraction/cells/img_20.jpg_r1_c1.png"
841
- }
842
- ],
843
- "children": []
844
- }
845
- ]
846
- },
847
- {
848
- "title": "2 Data presentation and interpretation continued",
849
- "contents": [
850
- {
851
- "type": "image",
852
- "key": "/topic-extraction/cells/img_21.jpg_r0_c0.png"
853
- }
854
- ],
855
- "children": [
856
- {
857
- "title": "2.4",
858
- "contents": [
859
- {
860
- "type": "image",
861
- "key": "/topic-extraction/cells/img_21.jpg_r0_c1.png"
862
- }
863
- ],
864
- "children": []
865
- },
866
- {
867
- "title": "3.1",
868
- "contents": [
869
- {
870
- "type": "image",
871
- "key": "/topic-extraction/cells/img_21.jpg_r1_c1.png"
872
- }
873
- ],
874
- "children": []
875
- }
876
- ]
877
- },
878
- {
879
- "title": "Topics",
880
- "contents": [
881
- {
882
- "type": "image",
883
- "key": "/topic-extraction/cells/img_22.jpg_r0_c0.png"
884
- }
885
- ],
886
- "children": [
887
- {
888
- "title": "3.3",
889
- "contents": [
890
- {
891
- "type": "image",
892
- "key": "/topic-extraction/cells/img_22.jpg_r1_c1.png"
893
- }
894
- ],
895
- "children": []
896
- },
897
- {
898
- "title": "4.1",
899
- "contents": [
900
- {
901
- "type": "image",
902
- "key": "/topic-extraction/cells/img_22.jpg_r2_c1.png"
903
- }
904
- ],
905
- "children": []
906
- },
907
- {
908
- "title": "4.2",
909
- "contents": [
910
- {
911
- "type": "image",
912
- "key": "/topic-extraction/cells/img_22.jpg_r3_c0.png"
913
- }
914
- ],
915
- "children": []
916
- }
917
- ]
918
- },
919
- {
920
- "title": "4 Statistical distributions continued",
921
- "contents": [
922
- {
923
- "type": "image",
924
- "key": "/topic-extraction/cells/img_23.jpg_r0_c0.png"
925
- }
926
- ],
927
- "children": [
928
- {
929
- "title": "4.3",
930
- "contents": [
931
- {
932
- "type": "image",
933
- "key": "/topic-extraction/cells/img_23.jpg_r0_c1.png"
934
- }
935
- ],
936
- "children": []
937
- },
938
- {
939
- "title": "5.1",
940
- "contents": [
941
- {
942
- "type": "image",
943
- "key": "/topic-extraction/cells/img_23.jpg_r1_c1.png"
944
- }
945
- ],
946
- "children": []
947
- }
948
- ]
949
- },
950
- {
951
- "title": "Topics",
952
- "contents": [
953
- {
954
- "type": "image",
955
- "key": "/topic-extraction/cells/img_24.jpg_r0_c0.png"
956
- }
957
- ],
958
- "children": [
959
- {
960
- "title": "5.2",
961
- "contents": [
962
- {
963
- "type": "image",
964
- "key": "/topic-extraction/cells/img_24.jpg_r1_c1.png"
965
- }
966
- ],
967
- "children": []
968
- },
969
- {
970
- "title": "5.3",
971
- "contents": [
972
- {
973
- "type": "image",
974
- "key": "/topic-extraction/cells/img_24.jpg_r2_c0.png"
975
- }
976
- ],
977
- "children": []
978
- }
979
- ]
980
- },
981
- {
982
- "title": "",
983
- "contents": [],
984
- "children": [
985
- {
986
- "title": "7.1",
987
- "contents": [
988
- {
989
- "type": "image",
990
- "key": "/topic-extraction/cells/img_25.jpg_r1_c0.png"
991
- }
992
- ],
993
- "children": []
994
- },
995
- {
996
- "title": "7.2",
997
- "contents": [
998
- {
999
- "type": "image",
1000
- "key": "/topic-extraction/cells/img_25.jpg_r2_c0.png"
1001
- }
1002
- ],
1003
- "children": []
1004
- },
1005
- {
1006
- "title": "7.3",
1007
- "contents": [
1008
- {
1009
- "type": "image",
1010
- "key": "/topic-extraction/cells/img_25.jpg_r3_c0.png"
1011
- }
1012
- ],
1013
- "children": []
1014
- },
1015
- {
1016
- "title": "7.4",
1017
- "contents": [
1018
- {
1019
- "type": "image",
1020
- "key": "/topic-extraction/cells/img_25.jpg_r4_c0.png"
1021
- }
1022
- ],
1023
- "children": []
1024
- },
1025
- {
1026
- "title": "7.5",
1027
- "contents": [
1028
- {
1029
- "type": "image",
1030
- "key": "/topic-extraction/cells/img_25.jpg_r5_c0.png"
1031
- }
1032
- ],
1033
- "children": []
1034
- }
1035
- ]
1036
- },
1037
- {
1038
- "title": "8 Forces and Newton's laws",
1039
- "contents": [
1040
- {
1041
- "type": "image",
1042
- "key": "/topic-extraction/cells/img_26.jpg_r0_c0.png"
1043
- }
1044
- ],
1045
- "children": [
1046
- {
1047
- "title": "8.1",
1048
- "contents": [
1049
- {
1050
- "type": "image",
1051
- "key": "/topic-extraction/cells/img_26.jpg_r0_c1.png"
1052
- }
1053
- ],
1054
- "children": []
1055
- },
1056
- {
1057
- "title": "8.2",
1058
- "contents": [
1059
- {
1060
- "type": "image",
1061
- "key": "/topic-extraction/cells/img_26.jpg_r1_c0.png"
1062
- }
1063
- ],
1064
- "children": []
1065
- },
1066
- {
1067
- "title": "8.3",
1068
- "contents": [
1069
- {
1070
- "type": "image",
1071
- "key": "/topic-extraction/cells/img_26.jpg_r2_c0.png"
1072
- }
1073
- ],
1074
- "children": []
1075
- }
1076
- ]
1077
- },
1078
- {
1079
- "title": "Topics",
1080
- "contents": [
1081
- {
1082
- "type": "image",
1083
- "key": "/topic-extraction/cells/img_27.jpg_r0_c0.png"
1084
- }
1085
- ],
1086
- "children": [
1087
- {
1088
- "title": "8.4",
1089
- "contents": [
1090
- {
1091
- "type": "image",
1092
- "key": "/topic-extraction/cells/img_27.jpg_r1_c1.png"
1093
- }
1094
- ],
1095
- "children": []
1096
- },
1097
- {
1098
- "title": "8.5",
1099
- "contents": [
1100
- {
1101
- "type": "image",
1102
- "key": "/topic-extraction/cells/img_27.jpg_r2_c0.png"
1103
- }
1104
- ],
1105
- "children": []
1106
- },
1107
- {
1108
- "title": "8.6",
1109
- "contents": [
1110
- {
1111
- "type": "image",
1112
- "key": "/topic-extraction/cells/img_27.jpg_r3_c0.png"
1113
- }
1114
- ],
1115
- "children": []
1116
- },
1117
- {
1118
- "title": "9.1",
1119
- "contents": [
1120
- {
1121
- "type": "image",
1122
- "key": "/topic-extraction/cells/img_27.jpg_r4_c1.png"
1123
- }
1124
- ],
1125
- "children": []
1126
- }
1127
- ]
1128
- },
1129
- {
1130
- "title": "Reason, interpret and communicate mathematically",
1131
- "contents": [
1132
- {
1133
- "type": "image",
1134
- "key": "/topic-extraction/cells/img_28.jpg_r1_c0.png"
1135
- }
1136
- ],
1137
- "children": []
1138
- }
1139
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
topic_extr.py CHANGED
@@ -168,99 +168,6 @@ async def classify_image_async(image_data: bytes, api_key: str, max_retries: int
168
  preprocessed = preprocess_image(image_data)
169
  return await loop.run_in_executor(None, call_gemini_for_table_classification, preprocessed, api_key, max_retries)
170
 
171
-
172
- # def call_gemini_for_subtopic_identification_image(image_data: bytes, api_key: str, max_retries: int = 1) -> dict:
173
- # for attempt in range(max_retries + 1):
174
- # try:
175
- # prompt = """
176
- # You are given an image from an educational curriculum specification. The image may contain either:
177
- # 1) A main topic heading in the format: "<number> <Topic Name>", for example "2 Algebra and functions continued".
178
- # 2) A subtopic heading in the format "<number>.<number>", for example "2.5", "2.6", or "3.4".
179
- # 3) Possibly no relevant text at all.
180
-
181
- # Your task:
182
- # 1. If the cell shows a main topic, extract the topic name (e.g. "2 Algebra and functions") and place it in the JSON key "title".
183
- # 2. If the cell shows one or more subtopic numbers (e.g. "2.5", "2.6"), collect them in the JSON key "subtopics" as an array of strings.
184
- # 3. If neither a main topic nor subtopic is detected, return empty values.
185
-
186
- # Output only valid JSON in this exact structure, with no extra text or explanation:
187
-
188
- # {
189
- # "title": "...",
190
- # "subtopics": [...]
191
- # }
192
-
193
- # Where:
194
- # - "title" is the recognized main topic (if any). Otherwise, an empty string.
195
- # - "subtopics" is an array of recognized subtopic numbers (e.g. ["2.5", "2.6"]). Otherwise, an empty array.
196
-
197
- # Examples:
198
- # 1. If the image text is "2 Algebra and functions continued", return:
199
- # {
200
- # "title": "2 Algebra and functions continued",
201
- # "subtopics": []
202
- # }
203
-
204
- # 2. If the image text is "2.5 Solve linear and quadratic inequalities ...", return:
205
- # {
206
- # "title": "",
207
- # "subtopics": ["2.5"]
208
- # }
209
-
210
- # 3. If the image text is "2.6 Manipulate polynomials algebraically ...", return:
211
- # {
212
- # "title": "",
213
- # "subtopics": ["2.6"]
214
- # }
215
-
216
- # If you cannot recognize any text matching these patterns, or if nothing is found, return:
217
- # {
218
- # "title": "",
219
- # "subtopics": []
220
- # }
221
- # """
222
- # global _GEMINI_CLIENT
223
- # if _GEMINI_CLIENT is None:
224
- # _GEMINI_CLIENT = genai.Client(api_key=api_key)
225
- # client = _GEMINI_CLIENT
226
-
227
- # resp = client.models.generate_content(
228
- # model="gemini-2.0-flash",
229
- # contents=[
230
- # {
231
- # "parts": [
232
- # {"text": prompt},
233
- # {
234
- # "inline_data": {
235
- # "mime_type": "image/jpeg",
236
- # "data": base64.b64encode(image_data).decode("utf-8")
237
- # }
238
- # }
239
- # ]
240
- # }
241
- # ],
242
- # config=types.GenerateContentConfig(temperature=0.0)
243
- # )
244
- # if not resp or not resp.text:
245
- # return {"title": "", "subtopics": []}
246
-
247
- # raw = resp.text.strip()
248
-
249
- # data = json.loads(raw)
250
- # title = data.get("title", "")
251
- # subtopics = data.get("subtopics", [])
252
- # if not isinstance(subtopics, list):
253
- # subtopics = []
254
- # return {"title": title, "subtopics": subtopics}
255
-
256
- # except Exception as e:
257
- # if attempt < max_retries:
258
- # time.sleep(0.5)
259
- # else:
260
- # return {"title": "", "subtopics": []}
261
-
262
- # return {"title": "", "subtopics": []}
263
-
264
  def call_gemini_for_subtopic_identification_image(image_data: bytes, api_key: str, max_retries: int = 1) -> dict:
265
  for attempt in range(max_retries + 1):
266
  try:
@@ -360,8 +267,6 @@ If you cannot recognize any text matching these patterns, or if nothing is found
360
 
361
  return {"title": "", "subtopics": []}
362
 
363
-
364
-
365
  class S3ImageWriter(DataWriter):
366
  def __init__(self, s3_writer: s3Writer, base_path: str, gemini_api_key: str):
367
  self.s3_writer = s3_writer
@@ -370,7 +275,7 @@ class S3ImageWriter(DataWriter):
370
  self.descriptions = {}
371
  self._img_count = 0
372
  self.extracted_tables = {}
373
- # New attribute to store final subtopic JSON
374
  self.extracted_subtopics = {}
375
 
376
  def write(self, path: str, data: bytes) -> None:
@@ -399,7 +304,6 @@ class S3ImageWriter(DataWriter):
399
  else:
400
  self.descriptions[p]['table_classification'] = result
401
 
402
- # 2) Replace the original markdown references with alt text
403
  for p, info in self.descriptions.items():
404
  cls = info['table_classification']
405
  if cls == "TWO_COLUMN":
@@ -419,7 +323,6 @@ class S3ImageWriter(DataWriter):
419
  ]
420
  return "\n".join(final_lines)
421
 
422
-
423
  async def _process_table_images_in_markdown(self, key: str, md_content: str) -> str:
424
  pat = r"!\[HAS TO BE PROCESSED - (two|three) column table\]\(([^)]+)\)"
425
  matches = re.findall(pat, md_content, flags=re.IGNORECASE)
@@ -443,7 +346,6 @@ class S3ImageWriter(DataWriter):
443
  temp_path = temp_file.name
444
 
445
  try:
446
- # 1) Extract row bounding boxes.
447
  if col_type.lower() == 'two':
448
  extractor = TableExtractor(
449
  skip_header=True,
@@ -464,10 +366,10 @@ class S3ImageWriter(DataWriter):
464
  # for i, row in enumerate(row_boxes):
465
  # logger.info(f"Row {i} has {len(row)} cells")
466
 
467
- # out_folder = temp_path + "_rows"
468
- # os.makedirs(out_folder, exist_ok=True)
469
- out_folder = os.path.join(os.path.dirname(temp_path), os.path.basename(temp_path) + "_rows")
470
  os.makedirs(out_folder, exist_ok=True)
 
 
471
 
472
  extractor.save_extracted_cells(temp_path, row_boxes, out_folder)
473
  #just to print structure how cells are saved and named for each table image
@@ -479,7 +381,7 @@ class S3ImageWriter(DataWriter):
479
  main_topic_image_key = None
480
  recognized_subtopics = []
481
 
482
- # 2) Loop over each cell image.
483
  for i, row in enumerate(row_boxes):
484
  row_dir = os.path.join(out_folder, f"row_{i}")
485
  for j, _ in enumerate(row):
@@ -499,8 +401,6 @@ class S3ImageWriter(DataWriter):
499
  cell_key = f"{self.base_path}cells/{os.path.basename(s3_key)}_r{i}_c{j}.png"
500
  self.s3_writer.write(cell_key, cell_image_data)
501
 
502
- # Log before calling Gemini.
503
- logger.debug(f"About to call Gemini for cell image: {cell_path}")
504
  info = call_gemini_for_subtopic_identification_image(cell_image_data, self.gemini_api_key)
505
  # logger.info(f"Gemini subtopic extraction result for cell {cell_path}: {info}")
506
 
@@ -542,12 +442,9 @@ class S3ImageWriter(DataWriter):
542
 
543
  return md_content
544
 
545
-
546
-
547
  def post_process(self, key: str, md_content: str) -> str:
548
  return asyncio.run(self.post_process_async(key, md_content))
549
 
550
-
551
  class LocalImageWriter(DataWriter):
552
  def __init__(self, output_folder: str, gemini_api_key: str):
553
  self.output_folder = output_folder
@@ -555,7 +452,6 @@ class LocalImageWriter(DataWriter):
555
  self.descriptions = {}
556
  self._img_count = 0
557
  self.gemini_api_key = gemini_api_key
558
-
559
  self.extracted_tables = {}
560
 
561
  def write(self, path: str, data: bytes) -> None:
@@ -567,7 +463,6 @@ class LocalImageWriter(DataWriter):
567
  "table_classification": "NO_TABLE",
568
  "final_alt": ""
569
  }
570
- # Also save the original image locally for testing.
571
  image_path = os.path.join(self.output_folder, unique_id)
572
  with open(image_path, "wb") as f:
573
  f.write(data)
@@ -859,7 +754,6 @@ Now, extract topics from this text:
859
  logger.error(f"Could not open PDF: {e}")
860
  return "\n".join(text_parts)
861
 
862
-
863
  class MineruNoTextProcessor:
864
  def __init__(self, output_folder: str, gemini_api_key: str):
865
  self.output_folder = output_folder
@@ -868,7 +762,7 @@ class MineruNoTextProcessor:
868
  self.formula_enable = True
869
  self.table_enable = False
870
  self.language = "en"
871
-
872
  self.subtopic_extractor = GeminiTopicExtractor(api_key=gemini_api_key, num_pages=20)
873
  self.gemini_api_key = gemini_api_key or os.getenv("GEMINI_API_KEY", "")
874
 
@@ -891,7 +785,7 @@ class MineruNoTextProcessor:
891
  def process(self, pdf_path: str) -> Dict[str, Any]:
892
  logger.info(f"Processing PDF: {pdf_path}")
893
  try:
894
- # 1) Possibly call subtopic_extractor on first pages to find subtopics in the PDF as a whole
895
  subtopics = self.subtopic_extractor.extract_subtopics(pdf_path)
896
  logger.info(f"Gemini returned subtopics: {subtopics}")
897
 
@@ -911,7 +805,7 @@ class MineruNoTextProcessor:
911
  total_pages = doc.page_count
912
  doc.close()
913
 
914
- # 3) Decide which pages to process
915
  final_pages = set()
916
  if not subtopics:
917
  # fallback
@@ -963,7 +857,12 @@ class MineruNoTextProcessor:
963
  formula_enable=self.formula_enable,
964
  table_enable=self.table_enable
965
  )
 
966
  writer = S3ImageWriter(self.s3_writer, "/topic-extraction", self.gemini_api_key)
 
 
 
 
967
  md_prefix = "/topic-extraction/"
968
  pipe_result = inference.pipe_ocr_mode(writer, lang=self.language)
969
  md_content = pipe_result.get_markdown(md_prefix)
@@ -991,6 +890,5 @@ if __name__ == "__main__":
991
  processor = MineruNoTextProcessor(output_folder=output_dir, gemini_api_key=gemini_key)
992
  result = processor.process(input_pdf)
993
  logger.info("Processing completed successfully.")
994
- # The result includes final_markdown and subtopics_extracted
995
  except Exception as e:
996
  logger.error(f"Processing failed: {e}")
 
168
  preprocessed = preprocess_image(image_data)
169
  return await loop.run_in_executor(None, call_gemini_for_table_classification, preprocessed, api_key, max_retries)
170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  def call_gemini_for_subtopic_identification_image(image_data: bytes, api_key: str, max_retries: int = 1) -> dict:
172
  for attempt in range(max_retries + 1):
173
  try:
 
267
 
268
  return {"title": "", "subtopics": []}
269
 
 
 
270
  class S3ImageWriter(DataWriter):
271
  def __init__(self, s3_writer: s3Writer, base_path: str, gemini_api_key: str):
272
  self.s3_writer = s3_writer
 
275
  self.descriptions = {}
276
  self._img_count = 0
277
  self.extracted_tables = {}
278
+
279
  self.extracted_subtopics = {}
280
 
281
  def write(self, path: str, data: bytes) -> None:
 
304
  else:
305
  self.descriptions[p]['table_classification'] = result
306
 
 
307
  for p, info in self.descriptions.items():
308
  cls = info['table_classification']
309
  if cls == "TWO_COLUMN":
 
323
  ]
324
  return "\n".join(final_lines)
325
 
 
326
  async def _process_table_images_in_markdown(self, key: str, md_content: str) -> str:
327
  pat = r"!\[HAS TO BE PROCESSED - (two|three) column table\]\(([^)]+)\)"
328
  matches = re.findall(pat, md_content, flags=re.IGNORECASE)
 
346
  temp_path = temp_file.name
347
 
348
  try:
 
349
  if col_type.lower() == 'two':
350
  extractor = TableExtractor(
351
  skip_header=True,
 
366
  # for i, row in enumerate(row_boxes):
367
  # logger.info(f"Row {i} has {len(row)} cells")
368
 
369
+ out_folder = temp_path + "_rows"
 
 
370
  os.makedirs(out_folder, exist_ok=True)
371
+ # out_folder = os.path.join(os.path.dirname(temp_path), os.path.basename(temp_path) + "_rows")
372
+ # os.makedirs(out_folder, exist_ok=True)
373
 
374
  extractor.save_extracted_cells(temp_path, row_boxes, out_folder)
375
  #just to print structure how cells are saved and named for each table image
 
381
  main_topic_image_key = None
382
  recognized_subtopics = []
383
 
384
+ # Loop over each cell image.
385
  for i, row in enumerate(row_boxes):
386
  row_dir = os.path.join(out_folder, f"row_{i}")
387
  for j, _ in enumerate(row):
 
401
  cell_key = f"{self.base_path}cells/{os.path.basename(s3_key)}_r{i}_c{j}.png"
402
  self.s3_writer.write(cell_key, cell_image_data)
403
 
 
 
404
  info = call_gemini_for_subtopic_identification_image(cell_image_data, self.gemini_api_key)
405
  # logger.info(f"Gemini subtopic extraction result for cell {cell_path}: {info}")
406
 
 
442
 
443
  return md_content
444
 
 
 
445
  def post_process(self, key: str, md_content: str) -> str:
446
  return asyncio.run(self.post_process_async(key, md_content))
447
 
 
448
  class LocalImageWriter(DataWriter):
449
  def __init__(self, output_folder: str, gemini_api_key: str):
450
  self.output_folder = output_folder
 
452
  self.descriptions = {}
453
  self._img_count = 0
454
  self.gemini_api_key = gemini_api_key
 
455
  self.extracted_tables = {}
456
 
457
  def write(self, path: str, data: bytes) -> None:
 
463
  "table_classification": "NO_TABLE",
464
  "final_alt": ""
465
  }
 
466
  image_path = os.path.join(self.output_folder, unique_id)
467
  with open(image_path, "wb") as f:
468
  f.write(data)
 
754
  logger.error(f"Could not open PDF: {e}")
755
  return "\n".join(text_parts)
756
 
 
757
  class MineruNoTextProcessor:
758
  def __init__(self, output_folder: str, gemini_api_key: str):
759
  self.output_folder = output_folder
 
762
  self.formula_enable = True
763
  self.table_enable = False
764
  self.language = "en"
765
+
766
  self.subtopic_extractor = GeminiTopicExtractor(api_key=gemini_api_key, num_pages=20)
767
  self.gemini_api_key = gemini_api_key or os.getenv("GEMINI_API_KEY", "")
768
 
 
785
  def process(self, pdf_path: str) -> Dict[str, Any]:
786
  logger.info(f"Processing PDF: {pdf_path}")
787
  try:
788
+ # Possibly call subtopic_extractor on first pages to find subtopics in the PDF as a whole
789
  subtopics = self.subtopic_extractor.extract_subtopics(pdf_path)
790
  logger.info(f"Gemini returned subtopics: {subtopics}")
791
 
 
805
  total_pages = doc.page_count
806
  doc.close()
807
 
808
+ # Decide which pages to process
809
  final_pages = set()
810
  if not subtopics:
811
  # fallback
 
857
  formula_enable=self.formula_enable,
858
  table_enable=self.table_enable
859
  )
860
+ #S3
861
  writer = S3ImageWriter(self.s3_writer, "/topic-extraction", self.gemini_api_key)
862
+
863
+ #local
864
+ # writer = LocalImageWriter(self.output_folder, self.gemini_api_key)
865
+
866
  md_prefix = "/topic-extraction/"
867
  pipe_result = inference.pipe_ocr_mode(writer, lang=self.language)
868
  md_content = pipe_result.get_markdown(md_prefix)
 
890
  processor = MineruNoTextProcessor(output_folder=output_dir, gemini_api_key=gemini_key)
891
  result = processor.process(input_pdf)
892
  logger.info("Processing completed successfully.")
 
893
  except Exception as e:
894
  logger.error(f"Processing failed: {e}")
topic_extract_arsenii.py CHANGED
@@ -180,73 +180,50 @@ def call_gemini_for_subtopic_identification_image(image_data: bytes, api_key: st
180
  "subtopics": ["2.5", "2.6", ...]
181
  }
182
  """
183
- for attempt in range(max_retries + 1):
184
- try:
185
- # Prompt specifically instructs Gemini to read the image’s text and extract
186
- # either a main topic or subtopic heading if present:
187
- prompt = """
188
- You are given an image of a table cell from an educational curriculum specification.
189
- The text in this cell may contain:
190
- 1) A main topic heading in the format "<number> <Topic Name>", for example: "2 Algebra and functions"
191
- 2) A subtopic heading in the format "<number>.<number>", for example: "2.5" or "3.4"
192
- Identify if the cell contains exactly one main topic or subtopic.
193
- Return a valid JSON object with the keys "title" and "subtopics" of the form:
194
- {{
195
- "title": "2 Algebra and functions",
196
- "subtopics": ["2.5", "2.6"]
197
- }}
198
- If you find a main topic (like '2 Algebra and functions'), put it in "title".
199
- If you find subtopic numbers (like '2.5', '3.4'), put them in the "subtopics" array.
200
- """
201
-
202
- # Re-use or initialize your global Gemini client:
203
- global _GEMINI_CLIENT
204
- if _GEMINI_CLIENT is None:
205
- _GEMINI_CLIENT = genai.Client(api_key=api_key)
206
- client = _GEMINI_CLIENT
207
-
208
- # Send the prompt + image to Gemini:
209
- resp = client.models.generate_content(
210
- model="gemini-2.0-flash",
211
- contents=[
212
  {
213
- "parts": [
214
- {"text": prompt},
215
- {
216
- "inline_data": {
217
- "mime_type": "image/jpeg",
218
- "data": base64.b64encode(image_data).decode("utf-8")
219
- }
220
- }
221
- ]
222
  }
223
- ],
224
- config=types.GenerateContentConfig(temperature=0.0)
225
- )
226
- # if not resp or not resp.text:
227
- # return {"title": "", "subtopics": []}
228
-
229
- raw = resp.text.strip().replace("```json", "").replace("```", "")
230
- logger.info(f"== RAW == {raw}")
231
-
232
- # Attempt to parse JSON from Gemini’s response:
233
- data = json.loads(raw)
234
- title = data.get("title", "")
235
- subtopics = data.get("subtopics", [])
236
- if not isinstance(subtopics, list):
237
- subtopics = []
238
- return {"title": title, "subtopics": subtopics}
239
-
240
- except Exception as e:
241
- # Retry logic if you like:
242
- if attempt < max_retries:
243
- time.sleep(0.5)
244
- else:
245
- return {"title": "", "subtopics": []}
246
- # fallback:
247
- return {"title": "", "subtopics": []}
248
-
249
 
 
 
 
 
250
 
251
 
252
  class S3ImageWriter(DataWriter):
@@ -357,8 +334,8 @@ class S3ImageWriter(DataWriter):
357
  row_dir = os.path.join(out_folder, f"row_{i}")
358
  for j, _ in enumerate(row):
359
  cell_path = os.path.join(row_dir, f"col_{j}.jpg")
360
- if not os.path.isfile(cell_path):
361
- continue
362
 
363
  with open(cell_path, "rb") as cf:
364
  cell_image_data = cf.read()
@@ -369,7 +346,7 @@ class S3ImageWriter(DataWriter):
369
 
370
  # Call Gemini with the cell image
371
  info = call_gemini_for_subtopic_identification_image(cell_image_data, self.gemini_api_key)
372
- logger.info(f"== INFO == {info}")
373
  # e.g. info = {"title": "2 Algebra and functions", "subtopics": ["2.5"]}
374
 
375
  # 3d) Merge the recognized topic/subtopics
@@ -400,6 +377,7 @@ class S3ImageWriter(DataWriter):
400
  ],
401
  "children": []
402
  }
 
403
  for st in recognized_subtopics:
404
  final_json["children"].append({
405
  "title": st,
@@ -410,7 +388,6 @@ class S3ImageWriter(DataWriter):
410
 
411
  self.extracted_subtopics[s3_key] = final_json
412
 
413
-
414
  # Replace the original table image line in the markdown with the snippet
415
  new_snip = "\n".join(snippet)
416
  old_line = f"![HAS TO BE PROCESSED - {col_type} column table]({s3_key})"
 
180
  "subtopics": ["2.5", "2.6", ...]
181
  }
182
  """
183
+ # Prompt specifically instructs Gemini to read the image’s text and extract
184
+ # either a main topic or subtopic heading if present:
185
+ prompt = """
186
+ You are given an image of a table cell from an educational curriculum specification.
187
+ The text in this cell may contain:
188
+ 1) A main topic heading in the format "<number> <Topic Name>", for example: "2 Algebra and functions"
189
+ 2) A subtopic heading in the format "<number>.<number>", for example: "2.5" or "3.4"
190
+ Identify if the cell contains exactly one main topic or subtopic.
191
+ Return a valid JSON object with the keys "title" and "subtopics" of the form:
192
+ {{
193
+ "title": "2 Algebra and functions",
194
+ "subtopics": ["2.5", "2.6"]
195
+ }}
196
+ If you find a main topic (like '2 Algebra and functions'), put it in "title".
197
+ If you find subtopic numbers (like '2.5', '3.4'), put them in the "subtopics" array.
198
+ """
199
+ # Re-use or initialize your global Gemini client:
200
+ client = genai.Client(api_key=api_key)
201
+ # Send the prompt + image to Gemini:
202
+ resp = client.models.generate_content(
203
+ model="gemini-2.0-flash",
204
+ contents=[
205
+ {
206
+ "parts": [
207
+ {"text": prompt},
 
 
 
 
208
  {
209
+ "inline_data": {
210
+ "mime_type": "image/jpeg",
211
+ "data": base64.b64encode(image_data).decode("utf-8")
212
+ }
 
 
 
 
 
213
  }
214
+ ]
215
+ }
216
+ ],
217
+ config=types.GenerateContentConfig(temperature=0.0)
218
+ )
219
+ raw = resp.text.strip().replace("```json", "").replace("```", "")
220
+ logger.info(f"== RAW == {raw}")
221
+ print(f"== RAW == {raw}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
 
223
+ data = json.loads(raw)
224
+ title = data["title"]
225
+ subtopics = data["subtopics"]
226
+ return {"title": title, "subtopics": subtopics}
227
 
228
 
229
  class S3ImageWriter(DataWriter):
 
334
  row_dir = os.path.join(out_folder, f"row_{i}")
335
  for j, _ in enumerate(row):
336
  cell_path = os.path.join(row_dir, f"col_{j}.jpg")
337
+ # if not os.path.isfile(cell_path):
338
+ # continue
339
 
340
  with open(cell_path, "rb") as cf:
341
  cell_image_data = cf.read()
 
346
 
347
  # Call Gemini with the cell image
348
  info = call_gemini_for_subtopic_identification_image(cell_image_data, self.gemini_api_key)
349
+ logger.debug(f"== INFO == {info}")
350
  # e.g. info = {"title": "2 Algebra and functions", "subtopics": ["2.5"]}
351
 
352
  # 3d) Merge the recognized topic/subtopics
 
377
  ],
378
  "children": []
379
  }
380
+
381
  for st in recognized_subtopics:
382
  final_json["children"].append({
383
  "title": st,
 
388
 
389
  self.extracted_subtopics[s3_key] = final_json
390
 
 
391
  # Replace the original table image line in the markdown with the snippet
392
  new_snip = "\n".join(snippet)
393
  old_line = f"![HAS TO BE PROCESSED - {col_type} column table]({s3_key})"
topic_extraction.log CHANGED
@@ -5208,3 +5208,111 @@ and series'. Using page 7.
5208
  2025-03-03 17:13:39,435 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_1.jpg_r1_c0.png
5209
  2025-03-03 17:13:41,175 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_1.jpg_r1_c1.png
5210
  2025-03-03 17:13:42,923 [INFO] __main__ - GPU memory cleaned up.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5208
  2025-03-03 17:13:39,435 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_1.jpg_r1_c0.png
5209
  2025-03-03 17:13:41,175 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/img_1.jpg_r1_c1.png
5210
  2025-03-03 17:13:42,923 [INFO] __main__ - GPU memory cleaned up.
5211
+ 2025-03-03 17:18:11,461 [INFO] __main__ - Processing PDF: /home/user/app/input_output/a-level-pearson-mathematics-specification.pdf
5212
+ 2025-03-03 17:18:12,239 [INFO] __main__ - Gemini returned subtopics: {'Paper 1 and Paper 2: Pure Mathematics': [11, 29], 'Paper 3: Statistics and Mechanics': [30, 40]}
5213
+ 2025-03-03 17:18:12,240 [INFO] __main__ - Loaded 1135473 bytes from local file '/home/user/app/input_output/a-level-pearson-mathematics-specification.pdf'
5214
+ 2025-03-03 17:18:12,539 [INFO] __main__ - Computed global offset: 4
5215
+ 2025-03-03 17:18:12,540 [INFO] __main__ - Processing pages (0-based): [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43]
5216
+ 2025-03-03 17:19:17,416 [INFO] __main__ - Classifying images to detect tables.
5217
+ 2025-03-03 17:19:44,737 [INFO] __main__ - Processing table image => img_1.jpg, columns=three
5218
+ 2025-03-03 17:19:47,862 [INFO] __main__ - Processing table image => img_2.jpg, columns=three
5219
+ 2025-03-03 17:19:50,752 [INFO] __main__ - Processing table image => img_3.jpg, columns=three
5220
+ 2025-03-03 17:19:53,861 [INFO] __main__ - Processing table image => img_4.jpg, columns=three
5221
+ 2025-03-03 17:19:56,969 [INFO] __main__ - Processing table image => img_5.jpg, columns=three
5222
+ 2025-03-03 17:20:00,413 [INFO] __main__ - Processing table image => img_6.jpg, columns=three
5223
+ 2025-03-03 17:20:04,193 [INFO] __main__ - Processing table image => img_7.jpg, columns=three
5224
+ 2025-03-03 17:20:07,968 [INFO] __main__ - Processing table image => img_8.jpg, columns=three
5225
+ 2025-03-03 17:20:11,413 [INFO] __main__ - Processing table image => img_9.jpg, columns=three
5226
+ 2025-03-03 17:20:14,895 [INFO] __main__ - Processing table image => img_10.jpg, columns=three
5227
+ 2025-03-03 17:20:18,856 [INFO] __main__ - Processing table image => img_11.jpg, columns=three
5228
+ 2025-03-03 17:20:22,697 [INFO] __main__ - Processing table image => img_12.jpg, columns=three
5229
+ 2025-03-03 17:20:25,904 [INFO] __main__ - Processing table image => img_13.jpg, columns=three
5230
+ 2025-03-03 17:20:29,171 [INFO] __main__ - Processing table image => img_14.jpg, columns=two
5231
+ 2025-03-03 17:20:34,203 [INFO] __main__ - Processing table image => img_15.jpg, columns=three
5232
+ 2025-03-03 17:20:39,122 [INFO] __main__ - Processing table image => img_16.jpg, columns=three
5233
+ 2025-03-03 17:20:43,452 [INFO] __main__ - Processing table image => img_17.jpg, columns=three
5234
+ 2025-03-03 17:20:47,324 [INFO] __main__ - Processing table image => img_18.jpg, columns=three
5235
+ 2025-03-03 17:20:48,254 [INFO] __main__ - Processing table image => img_19.jpg, columns=three
5236
+ 2025-03-03 17:20:50,879 [INFO] __main__ - Processing table image => img_20.jpg, columns=two
5237
+ 2025-03-03 17:20:54,283 [INFO] __main__ - Processing table image => img_21.jpg, columns=three
5238
+ 2025-03-03 17:20:57,784 [INFO] __main__ - Processing table image => img_22.jpg, columns=three
5239
+ 2025-03-03 17:21:01,431 [INFO] __main__ - Processing table image => img_23.jpg, columns=three
5240
+ 2025-03-03 17:21:04,472 [INFO] __main__ - Processing table image => img_24.jpg, columns=three
5241
+ 2025-03-03 17:21:07,947 [INFO] __main__ - Processing table image => img_25.jpg, columns=three
5242
+ 2025-03-03 17:21:11,586 [INFO] __main__ - Processing table image => img_26.jpg, columns=three
5243
+ 2025-03-03 17:21:14,419 [INFO] __main__ - Processing table image => img_27.jpg, columns=three
5244
+ 2025-03-03 17:21:17,542 [INFO] __main__ - Processing table image => img_28.jpg, columns=two
5245
+ 2025-03-03 17:21:21,034 [INFO] __main__ - GPU memory cleaned up.
5246
+ 2025-03-03 17:21:21,035 [ERROR] __main__ - Processing failed: 'LocalImageWriter' object has no attribute 'extracted_subtopics'
5247
+ 2025-03-03 17:23:42,864 [INFO] __main__ - Processing PDF: /home/user/app/input_output/a-level-pearson-mathematics-specification.pdf
5248
+ 2025-03-03 17:23:43,614 [INFO] __main__ - Gemini returned subtopics: {'Paper 1 and Paper 2: Pure Mathematics': [11, 29], 'Paper 3: Statistics and Mechanics': [30, 40]}
5249
+ 2025-03-03 17:23:43,615 [INFO] __main__ - Loaded 1135473 bytes from local file '/home/user/app/input_output/a-level-pearson-mathematics-specification.pdf'
5250
+ 2025-03-03 17:23:43,926 [INFO] __main__ - Computed global offset: 4
5251
+ 2025-03-03 17:23:43,927 [INFO] __main__ - Processing pages (0-based): [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43]
5252
+ 2025-03-03 17:24:49,282 [INFO] __main__ - Classifying images to detect tables.
5253
+ 2025-03-03 17:25:17,902 [INFO] __main__ - Processing table image => img_1.jpg, columns=three
5254
+ 2025-03-03 17:25:20,696 [INFO] __main__ - Processing table image => img_2.jpg, columns=three
5255
+ 2025-03-03 17:25:23,663 [INFO] __main__ - Processing table image => img_3.jpg, columns=three
5256
+ 2025-03-03 17:25:26,866 [INFO] __main__ - Processing table image => img_4.jpg, columns=three
5257
+ 2025-03-03 17:25:30,504 [INFO] __main__ - Processing table image => img_5.jpg, columns=three
5258
+ 2025-03-03 17:25:35,390 [INFO] __main__ - Processing table image => img_6.jpg, columns=three
5259
+ 2025-03-03 17:25:39,596 [INFO] __main__ - Processing table image => img_7.jpg, columns=three
5260
+ 2025-03-03 17:25:43,260 [INFO] __main__ - Processing table image => img_8.jpg, columns=three
5261
+ 2025-03-03 17:25:46,325 [INFO] __main__ - Processing table image => img_9.jpg, columns=three
5262
+ 2025-03-03 17:25:50,400 [INFO] __main__ - Processing table image => img_10.jpg, columns=three
5263
+ 2025-03-03 17:25:53,575 [INFO] __main__ - Processing table image => img_11.jpg, columns=three
5264
+ 2025-03-03 17:25:57,251 [INFO] __main__ - Processing table image => img_12.jpg, columns=three
5265
+ 2025-03-03 17:26:00,714 [INFO] __main__ - Processing table image => img_13.jpg, columns=three
5266
+ 2025-03-03 17:26:03,441 [INFO] __main__ - Processing table image => img_14.jpg, columns=two
5267
+ 2025-03-03 17:26:06,715 [INFO] __main__ - Processing table image => img_15.jpg, columns=three
5268
+ 2025-03-03 17:26:09,908 [INFO] __main__ - Processing table image => img_16.jpg, columns=three
5269
+ 2025-03-03 17:26:13,326 [INFO] __main__ - Processing table image => img_17.jpg, columns=three
5270
+ 2025-03-03 17:26:16,837 [INFO] __main__ - Processing table image => img_18.jpg, columns=three
5271
+ 2025-03-03 17:26:17,845 [INFO] __main__ - Processing table image => img_19.jpg, columns=three
5272
+ 2025-03-03 17:26:20,525 [INFO] __main__ - Processing table image => img_20.jpg, columns=two
5273
+ 2025-03-03 17:26:25,219 [INFO] __main__ - Processing table image => img_21.jpg, columns=three
5274
+ 2025-03-03 17:26:29,725 [INFO] __main__ - Processing table image => img_22.jpg, columns=three
5275
+ 2025-03-03 17:26:33,240 [INFO] __main__ - Processing table image => img_23.jpg, columns=three
5276
+ 2025-03-03 17:26:36,140 [INFO] __main__ - Processing table image => img_24.jpg, columns=three
5277
+ 2025-03-03 17:26:39,453 [INFO] __main__ - Processing table image => img_25.jpg, columns=three
5278
+ 2025-03-03 17:26:42,611 [INFO] __main__ - Processing table image => img_26.jpg, columns=three
5279
+ 2025-03-03 17:26:45,575 [INFO] __main__ - Processing table image => img_27.jpg, columns=three
5280
+ 2025-03-03 17:26:48,347 [INFO] __main__ - Processing table image => img_28.jpg, columns=two
5281
+ 2025-03-03 17:26:52,250 [INFO] __main__ - GPU memory cleaned up.
5282
+ 2025-03-03 17:26:52,250 [ERROR] __main__ - Processing failed: 'LocalImageWriter' object has no attribute 'extracted_subtopics'
5283
+ 2025-03-03 17:29:15,373 [INFO] __main__ - Processing PDF: /home/user/app/input_output/a-level-pearson-mathematics-specification.pdf
5284
+ 2025-03-03 17:29:16,145 [INFO] __main__ - Gemini returned subtopics: {'Paper 1 and Paper 2: Pure Mathematics': [11, 29], 'Paper 3: Statistics and Mechanics': [30, 40]}
5285
+ 2025-03-03 17:29:16,146 [INFO] __main__ - Loaded 1135473 bytes from local file '/home/user/app/input_output/a-level-pearson-mathematics-specification.pdf'
5286
+ 2025-03-03 17:29:16,646 [INFO] __main__ - Computed global offset: 4
5287
+ 2025-03-03 17:29:16,647 [INFO] __main__ - Processing pages (0-based): [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43]
5288
+ 2025-03-03 17:30:37,147 [INFO] __main__ - Classifying images to detect tables.
5289
+ 2025-03-03 17:31:06,356 [INFO] __main__ - Processing table image => img_1.jpg, columns=three
5290
+ 2025-03-03 17:31:10,484 [INFO] __main__ - Processing table image => img_2.jpg, columns=three
5291
+ 2025-03-03 17:31:14,433 [INFO] __main__ - Processing table image => img_3.jpg, columns=three
5292
+ 2025-03-03 17:31:18,298 [INFO] __main__ - Processing table image => img_4.jpg, columns=three
5293
+ 2025-03-03 17:31:22,342 [INFO] __main__ - Processing table image => img_5.jpg, columns=three
5294
+ 2025-03-03 17:31:25,875 [INFO] __main__ - Processing table image => img_6.jpg, columns=three
5295
+ 2025-03-03 17:31:30,066 [INFO] __main__ - Processing table image => img_7.jpg, columns=three
5296
+ 2025-03-03 17:31:34,442 [INFO] __main__ - Processing table image => img_8.jpg, columns=three
5297
+ 2025-03-03 17:31:38,680 [INFO] __main__ - Processing table image => img_9.jpg, columns=three
5298
+ 2025-03-03 17:31:42,870 [INFO] __main__ - Processing table image => img_10.jpg, columns=three
5299
+ 2025-03-03 17:31:46,601 [INFO] __main__ - Processing table image => img_11.jpg, columns=three
5300
+ 2025-03-03 17:31:50,338 [INFO] __main__ - Processing table image => img_12.jpg, columns=three
5301
+ 2025-03-03 17:31:54,258 [INFO] __main__ - Processing table image => img_13.jpg, columns=three
5302
+ 2025-03-03 17:31:57,952 [INFO] __main__ - Processing table image => img_14.jpg, columns=two
5303
+ 2025-03-03 17:32:02,306 [INFO] __main__ - Processing table image => img_15.jpg, columns=three
5304
+ 2025-03-03 17:32:06,554 [INFO] __main__ - Processing table image => img_16.jpg, columns=three
5305
+ 2025-03-03 17:32:11,743 [INFO] __main__ - Processing table image => img_17.jpg, columns=three
5306
+ 2025-03-03 17:32:16,224 [INFO] __main__ - Processing table image => img_18.jpg, columns=three
5307
+ 2025-03-03 17:32:17,405 [INFO] __main__ - Processing table image => img_19.jpg, columns=three
5308
+ 2025-03-03 17:32:20,461 [INFO] __main__ - Processing table image => img_20.jpg, columns=two
5309
+ 2025-03-03 17:32:24,355 [INFO] __main__ - Processing table image => img_21.jpg, columns=three
5310
+ 2025-03-03 17:32:28,605 [INFO] __main__ - Processing table image => img_22.jpg, columns=three
5311
+ 2025-03-03 17:32:33,370 [INFO] __main__ - Processing table image => img_23.jpg, columns=three
5312
+ 2025-03-03 17:32:36,960 [INFO] __main__ - Processing table image => img_24.jpg, columns=three
5313
+ 2025-03-03 17:32:41,190 [INFO] __main__ - Processing table image => img_25.jpg, columns=three
5314
+ 2025-03-03 17:32:45,789 [INFO] __main__ - Processing table image => img_26.jpg, columns=three
5315
+ 2025-03-03 17:32:49,145 [INFO] __main__ - Processing table image => img_27.jpg, columns=three
5316
+ 2025-03-03 17:32:51,859 [INFO] __main__ - Processing table image => img_28.jpg, columns=two
5317
+ 2025-03-03 17:32:55,099 [INFO] __main__ - GPU memory cleaned up.
5318
+ 2025-03-03 17:32:55,099 [ERROR] __main__ - Processing failed: 'LocalImageWriter' object has no attribute 'extracted_subtopics'
topic_extraction_ars.log CHANGED
@@ -458,3 +458,289 @@
458
  2025-03-03 15:56:58,131 [INFO] __main__ - Final subtopics JSON saved locally at /home/user/app/we/we_ars/final_subtopics.json
459
  2025-03-03 15:56:58,438 [INFO] __main__ - GPU memory cleaned up.
460
  2025-03-03 15:56:58,445 [INFO] __main__ - Processing completed successfully.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
458
  2025-03-03 15:56:58,131 [INFO] __main__ - Final subtopics JSON saved locally at /home/user/app/we/we_ars/final_subtopics.json
459
  2025-03-03 15:56:58,438 [INFO] __main__ - GPU memory cleaned up.
460
  2025-03-03 15:56:58,445 [INFO] __main__ - Processing completed successfully.
461
+ 2025-03-03 17:28:40,888 [INFO] __main__ - Processing PDF: /home/user/app/input_output/a-level-pearson-mathematics-specification.pdf
462
+ 2025-03-03 17:28:41,627 [INFO] __main__ - Gemini returned subtopics: {'Paper 1 and Paper 2: Pure Mathematics': [11, 29], 'Paper 3: Statistics and Mechanics': [30, 40]}
463
+ 2025-03-03 17:28:41,628 [INFO] __main__ - Loaded 1135473 bytes from local file '/home/user/app/input_output/a-level-pearson-mathematics-specification.pdf'
464
+ 2025-03-03 17:28:41,960 [INFO] __main__ - Computed global offset: 4
465
+ 2025-03-03 17:28:41,961 [INFO] __main__ - Processing pages (0-based): [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43]
466
+ 2025-03-03 17:29:47,681 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_1.jpg
467
+ 2025-03-03 17:29:50,244 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_2.jpg
468
+ 2025-03-03 17:29:50,897 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_3.jpg
469
+ 2025-03-03 17:29:51,556 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_4.jpg
470
+ 2025-03-03 17:29:52,183 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_5.jpg
471
+ 2025-03-03 17:29:52,887 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_6.jpg
472
+ 2025-03-03 17:29:53,485 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_7.jpg
473
+ 2025-03-03 17:29:54,194 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_8.jpg
474
+ 2025-03-03 17:29:54,820 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_9.jpg
475
+ 2025-03-03 17:29:55,457 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_10.jpg
476
+ 2025-03-03 17:29:56,019 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_11.jpg
477
+ 2025-03-03 17:29:56,666 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_12.jpg
478
+ 2025-03-03 17:29:57,238 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_13.jpg
479
+ 2025-03-03 17:29:57,934 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_14.jpg
480
+ 2025-03-03 17:29:58,524 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_15.jpg
481
+ 2025-03-03 17:29:59,210 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_16.jpg
482
+ 2025-03-03 17:29:59,902 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_17.jpg
483
+ 2025-03-03 17:30:00,309 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_18.jpg
484
+ 2025-03-03 17:30:01,021 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_19.jpg
485
+ 2025-03-03 17:30:01,692 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_20.jpg
486
+ 2025-03-03 17:30:02,389 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_21.jpg
487
+ 2025-03-03 17:30:03,066 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_22.jpg
488
+ 2025-03-03 17:30:03,630 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_23.jpg
489
+ 2025-03-03 17:30:04,225 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_24.jpg
490
+ 2025-03-03 17:30:04,890 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_25.jpg
491
+ 2025-03-03 17:30:05,488 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_26.jpg
492
+ 2025-03-03 17:30:06,047 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_27.jpg
493
+ 2025-03-03 17:30:06,794 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_28.jpg
494
+ 2025-03-03 17:30:07,237 [INFO] __main__ - Classifying images to detect tables.
495
+ 2025-03-03 17:30:11,295 [INFO] __main__ - Processing table image: /topic-extraction/img_1.jpg, columns=three
496
+ 2025-03-03 17:30:15,135 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r0_c0.jpg
497
+ 2025-03-03 17:30:15,423 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r0_c1.jpg
498
+ 2025-03-03 17:30:15,662 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r1_c0.jpg
499
+ 2025-03-03 17:30:15,897 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_1.jpg_r1_c1.jpg
500
+ 2025-03-03 17:30:15,898 [INFO] __main__ - Processing table image: /topic-extraction/img_2.jpg, columns=three
501
+ 2025-03-03 17:30:20,773 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r0_c0.jpg
502
+ 2025-03-03 17:30:21,085 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r0_c1.jpg
503
+ 2025-03-03 17:30:21,321 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r1_c0.jpg
504
+ 2025-03-03 17:30:21,556 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r1_c1.jpg
505
+ 2025-03-03 17:30:21,799 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r2_c0.jpg
506
+ 2025-03-03 17:30:22,035 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_2.jpg_r3_c0.jpg
507
+ 2025-03-03 17:30:22,036 [INFO] __main__ - Processing table image: /topic-extraction/img_3.jpg, columns=three
508
+ 2025-03-03 17:30:27,289 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_3.jpg_r0_c0.jpg
509
+ 2025-03-03 17:30:27,603 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_3.jpg_r0_c1.jpg
510
+ 2025-03-03 17:30:27,603 [INFO] __main__ - Processing table image: /topic-extraction/img_4.jpg, columns=three
511
+ 2025-03-03 17:30:33,266 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r0_c0.jpg
512
+ 2025-03-03 17:30:33,573 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r0_c1.jpg
513
+ 2025-03-03 17:30:33,831 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r1_c0.jpg
514
+ 2025-03-03 17:30:34,027 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_4.jpg_r1_c1.jpg
515
+ 2025-03-03 17:30:34,028 [INFO] __main__ - Processing table image: /topic-extraction/img_5.jpg, columns=three
516
+ 2025-03-03 17:30:39,478 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r0_c0.jpg
517
+ 2025-03-03 17:30:39,772 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r0_c1.jpg
518
+ 2025-03-03 17:30:39,984 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r1_c0.jpg
519
+ 2025-03-03 17:30:40,240 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r1_c1.jpg
520
+ 2025-03-03 17:30:40,466 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_5.jpg_r2_c0.jpg
521
+ 2025-03-03 17:30:40,467 [INFO] __main__ - Processing table image: /topic-extraction/img_6.jpg, columns=three
522
+ 2025-03-03 17:30:44,908 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r0_c0.jpg
523
+ 2025-03-03 17:30:45,224 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r0_c1.jpg
524
+ 2025-03-03 17:30:45,474 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r1_c0.jpg
525
+ 2025-03-03 17:30:45,669 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r1_c1.jpg
526
+ 2025-03-03 17:30:45,909 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_6.jpg_r2_c0.jpg
527
+ 2025-03-03 17:30:45,910 [INFO] __main__ - Processing table image: /topic-extraction/img_7.jpg, columns=three
528
+ 2025-03-03 17:30:50,049 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r0_c0.jpg
529
+ 2025-03-03 17:30:50,338 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r0_c1.jpg
530
+ 2025-03-03 17:30:50,577 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r1_c0.jpg
531
+ 2025-03-03 17:30:50,772 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r1_c1.jpg
532
+ 2025-03-03 17:30:51,001 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_7.jpg_r2_c0.jpg
533
+ 2025-03-03 17:30:51,001 [INFO] __main__ - Processing table image: /topic-extraction/img_8.jpg, columns=three
534
+ 2025-03-03 17:30:54,784 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r0_c0.jpg
535
+ 2025-03-03 17:30:55,093 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r0_c1.jpg
536
+ 2025-03-03 17:30:55,328 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r1_c0.jpg
537
+ 2025-03-03 17:30:55,552 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r1_c1.jpg
538
+ 2025-03-03 17:30:55,777 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r2_c0.jpg
539
+ 2025-03-03 17:30:56,026 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r3_c0.jpg
540
+ 2025-03-03 17:30:56,240 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_8.jpg_r4_c0.jpg
541
+ 2025-03-03 17:30:56,240 [INFO] __main__ - Processing table image: /topic-extraction/img_9.jpg, columns=three
542
+ 2025-03-03 17:31:00,457 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r0_c0.jpg
543
+ 2025-03-03 17:31:00,759 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_9.jpg_r1_c0.jpg
544
+ 2025-03-03 17:31:00,760 [INFO] __main__ - Processing table image: /topic-extraction/img_10.jpg, columns=three
545
+ 2025-03-03 17:31:04,717 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r0_c0.jpg
546
+ 2025-03-03 17:31:04,985 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r0_c1.jpg
547
+ 2025-03-03 17:31:05,239 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r1_c0.jpg
548
+ 2025-03-03 17:31:05,455 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r2_c0.jpg
549
+ 2025-03-03 17:31:05,683 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_10.jpg_r3_c0.jpg
550
+ 2025-03-03 17:31:05,684 [INFO] __main__ - Processing table image: /topic-extraction/img_11.jpg, columns=three
551
+ 2025-03-03 17:31:10,692 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r0_c0.jpg
552
+ 2025-03-03 17:31:11,003 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r1_c0.jpg
553
+ 2025-03-03 17:31:11,245 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r2_c0.jpg
554
+ 2025-03-03 17:31:11,435 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r3_c0.jpg
555
+ 2025-03-03 17:31:11,655 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_11.jpg_r4_c0.jpg
556
+ 2025-03-03 17:31:11,655 [INFO] __main__ - Processing table image: /topic-extraction/img_12.jpg, columns=three
557
+ 2025-03-03 17:31:15,894 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r0_c0.jpg
558
+ 2025-03-03 17:31:16,213 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r0_c1.jpg
559
+ 2025-03-03 17:31:16,433 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r1_c0.jpg
560
+ 2025-03-03 17:31:16,670 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r1_c1.jpg
561
+ 2025-03-03 17:31:16,928 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r2_c0.jpg
562
+ 2025-03-03 17:31:17,120 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_12.jpg_r2_c1.jpg
563
+ 2025-03-03 17:31:17,120 [INFO] __main__ - Processing table image: /topic-extraction/img_13.jpg, columns=three
564
+ 2025-03-03 17:31:20,856 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r0_c0.jpg
565
+ 2025-03-03 17:31:21,154 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r0_c1.jpg
566
+ 2025-03-03 17:31:21,398 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r1_c0.jpg
567
+ 2025-03-03 17:31:21,637 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r1_c1.jpg
568
+ 2025-03-03 17:31:21,856 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r2_c0.jpg
569
+ 2025-03-03 17:31:22,094 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_13.jpg_r3_c0.jpg
570
+ 2025-03-03 17:31:22,095 [INFO] __main__ - Processing table image: /topic-extraction/img_14.jpg, columns=two
571
+ 2025-03-03 17:31:27,406 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r0_c0.jpg
572
+ 2025-03-03 17:31:27,685 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_14.jpg_r1_c0.jpg
573
+ 2025-03-03 17:31:27,686 [INFO] __main__ - Processing table image: /topic-extraction/img_15.jpg, columns=three
574
+ 2025-03-03 17:31:32,916 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r0_c0.jpg
575
+ 2025-03-03 17:31:33,211 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r0_c1.jpg
576
+ 2025-03-03 17:31:33,422 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r1_c0.jpg
577
+ 2025-03-03 17:31:33,672 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r2_c0.jpg
578
+ 2025-03-03 17:31:33,904 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_15.jpg_r3_c0.jpg
579
+ 2025-03-03 17:31:33,904 [INFO] __main__ - Processing table image: /topic-extraction/img_16.jpg, columns=three
580
+ 2025-03-03 17:31:39,209 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r0_c0.jpg
581
+ 2025-03-03 17:31:39,525 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r0_c1.jpg
582
+ 2025-03-03 17:31:39,778 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r1_c0.jpg
583
+ 2025-03-03 17:31:40,003 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r2_c0.jpg
584
+ 2025-03-03 17:31:40,232 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r2_c1.jpg
585
+ 2025-03-03 17:31:40,479 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r3_c0.jpg
586
+ 2025-03-03 17:31:40,707 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_16.jpg_r4_c0.jpg
587
+ 2025-03-03 17:31:40,708 [INFO] __main__ - Processing table image: /topic-extraction/img_17.jpg, columns=three
588
+ 2025-03-03 17:31:45,922 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r0_c0.jpg
589
+ 2025-03-03 17:31:46,235 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r0_c1.jpg
590
+ 2025-03-03 17:31:46,463 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r1_c0.jpg
591
+ 2025-03-03 17:31:46,691 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r2_c0.jpg
592
+ 2025-03-03 17:31:46,878 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r2_c1.jpg
593
+ 2025-03-03 17:31:47,130 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r3_c0.jpg
594
+ 2025-03-03 17:31:47,375 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_17.jpg_r4_c0.jpg
595
+ 2025-03-03 17:31:47,376 [INFO] __main__ - Processing table image: /topic-extraction/img_18.jpg, columns=three
596
+ 2025-03-03 17:31:49,248 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r0_c0.jpg
597
+ 2025-03-03 17:31:49,508 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r0_c1.jpg
598
+ 2025-03-03 17:31:49,702 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r1_c0.jpg
599
+ 2025-03-03 17:31:49,890 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_18.jpg_r1_c1.jpg
600
+ 2025-03-03 17:31:49,891 [INFO] __main__ - Processing table image: /topic-extraction/img_19.jpg, columns=three
601
+ 2025-03-03 17:31:53,834 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r0_c0.jpg
602
+ 2025-03-03 17:31:54,137 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r0_c1.jpg
603
+ 2025-03-03 17:31:54,379 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r1_c0.jpg
604
+ 2025-03-03 17:31:54,577 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r1_c1.jpg
605
+ 2025-03-03 17:31:54,793 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r2_c0.jpg
606
+ 2025-03-03 17:31:55,019 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_19.jpg_r2_c1.jpg
607
+ 2025-03-03 17:31:55,019 [INFO] __main__ - Processing table image: /topic-extraction/img_20.jpg, columns=two
608
+ 2025-03-03 17:32:00,652 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_20.jpg_r0_c0.jpg
609
+ 2025-03-03 17:32:00,653 [INFO] __main__ - Processing table image: /topic-extraction/img_21.jpg, columns=three
610
+ 2025-03-03 17:32:05,661 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r0_c0.jpg
611
+ 2025-03-03 17:32:05,960 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r0_c1.jpg
612
+ 2025-03-03 17:32:06,196 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r1_c0.jpg
613
+ 2025-03-03 17:32:06,457 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r1_c1.jpg
614
+ 2025-03-03 17:32:06,707 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r2_c0.jpg
615
+ 2025-03-03 17:32:06,940 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_21.jpg_r2_c1.jpg
616
+ 2025-03-03 17:32:06,941 [INFO] __main__ - Processing table image: /topic-extraction/img_22.jpg, columns=three
617
+ 2025-03-03 17:32:12,376 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r0_c0.jpg
618
+ 2025-03-03 17:32:12,703 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r0_c1.jpg
619
+ 2025-03-03 17:32:12,940 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_22.jpg_r1_c0.jpg
620
+ 2025-03-03 17:32:12,941 [INFO] __main__ - Processing table image: /topic-extraction/img_23.jpg, columns=three
621
+ 2025-03-03 17:32:17,156 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r0_c0.jpg
622
+ 2025-03-03 17:32:17,423 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r0_c1.jpg
623
+ 2025-03-03 17:32:17,698 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r1_c0.jpg
624
+ 2025-03-03 17:32:17,937 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_23.jpg_r1_c1.jpg
625
+ 2025-03-03 17:32:17,938 [INFO] __main__ - Processing table image: /topic-extraction/img_24.jpg, columns=three
626
+ 2025-03-03 17:32:23,150 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r0_c0.jpg
627
+ 2025-03-03 17:32:23,450 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r0_c1.jpg
628
+ 2025-03-03 17:32:23,675 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r1_c0.jpg
629
+ 2025-03-03 17:32:23,918 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r1_c1.jpg
630
+ 2025-03-03 17:32:24,135 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_24.jpg_r2_c0.jpg
631
+ 2025-03-03 17:32:24,136 [INFO] __main__ - Processing table image: /topic-extraction/img_25.jpg, columns=three
632
+ 2025-03-03 17:32:29,269 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r0_c0.jpg
633
+ 2025-03-03 17:32:29,548 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r0_c1.jpg
634
+ 2025-03-03 17:32:29,771 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r1_c0.jpg
635
+ 2025-03-03 17:32:30,016 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_25.jpg_r1_c1.jpg
636
+ 2025-03-03 17:32:30,016 [INFO] __main__ - Processing table image: /topic-extraction/img_26.jpg, columns=three
637
+ 2025-03-03 17:32:34,291 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r0_c0.jpg
638
+ 2025-03-03 17:32:34,576 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r0_c1.jpg
639
+ 2025-03-03 17:32:34,811 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r1_c0.jpg
640
+ 2025-03-03 17:32:35,083 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r1_c1.jpg
641
+ 2025-03-03 17:32:35,298 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_26.jpg_r2_c0.jpg
642
+ 2025-03-03 17:32:35,299 [INFO] __main__ - Processing table image: /topic-extraction/img_27.jpg, columns=three
643
+ 2025-03-03 17:32:39,414 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r0_c0.jpg
644
+ 2025-03-03 17:32:39,710 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r0_c1.jpg
645
+ 2025-03-03 17:32:39,965 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r1_c0.jpg
646
+ 2025-03-03 17:32:40,181 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r1_c1.jpg
647
+ 2025-03-03 17:32:40,393 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r2_c0.jpg
648
+ 2025-03-03 17:32:40,629 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r3_c0.jpg
649
+ 2025-03-03 17:32:40,852 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r4_c0.jpg
650
+ 2025-03-03 17:32:41,080 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_27.jpg_r4_c1.jpg
651
+ 2025-03-03 17:32:41,080 [INFO] __main__ - Processing table image: /topic-extraction/img_28.jpg, columns=two
652
+ 2025-03-03 17:32:45,688 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_28.jpg_r0_c0.jpg
653
+ 2025-03-03 17:32:45,999 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_28.jpg_r1_c0.jpg
654
+ 2025-03-03 17:32:46,226 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_28.jpg_r2_c0.jpg
655
+ 2025-03-03 17:32:46,462 [INFO] __main__ - Uploaded to S3: /topic-extraction/cells/table_s3_img_28.jpg_r2_c1.jpg
656
+ 2025-03-03 17:32:46,468 [INFO] __main__ - Final subtopics JSON saved locally at /home/user/app/we/we_ars/final_subtopics.json
657
+ 2025-03-03 17:32:46,930 [INFO] __main__ - GPU memory cleaned up.
658
+ 2025-03-03 17:32:46,940 [INFO] __main__ - Processing completed successfully.
659
+ 2025-03-03 17:42:37,923 [INFO] __main__ - Processing PDF: /home/user/app/input_output/a-level-pearson-mathematics-specification.pdf
660
+ 2025-03-03 17:42:38,720 [INFO] __main__ - Gemini returned subtopics: {'Paper 1 and Paper 2: Pure Mathematics': [11, 29], 'Paper 3: Statistics and Mechanics': [30, 40]}
661
+ 2025-03-03 17:42:38,721 [INFO] __main__ - Loaded 1135473 bytes from local file '/home/user/app/input_output/a-level-pearson-mathematics-specification.pdf'
662
+ 2025-03-03 17:42:39,089 [INFO] __main__ - Computed global offset: 4
663
+ 2025-03-03 17:42:39,090 [INFO] __main__ - Processing pages (0-based): [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43]
664
+ 2025-03-03 17:43:33,813 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_1.jpg
665
+ 2025-03-03 17:43:35,535 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_2.jpg
666
+ 2025-03-03 17:43:36,124 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_3.jpg
667
+ 2025-03-03 17:43:36,748 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_4.jpg
668
+ 2025-03-03 17:43:37,282 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_5.jpg
669
+ 2025-03-03 17:43:37,857 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_6.jpg
670
+ 2025-03-03 17:43:38,322 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_7.jpg
671
+ 2025-03-03 17:43:38,786 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_8.jpg
672
+ 2025-03-03 17:43:39,279 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_9.jpg
673
+ 2025-03-03 17:43:39,847 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_10.jpg
674
+ 2025-03-03 17:43:40,400 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_11.jpg
675
+ 2025-03-03 17:43:40,940 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_12.jpg
676
+ 2025-03-03 17:43:41,381 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_13.jpg
677
+ 2025-03-03 17:43:41,964 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_14.jpg
678
+ 2025-03-03 17:43:42,436 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_15.jpg
679
+ 2025-03-03 17:43:42,967 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_16.jpg
680
+ 2025-03-03 17:43:43,518 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_17.jpg
681
+ 2025-03-03 17:43:43,822 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_18.jpg
682
+ 2025-03-03 17:43:44,428 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_19.jpg
683
+ 2025-03-03 17:43:44,963 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_20.jpg
684
+ 2025-03-03 17:43:45,639 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_21.jpg
685
+ 2025-03-03 17:43:46,199 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_22.jpg
686
+ 2025-03-03 17:43:46,786 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_23.jpg
687
+ 2025-03-03 17:43:47,259 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_24.jpg
688
+ 2025-03-03 17:43:47,796 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_25.jpg
689
+ 2025-03-03 17:43:48,235 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_26.jpg
690
+ 2025-03-03 17:43:48,656 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_27.jpg
691
+ 2025-03-03 17:43:49,290 [INFO] __main__ - Uploaded to S3: /topic-extraction/img_28.jpg
692
+ 2025-03-03 17:43:49,683 [INFO] __main__ - Classifying images to detect tables.
693
+ 2025-03-03 17:43:53,784 [INFO] __main__ - Processing table image: /topic-extraction/img_1.jpg, columns=three
694
+ 2025-03-03 17:43:56,550 [ERROR] __main__ - Error processing table image /topic-extraction/img_1.jpg: [Errno 2] No such file or directory: '/tmp/tmp63t8um4x.jpg_rows/row_0/col_0.jpg'
695
+ 2025-03-03 17:43:56,550 [INFO] __main__ - Processing table image: /topic-extraction/img_2.jpg, columns=three
696
+ 2025-03-03 17:43:59,443 [ERROR] __main__ - Error processing table image /topic-extraction/img_2.jpg: [Errno 2] No such file or directory: '/tmp/tmps0rsmzl6.jpg_rows/row_0/col_0.jpg'
697
+ 2025-03-03 17:43:59,443 [INFO] __main__ - Processing table image: /topic-extraction/img_3.jpg, columns=three
698
+ 2025-03-03 17:44:02,428 [ERROR] __main__ - Error processing table image /topic-extraction/img_3.jpg: [Errno 2] No such file or directory: '/tmp/tmpj4fx8a9s.jpg_rows/row_0/col_0.jpg'
699
+ 2025-03-03 17:44:02,429 [INFO] __main__ - Processing table image: /topic-extraction/img_4.jpg, columns=three
700
+ 2025-03-03 17:44:05,216 [ERROR] __main__ - Error processing table image /topic-extraction/img_4.jpg: [Errno 2] No such file or directory: '/tmp/tmpmumoju32.jpg_rows/row_0/col_0.jpg'
701
+ 2025-03-03 17:44:05,216 [INFO] __main__ - Processing table image: /topic-extraction/img_5.jpg, columns=three
702
+ 2025-03-03 17:44:08,445 [ERROR] __main__ - Error processing table image /topic-extraction/img_5.jpg: [Errno 2] No such file or directory: '/tmp/tmptekcelbx.jpg_rows/row_0/col_0.jpg'
703
+ 2025-03-03 17:44:08,445 [INFO] __main__ - Processing table image: /topic-extraction/img_6.jpg, columns=three
704
+ 2025-03-03 17:44:11,635 [ERROR] __main__ - Error processing table image /topic-extraction/img_6.jpg: [Errno 2] No such file or directory: '/tmp/tmpi4bsuwn6.jpg_rows/row_0/col_0.jpg'
705
+ 2025-03-03 17:44:11,635 [INFO] __main__ - Processing table image: /topic-extraction/img_7.jpg, columns=three
706
+ 2025-03-03 17:44:14,589 [ERROR] __main__ - Error processing table image /topic-extraction/img_7.jpg: [Errno 2] No such file or directory: '/tmp/tmpj_8l15kk.jpg_rows/row_0/col_0.jpg'
707
+ 2025-03-03 17:44:14,589 [INFO] __main__ - Processing table image: /topic-extraction/img_8.jpg, columns=three
708
+ 2025-03-03 17:44:17,836 [ERROR] __main__ - Error processing table image /topic-extraction/img_8.jpg: [Errno 2] No such file or directory: '/tmp/tmp3_kflaqs.jpg_rows/row_0/col_0.jpg'
709
+ 2025-03-03 17:44:17,837 [INFO] __main__ - Processing table image: /topic-extraction/img_9.jpg, columns=three
710
+ 2025-03-03 17:44:21,255 [ERROR] __main__ - Error processing table image /topic-extraction/img_9.jpg: [Errno 2] No such file or directory: '/tmp/tmpwuir45y0.jpg_rows/row_0/col_0.jpg'
711
+ 2025-03-03 17:44:21,255 [INFO] __main__ - Processing table image: /topic-extraction/img_10.jpg, columns=three
712
+ 2025-03-03 17:44:24,155 [ERROR] __main__ - Error processing table image /topic-extraction/img_10.jpg: [Errno 2] No such file or directory: '/tmp/tmpu2qia4ih.jpg_rows/row_0/col_0.jpg'
713
+ 2025-03-03 17:44:24,155 [INFO] __main__ - Processing table image: /topic-extraction/img_11.jpg, columns=three
714
+ 2025-03-03 17:44:27,346 [ERROR] __main__ - Error processing table image /topic-extraction/img_11.jpg: [Errno 2] No such file or directory: '/tmp/tmp5ucu_tbp.jpg_rows/row_0/col_0.jpg'
715
+ 2025-03-03 17:44:27,346 [INFO] __main__ - Processing table image: /topic-extraction/img_12.jpg, columns=three
716
+ 2025-03-03 17:44:30,489 [ERROR] __main__ - Error processing table image /topic-extraction/img_12.jpg: [Errno 2] No such file or directory: '/tmp/tmp_ciyju4y.jpg_rows/row_0/col_0.jpg'
717
+ 2025-03-03 17:44:30,489 [INFO] __main__ - Processing table image: /topic-extraction/img_13.jpg, columns=three
718
+ 2025-03-03 17:44:33,140 [ERROR] __main__ - Error processing table image /topic-extraction/img_13.jpg: [Errno 2] No such file or directory: '/tmp/tmp1_mz16x9.jpg_rows/row_0/col_0.jpg'
719
+ 2025-03-03 17:44:33,141 [INFO] __main__ - Processing table image: /topic-extraction/img_14.jpg, columns=two
720
+ 2025-03-03 17:44:36,423 [ERROR] __main__ - Error processing table image /topic-extraction/img_14.jpg: [Errno 2] No such file or directory: '/tmp/tmp_44dh1m3.jpg_rows/row_0/col_0.jpg'
721
+ 2025-03-03 17:44:36,423 [INFO] __main__ - Processing table image: /topic-extraction/img_15.jpg, columns=three
722
+ 2025-03-03 17:44:39,622 [ERROR] __main__ - Error processing table image /topic-extraction/img_15.jpg: [Errno 2] No such file or directory: '/tmp/tmp4e3y3440.jpg_rows/row_0/col_0.jpg'
723
+ 2025-03-03 17:44:39,623 [INFO] __main__ - Processing table image: /topic-extraction/img_16.jpg, columns=three
724
+ 2025-03-03 17:44:42,896 [ERROR] __main__ - Error processing table image /topic-extraction/img_16.jpg: [Errno 2] No such file or directory: '/tmp/tmp2njdfsc6.jpg_rows/row_0/col_0.jpg'
725
+ 2025-03-03 17:44:42,896 [INFO] __main__ - Processing table image: /topic-extraction/img_17.jpg, columns=three
726
+ 2025-03-03 17:44:46,043 [ERROR] __main__ - Error processing table image /topic-extraction/img_17.jpg: [Errno 2] No such file or directory: '/tmp/tmpwq0nk28o.jpg_rows/row_0/col_0.jpg'
727
+ 2025-03-03 17:44:46,044 [INFO] __main__ - Processing table image: /topic-extraction/img_18.jpg, columns=three
728
+ 2025-03-03 17:44:47,088 [ERROR] __main__ - Error processing table image /topic-extraction/img_18.jpg: [Errno 2] No such file or directory: '/tmp/tmpdx8gcoqg.jpg_rows/row_0/col_0.jpg'
729
+ 2025-03-03 17:44:47,089 [INFO] __main__ - Processing table image: /topic-extraction/img_19.jpg, columns=three
730
+ 2025-03-03 17:44:49,477 [ERROR] __main__ - Error processing table image /topic-extraction/img_19.jpg: [Errno 2] No such file or directory: '/tmp/tmp72627l8g.jpg_rows/row_0/col_0.jpg'
731
+ 2025-03-03 17:44:49,478 [INFO] __main__ - Processing table image: /topic-extraction/img_20.jpg, columns=two
732
+ 2025-03-03 17:44:53,018 [ERROR] __main__ - Error processing table image /topic-extraction/img_20.jpg: [Errno 2] No such file or directory: '/tmp/tmpdnic1_0w.jpg_rows/row_0/col_0.jpg'
733
+ 2025-03-03 17:44:53,019 [INFO] __main__ - Processing table image: /topic-extraction/img_21.jpg, columns=three
734
+ 2025-03-03 17:44:56,093 [ERROR] __main__ - Error processing table image /topic-extraction/img_21.jpg: [Errno 2] No such file or directory: '/tmp/tmpmhoh8yuy.jpg_rows/row_0/col_0.jpg'
735
+ 2025-03-03 17:44:56,093 [INFO] __main__ - Processing table image: /topic-extraction/img_22.jpg, columns=three
736
+ 2025-03-03 17:44:59,613 [ERROR] __main__ - Error processing table image /topic-extraction/img_22.jpg: [Errno 2] No such file or directory: '/tmp/tmp7ted27c7.jpg_rows/row_0/col_0.jpg'
737
+ 2025-03-03 17:44:59,613 [INFO] __main__ - Processing table image: /topic-extraction/img_23.jpg, columns=three
738
+ 2025-03-03 17:45:02,646 [ERROR] __main__ - Error processing table image /topic-extraction/img_23.jpg: [Errno 2] No such file or directory: '/tmp/tmpbr3_k9_v.jpg_rows/row_0/col_0.jpg'
739
+ 2025-03-03 17:45:02,646 [INFO] __main__ - Processing table image: /topic-extraction/img_24.jpg, columns=three
740
+ 2025-03-03 17:45:06,144 [ERROR] __main__ - Error processing table image /topic-extraction/img_24.jpg: [Errno 2] No such file or directory: '/tmp/tmpg6iw11r9.jpg_rows/row_0/col_0.jpg'
741
+ 2025-03-03 17:45:06,145 [INFO] __main__ - Processing table image: /topic-extraction/img_25.jpg, columns=three
742
+ 2025-03-03 17:45:09,409 [ERROR] __main__ - Error processing table image /topic-extraction/img_25.jpg: [Errno 2] No such file or directory: '/tmp/tmp_ntakmkl.jpg_rows/row_0/col_0.jpg'
743
+ 2025-03-03 17:45:09,410 [INFO] __main__ - Processing table image: /topic-extraction/img_26.jpg, columns=three
744
+ 2025-03-03 17:45:12,057 [ERROR] __main__ - Error processing table image /topic-extraction/img_26.jpg: [Errno 2] No such file or directory: '/tmp/tmp0k8i_n4p.jpg_rows/row_0/col_0.jpg'
745
+ 2025-03-03 17:45:12,057 [INFO] __main__ - Processing table image: /topic-extraction/img_27.jpg, columns=three
746
+ 2025-03-03 17:45:14,839 [INFO] __main__ - GPU memory cleaned up.
we/final_subtopics.json CHANGED
@@ -1,280 +1,1137 @@
1
  [
2
  {
3
- "title": "",
4
  "contents": [
5
  {
6
  "type": "image",
7
- "key": "/topic-extraction/img_1.jpg"
8
  }
9
  ],
10
- "children": []
 
 
 
 
 
 
 
 
 
 
 
11
  },
12
  {
13
- "title": "",
14
  "contents": [
15
  {
16
  "type": "image",
17
- "key": "/topic-extraction/img_2.jpg"
18
  }
19
  ],
20
- "children": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  },
22
  {
23
- "title": "",
24
  "contents": [
25
  {
26
  "type": "image",
27
- "key": "/topic-extraction/img_3.jpg"
28
  }
29
  ],
30
- "children": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  },
32
  {
33
- "title": "",
34
  "contents": [
35
  {
36
  "type": "image",
37
- "key": "/topic-extraction/img_4.jpg"
38
  }
39
  ],
40
- "children": []
 
 
 
 
 
 
 
 
 
 
 
41
  },
42
  {
43
- "title": "",
44
  "contents": [
45
  {
46
  "type": "image",
47
- "key": "/topic-extraction/img_5.jpg"
48
  }
49
  ],
50
- "children": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  },
52
  {
53
- "title": "",
54
  "contents": [
55
  {
56
  "type": "image",
57
- "key": "/topic-extraction/img_6.jpg"
58
  }
59
  ],
60
- "children": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  },
62
  {
63
- "title": "",
64
  "contents": [
65
  {
66
  "type": "image",
67
- "key": "/topic-extraction/img_7.jpg"
68
  }
69
  ],
70
- "children": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  },
72
  {
73
- "title": "",
74
  "contents": [
75
  {
76
  "type": "image",
77
- "key": "/topic-extraction/img_8.jpg"
78
  }
79
  ],
80
- "children": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  },
82
  {
83
- "title": "",
84
  "contents": [
85
  {
86
  "type": "image",
87
- "key": "/topic-extraction/img_9.jpg"
88
  }
89
  ],
90
- "children": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  },
92
  {
93
- "title": "",
94
  "contents": [
95
  {
96
  "type": "image",
97
- "key": "/topic-extraction/img_10.jpg"
98
  }
99
  ],
100
- "children": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  },
102
  {
103
  "title": "",
104
- "contents": [
 
105
  {
106
- "type": "image",
107
- "key": "/topic-extraction/img_11.jpg"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  }
109
- ],
110
- "children": []
111
  },
112
  {
113
- "title": "",
114
  "contents": [
115
  {
116
  "type": "image",
117
- "key": "/topic-extraction/img_12.jpg"
118
  }
119
  ],
120
- "children": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  },
122
  {
123
- "title": "",
124
  "contents": [
125
  {
126
  "type": "image",
127
- "key": "/topic-extraction/img_13.jpg"
128
  }
129
  ],
130
- "children": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  },
132
  {
133
- "title": "",
134
  "contents": [
135
  {
136
  "type": "image",
137
- "key": "/topic-extraction/img_14.jpg"
138
  }
139
  ],
140
- "children": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  },
142
  {
143
- "title": "",
144
  "contents": [
145
  {
146
  "type": "image",
147
- "key": "/topic-extraction/img_15.jpg"
148
  }
149
  ],
150
- "children": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  },
152
  {
153
- "title": "",
154
  "contents": [
155
  {
156
  "type": "image",
157
- "key": "/topic-extraction/img_16.jpg"
158
  }
159
  ],
160
- "children": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  },
162
  {
163
- "title": "",
164
  "contents": [
165
  {
166
  "type": "image",
167
- "key": "/topic-extraction/img_17.jpg"
168
  }
169
  ],
170
- "children": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  },
172
  {
173
- "title": "",
174
  "contents": [
175
  {
176
  "type": "image",
177
- "key": "/topic-extraction/img_18.jpg"
178
  }
179
  ],
180
- "children": []
 
 
 
 
 
 
 
 
 
 
 
181
  },
182
  {
183
- "title": "",
184
  "contents": [
185
  {
186
  "type": "image",
187
- "key": "/topic-extraction/img_19.jpg"
188
  }
189
  ],
190
- "children": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  },
192
  {
193
- "title": "",
194
  "contents": [
195
  {
196
  "type": "image",
197
- "key": "/topic-extraction/img_20.jpg"
198
  }
199
  ],
200
- "children": []
 
 
 
 
 
 
 
 
 
 
 
201
  },
202
  {
203
- "title": "",
204
  "contents": [
205
  {
206
  "type": "image",
207
- "key": "/topic-extraction/img_21.jpg"
208
  }
209
  ],
210
- "children": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  },
212
  {
213
- "title": "",
214
  "contents": [
215
  {
216
  "type": "image",
217
- "key": "/topic-extraction/img_22.jpg"
218
  }
219
  ],
220
- "children": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  },
222
  {
223
- "title": "",
224
  "contents": [
225
  {
226
  "type": "image",
227
- "key": "/topic-extraction/img_23.jpg"
228
  }
229
  ],
230
- "children": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  },
232
  {
233
- "title": "",
234
  "contents": [
235
  {
236
  "type": "image",
237
- "key": "/topic-extraction/img_24.jpg"
238
  }
239
  ],
240
- "children": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  },
242
  {
243
  "title": "",
244
- "contents": [
 
245
  {
246
- "type": "image",
247
- "key": "/topic-extraction/img_25.jpg"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  }
249
- ],
250
- "children": []
251
  },
252
  {
253
- "title": "",
254
  "contents": [
255
  {
256
  "type": "image",
257
- "key": "/topic-extraction/img_26.jpg"
258
  }
259
  ],
260
- "children": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
  },
262
  {
263
- "title": "",
264
  "contents": [
265
  {
266
  "type": "image",
267
- "key": "/topic-extraction/img_27.jpg"
268
  }
269
  ],
270
- "children": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
  },
272
  {
273
- "title": "",
274
  "contents": [
275
  {
276
  "type": "image",
277
- "key": "/topic-extraction/img_28.jpg"
278
  }
279
  ],
280
  "children": []
 
1
  [
2
  {
3
+ "title": "Topics",
4
  "contents": [
5
  {
6
  "type": "image",
7
+ "key": "/topic-extraction/cells/img_1.jpg_r0_c0.png"
8
  }
9
  ],
10
+ "children": [
11
+ {
12
+ "title": "1.1",
13
+ "contents": [
14
+ {
15
+ "type": "image",
16
+ "key": "/topic-extraction/cells/img_1.jpg_r1_c1.png"
17
+ }
18
+ ],
19
+ "children": []
20
+ }
21
+ ]
22
  },
23
  {
24
+ "title": "2 Algebra and functions",
25
  "contents": [
26
  {
27
  "type": "image",
28
+ "key": "/topic-extraction/cells/img_2.jpg_r0_c0.png"
29
  }
30
  ],
31
+ "children": [
32
+ {
33
+ "title": "2.1",
34
+ "contents": [
35
+ {
36
+ "type": "image",
37
+ "key": "/topic-extraction/cells/img_2.jpg_r0_c1.png"
38
+ }
39
+ ],
40
+ "children": []
41
+ },
42
+ {
43
+ "title": "2.2",
44
+ "contents": [
45
+ {
46
+ "type": "image",
47
+ "key": "/topic-extraction/cells/img_2.jpg_r1_c0.png"
48
+ }
49
+ ],
50
+ "children": []
51
+ },
52
+ {
53
+ "title": "2.3",
54
+ "contents": [
55
+ {
56
+ "type": "image",
57
+ "key": "/topic-extraction/cells/img_2.jpg_r2_c0.png"
58
+ }
59
+ ],
60
+ "children": []
61
+ },
62
+ {
63
+ "title": "2.4",
64
+ "contents": [
65
+ {
66
+ "type": "image",
67
+ "key": "/topic-extraction/cells/img_2.jpg_r3_c0.png"
68
+ }
69
+ ],
70
+ "children": []
71
+ }
72
+ ]
73
  },
74
  {
75
+ "title": "2 Algebra and functions continued",
76
  "contents": [
77
  {
78
  "type": "image",
79
+ "key": "/topic-extraction/cells/img_3.jpg_r0_c0.png"
80
  }
81
  ],
82
+ "children": [
83
+ {
84
+ "title": "2.5",
85
+ "contents": [
86
+ {
87
+ "type": "image",
88
+ "key": "/topic-extraction/cells/img_3.jpg_r0_c1.png"
89
+ }
90
+ ],
91
+ "children": []
92
+ },
93
+ {
94
+ "title": "2.6",
95
+ "contents": [
96
+ {
97
+ "type": "image",
98
+ "key": "/topic-extraction/cells/img_3.jpg_r1_c0.png"
99
+ }
100
+ ],
101
+ "children": []
102
+ }
103
+ ]
104
  },
105
  {
106
+ "title": "Topics",
107
  "contents": [
108
  {
109
  "type": "image",
110
+ "key": "/topic-extraction/cells/img_4.jpg_r0_c0.png"
111
  }
112
  ],
113
+ "children": [
114
+ {
115
+ "title": "2.7",
116
+ "contents": [
117
+ {
118
+ "type": "image",
119
+ "key": "/topic-extraction/cells/img_4.jpg_r1_c1.png"
120
+ }
121
+ ],
122
+ "children": []
123
+ }
124
+ ]
125
  },
126
  {
127
+ "title": "Topics",
128
  "contents": [
129
  {
130
  "type": "image",
131
+ "key": "/topic-extraction/cells/img_5.jpg_r0_c0.png"
132
  }
133
  ],
134
+ "children": [
135
+ {
136
+ "title": "2.8",
137
+ "contents": [
138
+ {
139
+ "type": "image",
140
+ "key": "/topic-extraction/cells/img_5.jpg_r1_c1.png"
141
+ }
142
+ ],
143
+ "children": []
144
+ },
145
+ {
146
+ "title": "2.9",
147
+ "contents": [
148
+ {
149
+ "type": "image",
150
+ "key": "/topic-extraction/cells/img_5.jpg_r2_c0.png"
151
+ }
152
+ ],
153
+ "children": []
154
+ }
155
+ ]
156
  },
157
  {
158
+ "title": "2 Algebra and functions continued",
159
  "contents": [
160
  {
161
  "type": "image",
162
+ "key": "/topic-extraction/cells/img_6.jpg_r0_c0.png"
163
  }
164
  ],
165
+ "children": [
166
+ {
167
+ "title": "2.11",
168
+ "contents": [
169
+ {
170
+ "type": "image",
171
+ "key": "/topic-extraction/cells/img_6.jpg_r0_c1.png"
172
+ }
173
+ ],
174
+ "children": []
175
+ },
176
+ {
177
+ "title": "3.1",
178
+ "contents": [
179
+ {
180
+ "type": "image",
181
+ "key": "/topic-extraction/cells/img_6.jpg_r1_c1.png"
182
+ }
183
+ ],
184
+ "children": []
185
+ }
186
+ ]
187
  },
188
  {
189
+ "title": "3 Coordinate geometry in the (x, y) plane continued",
190
  "contents": [
191
  {
192
  "type": "image",
193
+ "key": "/topic-extraction/cells/img_7.jpg_r0_c0.png"
194
  }
195
  ],
196
+ "children": [
197
+ {
198
+ "title": "3.3",
199
+ "contents": [
200
+ {
201
+ "type": "image",
202
+ "key": "/topic-extraction/cells/img_7.jpg_r0_c1.png"
203
+ }
204
+ ],
205
+ "children": []
206
+ },
207
+ {
208
+ "title": "3.4",
209
+ "contents": [
210
+ {
211
+ "type": "image",
212
+ "key": "/topic-extraction/cells/img_7.jpg_r1_c0.png"
213
+ }
214
+ ],
215
+ "children": []
216
+ },
217
+ {
218
+ "title": "4.1",
219
+ "contents": [
220
+ {
221
+ "type": "image",
222
+ "key": "/topic-extraction/cells/img_7.jpg_r2_c1.png"
223
+ }
224
+ ],
225
+ "children": []
226
+ }
227
+ ]
228
  },
229
  {
230
+ "title": "Topics",
231
  "contents": [
232
  {
233
  "type": "image",
234
+ "key": "/topic-extraction/cells/img_8.jpg_r0_c0.png"
235
  }
236
  ],
237
+ "children": [
238
+ {
239
+ "title": "4.2",
240
+ "contents": [
241
+ {
242
+ "type": "image",
243
+ "key": "/topic-extraction/cells/img_8.jpg_r1_c1.png"
244
+ }
245
+ ],
246
+ "children": []
247
+ },
248
+ {
249
+ "title": "4.3",
250
+ "contents": [
251
+ {
252
+ "type": "image",
253
+ "key": "/topic-extraction/cells/img_8.jpg_r2_c0.png"
254
+ }
255
+ ],
256
+ "children": []
257
+ },
258
+ {
259
+ "title": "4.4",
260
+ "contents": [
261
+ {
262
+ "type": "image",
263
+ "key": "/topic-extraction/cells/img_8.jpg_r3_c0.png"
264
+ }
265
+ ],
266
+ "children": []
267
+ },
268
+ {
269
+ "title": "4.5",
270
+ "contents": [
271
+ {
272
+ "type": "image",
273
+ "key": "/topic-extraction/cells/img_8.jpg_r4_c0.png"
274
+ }
275
+ ],
276
+ "children": []
277
+ },
278
+ {
279
+ "title": "4.6",
280
+ "contents": [
281
+ {
282
+ "type": "image",
283
+ "key": "/topic-extraction/cells/img_8.jpg_r5_c0.png"
284
+ }
285
+ ],
286
+ "children": []
287
+ }
288
+ ]
289
  },
290
  {
291
+ "title": "gonometry",
292
  "contents": [
293
  {
294
  "type": "image",
295
+ "key": "/topic-extraction/cells/img_9.jpg_r0_c0.png"
296
  }
297
  ],
298
+ "children": [
299
+ {
300
+ "title": "5.1",
301
+ "contents": [
302
+ {
303
+ "type": "image",
304
+ "key": "/topic-extraction/cells/img_9.jpg_r0_c1.png"
305
+ }
306
+ ],
307
+ "children": []
308
+ },
309
+ {
310
+ "title": "5.2",
311
+ "contents": [
312
+ {
313
+ "type": "image",
314
+ "key": "/topic-extraction/cells/img_9.jpg_r1_c0.png"
315
+ }
316
+ ],
317
+ "children": []
318
+ },
319
+ {
320
+ "title": "5.3",
321
+ "contents": [
322
+ {
323
+ "type": "image",
324
+ "key": "/topic-extraction/cells/img_9.jpg_r2_c0.png"
325
+ }
326
+ ],
327
+ "children": []
328
+ },
329
+ {
330
+ "title": "5.4",
331
+ "contents": [
332
+ {
333
+ "type": "image",
334
+ "key": "/topic-extraction/cells/img_9.jpg_r3_c0.png"
335
+ }
336
+ ],
337
+ "children": []
338
+ }
339
+ ]
340
  },
341
  {
342
+ "title": "5 Trigonometry continued",
343
  "contents": [
344
  {
345
  "type": "image",
346
+ "key": "/topic-extraction/cells/img_10.jpg_r0_c0.png"
347
  }
348
  ],
349
+ "children": [
350
+ {
351
+ "title": "5.5",
352
+ "contents": [
353
+ {
354
+ "type": "image",
355
+ "key": "/topic-extraction/cells/img_10.jpg_r0_c1.png"
356
+ }
357
+ ],
358
+ "children": []
359
+ },
360
+ {
361
+ "title": "5.6",
362
+ "contents": [
363
+ {
364
+ "type": "image",
365
+ "key": "/topic-extraction/cells/img_10.jpg_r1_c0.png"
366
+ }
367
+ ],
368
+ "children": []
369
+ },
370
+ {
371
+ "title": "5.7",
372
+ "contents": [
373
+ {
374
+ "type": "image",
375
+ "key": "/topic-extraction/cells/img_10.jpg_r2_c0.png"
376
+ }
377
+ ],
378
+ "children": []
379
+ },
380
+ {
381
+ "title": "5.8",
382
+ "contents": [
383
+ {
384
+ "type": "image",
385
+ "key": "/topic-extraction/cells/img_10.jpg_r3_c0.png"
386
+ }
387
+ ],
388
+ "children": []
389
+ }
390
+ ]
391
  },
392
  {
393
  "title": "",
394
+ "contents": [],
395
+ "children": [
396
  {
397
+ "title": "6.1",
398
+ "contents": [
399
+ {
400
+ "type": "image",
401
+ "key": "/topic-extraction/cells/img_11.jpg_r0_c0.png"
402
+ }
403
+ ],
404
+ "children": []
405
+ },
406
+ {
407
+ "title": "6.2",
408
+ "contents": [
409
+ {
410
+ "type": "image",
411
+ "key": "/topic-extraction/cells/img_11.jpg_r1_c0.png"
412
+ }
413
+ ],
414
+ "children": []
415
+ },
416
+ {
417
+ "title": "6.3",
418
+ "contents": [
419
+ {
420
+ "type": "image",
421
+ "key": "/topic-extraction/cells/img_11.jpg_r2_c0.png"
422
+ }
423
+ ],
424
+ "children": []
425
+ },
426
+ {
427
+ "title": "6.4",
428
+ "contents": [
429
+ {
430
+ "type": "image",
431
+ "key": "/topic-extraction/cells/img_11.jpg_r3_c0.png"
432
+ }
433
+ ],
434
+ "children": []
435
+ },
436
+ {
437
+ "title": "6.5",
438
+ "contents": [
439
+ {
440
+ "type": "image",
441
+ "key": "/topic-extraction/cells/img_11.jpg_r4_c0.png"
442
+ }
443
+ ],
444
+ "children": []
445
+ },
446
+ {
447
+ "title": "6.6",
448
+ "contents": [
449
+ {
450
+ "type": "image",
451
+ "key": "/topic-extraction/cells/img_11.jpg_r5_c0.png"
452
+ }
453
+ ],
454
+ "children": []
455
  }
456
+ ]
 
457
  },
458
  {
459
+ "title": "Topics",
460
  "contents": [
461
  {
462
  "type": "image",
463
+ "key": "/topic-extraction/cells/img_12.jpg_r0_c0.png"
464
  }
465
  ],
466
+ "children": [
467
+ {
468
+ "title": "6.7",
469
+ "contents": [
470
+ {
471
+ "type": "image",
472
+ "key": "/topic-extraction/cells/img_12.jpg_r1_c1.png"
473
+ }
474
+ ],
475
+ "children": []
476
+ },
477
+ {
478
+ "title": "7.1",
479
+ "contents": [
480
+ {
481
+ "type": "image",
482
+ "key": "/topic-extraction/cells/img_12.jpg_r2_c1.png"
483
+ }
484
+ ],
485
+ "children": []
486
+ }
487
+ ]
488
  },
489
  {
490
+ "title": "Topics",
491
  "contents": [
492
  {
493
  "type": "image",
494
+ "key": "/topic-extraction/cells/img_13.jpg_r0_c0.png"
495
  }
496
  ],
497
+ "children": [
498
+ {
499
+ "title": "7.1",
500
+ "contents": [
501
+ {
502
+ "type": "image",
503
+ "key": "/topic-extraction/cells/img_13.jpg_r1_c1.png"
504
+ }
505
+ ],
506
+ "children": []
507
+ },
508
+ {
509
+ "title": "7.2",
510
+ "contents": [
511
+ {
512
+ "type": "image",
513
+ "key": "/topic-extraction/cells/img_13.jpg_r2_c0.png"
514
+ }
515
+ ],
516
+ "children": []
517
+ },
518
+ {
519
+ "title": "7.3",
520
+ "contents": [
521
+ {
522
+ "type": "image",
523
+ "key": "/topic-extraction/cells/img_13.jpg_r3_c0.png"
524
+ }
525
+ ],
526
+ "children": []
527
+ }
528
+ ]
529
  },
530
  {
531
+ "title": "Topics",
532
  "contents": [
533
  {
534
  "type": "image",
535
+ "key": "/topic-extraction/cells/img_14.jpg_r0_c0.png"
536
  }
537
  ],
538
+ "children": [
539
+ {
540
+ "title": "7.4",
541
+ "contents": [
542
+ {
543
+ "type": "image",
544
+ "key": "/topic-extraction/cells/img_14.jpg_r1_c1.png"
545
+ }
546
+ ],
547
+ "children": []
548
+ },
549
+ {
550
+ "title": "7.5",
551
+ "contents": [
552
+ {
553
+ "type": "image",
554
+ "key": "/topic-extraction/cells/img_14.jpg_r2_c0.png"
555
+ }
556
+ ],
557
+ "children": []
558
+ },
559
+ {
560
+ "title": "7.6",
561
+ "contents": [
562
+ {
563
+ "type": "image",
564
+ "key": "/topic-extraction/cells/img_14.jpg_r3_c0.png"
565
+ }
566
+ ],
567
+ "children": []
568
+ },
569
+ {
570
+ "title": "8.1",
571
+ "contents": [
572
+ {
573
+ "type": "image",
574
+ "key": "/topic-extraction/cells/img_14.jpg_r4_c1.png"
575
+ }
576
+ ],
577
+ "children": []
578
+ },
579
+ {
580
+ "title": "8.2",
581
+ "contents": [
582
+ {
583
+ "type": "image",
584
+ "key": "/topic-extraction/cells/img_14.jpg_r5_c0.png"
585
+ }
586
+ ],
587
+ "children": []
588
+ }
589
+ ]
590
  },
591
  {
592
+ "title": "Topics",
593
  "contents": [
594
  {
595
  "type": "image",
596
+ "key": "/topic-extraction/cells/img_15.jpg_r0_c0.png"
597
  }
598
  ],
599
+ "children": [
600
+ {
601
+ "title": "8.3",
602
+ "contents": [
603
+ {
604
+ "type": "image",
605
+ "key": "/topic-extraction/cells/img_15.jpg_r1_c1.png"
606
+ }
607
+ ],
608
+ "children": []
609
+ },
610
+ {
611
+ "title": "8.4",
612
+ "contents": [
613
+ {
614
+ "type": "image",
615
+ "key": "/topic-extraction/cells/img_15.jpg_r2_c0.png"
616
+ }
617
+ ],
618
+ "children": []
619
+ },
620
+ {
621
+ "title": "8.5",
622
+ "contents": [
623
+ {
624
+ "type": "image",
625
+ "key": "/topic-extraction/cells/img_15.jpg_r3_c0.png"
626
+ }
627
+ ],
628
+ "children": []
629
+ },
630
+ {
631
+ "title": "8.6",
632
+ "contents": [
633
+ {
634
+ "type": "image",
635
+ "key": "/topic-extraction/cells/img_15.jpg_r4_c0.png"
636
+ }
637
+ ],
638
+ "children": []
639
+ }
640
+ ]
641
  },
642
  {
643
+ "title": "Topics",
644
  "contents": [
645
  {
646
  "type": "image",
647
+ "key": "/topic-extraction/cells/img_16.jpg_r0_c0.png"
648
  }
649
  ],
650
+ "children": [
651
+ {
652
+ "title": "8.7",
653
+ "contents": [
654
+ {
655
+ "type": "image",
656
+ "key": "/topic-extraction/cells/img_16.jpg_r1_c1.png"
657
+ }
658
+ ],
659
+ "children": []
660
+ },
661
+ {
662
+ "title": "8.8",
663
+ "contents": [
664
+ {
665
+ "type": "image",
666
+ "key": "/topic-extraction/cells/img_16.jpg_r2_c0.png"
667
+ }
668
+ ],
669
+ "children": []
670
+ },
671
+ {
672
+ "title": "9.1",
673
+ "contents": [
674
+ {
675
+ "type": "image",
676
+ "key": "/topic-extraction/cells/img_16.jpg_r3_c1.png"
677
+ }
678
+ ],
679
+ "children": []
680
+ },
681
+ {
682
+ "title": "9.2",
683
+ "contents": [
684
+ {
685
+ "type": "image",
686
+ "key": "/topic-extraction/cells/img_16.jpg_r4_c0.png"
687
+ }
688
+ ],
689
+ "children": []
690
+ },
691
+ {
692
+ "title": "9.3",
693
+ "contents": [
694
+ {
695
+ "type": "image",
696
+ "key": "/topic-extraction/cells/img_16.jpg_r5_c0.png"
697
+ }
698
+ ],
699
+ "children": []
700
+ }
701
+ ]
702
  },
703
  {
704
+ "title": "9 Numerical methods",
705
  "contents": [
706
  {
707
  "type": "image",
708
+ "key": "/topic-extraction/cells/img_17.jpg_r0_c0.png"
709
  }
710
  ],
711
+ "children": [
712
+ {
713
+ "title": "9.4",
714
+ "contents": [
715
+ {
716
+ "type": "image",
717
+ "key": "/topic-extraction/cells/img_17.jpg_r0_c1.png"
718
+ }
719
+ ],
720
+ "children": []
721
+ },
722
+ {
723
+ "title": "9.5",
724
+ "contents": [
725
+ {
726
+ "type": "image",
727
+ "key": "/topic-extraction/cells/img_17.jpg_r1_c0.png"
728
+ }
729
+ ],
730
+ "children": []
731
+ },
732
+ {
733
+ "title": "10.1",
734
+ "contents": [
735
+ {
736
+ "type": "image",
737
+ "key": "/topic-extraction/cells/img_17.jpg_r2_c1.png"
738
+ }
739
+ ],
740
+ "children": []
741
+ },
742
+ {
743
+ "title": "10.2",
744
+ "contents": [
745
+ {
746
+ "type": "image",
747
+ "key": "/topic-extraction/cells/img_17.jpg_r3_c0.png"
748
+ }
749
+ ],
750
+ "children": []
751
+ },
752
+ {
753
+ "title": "10.3",
754
+ "contents": [
755
+ {
756
+ "type": "image",
757
+ "key": "/topic-extraction/cells/img_17.jpg_r4_c0.png"
758
+ }
759
+ ],
760
+ "children": []
761
+ },
762
+ {
763
+ "title": "10.4",
764
+ "contents": [
765
+ {
766
+ "type": "image",
767
+ "key": "/topic-extraction/cells/img_17.jpg_r5_c0.png"
768
+ }
769
+ ],
770
+ "children": []
771
+ }
772
+ ]
773
  },
774
  {
775
+ "title": "Topics",
776
  "contents": [
777
  {
778
  "type": "image",
779
+ "key": "/topic-extraction/cells/img_18.jpg_r0_c0.png"
780
  }
781
  ],
782
+ "children": [
783
+ {
784
+ "title": "10.5",
785
+ "contents": [
786
+ {
787
+ "type": "image",
788
+ "key": "/topic-extraction/cells/img_18.jpg_r1_c1.png"
789
+ }
790
+ ],
791
+ "children": []
792
+ }
793
+ ]
794
  },
795
  {
796
+ "title": "Topics",
797
  "contents": [
798
  {
799
  "type": "image",
800
+ "key": "/topic-extraction/cells/img_19.jpg_r0_c0.png"
801
  }
802
  ],
803
+ "children": [
804
+ {
805
+ "title": "1.1",
806
+ "contents": [
807
+ {
808
+ "type": "image",
809
+ "key": "/topic-extraction/cells/img_19.jpg_r1_c1.png"
810
+ }
811
+ ],
812
+ "children": []
813
+ },
814
+ {
815
+ "title": "2.1",
816
+ "contents": [
817
+ {
818
+ "type": "image",
819
+ "key": "/topic-extraction/cells/img_19.jpg_r2_c1.png"
820
+ }
821
+ ],
822
+ "children": []
823
+ }
824
+ ]
825
  },
826
  {
827
+ "title": "Topics",
828
  "contents": [
829
  {
830
  "type": "image",
831
+ "key": "/topic-extraction/cells/img_20.jpg_r0_c0.png"
832
  }
833
  ],
834
+ "children": [
835
+ {
836
+ "title": "2.2",
837
+ "contents": [
838
+ {
839
+ "type": "image",
840
+ "key": "/topic-extraction/cells/img_20.jpg_r1_c1.png"
841
+ }
842
+ ],
843
+ "children": []
844
+ }
845
+ ]
846
  },
847
  {
848
+ "title": "2 Data presentation and interpretation continued",
849
  "contents": [
850
  {
851
  "type": "image",
852
+ "key": "/topic-extraction/cells/img_21.jpg_r0_c0.png"
853
  }
854
  ],
855
+ "children": [
856
+ {
857
+ "title": "2.4",
858
+ "contents": [
859
+ {
860
+ "type": "image",
861
+ "key": "/topic-extraction/cells/img_21.jpg_r0_c1.png"
862
+ }
863
+ ],
864
+ "children": []
865
+ },
866
+ {
867
+ "title": "3.1",
868
+ "contents": [
869
+ {
870
+ "type": "image",
871
+ "key": "/topic-extraction/cells/img_21.jpg_r1_c1.png"
872
+ }
873
+ ],
874
+ "children": []
875
+ }
876
+ ]
877
  },
878
  {
879
+ "title": "Topics",
880
  "contents": [
881
  {
882
  "type": "image",
883
+ "key": "/topic-extraction/cells/img_22.jpg_r0_c0.png"
884
  }
885
  ],
886
+ "children": [
887
+ {
888
+ "title": "3.3",
889
+ "contents": [
890
+ {
891
+ "type": "image",
892
+ "key": "/topic-extraction/cells/img_22.jpg_r1_c1.png"
893
+ }
894
+ ],
895
+ "children": []
896
+ },
897
+ {
898
+ "title": "4.1",
899
+ "contents": [
900
+ {
901
+ "type": "image",
902
+ "key": "/topic-extraction/cells/img_22.jpg_r2_c1.png"
903
+ }
904
+ ],
905
+ "children": []
906
+ },
907
+ {
908
+ "title": "4.2",
909
+ "contents": [
910
+ {
911
+ "type": "image",
912
+ "key": "/topic-extraction/cells/img_22.jpg_r3_c0.png"
913
+ }
914
+ ],
915
+ "children": []
916
+ }
917
+ ]
918
  },
919
  {
920
+ "title": "4 Statistical distributions continued",
921
  "contents": [
922
  {
923
  "type": "image",
924
+ "key": "/topic-extraction/cells/img_23.jpg_r0_c0.png"
925
  }
926
  ],
927
+ "children": [
928
+ {
929
+ "title": "4.3",
930
+ "contents": [
931
+ {
932
+ "type": "image",
933
+ "key": "/topic-extraction/cells/img_23.jpg_r0_c1.png"
934
+ }
935
+ ],
936
+ "children": []
937
+ },
938
+ {
939
+ "title": "5.1",
940
+ "contents": [
941
+ {
942
+ "type": "image",
943
+ "key": "/topic-extraction/cells/img_23.jpg_r1_c1.png"
944
+ }
945
+ ],
946
+ "children": []
947
+ }
948
+ ]
949
  },
950
  {
951
+ "title": "Topics",
952
  "contents": [
953
  {
954
  "type": "image",
955
+ "key": "/topic-extraction/cells/img_24.jpg_r0_c0.png"
956
  }
957
  ],
958
+ "children": [
959
+ {
960
+ "title": "5.2",
961
+ "contents": [
962
+ {
963
+ "type": "image",
964
+ "key": "/topic-extraction/cells/img_24.jpg_r1_c1.png"
965
+ }
966
+ ],
967
+ "children": []
968
+ },
969
+ {
970
+ "title": "5.3",
971
+ "contents": [
972
+ {
973
+ "type": "image",
974
+ "key": "/topic-extraction/cells/img_24.jpg_r2_c0.png"
975
+ }
976
+ ],
977
+ "children": []
978
+ }
979
+ ]
980
  },
981
  {
982
  "title": "",
983
+ "contents": [],
984
+ "children": [
985
  {
986
+ "title": "7.1",
987
+ "contents": [
988
+ {
989
+ "type": "image",
990
+ "key": "/topic-extraction/cells/img_25.jpg_r1_c0.png"
991
+ }
992
+ ],
993
+ "children": []
994
+ },
995
+ {
996
+ "title": "7.2",
997
+ "contents": [
998
+ {
999
+ "type": "image",
1000
+ "key": "/topic-extraction/cells/img_25.jpg_r2_c0.png"
1001
+ }
1002
+ ],
1003
+ "children": []
1004
+ },
1005
+ {
1006
+ "title": "7.3",
1007
+ "contents": [
1008
+ {
1009
+ "type": "image",
1010
+ "key": "/topic-extraction/cells/img_25.jpg_r3_c0.png"
1011
+ }
1012
+ ],
1013
+ "children": []
1014
+ },
1015
+ {
1016
+ "title": "7.4",
1017
+ "contents": [
1018
+ {
1019
+ "type": "image",
1020
+ "key": "/topic-extraction/cells/img_25.jpg_r4_c0.png"
1021
+ }
1022
+ ],
1023
+ "children": []
1024
+ },
1025
+ {
1026
+ "title": "7.5",
1027
+ "contents": [
1028
+ {
1029
+ "type": "image",
1030
+ "key": "/topic-extraction/cells/img_25.jpg_r5_c0.png"
1031
+ }
1032
+ ],
1033
+ "children": []
1034
  }
1035
+ ]
 
1036
  },
1037
  {
1038
+ "title": "8 Forces and Newton's laws",
1039
  "contents": [
1040
  {
1041
  "type": "image",
1042
+ "key": "/topic-extraction/cells/img_26.jpg_r0_c0.png"
1043
  }
1044
  ],
1045
+ "children": [
1046
+ {
1047
+ "title": "8.1",
1048
+ "contents": [
1049
+ {
1050
+ "type": "image",
1051
+ "key": "/topic-extraction/cells/img_26.jpg_r0_c1.png"
1052
+ }
1053
+ ],
1054
+ "children": []
1055
+ },
1056
+ {
1057
+ "title": "8.2",
1058
+ "contents": [
1059
+ {
1060
+ "type": "image",
1061
+ "key": "/topic-extraction/cells/img_26.jpg_r1_c0.png"
1062
+ }
1063
+ ],
1064
+ "children": []
1065
+ },
1066
+ {
1067
+ "title": "8.3",
1068
+ "contents": [
1069
+ {
1070
+ "type": "image",
1071
+ "key": "/topic-extraction/cells/img_26.jpg_r2_c0.png"
1072
+ }
1073
+ ],
1074
+ "children": []
1075
+ }
1076
+ ]
1077
  },
1078
  {
1079
+ "title": "Topics",
1080
  "contents": [
1081
  {
1082
  "type": "image",
1083
+ "key": "/topic-extraction/cells/img_27.jpg_r0_c0.png"
1084
  }
1085
  ],
1086
+ "children": [
1087
+ {
1088
+ "title": "8.4",
1089
+ "contents": [
1090
+ {
1091
+ "type": "image",
1092
+ "key": "/topic-extraction/cells/img_27.jpg_r1_c1.png"
1093
+ }
1094
+ ],
1095
+ "children": []
1096
+ },
1097
+ {
1098
+ "title": "8.5",
1099
+ "contents": [
1100
+ {
1101
+ "type": "image",
1102
+ "key": "/topic-extraction/cells/img_27.jpg_r2_c0.png"
1103
+ }
1104
+ ],
1105
+ "children": []
1106
+ },
1107
+ {
1108
+ "title": "8.6",
1109
+ "contents": [
1110
+ {
1111
+ "type": "image",
1112
+ "key": "/topic-extraction/cells/img_27.jpg_r3_c0.png"
1113
+ }
1114
+ ],
1115
+ "children": []
1116
+ },
1117
+ {
1118
+ "title": "9.1",
1119
+ "contents": [
1120
+ {
1121
+ "type": "image",
1122
+ "key": "/topic-extraction/cells/img_27.jpg_r4_c1.png"
1123
+ }
1124
+ ],
1125
+ "children": []
1126
+ }
1127
+ ]
1128
  },
1129
  {
1130
+ "title": "Reason, interpret and communicate mathematically",
1131
  "contents": [
1132
  {
1133
  "type": "image",
1134
+ "key": "/topic-extraction/cells/img_28.jpg_r1_c0.png"
1135
  }
1136
  ],
1137
  "children": []
wje/final_output.json DELETED
@@ -1,265 +0,0 @@
1
- {
2
- "subtopics": {
3
- "Paper 1 and Paper 2: Pure Mathematics": [
4
- 11,
5
- 29
6
- ],
7
- "Paper 3: Statistics and Mechanics": [
8
- 30,
9
- 40
10
- ]
11
- },
12
- "local_images": {
13
- "e7e5c8f3c0a6316c2b50698c45ebe05b49bfd8bbe47a07b7b1929dd3cfd3e609.jpg": "img_1.jpg",
14
- "b243ef738ec2465b1cc00f4dd8dd0e5f5e10a91debf7762903ac6c023dd238c4.jpg": "img_2.jpg",
15
- "5e22a8a8c5bc23ee4d16bda9cce4a6ab4bb53854074fd4d691531d5adb9f3ebe.jpg": "img_3.jpg",
16
- "0e70645e72eadab75c88846b7947fc1216cf31d325febf02fbdf4898b430465d.jpg": "img_4.jpg",
17
- "52484f429af5d74ef75e96bf132b15fdc4acd2ed46accb981d670592dcc57ff2.jpg": "img_5.jpg",
18
- "5a153708e7a128d8f6477cb294d2f902d3a9bc57af709c81ccc3937b96580137.jpg": "img_6.jpg",
19
- "fd3a52607bef204e6998e09db82d195de76d929399c2cb1a63e26f87054eec6f.jpg": "img_7.jpg",
20
- "93885318f77c148b9fb1cd162cb9938d6f6cd795d000d5b997f2297198462fcf.jpg": "img_8.jpg",
21
- "133a659582f49fb71dc5fcae918278e6659a257026e35741ba8e6b94fcdb9de6.jpg": "img_9.jpg",
22
- "860d10a56a7e892c674f74fd030592339e629fb80d6e6dbfc343f95ec65a7c16.jpg": "img_10.jpg",
23
- "ae5ee4479ae736ff433ca9b2a1c3f753bbc8cc11a384e27cb710b426757c31e9.jpg": "img_11.jpg",
24
- "39ac9ccc8cd681e552fb1ae08341b4a2dcb33ea8fe6c787daf99fb993d29e57f.jpg": "img_12.jpg",
25
- "6d67beb5c0bf2168a87ad6b7c179ff9c7de8bbd7e720f77f7bf206080cceb589.jpg": "img_13.jpg",
26
- "b89d31200bc06fda181bd2538b5f3274de3e52b0adc7dd023ca676e168e6d487.jpg": "img_14.jpg",
27
- "78907967ba7a56221a0987e6e696e361c82fcf057f41659e4aa77943a62b6763.jpg": "img_15.jpg",
28
- "bd3eb31469dd7b72e9773564915dc768e2e152878d887dcab34e83875e0625bb.jpg": "img_16.jpg",
29
- "f1f1acb21df3d785fa3120fbae5fc74f7064769d9b38524bb991cfaa110177f6.jpg": "img_17.jpg",
30
- "b8b803d008ec9053c40f4a9c2c265a8a0b15742059331dc7997336c94ab74dc4.jpg": "img_18.jpg",
31
- "9cbb4e3b89d75d1d5da2fe8c6ccc4c1d3f612779abaccf3322f8b78b2db8a161.jpg": "img_19.jpg",
32
- "c6c4dfd8d7d1b83ef05d0ad30d4d09e75fe1d1152099b976eef7aededb872873.jpg": "img_20.jpg",
33
- "7eaeb5261341b3dbe0554989b2681f87c4b7a418e21445f3e88aa873e16db0df.jpg": "img_21.jpg",
34
- "22cbebb54b25ccf620ab043fc977fcc709fd5692d1e74b02267b8f689284225d.jpg": "img_22.jpg",
35
- "7a3f07a668cfc19e26c35fb1421908638d5a233723942301eda2764a1e81374d.jpg": "img_23.jpg",
36
- "42b9e068a3fddcc2adaa6736e0ccee448c0302349547c8eaed8a07c870d29b17.jpg": "img_24.jpg",
37
- "2efcd74e6c9447686d3e08d2dca6998ffd44f5cf0323d7d93b4213a2337b32ab.jpg": "img_25.jpg",
38
- "6ba16781c7909a8a47a6a51e520e739320c22791147ad6bbd482473cf5c96717.jpg": "img_26.jpg",
39
- "3d3cdfbca59671749e9d93714510a36441a10769f6b43720f9f3e733d893ea3a.jpg": "img_27.jpg",
40
- "35394d307566e17440ab0322a3c915a4537db1db85628b38f2fe7827d19d719d.jpg": "img_28.jpg"
41
- },
42
- "tables_extracted": {
43
- "img_1.jpg": [
44
- "img_1.jpg_rows/row_0/col_0.png",
45
- "img_1.jpg_rows/row_0/col_1.png",
46
- "img_1.jpg_rows/row_1/col_0.png",
47
- "img_1.jpg_rows/row_1/col_1.png"
48
- ],
49
- "img_2.jpg": [
50
- "img_2.jpg_rows/row_0/col_0.png",
51
- "img_2.jpg_rows/row_0/col_1.png",
52
- "img_2.jpg_rows/row_1/col_0.png",
53
- "img_2.jpg_rows/row_2/col_0.png",
54
- "img_2.jpg_rows/row_3/col_0.png"
55
- ],
56
- "img_3.jpg": [
57
- "img_3.jpg_rows/row_0/col_0.png",
58
- "img_3.jpg_rows/row_0/col_1.png",
59
- "img_3.jpg_rows/row_1/col_0.png"
60
- ],
61
- "img_4.jpg": [
62
- "img_4.jpg_rows/row_0/col_0.png",
63
- "img_4.jpg_rows/row_0/col_1.png",
64
- "img_4.jpg_rows/row_1/col_0.png",
65
- "img_4.jpg_rows/row_1/col_1.png"
66
- ],
67
- "img_5.jpg": [
68
- "img_5.jpg_rows/row_0/col_0.png",
69
- "img_5.jpg_rows/row_0/col_1.png",
70
- "img_5.jpg_rows/row_1/col_0.png",
71
- "img_5.jpg_rows/row_1/col_1.png",
72
- "img_5.jpg_rows/row_2/col_0.png"
73
- ],
74
- "img_6.jpg": [
75
- "img_6.jpg_rows/row_0/col_0.png",
76
- "img_6.jpg_rows/row_0/col_1.png",
77
- "img_6.jpg_rows/row_1/col_0.png",
78
- "img_6.jpg_rows/row_1/col_1.png"
79
- ],
80
- "img_7.jpg": [
81
- "img_7.jpg_rows/row_0/col_0.png",
82
- "img_7.jpg_rows/row_0/col_1.png",
83
- "img_7.jpg_rows/row_1/col_0.png",
84
- "img_7.jpg_rows/row_2/col_0.png",
85
- "img_7.jpg_rows/row_2/col_1.png"
86
- ],
87
- "img_8.jpg": [
88
- "img_8.jpg_rows/row_0/col_0.png",
89
- "img_8.jpg_rows/row_0/col_1.png",
90
- "img_8.jpg_rows/row_0/col_2.png",
91
- "img_8.jpg_rows/row_1/col_0.png",
92
- "img_8.jpg_rows/row_1/col_1.png",
93
- "img_8.jpg_rows/row_1/col_2.png",
94
- "img_8.jpg_rows/row_2/col_0.png",
95
- "img_8.jpg_rows/row_2/col_1.png",
96
- "img_8.jpg_rows/row_3/col_0.png",
97
- "img_8.jpg_rows/row_3/col_1.png",
98
- "img_8.jpg_rows/row_4/col_0.png",
99
- "img_8.jpg_rows/row_4/col_1.png",
100
- "img_8.jpg_rows/row_5/col_0.png",
101
- "img_8.jpg_rows/row_5/col_1.png"
102
- ],
103
- "img_9.jpg": [
104
- "img_9.jpg_rows/row_0/col_0.png",
105
- "img_9.jpg_rows/row_0/col_1.png",
106
- "img_9.jpg_rows/row_0/col_2.png",
107
- "img_9.jpg_rows/row_1/col_0.png",
108
- "img_9.jpg_rows/row_1/col_1.png",
109
- "img_9.jpg_rows/row_2/col_0.png",
110
- "img_9.jpg_rows/row_2/col_1.png",
111
- "img_9.jpg_rows/row_3/col_0.png",
112
- "img_9.jpg_rows/row_3/col_1.png"
113
- ],
114
- "img_10.jpg": [
115
- "img_10.jpg_rows/row_0/col_0.png",
116
- "img_10.jpg_rows/row_0/col_1.png",
117
- "img_10.jpg_rows/row_1/col_0.png",
118
- "img_10.jpg_rows/row_2/col_0.png",
119
- "img_10.jpg_rows/row_3/col_0.png"
120
- ],
121
- "img_11.jpg": [
122
- "img_11.jpg_rows/row_0/col_0.png",
123
- "img_11.jpg_rows/row_1/col_0.png",
124
- "img_11.jpg_rows/row_2/col_0.png",
125
- "img_11.jpg_rows/row_3/col_0.png",
126
- "img_11.jpg_rows/row_4/col_0.png",
127
- "img_11.jpg_rows/row_5/col_0.png"
128
- ],
129
- "img_12.jpg": [
130
- "img_12.jpg_rows/row_0/col_0.png",
131
- "img_12.jpg_rows/row_0/col_1.png",
132
- "img_12.jpg_rows/row_1/col_0.png",
133
- "img_12.jpg_rows/row_1/col_1.png",
134
- "img_12.jpg_rows/row_2/col_0.png",
135
- "img_12.jpg_rows/row_2/col_1.png"
136
- ],
137
- "img_13.jpg": [
138
- "img_13.jpg_rows/row_0/col_0.png",
139
- "img_13.jpg_rows/row_0/col_1.png",
140
- "img_13.jpg_rows/row_1/col_0.png",
141
- "img_13.jpg_rows/row_1/col_1.png",
142
- "img_13.jpg_rows/row_2/col_0.png",
143
- "img_13.jpg_rows/row_3/col_0.png"
144
- ],
145
- "img_14.jpg": [
146
- "img_14.jpg_rows/row_0/col_0.png",
147
- "img_14.jpg_rows/row_0/col_1.png",
148
- "img_14.jpg_rows/row_1/col_0.png",
149
- "img_14.jpg_rows/row_1/col_1.png",
150
- "img_14.jpg_rows/row_2/col_0.png",
151
- "img_14.jpg_rows/row_3/col_0.png",
152
- "img_14.jpg_rows/row_4/col_0.png",
153
- "img_14.jpg_rows/row_4/col_1.png",
154
- "img_14.jpg_rows/row_5/col_0.png"
155
- ],
156
- "img_15.jpg": [
157
- "img_15.jpg_rows/row_0/col_0.png",
158
- "img_15.jpg_rows/row_0/col_1.png",
159
- "img_15.jpg_rows/row_1/col_0.png",
160
- "img_15.jpg_rows/row_1/col_1.png",
161
- "img_15.jpg_rows/row_2/col_0.png",
162
- "img_15.jpg_rows/row_3/col_0.png",
163
- "img_15.jpg_rows/row_4/col_0.png"
164
- ],
165
- "img_16.jpg": [
166
- "img_16.jpg_rows/row_0/col_0.png",
167
- "img_16.jpg_rows/row_0/col_1.png",
168
- "img_16.jpg_rows/row_1/col_0.png",
169
- "img_16.jpg_rows/row_1/col_1.png",
170
- "img_16.jpg_rows/row_2/col_0.png",
171
- "img_16.jpg_rows/row_3/col_0.png",
172
- "img_16.jpg_rows/row_3/col_1.png",
173
- "img_16.jpg_rows/row_4/col_0.png",
174
- "img_16.jpg_rows/row_5/col_0.png"
175
- ],
176
- "img_17.jpg": [
177
- "img_17.jpg_rows/row_0/col_0.png",
178
- "img_17.jpg_rows/row_0/col_1.png",
179
- "img_17.jpg_rows/row_1/col_0.png",
180
- "img_17.jpg_rows/row_2/col_0.png",
181
- "img_17.jpg_rows/row_2/col_1.png",
182
- "img_17.jpg_rows/row_3/col_0.png",
183
- "img_17.jpg_rows/row_4/col_0.png",
184
- "img_17.jpg_rows/row_5/col_0.png"
185
- ],
186
- "img_18.jpg": [
187
- "img_18.jpg_rows/row_0/col_0.png",
188
- "img_18.jpg_rows/row_0/col_1.png",
189
- "img_18.jpg_rows/row_1/col_0.png",
190
- "img_18.jpg_rows/row_1/col_1.png"
191
- ],
192
- "img_19.jpg": [
193
- "img_19.jpg_rows/row_0/col_0.png",
194
- "img_19.jpg_rows/row_0/col_1.png",
195
- "img_19.jpg_rows/row_1/col_0.png",
196
- "img_19.jpg_rows/row_1/col_1.png",
197
- "img_19.jpg_rows/row_2/col_0.png",
198
- "img_19.jpg_rows/row_2/col_1.png"
199
- ],
200
- "img_20.jpg": [
201
- "img_20.jpg_rows/row_0/col_0.png",
202
- "img_20.jpg_rows/row_0/col_1.png",
203
- "img_20.jpg_rows/row_1/col_0.png",
204
- "img_20.jpg_rows/row_1/col_1.png"
205
- ],
206
- "img_21.jpg": [
207
- "img_21.jpg_rows/row_0/col_0.png",
208
- "img_21.jpg_rows/row_0/col_1.png",
209
- "img_21.jpg_rows/row_1/col_0.png",
210
- "img_21.jpg_rows/row_1/col_1.png"
211
- ],
212
- "img_22.jpg": [
213
- "img_22.jpg_rows/row_0/col_0.png",
214
- "img_22.jpg_rows/row_0/col_1.png",
215
- "img_22.jpg_rows/row_1/col_0.png",
216
- "img_22.jpg_rows/row_1/col_1.png",
217
- "img_22.jpg_rows/row_2/col_0.png",
218
- "img_22.jpg_rows/row_2/col_1.png",
219
- "img_22.jpg_rows/row_3/col_0.png"
220
- ],
221
- "img_23.jpg": [
222
- "img_23.jpg_rows/row_0/col_0.png",
223
- "img_23.jpg_rows/row_0/col_1.png",
224
- "img_23.jpg_rows/row_1/col_0.png",
225
- "img_23.jpg_rows/row_1/col_1.png"
226
- ],
227
- "img_24.jpg": [
228
- "img_24.jpg_rows/row_0/col_0.png",
229
- "img_24.jpg_rows/row_0/col_1.png",
230
- "img_24.jpg_rows/row_1/col_0.png",
231
- "img_24.jpg_rows/row_1/col_1.png",
232
- "img_24.jpg_rows/row_2/col_0.png"
233
- ],
234
- "img_25.jpg": [
235
- "img_25.jpg_rows/row_0/col_0.png",
236
- "img_25.jpg_rows/row_1/col_0.png",
237
- "img_25.jpg_rows/row_2/col_0.png",
238
- "img_25.jpg_rows/row_3/col_0.png",
239
- "img_25.jpg_rows/row_4/col_0.png",
240
- "img_25.jpg_rows/row_5/col_0.png"
241
- ],
242
- "img_26.jpg": [
243
- "img_26.jpg_rows/row_0/col_0.png",
244
- "img_26.jpg_rows/row_0/col_1.png",
245
- "img_26.jpg_rows/row_1/col_0.png",
246
- "img_26.jpg_rows/row_2/col_0.png"
247
- ],
248
- "img_27.jpg": [
249
- "img_27.jpg_rows/row_0/col_0.png",
250
- "img_27.jpg_rows/row_0/col_1.png",
251
- "img_27.jpg_rows/row_1/col_0.png",
252
- "img_27.jpg_rows/row_1/col_1.png",
253
- "img_27.jpg_rows/row_2/col_0.png",
254
- "img_27.jpg_rows/row_3/col_0.png",
255
- "img_27.jpg_rows/row_4/col_0.png",
256
- "img_27.jpg_rows/row_4/col_1.png"
257
- ],
258
- "img_28.jpg": [
259
- "img_28.jpg_rows/row_0/col_0.png",
260
- "img_28.jpg_rows/row_1/col_0.png",
261
- "img_28.jpg_rows/row_2/col_0.png",
262
- "img_28.jpg_rows/row_3/col_0.png"
263
- ]
264
- }
265
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wje/final_output_local.json DELETED
@@ -1,265 +0,0 @@
1
- {
2
- "subtopics": {
3
- "Paper 1 and Paper 2: Pure Mathematics": [
4
- 11,
5
- 29
6
- ],
7
- "Paper 3: Statistics and Mechanics": [
8
- 30,
9
- 40
10
- ]
11
- },
12
- "local_images": {
13
- "e7e5c8f3c0a6316c2b50698c45ebe05b49bfd8bbe47a07b7b1929dd3cfd3e609.jpg": "img_1.jpg",
14
- "b243ef738ec2465b1cc00f4dd8dd0e5f5e10a91debf7762903ac6c023dd238c4.jpg": "img_2.jpg",
15
- "5e22a8a8c5bc23ee4d16bda9cce4a6ab4bb53854074fd4d691531d5adb9f3ebe.jpg": "img_3.jpg",
16
- "0e70645e72eadab75c88846b7947fc1216cf31d325febf02fbdf4898b430465d.jpg": "img_4.jpg",
17
- "52484f429af5d74ef75e96bf132b15fdc4acd2ed46accb981d670592dcc57ff2.jpg": "img_5.jpg",
18
- "5a153708e7a128d8f6477cb294d2f902d3a9bc57af709c81ccc3937b96580137.jpg": "img_6.jpg",
19
- "fd3a52607bef204e6998e09db82d195de76d929399c2cb1a63e26f87054eec6f.jpg": "img_7.jpg",
20
- "93885318f77c148b9fb1cd162cb9938d6f6cd795d000d5b997f2297198462fcf.jpg": "img_8.jpg",
21
- "133a659582f49fb71dc5fcae918278e6659a257026e35741ba8e6b94fcdb9de6.jpg": "img_9.jpg",
22
- "860d10a56a7e892c674f74fd030592339e629fb80d6e6dbfc343f95ec65a7c16.jpg": "img_10.jpg",
23
- "ae5ee4479ae736ff433ca9b2a1c3f753bbc8cc11a384e27cb710b426757c31e9.jpg": "img_11.jpg",
24
- "39ac9ccc8cd681e552fb1ae08341b4a2dcb33ea8fe6c787daf99fb993d29e57f.jpg": "img_12.jpg",
25
- "6d67beb5c0bf2168a87ad6b7c179ff9c7de8bbd7e720f77f7bf206080cceb589.jpg": "img_13.jpg",
26
- "b89d31200bc06fda181bd2538b5f3274de3e52b0adc7dd023ca676e168e6d487.jpg": "img_14.jpg",
27
- "78907967ba7a56221a0987e6e696e361c82fcf057f41659e4aa77943a62b6763.jpg": "img_15.jpg",
28
- "bd3eb31469dd7b72e9773564915dc768e2e152878d887dcab34e83875e0625bb.jpg": "img_16.jpg",
29
- "f1f1acb21df3d785fa3120fbae5fc74f7064769d9b38524bb991cfaa110177f6.jpg": "img_17.jpg",
30
- "b8b803d008ec9053c40f4a9c2c265a8a0b15742059331dc7997336c94ab74dc4.jpg": "img_18.jpg",
31
- "9cbb4e3b89d75d1d5da2fe8c6ccc4c1d3f612779abaccf3322f8b78b2db8a161.jpg": "img_19.jpg",
32
- "c6c4dfd8d7d1b83ef05d0ad30d4d09e75fe1d1152099b976eef7aededb872873.jpg": "img_20.jpg",
33
- "7eaeb5261341b3dbe0554989b2681f87c4b7a418e21445f3e88aa873e16db0df.jpg": "img_21.jpg",
34
- "22cbebb54b25ccf620ab043fc977fcc709fd5692d1e74b02267b8f689284225d.jpg": "img_22.jpg",
35
- "7a3f07a668cfc19e26c35fb1421908638d5a233723942301eda2764a1e81374d.jpg": "img_23.jpg",
36
- "42b9e068a3fddcc2adaa6736e0ccee448c0302349547c8eaed8a07c870d29b17.jpg": "img_24.jpg",
37
- "2efcd74e6c9447686d3e08d2dca6998ffd44f5cf0323d7d93b4213a2337b32ab.jpg": "img_25.jpg",
38
- "6ba16781c7909a8a47a6a51e520e739320c22791147ad6bbd482473cf5c96717.jpg": "img_26.jpg",
39
- "3d3cdfbca59671749e9d93714510a36441a10769f6b43720f9f3e733d893ea3a.jpg": "img_27.jpg",
40
- "35394d307566e17440ab0322a3c915a4537db1db85628b38f2fe7827d19d719d.jpg": "img_28.jpg"
41
- },
42
- "tables_extracted": {
43
- "img_1.jpg": [
44
- "img_1.jpg_rows/row_0/col_0.png",
45
- "img_1.jpg_rows/row_0/col_1.png",
46
- "img_1.jpg_rows/row_1/col_0.png",
47
- "img_1.jpg_rows/row_1/col_1.png"
48
- ],
49
- "img_2.jpg": [
50
- "img_2.jpg_rows/row_0/col_0.png",
51
- "img_2.jpg_rows/row_0/col_1.png",
52
- "img_2.jpg_rows/row_1/col_0.png",
53
- "img_2.jpg_rows/row_2/col_0.png",
54
- "img_2.jpg_rows/row_3/col_0.png"
55
- ],
56
- "img_3.jpg": [
57
- "img_3.jpg_rows/row_0/col_0.png",
58
- "img_3.jpg_rows/row_0/col_1.png",
59
- "img_3.jpg_rows/row_1/col_0.png"
60
- ],
61
- "img_4.jpg": [
62
- "img_4.jpg_rows/row_0/col_0.png",
63
- "img_4.jpg_rows/row_0/col_1.png",
64
- "img_4.jpg_rows/row_1/col_0.png",
65
- "img_4.jpg_rows/row_1/col_1.png"
66
- ],
67
- "img_5.jpg": [
68
- "img_5.jpg_rows/row_0/col_0.png",
69
- "img_5.jpg_rows/row_0/col_1.png",
70
- "img_5.jpg_rows/row_1/col_0.png",
71
- "img_5.jpg_rows/row_1/col_1.png",
72
- "img_5.jpg_rows/row_2/col_0.png"
73
- ],
74
- "img_6.jpg": [
75
- "img_6.jpg_rows/row_0/col_0.png",
76
- "img_6.jpg_rows/row_0/col_1.png",
77
- "img_6.jpg_rows/row_1/col_0.png",
78
- "img_6.jpg_rows/row_1/col_1.png"
79
- ],
80
- "img_7.jpg": [
81
- "img_7.jpg_rows/row_0/col_0.png",
82
- "img_7.jpg_rows/row_0/col_1.png",
83
- "img_7.jpg_rows/row_1/col_0.png",
84
- "img_7.jpg_rows/row_2/col_0.png",
85
- "img_7.jpg_rows/row_2/col_1.png"
86
- ],
87
- "img_8.jpg": [
88
- "img_8.jpg_rows/row_0/col_0.png",
89
- "img_8.jpg_rows/row_0/col_1.png",
90
- "img_8.jpg_rows/row_0/col_2.png",
91
- "img_8.jpg_rows/row_1/col_0.png",
92
- "img_8.jpg_rows/row_1/col_1.png",
93
- "img_8.jpg_rows/row_1/col_2.png",
94
- "img_8.jpg_rows/row_2/col_0.png",
95
- "img_8.jpg_rows/row_2/col_1.png",
96
- "img_8.jpg_rows/row_3/col_0.png",
97
- "img_8.jpg_rows/row_3/col_1.png",
98
- "img_8.jpg_rows/row_4/col_0.png",
99
- "img_8.jpg_rows/row_4/col_1.png",
100
- "img_8.jpg_rows/row_5/col_0.png",
101
- "img_8.jpg_rows/row_5/col_1.png"
102
- ],
103
- "img_9.jpg": [
104
- "img_9.jpg_rows/row_0/col_0.png",
105
- "img_9.jpg_rows/row_0/col_1.png",
106
- "img_9.jpg_rows/row_0/col_2.png",
107
- "img_9.jpg_rows/row_1/col_0.png",
108
- "img_9.jpg_rows/row_1/col_1.png",
109
- "img_9.jpg_rows/row_2/col_0.png",
110
- "img_9.jpg_rows/row_2/col_1.png",
111
- "img_9.jpg_rows/row_3/col_0.png",
112
- "img_9.jpg_rows/row_3/col_1.png"
113
- ],
114
- "img_10.jpg": [
115
- "img_10.jpg_rows/row_0/col_0.png",
116
- "img_10.jpg_rows/row_0/col_1.png",
117
- "img_10.jpg_rows/row_1/col_0.png",
118
- "img_10.jpg_rows/row_2/col_0.png",
119
- "img_10.jpg_rows/row_3/col_0.png"
120
- ],
121
- "img_11.jpg": [
122
- "img_11.jpg_rows/row_0/col_0.png",
123
- "img_11.jpg_rows/row_1/col_0.png",
124
- "img_11.jpg_rows/row_2/col_0.png",
125
- "img_11.jpg_rows/row_3/col_0.png",
126
- "img_11.jpg_rows/row_4/col_0.png",
127
- "img_11.jpg_rows/row_5/col_0.png"
128
- ],
129
- "img_12.jpg": [
130
- "img_12.jpg_rows/row_0/col_0.png",
131
- "img_12.jpg_rows/row_0/col_1.png",
132
- "img_12.jpg_rows/row_1/col_0.png",
133
- "img_12.jpg_rows/row_1/col_1.png",
134
- "img_12.jpg_rows/row_2/col_0.png",
135
- "img_12.jpg_rows/row_2/col_1.png"
136
- ],
137
- "img_13.jpg": [
138
- "img_13.jpg_rows/row_0/col_0.png",
139
- "img_13.jpg_rows/row_0/col_1.png",
140
- "img_13.jpg_rows/row_1/col_0.png",
141
- "img_13.jpg_rows/row_1/col_1.png",
142
- "img_13.jpg_rows/row_2/col_0.png",
143
- "img_13.jpg_rows/row_3/col_0.png"
144
- ],
145
- "img_14.jpg": [
146
- "img_14.jpg_rows/row_0/col_0.png",
147
- "img_14.jpg_rows/row_0/col_1.png",
148
- "img_14.jpg_rows/row_1/col_0.png",
149
- "img_14.jpg_rows/row_1/col_1.png",
150
- "img_14.jpg_rows/row_2/col_0.png",
151
- "img_14.jpg_rows/row_3/col_0.png",
152
- "img_14.jpg_rows/row_4/col_0.png",
153
- "img_14.jpg_rows/row_4/col_1.png",
154
- "img_14.jpg_rows/row_5/col_0.png"
155
- ],
156
- "img_15.jpg": [
157
- "img_15.jpg_rows/row_0/col_0.png",
158
- "img_15.jpg_rows/row_0/col_1.png",
159
- "img_15.jpg_rows/row_1/col_0.png",
160
- "img_15.jpg_rows/row_1/col_1.png",
161
- "img_15.jpg_rows/row_2/col_0.png",
162
- "img_15.jpg_rows/row_3/col_0.png",
163
- "img_15.jpg_rows/row_4/col_0.png"
164
- ],
165
- "img_16.jpg": [
166
- "img_16.jpg_rows/row_0/col_0.png",
167
- "img_16.jpg_rows/row_0/col_1.png",
168
- "img_16.jpg_rows/row_1/col_0.png",
169
- "img_16.jpg_rows/row_1/col_1.png",
170
- "img_16.jpg_rows/row_2/col_0.png",
171
- "img_16.jpg_rows/row_3/col_0.png",
172
- "img_16.jpg_rows/row_3/col_1.png",
173
- "img_16.jpg_rows/row_4/col_0.png",
174
- "img_16.jpg_rows/row_5/col_0.png"
175
- ],
176
- "img_17.jpg": [
177
- "img_17.jpg_rows/row_0/col_0.png",
178
- "img_17.jpg_rows/row_0/col_1.png",
179
- "img_17.jpg_rows/row_1/col_0.png",
180
- "img_17.jpg_rows/row_2/col_0.png",
181
- "img_17.jpg_rows/row_2/col_1.png",
182
- "img_17.jpg_rows/row_3/col_0.png",
183
- "img_17.jpg_rows/row_4/col_0.png",
184
- "img_17.jpg_rows/row_5/col_0.png"
185
- ],
186
- "img_18.jpg": [
187
- "img_18.jpg_rows/row_0/col_0.png",
188
- "img_18.jpg_rows/row_0/col_1.png",
189
- "img_18.jpg_rows/row_1/col_0.png",
190
- "img_18.jpg_rows/row_1/col_1.png"
191
- ],
192
- "img_19.jpg": [
193
- "img_19.jpg_rows/row_0/col_0.png",
194
- "img_19.jpg_rows/row_0/col_1.png",
195
- "img_19.jpg_rows/row_1/col_0.png",
196
- "img_19.jpg_rows/row_1/col_1.png",
197
- "img_19.jpg_rows/row_2/col_0.png",
198
- "img_19.jpg_rows/row_2/col_1.png"
199
- ],
200
- "img_20.jpg": [
201
- "img_20.jpg_rows/row_0/col_0.png",
202
- "img_20.jpg_rows/row_0/col_1.png",
203
- "img_20.jpg_rows/row_1/col_0.png",
204
- "img_20.jpg_rows/row_1/col_1.png"
205
- ],
206
- "img_21.jpg": [
207
- "img_21.jpg_rows/row_0/col_0.png",
208
- "img_21.jpg_rows/row_0/col_1.png",
209
- "img_21.jpg_rows/row_1/col_0.png",
210
- "img_21.jpg_rows/row_1/col_1.png"
211
- ],
212
- "img_22.jpg": [
213
- "img_22.jpg_rows/row_0/col_0.png",
214
- "img_22.jpg_rows/row_0/col_1.png",
215
- "img_22.jpg_rows/row_1/col_0.png",
216
- "img_22.jpg_rows/row_1/col_1.png",
217
- "img_22.jpg_rows/row_2/col_0.png",
218
- "img_22.jpg_rows/row_2/col_1.png",
219
- "img_22.jpg_rows/row_3/col_0.png"
220
- ],
221
- "img_23.jpg": [
222
- "img_23.jpg_rows/row_0/col_0.png",
223
- "img_23.jpg_rows/row_0/col_1.png",
224
- "img_23.jpg_rows/row_1/col_0.png",
225
- "img_23.jpg_rows/row_1/col_1.png"
226
- ],
227
- "img_24.jpg": [
228
- "img_24.jpg_rows/row_0/col_0.png",
229
- "img_24.jpg_rows/row_0/col_1.png",
230
- "img_24.jpg_rows/row_1/col_0.png",
231
- "img_24.jpg_rows/row_1/col_1.png",
232
- "img_24.jpg_rows/row_2/col_0.png"
233
- ],
234
- "img_25.jpg": [
235
- "img_25.jpg_rows/row_0/col_0.png",
236
- "img_25.jpg_rows/row_1/col_0.png",
237
- "img_25.jpg_rows/row_2/col_0.png",
238
- "img_25.jpg_rows/row_3/col_0.png",
239
- "img_25.jpg_rows/row_4/col_0.png",
240
- "img_25.jpg_rows/row_5/col_0.png"
241
- ],
242
- "img_26.jpg": [
243
- "img_26.jpg_rows/row_0/col_0.png",
244
- "img_26.jpg_rows/row_0/col_1.png",
245
- "img_26.jpg_rows/row_1/col_0.png",
246
- "img_26.jpg_rows/row_2/col_0.png"
247
- ],
248
- "img_27.jpg": [
249
- "img_27.jpg_rows/row_0/col_0.png",
250
- "img_27.jpg_rows/row_0/col_1.png",
251
- "img_27.jpg_rows/row_1/col_0.png",
252
- "img_27.jpg_rows/row_1/col_1.png",
253
- "img_27.jpg_rows/row_2/col_0.png",
254
- "img_27.jpg_rows/row_3/col_0.png",
255
- "img_27.jpg_rows/row_4/col_0.png",
256
- "img_27.jpg_rows/row_4/col_1.png"
257
- ],
258
- "img_28.jpg": [
259
- "img_28.jpg_rows/row_0/col_0.png",
260
- "img_28.jpg_rows/row_1/col_0.png",
261
- "img_28.jpg_rows/row_2/col_0.png",
262
- "img_28.jpg_rows/row_3/col_0.png"
263
- ]
264
- }
265
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wje/img_1.jpg_rows/row_0/col_0.png DELETED
Binary file (4.73 kB)
 
wje/img_1.jpg_rows/row_0/col_1.png DELETED
Binary file (14 kB)
 
wje/img_1.jpg_rows/row_1/col_0.png DELETED
Binary file (16.5 kB)
 
wje/img_1.jpg_rows/row_1/col_1.png DELETED
Binary file (672 kB)
 
wje/img_10.jpg_rows/row_0/col_0.png DELETED
Binary file (21.3 kB)
 
wje/img_10.jpg_rows/row_0/col_1.png DELETED
Binary file (190 kB)
 
wje/img_10.jpg_rows/row_1/col_0.png DELETED
Binary file (301 kB)
 
wje/img_10.jpg_rows/row_2/col_0.png DELETED
Binary file (268 kB)
 
wje/img_10.jpg_rows/row_3/col_0.png DELETED
Binary file (92.6 kB)
 
wje/img_11.jpg_rows/row_0/col_0.png DELETED
Binary file (142 kB)
 
wje/img_11.jpg_rows/row_1/col_0.png DELETED
Binary file (160 kB)
 
wje/img_11.jpg_rows/row_2/col_0.png DELETED
Binary file (187 kB)
 
wje/img_11.jpg_rows/row_3/col_0.png DELETED
Binary file (142 kB)
 
wje/img_11.jpg_rows/row_4/col_0.png DELETED
Binary file (88.8 kB)
 
wje/img_11.jpg_rows/row_5/col_0.png DELETED
Binary file (206 kB)
 
wje/img_12.jpg_rows/row_0/col_0.png DELETED
Binary file (4.95 kB)
 
wje/img_12.jpg_rows/row_0/col_1.png DELETED
Binary file (15 kB)
 
wje/img_12.jpg_rows/row_1/col_0.png DELETED
Binary file (32.1 kB)
 
wje/img_12.jpg_rows/row_1/col_1.png DELETED
Binary file (427 kB)
 
wje/img_12.jpg_rows/row_2/col_0.png DELETED
Binary file (19.8 kB)
 
wje/img_12.jpg_rows/row_2/col_1.png DELETED
Binary file (558 kB)
 
wje/img_13.jpg_rows/row_0/col_0.png DELETED
Binary file (4.86 kB)
 
wje/img_13.jpg_rows/row_0/col_1.png DELETED
Binary file (14.2 kB)
 
wje/img_13.jpg_rows/row_1/col_0.png DELETED
Binary file (23.5 kB)
 
wje/img_13.jpg_rows/row_1/col_1.png DELETED
Binary file (292 kB)
 
wje/img_13.jpg_rows/row_2/col_0.png DELETED
Binary file (278 kB)
 
wje/img_13.jpg_rows/row_3/col_0.png DELETED
Binary file (300 kB)
 
wje/img_14.jpg_rows/row_0/col_0.png DELETED
Binary file (4.5 kB)
 
wje/img_14.jpg_rows/row_0/col_1.png DELETED
Binary file (15.9 kB)
 
wje/img_14.jpg_rows/row_1/col_0.png DELETED
Binary file (23.5 kB)
 
wje/img_14.jpg_rows/row_1/col_1.png DELETED
Binary file (356 kB)
 
wje/img_14.jpg_rows/row_2/col_0.png DELETED
Binary file (137 kB)
 
wje/img_14.jpg_rows/row_3/col_0.png DELETED
Binary file (242 kB)
 
wje/img_14.jpg_rows/row_4/col_0.png DELETED
Binary file (11.8 kB)
 
wje/img_14.jpg_rows/row_4/col_1.png DELETED
Binary file (135 kB)
 
wje/img_14.jpg_rows/row_5/col_0.png DELETED
Binary file (393 kB)
 
wje/img_15.jpg_rows/row_0/col_0.png DELETED
Binary file (4.86 kB)
 
wje/img_15.jpg_rows/row_0/col_1.png DELETED
Binary file (15.1 kB)
 
wje/img_15.jpg_rows/row_1/col_0.png DELETED
Binary file (21.4 kB)