File size: 435 Bytes
fdfb2b8
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
import re

class TextPreprocessor:
    def __init__(self):
        pass

    def remove_figure_references(self, text):
        # Remove references like "(Figure X.Y)" or "(Figure X.Y.Z)" and "see Figure X.Y" or "see Figure X.Y.Z"
        text = re.sub(r'\(Figure \d+(\.\d+)+\)', '', text)  # Remove (Figure X.Y or X.Y.Z)
        text = re.sub(r'see Figure \d+(\.\d+)+', '', text)  # Remove "see Figure X.Y or X.Y.Z"
        return text