Spaces:

darthPanda
/

SentimentAnalysisTool

Runtime error

App Files Files Community

darthPanda commited on Mar 4, 2023

Commit

41dac9c

1 Parent(s): 5976712

hf6

Browse files

Files changed (2) hide show

app.py +229 -41
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -52,6 +52,13 @@ def get_emotion_model():
 tokenizer_emotion,model_emotion = get_emotion_model()
 def extract_text_from_pdf(path):
   text=''
   reader = PdfReader(path)
@@ -81,7 +88,7 @@ if 'filename_key' not in st.session_state:
     st.session_state.filename_key = ''
 st.write("""
-# Sentiment Analysis Tool
 """)
 #uploaded_file = st.file_uploader("Choose a PDF file")
 #uploaded_file = st.file_uploader("Choose a PDF file", accept_multiple_files=False, type=['pdf'])
@@ -147,24 +154,74 @@ elif len(uploaded_file)>0:
                 else:
                     useful_sentence.append(i)
             del sentences
-            with st.spinner('Performing Sentiment Analysis...'):
-                tokenizer = tokenizer_sentiment
-                model = model_sentiment
-                pipe = pipeline(model="ProsusAI/finbert")
-                classifier = pipeline(model="ProsusAI/finbert")
-                output = classifier(useful_sentence)
-            with st.spinner('Performing Emotion Analysis...'):
-                tokenizer = tokenizer_emotion
-                model = model_emotion
-                classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=1)
-                temp_emotion = classifier(useful_sentence)
-                df = pd.DataFrame.from_dict(output)
-                df['Sentence']= pd.Series(useful_sentence)
             ############################ 3. Processing ############################
@@ -186,7 +243,10 @@ elif len(uploaded_file)>0:
             pos_df = pos_df.sort_values('score', ascending=False)
             pos_df_mean = pos_df.score.mean()
             pos_df['score'] = pos_df['score'].round(4)
-            pos_df.rename(columns = {'Sentence':'Positive Sentences'}, inplace = True)
             neg_df = df[df['label']=='negative']
             neg_df = neg_df[['score', 'Sentence']]
@@ -194,6 +254,9 @@ elif len(uploaded_file)>0:
             neg_df_mean = neg_df.score.mean()
             neg_df['score'] = neg_df['score'].round(4)
             neg_df.rename(columns = {'Sentence':'Negative Sentences'}, inplace = True)
             neu_df = df[df['label']=='neutral']
             neu_df = neu_df[['score', 'Sentence']]
@@ -201,16 +264,15 @@ elif len(uploaded_file)>0:
             #neu_df_mean = neu_df.score.mean()
             neu_df['score'] = neu_df['score'].round(4)
             neu_df.rename(columns = {'Sentence':'Neutral Sentences'}, inplace = True)
             df_temp = neg_df
             df_temp = df_temp['score'] * -1
             df_temp = pd.concat([df_temp, pos_df])
             ############################ 3.2. Emotion Analysis ############################
-            output_emotion = []
-            for temp in temp_emotion:
-                output_emotion.append(temp[0])
             df_emotion = pd.DataFrame.from_dict(output_emotion)
             df_emotion['Sentence']= pd.Series(useful_sentence)
@@ -250,15 +312,56 @@ elif len(uploaded_file)>0:
             num_of_surprise_sentences = df_surprise.shape[0]
             if num_of_surprise_sentences == 0:
                 df_surprise.loc[0] = [0.0, '-------No surprised sentences found in report-------']
             ############################ 4. Plotting ############################
             fig = make_subplots(
-                rows=41, cols=6,
                 specs=[ [None, None, None, None, None, None],
                         [None, None, None, None, None, None],
-                        [None, None, {"type": "indicator", "rowspan": 3, "colspan": 2}, None, None, None],
                         [None, None, None, None, None, None],
                         [None, None, None, None, None, None],
                         [{"type": "pie", "rowspan": 6, "colspan": 2}, None, {"type": "indicator", "rowspan": 6, "colspan": 2}, None, {"type": "indicator", "rowspan": 6, "colspan": 2}, None],
                         [None, None, None, None, None, None],
@@ -278,9 +381,10 @@ elif len(uploaded_file)>0:
                         [None, None, None, None, None, None],
                         [None, None, None, None, None, None],
                         [None, None, None, None, None, None],
-                        [None, None, {"type": "indicator", "rowspan": 3, "colspan": 2}, None, None, None],
                         [None, None, None, None, None, None],
                         [None, None, None, None, None, None],
                         [None, None, None, None, None, None],
                         [{"type": "bar", "rowspan": 6, "colspan": 6}, None, None, None, None, None],
                         [None, None, None, None, None, None],
@@ -296,14 +400,37 @@ elif len(uploaded_file)>0:
                         [None, None, None, None, None, None],
                         [None, None, None, None, None, None],
                         [None, None, None, None, None, None],
                     ],
             )
             ############################ 4.1. Sentiment Analysis ############################
             fig.add_trace(go.Indicator(
                 mode = "number",
-                value = None,
-                title = {"text": "Sentiment Analysis"}), row=3, col=3)
             colors = px.colors.diverging.Portland#RdBu
             fig.add_trace(go.Pie(labels=labels, values=values, hole = 0.5,
@@ -372,15 +499,16 @@ elif len(uploaded_file)>0:
             )
             fig.add_trace(table_trace2, row=18, col=1)
-            fig.add_trace(go.Indicator(
-                mode = "number",
-                value = None,
-                title = {"text": "Emotion Analysis"}), row=24, col=3)
-            ############## Under Construction ##############
-            ############################ 4.2. Emotion Analysis ############################
-            #go.Bar(x=['Joy', 'Sadness', 'Anger', 'Surprise'], y=[3, 4, 1])
             # Add bar chart
             colors_emotions = ['#174ecf', '#cfc517', '#940625', '#17cfcb']
@@ -397,9 +525,9 @@ elif len(uploaded_file)>0:
                     marker_color=colors_emotions,
                     text=annotations,
                     textfont=dict(size=40)),
-                        row=28, col=1)
-            fig.update_xaxes(title_text='Emotions', title_font=dict(size=16), row=28, col=1)
-            fig.update_yaxes(title_text='Number of sentences', title_font=dict(size=16), row=28, col=1)
             # df_anger.loc[0] = [0.0, 'None']
             # df_anger
@@ -409,7 +537,7 @@ elif len(uploaded_file)>0:
                 cells=dict(values=[df_joy[name] for name in df_joy.columns], fill_color='white', align='left'),
                 columnwidth=[1, 4]
             )
-            fig.add_trace(table_trace2, row=35, col=1)
             ################## sadness table
             table_trace2 = go.Table(
@@ -417,7 +545,7 @@ elif len(uploaded_file)>0:
                 cells=dict(values=[df_sadness[name] for name in df_sadness.columns], fill_color='white', align='left'),
                 columnwidth=[1, 4]
             )
-            fig.add_trace(table_trace2, row=35, col=4)
             ################## surprise table
             table_trace2 = go.Table(
@@ -425,7 +553,7 @@ elif len(uploaded_file)>0:
                 cells=dict(values=[df_surprise[name] for name in df_surprise.columns], fill_color='white', align='left'),
                 columnwidth=[1, 4]
             )
-            fig.add_trace(table_trace2, row=38, col=1)
             ################## anger table
             table_trace2 = go.Table(
@@ -433,7 +561,66 @@ elif len(uploaded_file)>0:
                 cells=dict(values=[df_anger[name] for name in df_anger.columns], fill_color='white', align='left'),
                 columnwidth=[1, 4]
             )
-            fig.add_trace(table_trace2, row=38, col=4)
             import textwrap
             if len(title) > 120:
@@ -443,7 +630,8 @@ elif len(uploaded_file)>0:
             # Add HTML tags to force line breaks in the title text
             wrapped_title = "<br>".join(wrapped_title.split("\n"))
-            fig.update_layout(height=3000, showlegend=False, title={'text': f"<b>{wrapped_title} - Text Analysis Report</b>", 'x': 0.5, 'xanchor': 'center','font': {'size': 32}})
             #pyo.plot(fig, filename='report.html')

 tokenizer_emotion,model_emotion = get_emotion_model()
+@st.cache(allow_output_mutation=True)
+def get_intent_model():
+    classifier = pipeline("zero-shot-classification", model='cross-encoder/nli-deberta-v3-small')
+    return classifier
+intent_classifier = get_intent_model()
 def extract_text_from_pdf(path):
   text=''
   reader = PdfReader(path)
     st.session_state.filename_key = ''
 st.write("""
+# Dcoument Analysis Tool
 """)
 #uploaded_file = st.file_uploader("Choose a PDF file")
 #uploaded_file = st.file_uploader("Choose a PDF file", accept_multiple_files=False, type=['pdf'])
                 else:
                     useful_sentence.append(i)
+            useful_sentence_len = len(useful_sentence)
             del sentences
+            ############################ 2.1 Sentiment Modeling ############################
+            placeholder1 = st.empty()
+            placeholder1.text('Performing Sentiment Analysis...')
+            #with st.empty():
+            my_bar = st.progress(0)
+            tokenizer = tokenizer_sentiment
+            model = model_sentiment
+            pipe = pipeline(model="ProsusAI/finbert")
+            classifier = pipeline(model="ProsusAI/finbert")
+            #output = classifier(useful_sentence)
+            output=[]
+            i=0
+            for temp in useful_sentence:
+                output.extend(classifier(temp))
+                i=i+1
+                my_bar.progress(int((i/useful_sentence_len)*100))
+            my_bar.empty()
+            df = pd.DataFrame.from_dict(output)
+            df['Sentence']= pd.Series(useful_sentence)
+            ############################ 2.2 Emotion Modeling ############################
+            #placeholder2 = st.empty()
+            placeholder1.text('Performing Emotion Analysis...')
+#            with st.empty():
+            my_bar = st.progress(0)
+            tokenizer = tokenizer_emotion
+            model = model_emotion
+            classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=1)
+            output_emotion = []
+            i=0
+            for temp in useful_sentence:
+                output_emotion.extend(classifier(temp)[0])
+                i=i+1
+                my_bar.progress(int((i/useful_sentence_len)*100))
+            my_bar.empty()
+            placeholder1.text('Emotion Analysis Completed')
+            ############################ 2.3 Intent Modeling ############################
+            placeholder1.text('Performing Intent Analysis...')
+            my_bar = st.progress(0)
+            candidate_labels = ['complaint', 'suggestion', 'query']
+            classifier = intent_classifier
+            # temp_intent = classifier(useful_sentence, candidate_labels)
+            # output_intent=[]
+            # for temp in temp_intent:
+            #     output_intent.append({'label' : temp['labels'][0], 'score' : temp['scores'][0]})
+            output_intent=[]
+            i=0
+            for temp1 in useful_sentence:
+                temp = classifier(temp1, candidate_labels)
+                output_intent.append({'label' : temp['labels'][0], 'score' : temp['scores'][0]})
+                i=i+1
+                my_bar.progress(int((i/useful_sentence_len)*100))
+            df_intent = pd.DataFrame.from_dict(output_intent)
+            df_intent['Sentence']= pd.Series(useful_sentence)
+            my_bar.empty()
+            placeholder1.text('Processing Completed')
             ############################ 3. Processing ############################
             pos_df = pos_df.sort_values('score', ascending=False)
             pos_df_mean = pos_df.score.mean()
             pos_df['score'] = pos_df['score'].round(4)
+            pos_df.rename(columns = {'Sentence':'Positive Sentences'}, inplace = True)
+            num_of_pos_sentences = pos_df.shape[0]
+            if num_of_pos_sentences == 0:
+                pos_df.loc[0] = [0.0, '-------No positive sentences found in report-------']
             neg_df = df[df['label']=='negative']
             neg_df = neg_df[['score', 'Sentence']]
             neg_df_mean = neg_df.score.mean()
             neg_df['score'] = neg_df['score'].round(4)
             neg_df.rename(columns = {'Sentence':'Negative Sentences'}, inplace = True)
+            num_of_neg_sentences = neg_df.shape[0]
+            if num_of_neg_sentences == 0:
+                neg_df.loc[0] = [0.0, '-------No negative sentences found in report-------']
             neu_df = df[df['label']=='neutral']
             neu_df = neu_df[['score', 'Sentence']]
             #neu_df_mean = neu_df.score.mean()
             neu_df['score'] = neu_df['score'].round(4)
             neu_df.rename(columns = {'Sentence':'Neutral Sentences'}, inplace = True)
+            num_of_neu_sentences = neu_df.shape[0]
+            if num_of_neu_sentences == 0:
+                neu_df.loc[0] = [0.0, '-------No neutral sentences found in report-------']
             df_temp = neg_df
             df_temp = df_temp['score'] * -1
             df_temp = pd.concat([df_temp, pos_df])
             ############################ 3.2. Emotion Analysis ############################
             df_emotion = pd.DataFrame.from_dict(output_emotion)
             df_emotion['Sentence']= pd.Series(useful_sentence)
             num_of_surprise_sentences = df_surprise.shape[0]
             if num_of_surprise_sentences == 0:
                 df_surprise.loc[0] = [0.0, '-------No surprised sentences found in report-------']
+            df_temp_emotion = df_sadness
+            df_temp_emotion = pd.concat([df_sadness, df_anger])
+            df_temp_emotion = df_temp_emotion['score'] * -1
+            df_temp_emotion = pd.concat([df_temp_emotion, df_joy])
+            ############################ 3.3. Intent Analysis ############################
+            df_query = df_intent[df_intent['label']=='query']
+            df_query = df_query[['score', 'Sentence']]
+            df_query = df_query.sort_values('score', ascending=False)
+            df_query['score'] = df_query['score'].round(4)
+            df_query.rename(columns = {'Sentence':'Queries'}, inplace = True)
+            df_query = df_query[df_query['score']>0.5]
+            num_of_queries = df_query.shape[0]
+            if num_of_queries == 0:
+                df_query.loc[0] = [0.0, '-------No queries found in report-------']
+            df_complaint = df_intent[df_intent['label']=='complaint']
+            df_complaint = df_complaint[['score', 'Sentence']]
+            df_complaint = df_complaint.sort_values('score', ascending=False)
+            df_complaint['score'] = df_complaint['score'].round(4)
+            df_complaint.rename(columns = {'Sentence':'Complaints'}, inplace = True)
+            df_complaint = df_complaint[df_complaint['score']>0.5]
+            num_of_complaints = df_complaint.shape[0]
+            if num_of_complaints == 0:
+                df_complaint.loc[0] = [0.0, '-------No complaints found in report-------']
+            df_suggestion = df_intent[df_intent['label']=='suggestion']
+            df_suggestion = df_suggestion[['score', 'Sentence']]
+            df_suggestion = df_suggestion.sort_values('score', ascending=False)
+            df_suggestion['score'] = df_suggestion['score'].round(4)
+            df_suggestion.rename(columns = {'Sentence':'Suggestions'}, inplace = True)
+            df_suggestion = df_suggestion[df_suggestion['score']>0.5]
+            num_of_suggestions = df_suggestion.shape[0]
+            if num_of_suggestions == 0:
+                df_suggestion.loc[0] = [0.0, '-------No suggestions found in report-------']
+            total_num_of_intent = num_of_queries + num_of_complaints + num_of_suggestions
             ############################ 4. Plotting ############################
             fig = make_subplots(
+                rows=62, cols=6,
                 specs=[ [None, None, None, None, None, None],
                         [None, None, None, None, None, None],
                         [None, None, None, None, None, None],
+                        [None, None, {"type": "indicator", "rowspan": 3, "colspan": 2}, None, None, None],
                         [None, None, None, None, None, None],
                         [{"type": "pie", "rowspan": 6, "colspan": 2}, None, {"type": "indicator", "rowspan": 6, "colspan": 2}, None, {"type": "indicator", "rowspan": 6, "colspan": 2}, None],
                         [None, None, None, None, None, None],
                         [None, None, None, None, None, None],
                         [None, None, None, None, None, None],
                         [None, None, None, None, None, None],
                         [None, None, None, None, None, None],
                         [None, None, None, None, None, None],
+                        [None, None, {"type": "indicator", "rowspan": 3, "colspan": 2}, None, None, None],
+                        [None, None, None, None, None, None],
                         [None, None, None, None, None, None],
                         [{"type": "bar", "rowspan": 6, "colspan": 6}, None, None, None, None, None],
                         [None, None, None, None, None, None],
                         [None, None, None, None, None, None],
                         [None, None, None, None, None, None],
                         [None, None, None, None, None, None],
+                        [None, None, None, None, None, None],
+                        [None, None, {"type": "indicator", "rowspan": 3, "colspan": 2}, None, None, None],
+                        [None, None, None, None, None, None],
+                        [None, None, None, None, None, None],
+                        [None, {"type": "indicator", "rowspan": 2, "colspan": 5}, None, None, None, None],#first bullet
+                        [None, None, None, None, None, None],
+                        [None, None, None, None, None, None],
+                        [None, {"type": "indicator", "rowspan": 2, "colspan": 5}, None, None, None, None], #2nd bullet
+                        [None, None, None, None, None, None],
+                        [None, None, None, None, None, None],
+                        [None, {"type": "indicator", "rowspan": 2, "colspan": 5}, None, None, None, None],
+                        [None, None, None, None, None, None],
+                        [None, None, None, None, None, None],
+                        [{"type": "table", "rowspan": 4, "colspan": 2}, None, {"type": "table", "rowspan": 4, "colspan": 2}, None, {"type": "table", "rowspan": 4, "colspan": 2}, None],
+                        [None, None, None, None, None, None],
+                        [None, None, None, None, None, None],
+                        [None, None, None, None, None, None],
+                        [None, None, None, None, None, None],
+                        [None, None, None, None, None, None],
+                        [None, None, None, None, None, None],
                     ],
             )
             ############################ 4.1. Sentiment Analysis ############################
             fig.add_trace(go.Indicator(
                 mode = "number",
+                value = int(df_temp.score.mean()*100),
+                number = {"suffix": "%"},
+                title = {"text": "<span style='font-size:1.5em'>Sentiment Analysis</span><br><span style='font-size:0.8em;color:gray'>Positivity Score</span>"}
+                ), row=4, col=3)
             colors = px.colors.diverging.Portland#RdBu
             fig.add_trace(go.Pie(labels=labels, values=values, hole = 0.5,
             )
             fig.add_trace(table_trace2, row=18, col=1)
+            ########################### 4.2. Emotion Analysis ###########################
+            fig.add_trace(go.Indicator(
+                mode = "number",
+                value = int(df_temp_emotion.score.mean()*100),
+                number = {"suffix": "%"},
+                title = {"text": "<span style='font-size:1.5em'>Emotion Analysis</span><br><span style='font-size:0.8em;color:gray'>Happiness Score</span>"}
+                ), row=26, col=3)
             # Add bar chart
             colors_emotions = ['#174ecf', '#cfc517', '#940625', '#17cfcb']
                     marker_color=colors_emotions,
                     text=annotations,
                     textfont=dict(size=40)),
+                        row=29, col=1)
+            fig.update_xaxes(title_text='Emotions', title_font=dict(size=16), row=29, col=1)
+            fig.update_yaxes(title_text='Number of sentences', title_font=dict(size=16), row=29, col=1)
             # df_anger.loc[0] = [0.0, 'None']
             # df_anger
                 cells=dict(values=[df_joy[name] for name in df_joy.columns], fill_color='white', align='left'),
                 columnwidth=[1, 4]
             )
+            fig.add_trace(table_trace2, row=36, col=1)
             ################## sadness table
             table_trace2 = go.Table(
                 cells=dict(values=[df_sadness[name] for name in df_sadness.columns], fill_color='white', align='left'),
                 columnwidth=[1, 4]
             )
+            fig.add_trace(table_trace2, row=36, col=4)
             ################## surprise table
             table_trace2 = go.Table(
                 cells=dict(values=[df_surprise[name] for name in df_surprise.columns], fill_color='white', align='left'),
                 columnwidth=[1, 4]
             )
+            fig.add_trace(table_trace2, row=39, col=1)
             ################## anger table
             table_trace2 = go.Table(
                 cells=dict(values=[df_anger[name] for name in df_anger.columns], fill_color='white', align='left'),
                 columnwidth=[1, 4]
             )
+            fig.add_trace(table_trace2, row=39, col=4)
+            ########################### 4.3. Intent Analysis ###########################
+            fig.add_trace(go.Indicator(
+                mode = "number",
+                value = round(num_of_suggestions/max(num_of_complaints,0), 2),
+                number = {"suffix": ""},
+                title = {"text": "<span style='font-size:1.5em'>Intent Analysis</span><br><span style='font-size:0.8em;color:gray'>Suggestion/Complaint Ratio</span>"}
+                ), row=44, col=3)
+            fig.add_trace(go.Indicator(
+                mode = "number+gauge",
+                gauge = {'shape': "bullet", 'axis': {'range': [None, total_num_of_intent]}, 'bar': {'color': "blue"}},
+                #delta = {'reference': 300},
+                value = num_of_queries,
+                #domain = {'x': [0.5, 1], 'y': [0.3, 0.9]},
+                title = {'text': "Queries"}), row=47, col=2)
+            fig.add_trace(go.Indicator(
+                mode = "number+gauge",
+                gauge = {'shape': "bullet", 'axis': {'range': [None, total_num_of_intent]},},
+                #delta = {'reference': 300},
+                value = num_of_suggestions,
+                #domain = {'x': [0.5, 1], 'y': [0.3, 0.9]},
+                title = {'text': "Suggestions"}), row=50, col=2)
+            fig.add_trace(go.Indicator(
+                mode = "number+gauge",
+                gauge = {'shape': "bullet", 'axis': {'range': [None, total_num_of_intent]}, 'bar': {'color': "red"}},
+                #delta = {'reference': 300},
+                value = num_of_complaints,
+                #domain = {'x': [0.5, 1], 'y': [0.3, 0.9]},
+                title = {'text': "Complaints"}), row=53, col=2)
+            ############ query table
+            table_trace2 = go.Table(
+                header=dict(values=list(df_query.columns), fill_color='lightgray', align='left'),
+                cells=dict(values=[df_query[name] for name in df_query.columns], fill_color='white', align='left'),
+                columnwidth=[1, 4]
+            )
+            fig.add_trace(table_trace2, row=56, col=1)
+            ############ complaints table
+            table_trace2 = go.Table(
+                header=dict(values=list(df_complaint.columns), fill_color='lightgray', align='left'),
+                cells=dict(values=[df_complaint[name] for name in df_complaint.columns], fill_color='white', align='left'),
+                columnwidth=[1, 4]
+            )
+            fig.add_trace(table_trace2, row=56, col=3)
+            ############ suggestions table
+            table_trace2 = go.Table(
+                header=dict(values=list(df_suggestion.columns), fill_color='lightgray', align='left'),
+                cells=dict(values=[df_suggestion[name] for name in df_suggestion.columns], fill_color='white', align='left'),
+                columnwidth=[1, 4]
+            )
+            fig.add_trace(table_trace2, row=56, col=5)
             import textwrap
             if len(title) > 120:
             # Add HTML tags to force line breaks in the title text
             wrapped_title = "<br>".join(wrapped_title.split("\n"))
+            fig.update_layout(height=4000, showlegend=False, title={'text': f"<b>{wrapped_title} - Text Analysis Report</b>", 'x': 0.5, 'xanchor': 'center','font': {'size': 32}})
             #pyo.plot(fig, filename='report.html')

requirements.txt CHANGED Viewed

@@ -1,5 +1,6 @@
 streamlit==1.17.0
 transformers
 torch
 PyPDF2
 nltk

 streamlit==1.17.0
 transformers
+sentencepiece
 torch
 PyPDF2
 nltk