|
|
|
|
|
""" |
|
|
Demonstration script for using Pulse Core 1 - Vietnamese Sentiment Analysis System from Hugging Face Hub. |
|
|
Shows how to download and use the pre-trained sentiment models for both general sentiment and banking aspect sentiment. |
|
|
""" |
|
|
|
|
|
from huggingface_hub import hf_hub_download |
|
|
import joblib |
|
|
|
|
|
|
|
|
def predict_text(model, text): |
|
|
"""Make prediction on a single text (consistent with inference.py)""" |
|
|
try: |
|
|
probabilities = model.predict_proba([text])[0] |
|
|
|
|
|
|
|
|
top_indices = probabilities.argsort()[-3:][::-1] |
|
|
top_predictions = [] |
|
|
for idx in top_indices: |
|
|
category = model.classes_[idx] |
|
|
prob = probabilities[idx] |
|
|
top_predictions.append((category, prob)) |
|
|
|
|
|
|
|
|
prediction = top_predictions[0][0] |
|
|
confidence = top_predictions[0][1] |
|
|
|
|
|
return prediction, confidence, top_predictions |
|
|
except Exception as e: |
|
|
print(f"Error making prediction: {e}") |
|
|
return None, 0, [] |
|
|
|
|
|
|
|
|
def load_model_from_hub(model_type="vlsp2016"): |
|
|
"""Load the pre-trained Pulse Core 1 sentiment model from Hugging Face Hub |
|
|
Args: |
|
|
model_type: 'vlsp2016' for general sentiment or 'uts2017' for banking aspect sentiment |
|
|
""" |
|
|
if model_type == "vlsp2016": |
|
|
filename = "vlsp2016_sentiment_20250929_075529.joblib" |
|
|
print("Downloading Pulse Core 1 (Vietnamese General Sentiment) model from Hugging Face Hub...") |
|
|
classes_desc = "sentiment classes (positive, negative, neutral)" |
|
|
else: |
|
|
filename = "uts2017_sentiment_20250928_131716.joblib" |
|
|
print("Downloading Pulse Core 1 (Vietnamese Banking Aspect Sentiment) model from Hugging Face Hub...") |
|
|
classes_desc = "aspect-sentiment combinations" |
|
|
|
|
|
try: |
|
|
model_path = hf_hub_download("undertheseanlp/pulse_core_1", filename) |
|
|
print(f"Model downloaded to: {model_path}") |
|
|
|
|
|
print("Loading model...") |
|
|
model = joblib.load(model_path) |
|
|
print(f"Model loaded successfully. Classes: {len(model.classes_)} {classes_desc}") |
|
|
print(f"Model type: {type(model.named_steps['clf']).__name__}") |
|
|
return model, model_type |
|
|
except Exception as e: |
|
|
print(f"Error downloading model: {e}") |
|
|
print("This might mean the model file hasn't been uploaded to Hugging Face Hub yet.") |
|
|
print("Please check the repository: https://huggingface.co/undertheseanlp/pulse_core_1") |
|
|
raise |
|
|
|
|
|
|
|
|
def predict_sentiment_examples(model, model_type): |
|
|
"""Demonstrate predictions on Vietnamese sentiment examples""" |
|
|
if model_type == "vlsp2016": |
|
|
print("\n" + "="*60) |
|
|
print("VIETNAMESE GENERAL SENTIMENT ANALYSIS EXAMPLES") |
|
|
print("="*60) |
|
|
|
|
|
|
|
|
examples = [ |
|
|
("positive", "Sản phẩm này rất tốt, tôi rất hài lòng"), |
|
|
("negative", "Chất lượng dịch vụ tệ quá"), |
|
|
("neutral", "Giá cả hợp lý, có thể chấp nhận được"), |
|
|
("positive", "Nhân viên phục vụ rất nhiệt tình"), |
|
|
("negative", "Đồ ăn không ngon, sẽ không quay lại"), |
|
|
("positive", "Giao hàng nhanh chóng, đóng gói cẩn thận"), |
|
|
("neutral", "Sản phẩm bình thường, không có gì đặc biệt"), |
|
|
("positive", "Rất đáng tiền, chất lượng tuyệt vời"), |
|
|
("negative", "Không như mong đợi, khá thất vọng"), |
|
|
("positive", "Dịch vụ khách hàng tốt, giải quyết nhanh chóng") |
|
|
] |
|
|
print("Testing Vietnamese general sentiment analysis:") |
|
|
else: |
|
|
print("\n" + "="*60) |
|
|
print("VIETNAMESE BANKING ASPECT SENTIMENT ANALYSIS EXAMPLES") |
|
|
print("="*60) |
|
|
|
|
|
|
|
|
examples = [ |
|
|
("CUSTOMER_SUPPORT#negative", "Dịch vụ chăm sóc khách hàng rất tệ"), |
|
|
("CUSTOMER_SUPPORT#positive", "Nhân viên hỗ trợ rất nhiệt tình"), |
|
|
("TRADEMARK#positive", "Ngân hàng ACB có uy tín tốt"), |
|
|
("TRADEMARK#negative", "Thương hiệu ngân hàng này không đáng tin cậy"), |
|
|
("LOAN#positive", "Lãi suất vay mua nhà rất ưu đãi"), |
|
|
("LOAN#negative", "Lãi suất vay quá cao, không chấp nhận được"), |
|
|
("INTEREST_RATE#negative", "Lãi suất tiết kiệm thấp quá"), |
|
|
("INTEREST_RATE#positive", "Lãi suất gửi tiết kiệm khá hấp dẫn"), |
|
|
("CARD#negative", "Thẻ tín dụng bị khóa không rõ lý do"), |
|
|
("CARD#positive", "Thẻ ATM rất tiện lợi khi sử dụng"), |
|
|
("INTERNET_BANKING#negative", "Internet banking hay bị lỗi"), |
|
|
("INTERNET_BANKING#positive", "Ứng dụng ngân hàng điện tử dễ sử dụng"), |
|
|
("MONEY_TRANSFER#negative", "Phí chuyển tiền quá đắt"), |
|
|
("PROMOTION#positive", "Chương trình khuyến mãi rất hấp dẫn"), |
|
|
("SECURITY#positive", "Bảo mật tài khoản rất tốt") |
|
|
] |
|
|
print("Testing Vietnamese banking aspect sentiment analysis:") |
|
|
|
|
|
print("-" * 60) |
|
|
|
|
|
for expected_label, text in examples: |
|
|
try: |
|
|
prediction, confidence, top_predictions = predict_text(model, text) |
|
|
|
|
|
if prediction: |
|
|
print(f"Text: {text}") |
|
|
print(f"Expected: {expected_label}") |
|
|
print(f"Predicted: {prediction}") |
|
|
print(f"Confidence: {confidence:.3f}") |
|
|
|
|
|
|
|
|
print("Top 3 predictions:") |
|
|
for i, (category, prob) in enumerate(top_predictions, 1): |
|
|
print(f" {i}. {category}: {prob:.3f}") |
|
|
|
|
|
print("-" * 60) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error predicting '{text}': {e}") |
|
|
print("-" * 60) |
|
|
|
|
|
|
|
|
def interactive_mode(model, model_type): |
|
|
"""Interactive mode for testing custom text""" |
|
|
print("\n" + "="*60) |
|
|
if model_type == "vlsp2016": |
|
|
print("INTERACTIVE MODE - VIETNAMESE GENERAL SENTIMENT ANALYSIS") |
|
|
print("="*60) |
|
|
print("Enter Vietnamese text to analyze sentiment (type 'quit' to exit):") |
|
|
else: |
|
|
print("INTERACTIVE MODE - VIETNAMESE BANKING ASPECT SENTIMENT ANALYSIS") |
|
|
print("="*60) |
|
|
print("Enter Vietnamese banking text to analyze aspect and sentiment (type 'quit' to exit):") |
|
|
|
|
|
while True: |
|
|
try: |
|
|
user_input = input("\nText: ").strip() |
|
|
|
|
|
if user_input.lower() in ['quit', 'exit', 'q']: |
|
|
break |
|
|
|
|
|
if not user_input: |
|
|
continue |
|
|
|
|
|
prediction, confidence, top_predictions = predict_text(model, user_input) |
|
|
|
|
|
if prediction: |
|
|
if model_type == "vlsp2016": |
|
|
print(f"Predicted sentiment: {prediction}") |
|
|
else: |
|
|
print(f"Predicted aspect-sentiment: {prediction}") |
|
|
print(f"Confidence: {confidence:.3f}") |
|
|
|
|
|
|
|
|
print("Top 3 predictions:") |
|
|
for i, (category, prob) in enumerate(top_predictions, 1): |
|
|
print(f" {i}. {category}: {prob:.3f}") |
|
|
|
|
|
except KeyboardInterrupt: |
|
|
print("\nExiting...") |
|
|
break |
|
|
except Exception as e: |
|
|
print(f"Error: {e}") |
|
|
|
|
|
|
|
|
def simple_usage_examples(): |
|
|
"""Show simple usage examples for HuggingFace Hub models""" |
|
|
print("\n" + "="*60) |
|
|
print("HUGGINGFACE HUB USAGE EXAMPLES") |
|
|
print("="*60) |
|
|
|
|
|
print("Code examples:") |
|
|
print(""" |
|
|
# Pulse Core 1 Models (Vietnamese Sentiment Analysis) |
|
|
from huggingface_hub import hf_hub_download |
|
|
import joblib |
|
|
|
|
|
# Option 1: General Sentiment Analysis (VLSP2016) |
|
|
general_model = joblib.load( |
|
|
hf_hub_download("undertheseanlp/pulse_core_1", "vlsp2016_sentiment_20250929_075529.joblib") |
|
|
) |
|
|
|
|
|
# Make prediction on general text |
|
|
general_text = "Sản phẩm này rất tốt" |
|
|
prediction = general_model.predict([general_text])[0] |
|
|
print(f"Sentiment: {prediction}") |
|
|
|
|
|
# Option 2: Banking Aspect Sentiment Analysis (UTS2017_Bank) |
|
|
banking_model = joblib.load( |
|
|
hf_hub_download("undertheseanlp/pulse_core_1", "uts2017_sentiment_20250928_131716.joblib") |
|
|
) |
|
|
|
|
|
# Make prediction on banking text |
|
|
bank_text = "Tôi muốn mở tài khoản tiết kiệm" |
|
|
prediction = banking_model.predict([bank_text])[0] |
|
|
print(f"Aspect-Sentiment: {prediction}") |
|
|
|
|
|
# For detailed predictions with confidence scores |
|
|
probabilities = banking_model.predict_proba([bank_text])[0] |
|
|
top_indices = probabilities.argsort()[-3:][::-1] |
|
|
for idx in top_indices: |
|
|
category = banking_model.classes_[idx] |
|
|
prob = probabilities[idx] |
|
|
print(f"{category}: {prob:.3f}") |
|
|
|
|
|
# For local file inference, use inference.py instead |
|
|
""") |
|
|
|
|
|
|
|
|
def main(): |
|
|
"""Main demonstration function""" |
|
|
print("Pulse Core 1 - Vietnamese Sentiment Analysis System") |
|
|
print("=" * 60) |
|
|
|
|
|
try: |
|
|
|
|
|
simple_usage_examples() |
|
|
|
|
|
|
|
|
print("\n" + "="*60) |
|
|
print("TESTING PULSE CORE 1 MODELS") |
|
|
print("="*60) |
|
|
|
|
|
|
|
|
print("\n1. Testing VLSP2016 General Sentiment Model") |
|
|
print("-" * 40) |
|
|
vlsp_model, vlsp_type = load_model_from_hub("vlsp2016") |
|
|
predict_sentiment_examples(vlsp_model, vlsp_type) |
|
|
|
|
|
|
|
|
print("\n2. Testing UTS2017 Banking Aspect Sentiment Model") |
|
|
print("-" * 40) |
|
|
uts_model, uts_type = load_model_from_hub("uts2017") |
|
|
predict_sentiment_examples(uts_model, uts_type) |
|
|
|
|
|
|
|
|
try: |
|
|
import sys |
|
|
if hasattr(sys, 'ps1') or sys.stdin.isatty(): |
|
|
choice = input("\nEnter interactive mode? Choose model type (vlsp2016/uts2017/n): ").strip().lower() |
|
|
|
|
|
if choice == 'vlsp2016': |
|
|
interactive_mode(vlsp_model, "vlsp2016") |
|
|
elif choice == 'uts2017': |
|
|
interactive_mode(uts_model, "uts2017") |
|
|
|
|
|
except (EOFError, OSError): |
|
|
print("\nInteractive mode not available in this environment.") |
|
|
print("Run this script in a regular terminal to use interactive mode.") |
|
|
|
|
|
print("\nDemonstration complete!") |
|
|
print("\nPulse Core 1 models are available on Hugging Face Hub:") |
|
|
print("- Repository: undertheseanlp/pulse_core_1") |
|
|
print("- VLSP2016 Model: vlsp2016_sentiment_20250929_075529.joblib") |
|
|
print(" * Task: Vietnamese General Sentiment Analysis") |
|
|
print(" * Classes: 3 sentiment polarities") |
|
|
print(" * Test accuracy: 71.14%") |
|
|
print("- UTS2017 Model: uts2017_sentiment_20250928_131716.joblib") |
|
|
print(" * Task: Vietnamese Banking Aspect Sentiment Analysis") |
|
|
print(" * Classes: 35 aspect-sentiment combinations") |
|
|
print(" * Test accuracy: 71.72%") |
|
|
print("- Model type: Support Vector Classification (SVC)") |
|
|
|
|
|
except ImportError: |
|
|
print("Error: huggingface_hub is required. Install with:") |
|
|
print(" pip install huggingface_hub") |
|
|
except Exception as e: |
|
|
print(f"Error loading models: {e}") |
|
|
print("\nMake sure you have internet connection and try again.") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |