|
|
|
|
|
""" |
|
|
Inference script for Pulse Core 1 - Vietnamese Sentiment Analysis System. |
|
|
Loads trained sentiment models from local files and performs predictions. |
|
|
Supports both VLSP2016 general sentiment and UTS2017_Bank aspect sentiment models. |
|
|
""" |
|
|
|
|
|
import argparse |
|
|
import joblib |
|
|
import os |
|
|
import glob |
|
|
|
|
|
|
|
|
def find_local_models(): |
|
|
"""Find all available local sentiment model files""" |
|
|
models = { |
|
|
'exported': {}, |
|
|
'runs': {} |
|
|
} |
|
|
|
|
|
|
|
|
for filename in os.listdir('.'): |
|
|
if filename.endswith('.joblib'): |
|
|
if filename.startswith('vlsp2016_sentiment_'): |
|
|
models['exported']['vlsp2016_sentiment'] = filename |
|
|
elif filename.startswith('uts2017_sentiment_'): |
|
|
models['exported']['uts2017_sentiment'] = filename |
|
|
|
|
|
|
|
|
vlsp_runs = glob.glob('runs/*/models/VLSP2016_Sentiment_*.joblib') |
|
|
uts_runs = glob.glob('runs/*/models/UTS2017_Bank_AspectSentiment_*.joblib') |
|
|
|
|
|
if vlsp_runs: |
|
|
|
|
|
vlsp_runs.sort(key=lambda x: os.path.getmtime(x), reverse=True) |
|
|
|
|
|
svc_models = [m for m in vlsp_runs if 'SVC' in m] |
|
|
if svc_models: |
|
|
models['runs']['vlsp2016_sentiment'] = svc_models[0] |
|
|
else: |
|
|
models['runs']['vlsp2016_sentiment'] = vlsp_runs[0] |
|
|
|
|
|
if uts_runs: |
|
|
|
|
|
uts_runs.sort(key=lambda x: os.path.getmtime(x), reverse=True) |
|
|
|
|
|
svc_models = [m for m in uts_runs if 'SVC' in m] |
|
|
if svc_models: |
|
|
models['runs']['uts2017_sentiment'] = svc_models[0] |
|
|
else: |
|
|
models['runs']['uts2017_sentiment'] = uts_runs[0] |
|
|
|
|
|
return models |
|
|
|
|
|
|
|
|
def load_model(model_path): |
|
|
"""Load a model from file path""" |
|
|
try: |
|
|
print(f"Loading model from: {model_path}") |
|
|
model = joblib.load(model_path) |
|
|
print(f"Model loaded successfully. Classes: {len(model.classes_)}") |
|
|
return model |
|
|
except Exception as e: |
|
|
print(f"Error loading model: {e}") |
|
|
return None |
|
|
|
|
|
|
|
|
def predict_text(model, text): |
|
|
"""Make prediction on a single text""" |
|
|
try: |
|
|
probabilities = model.predict_proba([text])[0] |
|
|
|
|
|
|
|
|
top_indices = probabilities.argsort()[-3:][::-1] |
|
|
top_predictions = [] |
|
|
for idx in top_indices: |
|
|
category = model.classes_[idx] |
|
|
prob = probabilities[idx] |
|
|
top_predictions.append((category, prob)) |
|
|
|
|
|
|
|
|
prediction = top_predictions[0][0] |
|
|
confidence = top_predictions[0][1] |
|
|
|
|
|
return prediction, confidence, top_predictions |
|
|
except Exception as e: |
|
|
print(f"Error making prediction: {e}") |
|
|
return None, 0, [] |
|
|
|
|
|
|
|
|
def interactive_mode(model, dataset_name): |
|
|
"""Interactive prediction mode""" |
|
|
print(f"\n{'='*60}") |
|
|
if dataset_name == 'vlsp2016_sentiment': |
|
|
print("INTERACTIVE MODE - VIETNAMESE GENERAL SENTIMENT ANALYSIS") |
|
|
print(f"{'='*60}") |
|
|
print("Enter Vietnamese text to analyze sentiment (type 'quit' to exit):") |
|
|
else: |
|
|
print("INTERACTIVE MODE - VIETNAMESE BANKING ASPECT SENTIMENT ANALYSIS") |
|
|
print(f"{'='*60}") |
|
|
print("Enter Vietnamese banking text to analyze aspect and sentiment (type 'quit' to exit):") |
|
|
|
|
|
while True: |
|
|
try: |
|
|
user_input = input("\nText: ").strip() |
|
|
|
|
|
if user_input.lower() in ['quit', 'exit', 'q']: |
|
|
break |
|
|
|
|
|
if not user_input: |
|
|
continue |
|
|
|
|
|
prediction, confidence, top_predictions = predict_text(model, user_input) |
|
|
|
|
|
if prediction: |
|
|
print(f"Predicted category: {prediction}") |
|
|
print(f"Confidence: {confidence:.3f}") |
|
|
print("Top 3 predictions:") |
|
|
for i, (category, prob) in enumerate(top_predictions, 1): |
|
|
print(f" {i}. {category}: {prob:.3f}") |
|
|
|
|
|
except KeyboardInterrupt: |
|
|
print("\nExiting...") |
|
|
break |
|
|
except Exception as e: |
|
|
print(f"Error: {e}") |
|
|
|
|
|
|
|
|
def test_examples(model, dataset_name): |
|
|
"""Test model with predefined examples based on dataset type""" |
|
|
if dataset_name == 'vlsp2016_sentiment': |
|
|
examples = [ |
|
|
"Sản phẩm này rất tốt, tôi rất hài lòng", |
|
|
"Chất lượng dịch vụ tệ quá", |
|
|
"Giá cả hợp lý, có thể chấp nhận được", |
|
|
"Nhân viên phục vụ rất nhiệt tình", |
|
|
"Đồ ăn không ngon, sẽ không quay lại", |
|
|
"Giao hàng nhanh chóng, đóng gói cẩn thận", |
|
|
"Sản phẩm bình thường, không có gì đặc biệt", |
|
|
"Rất đáng tiền, chất lượng tuyệt vời", |
|
|
"Không như mong đợi, khá thất vọng", |
|
|
"Dịch vụ khách hàng tốt, giải quyết nhanh chóng" |
|
|
] |
|
|
print("\n" + "="*60) |
|
|
print("TESTING VIETNAMESE GENERAL SENTIMENT ANALYSIS") |
|
|
print("="*60) |
|
|
else: |
|
|
examples = [ |
|
|
"Tôi muốn mở tài khoản tiết kiệm mới", |
|
|
"Lãi suất vay mua nhà hiện tại quá cao", |
|
|
"Làm thế nào để đăng ký internet banking?", |
|
|
"Chi phí chuyển tiền ra nước ngoài rất đắt", |
|
|
"Ngân hàng ACB có uy tín không?", |
|
|
"Tôi cần hỗ trợ về dịch vụ ngân hàng", |
|
|
"Thẻ tín dụng bị khóa không rõ lý do", |
|
|
"Dịch vụ chăm sóc khách hàng rất tệ", |
|
|
"Khuyến mãi tháng này rất hấp dẫn", |
|
|
"Bảo mật tài khoản có được đảm bảo không?" |
|
|
] |
|
|
print("\n" + "="*60) |
|
|
print("TESTING VIETNAMESE BANKING ASPECT SENTIMENT ANALYSIS") |
|
|
print("="*60) |
|
|
|
|
|
for text in examples: |
|
|
prediction, confidence, top_predictions = predict_text(model, text) |
|
|
|
|
|
if prediction: |
|
|
print(f"\nText: {text}") |
|
|
print(f"Prediction: {prediction}") |
|
|
print(f"Confidence: {confidence:.3f}") |
|
|
|
|
|
|
|
|
if confidence < 0.7: |
|
|
print("Alternative predictions:") |
|
|
for i, (category, prob) in enumerate(top_predictions[:3], 1): |
|
|
print(f" {i}. {category}: {prob:.3f}") |
|
|
print("-" * 60) |
|
|
|
|
|
|
|
|
def list_available_models(): |
|
|
"""List all available sentiment models""" |
|
|
models = find_local_models() |
|
|
|
|
|
print("Available Vietnamese Sentiment Models:") |
|
|
print("=" * 50) |
|
|
|
|
|
if models['exported']: |
|
|
print("\nExported Models (Project Root):") |
|
|
for model_type, filename in models['exported'].items(): |
|
|
file_size = os.path.getsize(filename) / (1024 * 1024) |
|
|
dataset_type = "General Sentiment" if "vlsp2016" in model_type else "Banking Aspect Sentiment" |
|
|
print(f" {model_type}: {filename} ({file_size:.1f}MB) - {dataset_type}") |
|
|
|
|
|
if models['runs']: |
|
|
print("\nRuns Models (Training Directory):") |
|
|
for model_type, filepath in models['runs'].items(): |
|
|
file_size = os.path.getsize(filepath) / (1024 * 1024) |
|
|
dataset_type = "General Sentiment" if "vlsp2016" in model_type else "Banking Aspect Sentiment" |
|
|
print(f" {model_type}: {filepath} ({file_size:.1f}MB) - {dataset_type}") |
|
|
|
|
|
if not models['exported'] and not models['runs']: |
|
|
print("No local sentiment models found!") |
|
|
print("Train a model first using:") |
|
|
print(" VLSP2016: python train.py --dataset vlsp2016 --export-model") |
|
|
print(" UTS2017: python train.py --dataset uts2017 --export-model") |
|
|
|
|
|
|
|
|
def main(): |
|
|
"""Main function""" |
|
|
parser = argparse.ArgumentParser( |
|
|
description="Inference with local Pulse Core 1 Vietnamese sentiment models" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--dataset", |
|
|
type=str, |
|
|
choices=["vlsp2016", "uts2017", "auto"], |
|
|
default="auto", |
|
|
help="Dataset type to use (default: auto-detect)" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--model-path", |
|
|
type=str, |
|
|
help="Path to specific sentiment model file" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--text", |
|
|
type=str, |
|
|
help="Vietnamese text to analyze (if not provided, enters interactive mode)" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--test-examples", |
|
|
action="store_true", |
|
|
help="Test with predefined examples" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--list-models", |
|
|
action="store_true", |
|
|
help="List all available local sentiment models" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--source", |
|
|
type=str, |
|
|
choices=["exported", "runs"], |
|
|
default="runs", |
|
|
help="Model source: exported files or runs directory (default: runs)" |
|
|
) |
|
|
|
|
|
args = parser.parse_args() |
|
|
|
|
|
|
|
|
if args.list_models: |
|
|
list_available_models() |
|
|
return |
|
|
|
|
|
|
|
|
models = find_local_models() |
|
|
|
|
|
|
|
|
model_path = None |
|
|
dataset_name = None |
|
|
|
|
|
if args.model_path: |
|
|
|
|
|
model_path = args.model_path |
|
|
|
|
|
if 'vlsp2016' in args.model_path: |
|
|
dataset_name = 'vlsp2016_sentiment' |
|
|
elif 'uts2017' in args.model_path: |
|
|
dataset_name = 'uts2017_sentiment' |
|
|
else: |
|
|
dataset_name = 'unknown' |
|
|
else: |
|
|
|
|
|
if args.dataset == 'vlsp2016': |
|
|
if models[args.source] and 'vlsp2016_sentiment' in models[args.source]: |
|
|
model_path = models[args.source]['vlsp2016_sentiment'] |
|
|
dataset_name = 'vlsp2016_sentiment' |
|
|
print("Selected VLSP2016 general sentiment model") |
|
|
else: |
|
|
print("No VLSP2016 models found!") |
|
|
list_available_models() |
|
|
return |
|
|
elif args.dataset == 'uts2017': |
|
|
if models[args.source] and 'uts2017_sentiment' in models[args.source]: |
|
|
model_path = models[args.source]['uts2017_sentiment'] |
|
|
dataset_name = 'uts2017_sentiment' |
|
|
print("Selected UTS2017 banking aspect sentiment model") |
|
|
else: |
|
|
print("No UTS2017 models found!") |
|
|
list_available_models() |
|
|
return |
|
|
else: |
|
|
|
|
|
if models[args.source] and 'vlsp2016_sentiment' in models[args.source]: |
|
|
model_path = models[args.source]['vlsp2016_sentiment'] |
|
|
dataset_name = 'vlsp2016_sentiment' |
|
|
print("Auto-selected VLSP2016 general sentiment model") |
|
|
elif models[args.source] and 'uts2017_sentiment' in models[args.source]: |
|
|
model_path = models[args.source]['uts2017_sentiment'] |
|
|
dataset_name = 'uts2017_sentiment' |
|
|
print("Auto-selected UTS2017 banking aspect sentiment model") |
|
|
else: |
|
|
print("No sentiment models found!") |
|
|
list_available_models() |
|
|
return |
|
|
|
|
|
if not model_path or not os.path.exists(model_path): |
|
|
print(f"Model file not found: {model_path}") |
|
|
list_available_models() |
|
|
return |
|
|
|
|
|
|
|
|
model = load_model(model_path) |
|
|
if not model: |
|
|
return |
|
|
|
|
|
|
|
|
if args.text: |
|
|
|
|
|
prediction, confidence, top_predictions = predict_text(model, args.text) |
|
|
if prediction: |
|
|
print(f"\nText: {args.text}") |
|
|
print(f"Prediction: {prediction}") |
|
|
print(f"Confidence: {confidence:.3f}") |
|
|
print("Top 3 predictions:") |
|
|
for i, (category, prob) in enumerate(top_predictions, 1): |
|
|
print(f" {i}. {category}: {prob:.3f}") |
|
|
|
|
|
elif args.test_examples: |
|
|
|
|
|
test_examples(model, dataset_name) |
|
|
|
|
|
else: |
|
|
|
|
|
model_type = "General Sentiment" if dataset_name == 'vlsp2016_sentiment' else "Banking Aspect Sentiment" |
|
|
print(f"Loaded {model_type} model: {os.path.basename(model_path)}") |
|
|
test_examples(model, dataset_name) |
|
|
|
|
|
|
|
|
try: |
|
|
response = input("\nEnter interactive mode? (y/n): ").strip().lower() |
|
|
if response in ['y', 'yes']: |
|
|
interactive_mode(model, dataset_name) |
|
|
except KeyboardInterrupt: |
|
|
print("\nExiting...") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |