#!/usr/bin/env python3 """ Test Yes/No Person Detector on multiple videos for accuracy verification """ import sys import os from io import BytesIO import glob # Add current directory to path sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) def test_multiple_videos(): """Test Yes/No Person Detector on multiple videos""" print("TESTING YES/NO PERSON DETECTOR - MULTIPLE VIDEOS") print("=" * 60) print("Verifying model accuracy across different video content") print() try: from local_models import get_local_model_manager from app import extract_frames_from_video, process_image_locally print("+ Components loaded successfully") except ImportError as e: print(f"- Import error: {e}") return # Find all MP4 files video_files = glob.glob("*.mp4") if not video_files: print("- No MP4 files found") return print(f"+ Found {len(video_files)} video files: {video_files}") # Initialize models try: local_manager = get_local_model_manager() print("+ Yes/No Person Detector ready") except Exception as e: print(f"- Model initialization error: {e}") return all_results = {} # Test each video for video_idx, video_path in enumerate(video_files): print(f"\n" + "=" * 60) print(f"TESTING VIDEO {video_idx + 1}: {video_path}") print("=" * 60) try: # Extract frames with open(video_path, 'rb') as f: video_data = f.read() video_file = BytesIO(video_data) frames = extract_frames_from_video(video_file, fps=0.3) # Every 3+ seconds if not frames: print(f"- No frames extracted from {video_path}") continue print(f"+ Extracted {len(frames)} frames from {video_path}") # Test first 3 frames from each video test_frames = frames[:3] video_results = [] for i, frame_data in enumerate(test_frames): frame_num = i + 1 timestamp = frame_data['timestamp'] print(f"\n Frame {frame_num} ({timestamp:.1f}s):") print(f" {'-' * 30}") try: result = process_image_locally( frame_data['frame'], "Is there a person in this image?", 'Yes/No Person Detector', local_manager ) if 'error' in result: print(f" ERROR: {result['error']}") video_results.append({ 'frame': frame_num, 'timestamp': timestamp, 'answer': 'ERROR', 'confidence': 0, 'raw_response': result['error'] }) elif 'yes_no_detection' in result: detection = result['yes_no_detection'] answer = detection.get('answer', 'UNKNOWN') person_detected = detection.get('person_detected', False) confidence = detection.get('confidence', 0) raw_response = detection.get('raw_response', 'N/A') print(f" Answer: {answer}") print(f" Person Detected: {person_detected}") print(f" Confidence: {confidence:.0%}") print(f" Raw Response: '{raw_response[:50]}{'...' if len(raw_response) > 50 else ''}'") video_results.append({ 'frame': frame_num, 'timestamp': timestamp, 'answer': answer, 'person_detected': person_detected, 'confidence': confidence, 'raw_response': raw_response }) else: print(f" Unexpected result format: {result}") video_results.append({ 'frame': frame_num, 'timestamp': timestamp, 'answer': 'UNKNOWN', 'confidence': 0, 'raw_response': str(result) }) except Exception as e: print(f" ERROR: {e}") video_results.append({ 'frame': frame_num, 'timestamp': timestamp, 'answer': 'ERROR', 'confidence': 0, 'raw_response': str(e) }) all_results[video_path] = video_results except Exception as e: print(f"- Failed to process {video_path}: {e}") continue # Comprehensive analysis print(f"\n" + "=" * 80) print("COMPREHENSIVE RESULTS ANALYSIS") print("=" * 80) # Summary table print(f"\nRESULTS SUMMARY BY VIDEO:") print("-" * 80) print(f"{'Video':<20} {'Frame':<8} {'Time':<8} {'Answer':<8} {'Confidence':<12} {'Raw Response':<25}") print("-" * 80) total_frames = 0 yes_count = 0 no_count = 0 error_count = 0 unclear_count = 0 confidence_sum = 0 for video_name, results in all_results.items(): for result in results: frame = result['frame'] timestamp = result['timestamp'] answer = result['answer'] confidence = result['confidence'] raw_response = result['raw_response'][:20] + "..." if len(result['raw_response']) > 20 else result['raw_response'] print(f"{video_name:<20} {frame:<8} {timestamp:<8.1f} {answer:<8} {confidence:<12.0%} {raw_response:<25}") total_frames += 1 confidence_sum += confidence if answer == 'YES': yes_count += 1 elif answer == 'NO': no_count += 1 elif answer == 'ERROR': error_count += 1 else: unclear_count += 1 # Overall statistics print(f"\n" + "=" * 80) print("OVERALL STATISTICS") print("=" * 80) print(f"Total frames tested: {total_frames}") print(f"Videos tested: {len(all_results)}") print(f"YES answers: {yes_count}") print(f"NO answers: {no_count}") print(f"ERROR responses: {error_count}") print(f"UNCLEAR responses: {unclear_count}") if total_frames > 0: success_rate = (yes_count + no_count) / total_frames * 100 avg_confidence = confidence_sum / total_frames print(f"Success rate: {success_rate:.1f}%") print(f"Average confidence: {avg_confidence:.0%}") # Accuracy assessment print(f"\n" + "=" * 80) print("ACCURACY ASSESSMENT") print("=" * 80) # Check if model is stuck giving same answer if yes_count == total_frames and total_frames > 3: print("WARNING: Model appears to be giving only YES answers!") print("This suggests the model may be:") print("- Overconfident or biased toward detecting people") print("- Not properly processing different image content") print("- The prompt may need adjustment") print("\nRECOMMENDED FIXES:") print("1. Test with images that definitely contain no people") print("2. Adjust the prompt to be more specific") print("3. Try different confidence thresholds") print("4. Consider using a different base model") elif no_count == total_frames and total_frames > 3: print("WARNING: Model appears to be giving only NO answers!") print("This suggests the model may be:") print("- Too conservative in person detection") print("- Having trouble detecting people in the images") print("- The prompt may be too restrictive") elif yes_count > 0 and no_count > 0: print("GOOD: Model is giving varied responses (both YES and NO)") print("This suggests the model is:") print("+ Properly analyzing different image content") print("+ Responding appropriately to image variations") print("+ Working as expected") else: print("INSUFFICIENT DATA: Need more diverse test cases") # Per-video analysis print(f"\nPER-VIDEO BREAKDOWN:") print("-" * 50) for video_name, results in all_results.items(): video_yes = sum(1 for r in results if r['answer'] == 'YES') video_no = sum(1 for r in results if r['answer'] == 'NO') video_total = len(results) print(f"{video_name}: {video_yes} YES, {video_no} NO (out of {video_total} frames)") return all_results if __name__ == "__main__": test_multiple_videos()