Spaces:
Runtime error
Runtime error
Rithvickkr
commited on
Commit
·
21b1e62
1
Parent(s):
abf2ac0
Fixed file upload bug for malicious Python code, ensured relevant CVEs, and multi-threat detection
Browse files- app.py +70 -6
- requirements.txt +0 -4
app.py
CHANGED
|
@@ -4,12 +4,14 @@ from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
|
|
| 4 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
| 5 |
import os
|
| 6 |
import re
|
|
|
|
| 7 |
from collections import defaultdict
|
| 8 |
from datetime import datetime, timedelta
|
| 9 |
import json
|
| 10 |
import time
|
| 11 |
import logging
|
| 12 |
from retrying import retry
|
|
|
|
| 13 |
|
| 14 |
# Set up logging
|
| 15 |
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
@@ -59,6 +61,40 @@ def call_mistral_llm(prompt):
|
|
| 59 |
logger.error(f"Mistral API request failed: {e}")
|
| 60 |
raise
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
# Enhanced DSATP log parsing
|
| 63 |
def dsatp_parse_log(text: str) -> dict:
|
| 64 |
log = text.lower()
|
|
@@ -95,7 +131,7 @@ def dsatp_parse_log(text: str) -> dict:
|
|
| 95 |
"flood", "syn flood", "http flood", "suspicious url", "script tag",
|
| 96 |
"sqlmap", "union select", "escalation attempt", "rootkit", "yara_match",
|
| 97 |
"wget", "curl", "bash", "sh", "payload", "ufw", "sudo", "root", "ssh",
|
| 98 |
-
"cron", "systemd"
|
| 99 |
]
|
| 100 |
ip_pattern = r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b'
|
| 101 |
timestamp_pattern = r'\w{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2}'
|
|
@@ -103,10 +139,10 @@ def dsatp_parse_log(text: str) -> dict:
|
|
| 103 |
sql_pattern = r'(union\s+select|select\s+.*\s+from|drop\s+table)'
|
| 104 |
xss_pattern = r'(<script>|on\w+\s*=|javascript:)'
|
| 105 |
yara_pattern = r'yara_match:\s*([\w\.]+)\s+detected'
|
| 106 |
-
script_pattern = r'(wget|curl)\s+.*(http[s]?://[^\s]+)\s*.*\.(sh|bash)'
|
| 107 |
sudo_pattern = r'sudo:.*user=root\s*;'
|
| 108 |
ssh_pattern = r'reverse\s+ssh|tunnel\s+service'
|
| 109 |
-
malicious_url_pattern = r'http[s]?://.*(malicious|payload)[^\s]*'
|
| 110 |
|
| 111 |
for line in lines:
|
| 112 |
# YARA match detection
|
|
@@ -124,6 +160,10 @@ def dsatp_parse_log(text: str) -> dict:
|
|
| 124 |
if re.search(script_pattern, line, re.IGNORECASE) or re.search(malicious_url_pattern, line, re.IGNORECASE):
|
| 125 |
detected_threats.append(threats["malicious"] | {"confidence": 0.95})
|
| 126 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
# Firewall block
|
| 128 |
if re.search(r'ufw\s+block', line, re.IGNORECASE):
|
| 129 |
ip_match = re.search(ip_pattern, line)
|
|
@@ -201,6 +241,16 @@ def dsatp_parse_log(text: str) -> dict:
|
|
| 201 |
# Enhanced DSATP YARA scanning
|
| 202 |
def dsatp_yara_scan(file_path: str) -> dict:
|
| 203 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
import yara
|
| 205 |
rules = yara.compile(source="""
|
| 206 |
rule BruteForceLog {
|
|
@@ -220,11 +270,25 @@ def dsatp_yara_scan(file_path: str) -> dict:
|
|
| 220 |
$trojan = "trojan" nocase
|
| 221 |
$ransomware = "ransomware" nocase
|
| 222 |
$wget = "wget" nocase
|
|
|
|
| 223 |
$payload = "payload" nocase
|
| 224 |
$malicious = "malicious" nocase
|
|
|
|
| 225 |
condition:
|
| 226 |
any of them
|
| 227 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
rule SuspiciousBehavior {
|
| 229 |
strings:
|
| 230 |
$heuristic = "heuristic" nocase
|
|
@@ -283,7 +347,7 @@ def dsatp_yara_scan(file_path: str) -> dict:
|
|
| 283 |
"mitigation": "Block suspicious IPs, disable password-based SSH, enable fail2ban",
|
| 284 |
"confidence": 0.95
|
| 285 |
})
|
| 286 |
-
elif match.rule == "MalwareLog":
|
| 287 |
detected_threats.append({
|
| 288 |
"classification": "Malware Detected",
|
| 289 |
"severity": "Critical",
|
|
@@ -377,7 +441,7 @@ def chatbot_response(user_input, file, history, state):
|
|
| 377 |
# Map classification to precise keywords for relevant CVEs
|
| 378 |
threat_keywords = {
|
| 379 |
"Brute-Force Attempt": "brute force, ssh, login attempt, authentication failure, openssh, password attack, cwe-287, cwe-307",
|
| 380 |
-
"Malware Detected": "malware, trojan, ransomware, payload, malicious script, backdoor, virus, cwe-
|
| 381 |
"Network Intrusion": "firewall, intrusion, ufw, network attack, port scan, unauthorized access, cwe-284",
|
| 382 |
"Privilege Escalation": "privilege escalation, sudo, root, unauthorized access, cwe-269, cwe-250",
|
| 383 |
"Persistence Mechanism": "ssh tunnel, reverse ssh, persistence, backdoor, remote access, cwe-284",
|
|
@@ -652,7 +716,7 @@ with gr.Blocks(css="""
|
|
| 652 |
with gr.Column(scale=2):
|
| 653 |
chatbot = gr.Chatbot(label="Security Analyst Chat", type="messages", height=500)
|
| 654 |
user_input = gr.Textbox(placeholder="Enter log data or alert (e.g., 'System compromised!', 'Trojan detected')", lines=3)
|
| 655 |
-
file_input = gr.File(label="Upload .txt/.log file", file_types=[".txt", ".log"])
|
| 656 |
submit_btn = gr.Button("Analyze")
|
| 657 |
with gr.Column(scale=1):
|
| 658 |
gr.Markdown("### Threat Analysis Results")
|
|
|
|
| 4 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
| 5 |
import os
|
| 6 |
import re
|
| 7 |
+
import ast
|
| 8 |
from collections import defaultdict
|
| 9 |
from datetime import datetime, timedelta
|
| 10 |
import json
|
| 11 |
import time
|
| 12 |
import logging
|
| 13 |
from retrying import retry
|
| 14 |
+
import base64
|
| 15 |
|
| 16 |
# Set up logging
|
| 17 |
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
|
|
| 61 |
logger.error(f"Mistral API request failed: {e}")
|
| 62 |
raise
|
| 63 |
|
| 64 |
+
# Basic Python code analysis
|
| 65 |
+
def analyze_python_code(content: str) -> dict:
|
| 66 |
+
try:
|
| 67 |
+
tree = ast.parse(content)
|
| 68 |
+
suspicious_patterns = []
|
| 69 |
+
for node in ast.walk(tree):
|
| 70 |
+
# Check for Base64 decoding
|
| 71 |
+
if isinstance(node, ast.Call) and isinstance(node.func, ast.Attribute):
|
| 72 |
+
if node.func.attr == 'b64decode' and isinstance(node.func.value, ast.Name) and node.func.value.id == 'base64':
|
| 73 |
+
suspicious_patterns.append("Base64 decoding detected")
|
| 74 |
+
# Check for exec usage
|
| 75 |
+
if isinstance(node, ast.Call) and isinstance(node.func, ast.Name) and node.func.id == 'exec':
|
| 76 |
+
suspicious_patterns.append("Dynamic code execution (exec) detected")
|
| 77 |
+
# Check for urllib.request or similar imports
|
| 78 |
+
if isinstance(node, ast.Import) or isinstance(node, ast.ImportFrom):
|
| 79 |
+
for name in (node.names if isinstance(node, ast.Import) else node.names):
|
| 80 |
+
if name.name in ['urllib', 'urllib.request', 'requests']:
|
| 81 |
+
suspicious_patterns.append(f"Suspicious import: {name.name}")
|
| 82 |
+
# Check for suspicious URLs in strings
|
| 83 |
+
if isinstance(node, ast.Str) or (isinstance(node, ast.Constant) and isinstance(node.value, str)):
|
| 84 |
+
if re.search(r'http[s]?://.*(evil|malicious|bad)[^\s]*', node.value, re.IGNORECASE):
|
| 85 |
+
suspicious_patterns.append(f"Suspicious URL: {node.value}")
|
| 86 |
+
if suspicious_patterns:
|
| 87 |
+
return {
|
| 88 |
+
"classification": "Malware Detected",
|
| 89 |
+
"severity": "Critical",
|
| 90 |
+
"mitigation": "Quarantine file, run antivirus, block suspicious URLs",
|
| 91 |
+
"confidence": 0.95,
|
| 92 |
+
"details": suspicious_patterns
|
| 93 |
+
}
|
| 94 |
+
except SyntaxError:
|
| 95 |
+
logger.warning("Invalid Python syntax in file")
|
| 96 |
+
return None
|
| 97 |
+
|
| 98 |
# Enhanced DSATP log parsing
|
| 99 |
def dsatp_parse_log(text: str) -> dict:
|
| 100 |
log = text.lower()
|
|
|
|
| 131 |
"flood", "syn flood", "http flood", "suspicious url", "script tag",
|
| 132 |
"sqlmap", "union select", "escalation attempt", "rootkit", "yara_match",
|
| 133 |
"wget", "curl", "bash", "sh", "payload", "ufw", "sudo", "root", "ssh",
|
| 134 |
+
"cron", "systemd", "base64", "exec"
|
| 135 |
]
|
| 136 |
ip_pattern = r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b'
|
| 137 |
timestamp_pattern = r'\w{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2}'
|
|
|
|
| 139 |
sql_pattern = r'(union\s+select|select\s+.*\s+from|drop\s+table)'
|
| 140 |
xss_pattern = r'(<script>|on\w+\s*=|javascript:)'
|
| 141 |
yara_pattern = r'yara_match:\s*([\w\.]+)\s+detected'
|
| 142 |
+
script_pattern = r'(wget|curl)\s+.*(http[s]?://[^\s]+)\s*.*\.(sh|bash|exe)'
|
| 143 |
sudo_pattern = r'sudo:.*user=root\s*;'
|
| 144 |
ssh_pattern = r'reverse\s+ssh|tunnel\s+service'
|
| 145 |
+
malicious_url_pattern = r'http[s]?://.*(malicious|payload|evil)[^\s]*'
|
| 146 |
|
| 147 |
for line in lines:
|
| 148 |
# YARA match detection
|
|
|
|
| 160 |
if re.search(script_pattern, line, re.IGNORECASE) or re.search(malicious_url_pattern, line, re.IGNORECASE):
|
| 161 |
detected_threats.append(threats["malicious"] | {"confidence": 0.95})
|
| 162 |
|
| 163 |
+
# Base64 and exec detection
|
| 164 |
+
if "base64.b64decode" in line and "exec" in line:
|
| 165 |
+
detected_threats.append(threats["malicious"] | {"confidence": 0.95})
|
| 166 |
+
|
| 167 |
# Firewall block
|
| 168 |
if re.search(r'ufw\s+block', line, re.IGNORECASE):
|
| 169 |
ip_match = re.search(ip_pattern, line)
|
|
|
|
| 241 |
# Enhanced DSATP YARA scanning
|
| 242 |
def dsatp_yara_scan(file_path: str) -> dict:
|
| 243 |
try:
|
| 244 |
+
# Read file content for Python analysis
|
| 245 |
+
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
| 246 |
+
content = f.read()
|
| 247 |
+
|
| 248 |
+
# Analyze Python code if it's a .py file
|
| 249 |
+
if file_path.endswith('.py'):
|
| 250 |
+
python_analysis = analyze_python_code(content)
|
| 251 |
+
if python_analysis:
|
| 252 |
+
return python_analysis
|
| 253 |
+
|
| 254 |
import yara
|
| 255 |
rules = yara.compile(source="""
|
| 256 |
rule BruteForceLog {
|
|
|
|
| 270 |
$trojan = "trojan" nocase
|
| 271 |
$ransomware = "ransomware" nocase
|
| 272 |
$wget = "wget" nocase
|
| 273 |
+
$curl = "curl" nocase
|
| 274 |
$payload = "payload" nocase
|
| 275 |
$malicious = "malicious" nocase
|
| 276 |
+
$evil = "evil" nocase
|
| 277 |
condition:
|
| 278 |
any of them
|
| 279 |
}
|
| 280 |
+
rule PythonMalware {
|
| 281 |
+
strings:
|
| 282 |
+
$base64 = "base64.b64decode" nocase
|
| 283 |
+
$exec = "exec" nocase
|
| 284 |
+
$urllib = "urllib.request" nocase
|
| 285 |
+
$requests = "requests.get" nocase
|
| 286 |
+
$url = "http://" nocase
|
| 287 |
+
$url2 = "https://" nocase
|
| 288 |
+
$evil_url = "evil.com" nocase
|
| 289 |
+
condition:
|
| 290 |
+
($base64 and $exec) or ($urllib and ($url or $url2 or $evil_url)) or ($requests and ($url or $url2 or $evil_url))
|
| 291 |
+
}
|
| 292 |
rule SuspiciousBehavior {
|
| 293 |
strings:
|
| 294 |
$heuristic = "heuristic" nocase
|
|
|
|
| 347 |
"mitigation": "Block suspicious IPs, disable password-based SSH, enable fail2ban",
|
| 348 |
"confidence": 0.95
|
| 349 |
})
|
| 350 |
+
elif match.rule == "MalwareLog" or match.rule == "PythonMalware":
|
| 351 |
detected_threats.append({
|
| 352 |
"classification": "Malware Detected",
|
| 353 |
"severity": "Critical",
|
|
|
|
| 441 |
# Map classification to precise keywords for relevant CVEs
|
| 442 |
threat_keywords = {
|
| 443 |
"Brute-Force Attempt": "brute force, ssh, login attempt, authentication failure, openssh, password attack, cwe-287, cwe-307",
|
| 444 |
+
"Malware Detected": "malware, trojan, ransomware, payload, malicious script, backdoor, virus, cwe-94, cwe-506, cwe-119",
|
| 445 |
"Network Intrusion": "firewall, intrusion, ufw, network attack, port scan, unauthorized access, cwe-284",
|
| 446 |
"Privilege Escalation": "privilege escalation, sudo, root, unauthorized access, cwe-269, cwe-250",
|
| 447 |
"Persistence Mechanism": "ssh tunnel, reverse ssh, persistence, backdoor, remote access, cwe-284",
|
|
|
|
| 716 |
with gr.Column(scale=2):
|
| 717 |
chatbot = gr.Chatbot(label="Security Analyst Chat", type="messages", height=500)
|
| 718 |
user_input = gr.Textbox(placeholder="Enter log data or alert (e.g., 'System compromised!', 'Trojan detected')", lines=3)
|
| 719 |
+
file_input = gr.File(label="Upload .txt/.log/.py file", file_types=[".txt", ".log", ".py"])
|
| 720 |
submit_btn = gr.Button("Analyze")
|
| 721 |
with gr.Column(scale=1):
|
| 722 |
gr.Markdown("### Threat Analysis Results")
|
requirements.txt
CHANGED
|
@@ -1,13 +1,9 @@
|
|
| 1 |
gradio[mcp]>=4.0.0
|
| 2 |
textblob
|
| 3 |
-
fastapi
|
| 4 |
-
uvicorn
|
| 5 |
yara-python
|
| 6 |
requests
|
| 7 |
llama-index-core
|
| 8 |
llama-index-embeddings-huggingface
|
| 9 |
pandas
|
| 10 |
sentence-transformers
|
| 11 |
-
transformers
|
| 12 |
-
torch
|
| 13 |
retrying
|
|
|
|
| 1 |
gradio[mcp]>=4.0.0
|
| 2 |
textblob
|
|
|
|
|
|
|
| 3 |
yara-python
|
| 4 |
requests
|
| 5 |
llama-index-core
|
| 6 |
llama-index-embeddings-huggingface
|
| 7 |
pandas
|
| 8 |
sentence-transformers
|
|
|
|
|
|
|
| 9 |
retrying
|