Rithvickkr commited on
Commit
21b1e62
·
1 Parent(s): abf2ac0

Fixed file upload bug for malicious Python code, ensured relevant CVEs, and multi-threat detection

Browse files
Files changed (2) hide show
  1. app.py +70 -6
  2. requirements.txt +0 -4
app.py CHANGED
@@ -4,12 +4,14 @@ from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
4
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
5
  import os
6
  import re
 
7
  from collections import defaultdict
8
  from datetime import datetime, timedelta
9
  import json
10
  import time
11
  import logging
12
  from retrying import retry
 
13
 
14
  # Set up logging
15
  logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -59,6 +61,40 @@ def call_mistral_llm(prompt):
59
  logger.error(f"Mistral API request failed: {e}")
60
  raise
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  # Enhanced DSATP log parsing
63
  def dsatp_parse_log(text: str) -> dict:
64
  log = text.lower()
@@ -95,7 +131,7 @@ def dsatp_parse_log(text: str) -> dict:
95
  "flood", "syn flood", "http flood", "suspicious url", "script tag",
96
  "sqlmap", "union select", "escalation attempt", "rootkit", "yara_match",
97
  "wget", "curl", "bash", "sh", "payload", "ufw", "sudo", "root", "ssh",
98
- "cron", "systemd"
99
  ]
100
  ip_pattern = r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b'
101
  timestamp_pattern = r'\w{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2}'
@@ -103,10 +139,10 @@ def dsatp_parse_log(text: str) -> dict:
103
  sql_pattern = r'(union\s+select|select\s+.*\s+from|drop\s+table)'
104
  xss_pattern = r'(<script>|on\w+\s*=|javascript:)'
105
  yara_pattern = r'yara_match:\s*([\w\.]+)\s+detected'
106
- script_pattern = r'(wget|curl)\s+.*(http[s]?://[^\s]+)\s*.*\.(sh|bash)'
107
  sudo_pattern = r'sudo:.*user=root\s*;'
108
  ssh_pattern = r'reverse\s+ssh|tunnel\s+service'
109
- malicious_url_pattern = r'http[s]?://.*(malicious|payload)[^\s]*'
110
 
111
  for line in lines:
112
  # YARA match detection
@@ -124,6 +160,10 @@ def dsatp_parse_log(text: str) -> dict:
124
  if re.search(script_pattern, line, re.IGNORECASE) or re.search(malicious_url_pattern, line, re.IGNORECASE):
125
  detected_threats.append(threats["malicious"] | {"confidence": 0.95})
126
 
 
 
 
 
127
  # Firewall block
128
  if re.search(r'ufw\s+block', line, re.IGNORECASE):
129
  ip_match = re.search(ip_pattern, line)
@@ -201,6 +241,16 @@ def dsatp_parse_log(text: str) -> dict:
201
  # Enhanced DSATP YARA scanning
202
  def dsatp_yara_scan(file_path: str) -> dict:
203
  try:
 
 
 
 
 
 
 
 
 
 
204
  import yara
205
  rules = yara.compile(source="""
206
  rule BruteForceLog {
@@ -220,11 +270,25 @@ def dsatp_yara_scan(file_path: str) -> dict:
220
  $trojan = "trojan" nocase
221
  $ransomware = "ransomware" nocase
222
  $wget = "wget" nocase
 
223
  $payload = "payload" nocase
224
  $malicious = "malicious" nocase
 
225
  condition:
226
  any of them
227
  }
 
 
 
 
 
 
 
 
 
 
 
 
228
  rule SuspiciousBehavior {
229
  strings:
230
  $heuristic = "heuristic" nocase
@@ -283,7 +347,7 @@ def dsatp_yara_scan(file_path: str) -> dict:
283
  "mitigation": "Block suspicious IPs, disable password-based SSH, enable fail2ban",
284
  "confidence": 0.95
285
  })
286
- elif match.rule == "MalwareLog":
287
  detected_threats.append({
288
  "classification": "Malware Detected",
289
  "severity": "Critical",
@@ -377,7 +441,7 @@ def chatbot_response(user_input, file, history, state):
377
  # Map classification to precise keywords for relevant CVEs
378
  threat_keywords = {
379
  "Brute-Force Attempt": "brute force, ssh, login attempt, authentication failure, openssh, password attack, cwe-287, cwe-307",
380
- "Malware Detected": "malware, trojan, ransomware, payload, malicious script, backdoor, virus, cwe-119, cwe-506",
381
  "Network Intrusion": "firewall, intrusion, ufw, network attack, port scan, unauthorized access, cwe-284",
382
  "Privilege Escalation": "privilege escalation, sudo, root, unauthorized access, cwe-269, cwe-250",
383
  "Persistence Mechanism": "ssh tunnel, reverse ssh, persistence, backdoor, remote access, cwe-284",
@@ -652,7 +716,7 @@ with gr.Blocks(css="""
652
  with gr.Column(scale=2):
653
  chatbot = gr.Chatbot(label="Security Analyst Chat", type="messages", height=500)
654
  user_input = gr.Textbox(placeholder="Enter log data or alert (e.g., 'System compromised!', 'Trojan detected')", lines=3)
655
- file_input = gr.File(label="Upload .txt/.log file", file_types=[".txt", ".log"])
656
  submit_btn = gr.Button("Analyze")
657
  with gr.Column(scale=1):
658
  gr.Markdown("### Threat Analysis Results")
 
4
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
5
  import os
6
  import re
7
+ import ast
8
  from collections import defaultdict
9
  from datetime import datetime, timedelta
10
  import json
11
  import time
12
  import logging
13
  from retrying import retry
14
+ import base64
15
 
16
  # Set up logging
17
  logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
 
61
  logger.error(f"Mistral API request failed: {e}")
62
  raise
63
 
64
+ # Basic Python code analysis
65
+ def analyze_python_code(content: str) -> dict:
66
+ try:
67
+ tree = ast.parse(content)
68
+ suspicious_patterns = []
69
+ for node in ast.walk(tree):
70
+ # Check for Base64 decoding
71
+ if isinstance(node, ast.Call) and isinstance(node.func, ast.Attribute):
72
+ if node.func.attr == 'b64decode' and isinstance(node.func.value, ast.Name) and node.func.value.id == 'base64':
73
+ suspicious_patterns.append("Base64 decoding detected")
74
+ # Check for exec usage
75
+ if isinstance(node, ast.Call) and isinstance(node.func, ast.Name) and node.func.id == 'exec':
76
+ suspicious_patterns.append("Dynamic code execution (exec) detected")
77
+ # Check for urllib.request or similar imports
78
+ if isinstance(node, ast.Import) or isinstance(node, ast.ImportFrom):
79
+ for name in (node.names if isinstance(node, ast.Import) else node.names):
80
+ if name.name in ['urllib', 'urllib.request', 'requests']:
81
+ suspicious_patterns.append(f"Suspicious import: {name.name}")
82
+ # Check for suspicious URLs in strings
83
+ if isinstance(node, ast.Str) or (isinstance(node, ast.Constant) and isinstance(node.value, str)):
84
+ if re.search(r'http[s]?://.*(evil|malicious|bad)[^\s]*', node.value, re.IGNORECASE):
85
+ suspicious_patterns.append(f"Suspicious URL: {node.value}")
86
+ if suspicious_patterns:
87
+ return {
88
+ "classification": "Malware Detected",
89
+ "severity": "Critical",
90
+ "mitigation": "Quarantine file, run antivirus, block suspicious URLs",
91
+ "confidence": 0.95,
92
+ "details": suspicious_patterns
93
+ }
94
+ except SyntaxError:
95
+ logger.warning("Invalid Python syntax in file")
96
+ return None
97
+
98
  # Enhanced DSATP log parsing
99
  def dsatp_parse_log(text: str) -> dict:
100
  log = text.lower()
 
131
  "flood", "syn flood", "http flood", "suspicious url", "script tag",
132
  "sqlmap", "union select", "escalation attempt", "rootkit", "yara_match",
133
  "wget", "curl", "bash", "sh", "payload", "ufw", "sudo", "root", "ssh",
134
+ "cron", "systemd", "base64", "exec"
135
  ]
136
  ip_pattern = r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b'
137
  timestamp_pattern = r'\w{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2}'
 
139
  sql_pattern = r'(union\s+select|select\s+.*\s+from|drop\s+table)'
140
  xss_pattern = r'(<script>|on\w+\s*=|javascript:)'
141
  yara_pattern = r'yara_match:\s*([\w\.]+)\s+detected'
142
+ script_pattern = r'(wget|curl)\s+.*(http[s]?://[^\s]+)\s*.*\.(sh|bash|exe)'
143
  sudo_pattern = r'sudo:.*user=root\s*;'
144
  ssh_pattern = r'reverse\s+ssh|tunnel\s+service'
145
+ malicious_url_pattern = r'http[s]?://.*(malicious|payload|evil)[^\s]*'
146
 
147
  for line in lines:
148
  # YARA match detection
 
160
  if re.search(script_pattern, line, re.IGNORECASE) or re.search(malicious_url_pattern, line, re.IGNORECASE):
161
  detected_threats.append(threats["malicious"] | {"confidence": 0.95})
162
 
163
+ # Base64 and exec detection
164
+ if "base64.b64decode" in line and "exec" in line:
165
+ detected_threats.append(threats["malicious"] | {"confidence": 0.95})
166
+
167
  # Firewall block
168
  if re.search(r'ufw\s+block', line, re.IGNORECASE):
169
  ip_match = re.search(ip_pattern, line)
 
241
  # Enhanced DSATP YARA scanning
242
  def dsatp_yara_scan(file_path: str) -> dict:
243
  try:
244
+ # Read file content for Python analysis
245
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
246
+ content = f.read()
247
+
248
+ # Analyze Python code if it's a .py file
249
+ if file_path.endswith('.py'):
250
+ python_analysis = analyze_python_code(content)
251
+ if python_analysis:
252
+ return python_analysis
253
+
254
  import yara
255
  rules = yara.compile(source="""
256
  rule BruteForceLog {
 
270
  $trojan = "trojan" nocase
271
  $ransomware = "ransomware" nocase
272
  $wget = "wget" nocase
273
+ $curl = "curl" nocase
274
  $payload = "payload" nocase
275
  $malicious = "malicious" nocase
276
+ $evil = "evil" nocase
277
  condition:
278
  any of them
279
  }
280
+ rule PythonMalware {
281
+ strings:
282
+ $base64 = "base64.b64decode" nocase
283
+ $exec = "exec" nocase
284
+ $urllib = "urllib.request" nocase
285
+ $requests = "requests.get" nocase
286
+ $url = "http://" nocase
287
+ $url2 = "https://" nocase
288
+ $evil_url = "evil.com" nocase
289
+ condition:
290
+ ($base64 and $exec) or ($urllib and ($url or $url2 or $evil_url)) or ($requests and ($url or $url2 or $evil_url))
291
+ }
292
  rule SuspiciousBehavior {
293
  strings:
294
  $heuristic = "heuristic" nocase
 
347
  "mitigation": "Block suspicious IPs, disable password-based SSH, enable fail2ban",
348
  "confidence": 0.95
349
  })
350
+ elif match.rule == "MalwareLog" or match.rule == "PythonMalware":
351
  detected_threats.append({
352
  "classification": "Malware Detected",
353
  "severity": "Critical",
 
441
  # Map classification to precise keywords for relevant CVEs
442
  threat_keywords = {
443
  "Brute-Force Attempt": "brute force, ssh, login attempt, authentication failure, openssh, password attack, cwe-287, cwe-307",
444
+ "Malware Detected": "malware, trojan, ransomware, payload, malicious script, backdoor, virus, cwe-94, cwe-506, cwe-119",
445
  "Network Intrusion": "firewall, intrusion, ufw, network attack, port scan, unauthorized access, cwe-284",
446
  "Privilege Escalation": "privilege escalation, sudo, root, unauthorized access, cwe-269, cwe-250",
447
  "Persistence Mechanism": "ssh tunnel, reverse ssh, persistence, backdoor, remote access, cwe-284",
 
716
  with gr.Column(scale=2):
717
  chatbot = gr.Chatbot(label="Security Analyst Chat", type="messages", height=500)
718
  user_input = gr.Textbox(placeholder="Enter log data or alert (e.g., 'System compromised!', 'Trojan detected')", lines=3)
719
+ file_input = gr.File(label="Upload .txt/.log/.py file", file_types=[".txt", ".log", ".py"])
720
  submit_btn = gr.Button("Analyze")
721
  with gr.Column(scale=1):
722
  gr.Markdown("### Threat Analysis Results")
requirements.txt CHANGED
@@ -1,13 +1,9 @@
1
  gradio[mcp]>=4.0.0
2
  textblob
3
- fastapi
4
- uvicorn
5
  yara-python
6
  requests
7
  llama-index-core
8
  llama-index-embeddings-huggingface
9
  pandas
10
  sentence-transformers
11
- transformers
12
- torch
13
  retrying
 
1
  gradio[mcp]>=4.0.0
2
  textblob
 
 
3
  yara-python
4
  requests
5
  llama-index-core
6
  llama-index-embeddings-huggingface
7
  pandas
8
  sentence-transformers
 
 
9
  retrying