Spaces:

Agents-MCP-Hackathon
/

DSATP_AI_cybersecurity_agent

Runtime error

App Files Files Community

Rithvickkr commited on Jun 8

Commit

21b1e62

1 Parent(s): abf2ac0

Fixed file upload bug for malicious Python code, ensured relevant CVEs, and multi-threat detection

Browse files

Files changed (2) hide show

app.py +70 -6
requirements.txt +0 -4

app.py CHANGED Viewed

@@ -4,12 +4,14 @@ from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 import os
 import re
 from collections import defaultdict
 from datetime import datetime, timedelta
 import json
 import time
 import logging
 from retrying import retry
 # Set up logging
 logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -59,6 +61,40 @@ def call_mistral_llm(prompt):
         logger.error(f"Mistral API request failed: {e}")
         raise
 # Enhanced DSATP log parsing
 def dsatp_parse_log(text: str) -> dict:
     log = text.lower()
@@ -95,7 +131,7 @@ def dsatp_parse_log(text: str) -> dict:
         "flood", "syn flood", "http flood", "suspicious url", "script tag",
         "sqlmap", "union select", "escalation attempt", "rootkit", "yara_match",
         "wget", "curl", "bash", "sh", "payload", "ufw", "sudo", "root", "ssh",
-        "cron", "systemd"
     ]
     ip_pattern = r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b'
     timestamp_pattern = r'\w{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2}'
@@ -103,10 +139,10 @@ def dsatp_parse_log(text: str) -> dict:
     sql_pattern = r'(union\s+select|select\s+.*\s+from|drop\s+table)'
     xss_pattern = r'(<script>|on\w+\s*=|javascript:)'
     yara_pattern = r'yara_match:\s*([\w\.]+)\s+detected'
-    script_pattern = r'(wget|curl)\s+.*(http[s]?://[^\s]+)\s*.*\.(sh|bash)'
     sudo_pattern = r'sudo:.*user=root\s*;'
     ssh_pattern = r'reverse\s+ssh|tunnel\s+service'
-    malicious_url_pattern = r'http[s]?://.*(malicious|payload)[^\s]*'
     for line in lines:
         # YARA match detection
@@ -124,6 +160,10 @@ def dsatp_parse_log(text: str) -> dict:
         if re.search(script_pattern, line, re.IGNORECASE) or re.search(malicious_url_pattern, line, re.IGNORECASE):
             detected_threats.append(threats["malicious"] | {"confidence": 0.95})
         # Firewall block
         if re.search(r'ufw\s+block', line, re.IGNORECASE):
             ip_match = re.search(ip_pattern, line)
@@ -201,6 +241,16 @@ def dsatp_parse_log(text: str) -> dict:
 # Enhanced DSATP YARA scanning
 def dsatp_yara_scan(file_path: str) -> dict:
     try:
         import yara
         rules = yara.compile(source="""
         rule BruteForceLog {
@@ -220,11 +270,25 @@ def dsatp_yara_scan(file_path: str) -> dict:
                 $trojan = "trojan" nocase
                 $ransomware = "ransomware" nocase
                 $wget = "wget" nocase
                 $payload = "payload" nocase
                 $malicious = "malicious" nocase
             condition:
                 any of them
         }
         rule SuspiciousBehavior {
             strings:
                 $heuristic = "heuristic" nocase
@@ -283,7 +347,7 @@ def dsatp_yara_scan(file_path: str) -> dict:
                         "mitigation": "Block suspicious IPs, disable password-based SSH, enable fail2ban",
                         "confidence": 0.95
                     })
-                elif match.rule == "MalwareLog":
                     detected_threats.append({
                         "classification": "Malware Detected",
                         "severity": "Critical",
@@ -377,7 +441,7 @@ def chatbot_response(user_input, file, history, state):
             # Map classification to precise keywords for relevant CVEs
             threat_keywords = {
                 "Brute-Force Attempt": "brute force, ssh, login attempt, authentication failure, openssh, password attack, cwe-287, cwe-307",
-                "Malware Detected": "malware, trojan, ransomware, payload, malicious script, backdoor, virus, cwe-119, cwe-506",
                 "Network Intrusion": "firewall, intrusion, ufw, network attack, port scan, unauthorized access, cwe-284",
                 "Privilege Escalation": "privilege escalation, sudo, root, unauthorized access, cwe-269, cwe-250",
                 "Persistence Mechanism": "ssh tunnel, reverse ssh, persistence, backdoor, remote access, cwe-284",
@@ -652,7 +716,7 @@ with gr.Blocks(css="""
         with gr.Column(scale=2):
             chatbot = gr.Chatbot(label="Security Analyst Chat", type="messages", height=500)
             user_input = gr.Textbox(placeholder="Enter log data or alert (e.g., 'System compromised!', 'Trojan detected')", lines=3)
-            file_input = gr.File(label="Upload .txt/.log file", file_types=[".txt", ".log"])
             submit_btn = gr.Button("Analyze")
         with gr.Column(scale=1):
             gr.Markdown("### Threat Analysis Results")

 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 import os
 import re
+import ast
 from collections import defaultdict
 from datetime import datetime, timedelta
 import json
 import time
 import logging
 from retrying import retry
+import base64
 # Set up logging
 logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
         logger.error(f"Mistral API request failed: {e}")
         raise
+# Basic Python code analysis
+def analyze_python_code(content: str) -> dict:
+    try:
+        tree = ast.parse(content)
+        suspicious_patterns = []
+        for node in ast.walk(tree):
+            # Check for Base64 decoding
+            if isinstance(node, ast.Call) and isinstance(node.func, ast.Attribute):
+                if node.func.attr == 'b64decode' and isinstance(node.func.value, ast.Name) and node.func.value.id == 'base64':
+                    suspicious_patterns.append("Base64 decoding detected")
+            # Check for exec usage
+            if isinstance(node, ast.Call) and isinstance(node.func, ast.Name) and node.func.id == 'exec':
+                suspicious_patterns.append("Dynamic code execution (exec) detected")
+            # Check for urllib.request or similar imports
+            if isinstance(node, ast.Import) or isinstance(node, ast.ImportFrom):
+                for name in (node.names if isinstance(node, ast.Import) else node.names):
+                    if name.name in ['urllib', 'urllib.request', 'requests']:
+                        suspicious_patterns.append(f"Suspicious import: {name.name}")
+            # Check for suspicious URLs in strings
+            if isinstance(node, ast.Str) or (isinstance(node, ast.Constant) and isinstance(node.value, str)):
+                if re.search(r'http[s]?://.*(evil|malicious|bad)[^\s]*', node.value, re.IGNORECASE):
+                    suspicious_patterns.append(f"Suspicious URL: {node.value}")
+        if suspicious_patterns:
+            return {
+                "classification": "Malware Detected",
+                "severity": "Critical",
+                "mitigation": "Quarantine file, run antivirus, block suspicious URLs",
+                "confidence": 0.95,
+                "details": suspicious_patterns
+            }
+    except SyntaxError:
+        logger.warning("Invalid Python syntax in file")
+    return None
 # Enhanced DSATP log parsing
 def dsatp_parse_log(text: str) -> dict:
     log = text.lower()
         "flood", "syn flood", "http flood", "suspicious url", "script tag",
         "sqlmap", "union select", "escalation attempt", "rootkit", "yara_match",
         "wget", "curl", "bash", "sh", "payload", "ufw", "sudo", "root", "ssh",
+        "cron", "systemd", "base64", "exec"
     ]
     ip_pattern = r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b'
     timestamp_pattern = r'\w{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2}'
     sql_pattern = r'(union\s+select|select\s+.*\s+from|drop\s+table)'
     xss_pattern = r'(<script>|on\w+\s*=|javascript:)'
     yara_pattern = r'yara_match:\s*([\w\.]+)\s+detected'
+    script_pattern = r'(wget|curl)\s+.*(http[s]?://[^\s]+)\s*.*\.(sh|bash|exe)'
     sudo_pattern = r'sudo:.*user=root\s*;'
     ssh_pattern = r'reverse\s+ssh|tunnel\s+service'
+    malicious_url_pattern = r'http[s]?://.*(malicious|payload|evil)[^\s]*'
     for line in lines:
         # YARA match detection
         if re.search(script_pattern, line, re.IGNORECASE) or re.search(malicious_url_pattern, line, re.IGNORECASE):
             detected_threats.append(threats["malicious"] | {"confidence": 0.95})
+        # Base64 and exec detection
+        if "base64.b64decode" in line and "exec" in line:
+            detected_threats.append(threats["malicious"] | {"confidence": 0.95})
         # Firewall block
         if re.search(r'ufw\s+block', line, re.IGNORECASE):
             ip_match = re.search(ip_pattern, line)
 # Enhanced DSATP YARA scanning
 def dsatp_yara_scan(file_path: str) -> dict:
     try:
+        # Read file content for Python analysis
+        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+            content = f.read()
+        # Analyze Python code if it's a .py file
+        if file_path.endswith('.py'):
+            python_analysis = analyze_python_code(content)
+            if python_analysis:
+                return python_analysis
         import yara
         rules = yara.compile(source="""
         rule BruteForceLog {
                 $trojan = "trojan" nocase
                 $ransomware = "ransomware" nocase
                 $wget = "wget" nocase
+                $curl = "curl" nocase
                 $payload = "payload" nocase
                 $malicious = "malicious" nocase
+                $evil = "evil" nocase
             condition:
                 any of them
         }
+        rule PythonMalware {
+            strings:
+                $base64 = "base64.b64decode" nocase
+                $exec = "exec" nocase
+                $urllib = "urllib.request" nocase
+                $requests = "requests.get" nocase
+                $url = "http://" nocase
+                $url2 = "https://" nocase
+                $evil_url = "evil.com" nocase
+            condition:
+                ($base64 and $exec) or ($urllib and ($url or $url2 or $evil_url)) or ($requests and ($url or $url2 or $evil_url))
+        }
         rule SuspiciousBehavior {
             strings:
                 $heuristic = "heuristic" nocase
                         "mitigation": "Block suspicious IPs, disable password-based SSH, enable fail2ban",
                         "confidence": 0.95
                     })
+                elif match.rule == "MalwareLog" or match.rule == "PythonMalware":
                     detected_threats.append({
                         "classification": "Malware Detected",
                         "severity": "Critical",
             # Map classification to precise keywords for relevant CVEs
             threat_keywords = {
                 "Brute-Force Attempt": "brute force, ssh, login attempt, authentication failure, openssh, password attack, cwe-287, cwe-307",
+                "Malware Detected": "malware, trojan, ransomware, payload, malicious script, backdoor, virus, cwe-94, cwe-506, cwe-119",
                 "Network Intrusion": "firewall, intrusion, ufw, network attack, port scan, unauthorized access, cwe-284",
                 "Privilege Escalation": "privilege escalation, sudo, root, unauthorized access, cwe-269, cwe-250",
                 "Persistence Mechanism": "ssh tunnel, reverse ssh, persistence, backdoor, remote access, cwe-284",
         with gr.Column(scale=2):
             chatbot = gr.Chatbot(label="Security Analyst Chat", type="messages", height=500)
             user_input = gr.Textbox(placeholder="Enter log data or alert (e.g., 'System compromised!', 'Trojan detected')", lines=3)
+            file_input = gr.File(label="Upload .txt/.log/.py file", file_types=[".txt", ".log", ".py"])
             submit_btn = gr.Button("Analyze")
         with gr.Column(scale=1):
             gr.Markdown("### Threat Analysis Results")

requirements.txt CHANGED Viewed

@@ -1,13 +1,9 @@
 gradio[mcp]>=4.0.0
 textblob
-fastapi
-uvicorn
 yara-python
 requests
 llama-index-core
 llama-index-embeddings-huggingface
 pandas
 sentence-transformers
-transformers
-torch
 retrying

 gradio[mcp]>=4.0.0
 textblob
 yara-python
 requests
 llama-index-core
 llama-index-embeddings-huggingface
 pandas
 sentence-transformers
 retrying