megharudushi commited on
Commit
f31bb42
·
verified ·
1 Parent(s): 1d68976

Upload vortex_api.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. vortex_api.py +323 -0
vortex_api.py ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ VORTEX@SANDBOX Agent Control API
4
+ Ultra-fast REST + WebSocket interface for AI agents
5
+ """
6
+
7
+ import asyncio
8
+ import base64
9
+ import os
10
+ import subprocess
11
+ import json
12
+ from typing import Optional
13
+ from io import BytesIO
14
+
15
+ from fastapi import FastAPI, WebSocket, HTTPException
16
+ from fastapi.middleware.cors import CORSMiddleware
17
+ from pydantic import BaseModel
18
+ import uvicorn
19
+
20
+ # Optional imports with fallbacks
21
+ try:
22
+ import pyautogui
23
+ pyautogui.FAILSAFE = False
24
+ pyautogui.PAUSE = 0.01 # Minimal delay for speed
25
+ except:
26
+ pyautogui = None
27
+
28
+ try:
29
+ import mss
30
+ except:
31
+ mss = None
32
+
33
+ try:
34
+ from PIL import Image
35
+ except:
36
+ Image = None
37
+
38
+ app = FastAPI(
39
+ title="vortex@sandbox API",
40
+ description="Agent control interface for containerized Linux desktop",
41
+ version="1.0.0"
42
+ )
43
+
44
+ app.add_middleware(
45
+ CORSMiddleware,
46
+ allow_origins=["*"],
47
+ allow_methods=["*"],
48
+ allow_headers=["*"],
49
+ )
50
+
51
+ # ============================================
52
+ # Request Models
53
+ # ============================================
54
+
55
+ class MouseMove(BaseModel):
56
+ x: int
57
+ y: int
58
+
59
+ class MouseClick(BaseModel):
60
+ x: int
61
+ y: int
62
+ button: str = "left"
63
+ clicks: int = 1
64
+
65
+ class KeyPress(BaseModel):
66
+ key: str
67
+ modifiers: list[str] = []
68
+
69
+ class TypeText(BaseModel):
70
+ text: str
71
+ interval: float = 0.01
72
+
73
+ class Screenshot(BaseModel):
74
+ region: Optional[list[int]] = None # [x, y, w, h]
75
+ format: str = "png"
76
+
77
+ class RunCommand(BaseModel):
78
+ command: str
79
+ timeout: int = 30
80
+
81
+ class Navigate(BaseModel):
82
+ url: str
83
+
84
+ # ============================================
85
+ # Core Endpoints
86
+ # ============================================
87
+
88
+ @app.get("/")
89
+ async def root():
90
+ return {
91
+ "name": "vortex@sandbox",
92
+ "version": "1.0.0",
93
+ "status": "running",
94
+ "endpoints": {
95
+ "mouse": "/mouse/move, /mouse/click",
96
+ "keyboard": "/key, /type",
97
+ "screen": "/screenshot",
98
+ "system": "/exec, /health",
99
+ "browser": "/navigate, /cdp"
100
+ }
101
+ }
102
+
103
+ @app.get("/health")
104
+ async def health():
105
+ return {"status": "healthy", "display": os.environ.get("DISPLAY", ":99")}
106
+
107
+ # ============================================
108
+ # Mouse Control
109
+ # ============================================
110
+
111
+ @app.post("/mouse/move")
112
+ async def mouse_move(req: MouseMove):
113
+ if pyautogui:
114
+ pyautogui.moveTo(req.x, req.y, duration=0)
115
+ else:
116
+ subprocess.run(["xdotool", "mousemove", str(req.x), str(req.y)])
117
+ return {"moved": [req.x, req.y]}
118
+
119
+ @app.post("/mouse/click")
120
+ async def mouse_click(req: MouseClick):
121
+ if pyautogui:
122
+ pyautogui.click(req.x, req.y, clicks=req.clicks, button=req.button)
123
+ else:
124
+ btn = {"left": "1", "middle": "2", "right": "3"}.get(req.button, "1")
125
+ subprocess.run(["xdotool", "mousemove", str(req.x), str(req.y)])
126
+ for _ in range(req.clicks):
127
+ subprocess.run(["xdotool", "click", btn])
128
+ return {"clicked": [req.x, req.y], "button": req.button}
129
+
130
+ @app.post("/mouse/scroll")
131
+ async def mouse_scroll(direction: str = "down", amount: int = 3):
132
+ if pyautogui:
133
+ pyautogui.scroll(-amount if direction == "down" else amount)
134
+ else:
135
+ btn = "5" if direction == "down" else "4"
136
+ for _ in range(amount):
137
+ subprocess.run(["xdotool", "click", btn])
138
+ return {"scrolled": direction, "amount": amount}
139
+
140
+ # ============================================
141
+ # Keyboard Control
142
+ # ============================================
143
+
144
+ @app.post("/key")
145
+ async def key_press(req: KeyPress):
146
+ if pyautogui:
147
+ if req.modifiers:
148
+ pyautogui.hotkey(*req.modifiers, req.key)
149
+ else:
150
+ pyautogui.press(req.key)
151
+ else:
152
+ key_combo = "+".join(req.modifiers + [req.key]) if req.modifiers else req.key
153
+ subprocess.run(["xdotool", "key", key_combo])
154
+ return {"pressed": req.key, "modifiers": req.modifiers}
155
+
156
+ @app.post("/type")
157
+ async def type_text(req: TypeText):
158
+ if pyautogui:
159
+ pyautogui.write(req.text, interval=req.interval)
160
+ else:
161
+ subprocess.run(["xdotool", "type", "--delay", str(int(req.interval*1000)), req.text])
162
+ return {"typed": req.text}
163
+
164
+ @app.post("/hotkey")
165
+ async def hotkey(keys: list[str]):
166
+ if pyautogui:
167
+ pyautogui.hotkey(*keys)
168
+ else:
169
+ subprocess.run(["xdotool", "key", "+".join(keys)])
170
+ return {"hotkey": keys}
171
+
172
+ # ============================================
173
+ # Screenshot
174
+ # ============================================
175
+
176
+ @app.post("/screenshot")
177
+ async def screenshot(req: Screenshot = Screenshot()):
178
+ try:
179
+ if mss:
180
+ with mss.mss() as sct:
181
+ monitor = sct.monitors[1]
182
+ if req.region:
183
+ monitor = {"left": req.region[0], "top": req.region[1],
184
+ "width": req.region[2], "height": req.region[3]}
185
+ img = sct.grab(monitor)
186
+
187
+ if Image:
188
+ pil_img = Image.frombytes("RGB", img.size, img.bgra, "raw", "BGRX")
189
+ buffer = BytesIO()
190
+ pil_img.save(buffer, format=req.format.upper())
191
+ data = base64.b64encode(buffer.getvalue()).decode()
192
+ return {"success": True, "format": req.format, "data": data,
193
+ "size": [img.width, img.height]}
194
+
195
+ # Fallback to scrot
196
+ result = subprocess.run(["scrot", "-o", "/tmp/screen.png"], capture_output=True)
197
+ with open("/tmp/screen.png", "rb") as f:
198
+ data = base64.b64encode(f.read()).decode()
199
+ return {"success": True, "format": "png", "data": data}
200
+
201
+ except Exception as e:
202
+ raise HTTPException(status_code=500, detail=str(e))
203
+
204
+ @app.get("/screenshot.png")
205
+ async def screenshot_direct():
206
+ """Direct PNG download"""
207
+ try:
208
+ subprocess.run(["scrot", "-o", "/tmp/screen.png"], capture_output=True)
209
+ with open("/tmp/screen.png", "rb") as f:
210
+ data = f.read()
211
+ from fastapi.responses import Response
212
+ return Response(content=data, media_type="image/png")
213
+ except Exception as e:
214
+ raise HTTPException(status_code=500, detail=str(e))
215
+
216
+ # ============================================
217
+ # System Commands
218
+ # ============================================
219
+
220
+ @app.post("/exec")
221
+ async def exec_command(req: RunCommand):
222
+ try:
223
+ result = subprocess.run(
224
+ req.command,
225
+ shell=True,
226
+ capture_output=True,
227
+ text=True,
228
+ timeout=req.timeout
229
+ )
230
+ return {
231
+ "success": result.returncode == 0,
232
+ "stdout": result.stdout,
233
+ "stderr": result.stderr,
234
+ "code": result.returncode
235
+ }
236
+ except subprocess.TimeoutExpired:
237
+ raise HTTPException(status_code=408, detail="Command timed out")
238
+ except Exception as e:
239
+ raise HTTPException(status_code=500, detail=str(e))
240
+
241
+ # ============================================
242
+ # Browser Control (via CDP)
243
+ # ============================================
244
+
245
+ @app.post("/navigate")
246
+ async def navigate(req: Navigate):
247
+ """Navigate browser via xdotool (focus + Ctrl+L + URL + Enter)"""
248
+ try:
249
+ # Focus Chromium window
250
+ subprocess.run(["xdotool", "search", "--name", "Chromium", "windowactivate"], timeout=2)
251
+ await asyncio.sleep(0.1)
252
+
253
+ # Ctrl+L to focus address bar
254
+ subprocess.run(["xdotool", "key", "ctrl+l"], timeout=1)
255
+ await asyncio.sleep(0.1)
256
+
257
+ # Type URL
258
+ subprocess.run(["xdotool", "type", "--delay", "10", req.url], timeout=10)
259
+ await asyncio.sleep(0.1)
260
+
261
+ # Press Enter
262
+ subprocess.run(["xdotool", "key", "Return"], timeout=1)
263
+
264
+ return {"navigated": req.url}
265
+ except Exception as e:
266
+ raise HTTPException(status_code=500, detail=str(e))
267
+
268
+ @app.get("/cdp")
269
+ async def get_cdp_info():
270
+ """Get Chrome DevTools Protocol endpoint info"""
271
+ try:
272
+ import httpx
273
+ async with httpx.AsyncClient() as client:
274
+ resp = await client.get("http://localhost:9222/json/version", timeout=5)
275
+ return resp.json()
276
+ except:
277
+ return {"error": "CDP not available", "port": 9222}
278
+
279
+ # ============================================
280
+ # WebSocket for Real-time Control
281
+ # ============================================
282
+
283
+ @app.websocket("/ws")
284
+ async def websocket_control(ws: WebSocket):
285
+ """WebSocket for streaming commands"""
286
+ await ws.accept()
287
+ try:
288
+ while True:
289
+ data = await ws.receive_text()
290
+ cmd = json.loads(data)
291
+ action = cmd.get("action")
292
+
293
+ result = {"error": "unknown action"}
294
+
295
+ if action == "move":
296
+ subprocess.run(["xdotool", "mousemove", str(cmd["x"]), str(cmd["y"])])
297
+ result = {"moved": [cmd["x"], cmd["y"]]}
298
+ elif action == "click":
299
+ subprocess.run(["xdotool", "mousemove", str(cmd["x"]), str(cmd["y"])])
300
+ subprocess.run(["xdotool", "click", "1"])
301
+ result = {"clicked": [cmd["x"], cmd["y"]]}
302
+ elif action == "type":
303
+ subprocess.run(["xdotool", "type", cmd["text"]])
304
+ result = {"typed": cmd["text"]}
305
+ elif action == "key":
306
+ subprocess.run(["xdotool", "key", cmd["key"]])
307
+ result = {"pressed": cmd["key"]}
308
+ elif action == "screenshot":
309
+ subprocess.run(["scrot", "-o", "/tmp/ws_screen.png"])
310
+ with open("/tmp/ws_screen.png", "rb") as f:
311
+ img_data = base64.b64encode(f.read()).decode()
312
+ result = {"screenshot": img_data}
313
+
314
+ await ws.send_text(json.dumps(result))
315
+ except Exception as e:
316
+ await ws.close()
317
+
318
+ # ============================================
319
+ # Run Server
320
+ # ============================================
321
+
322
+ if __name__ == "__main__":
323
+ uvicorn.run(app, host="0.0.0.0", port=8080, log_level="warning")