cloudflare · deathbyknowledge · Dec 5, 2025 · Dec 2, 2025 · Dec 2, 2025 · Dec 2, 2025
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -64,7 +64,7 @@ npm run build:clean        # Force rebuild without cache
 # Unit tests (runs in Workers runtime with vitest-pool-workers)
 npm test
 
-# E2E tests (requires Docker, runs sequentially due to container provisioning)
+# E2E tests (requires Docker)
 npm run test:e2e
 
 # Run a single E2E test file
@@ -74,7 +74,7 @@ npm run test:e2e -- -- tests/e2e/process-lifecycle-workflow.test.ts
 npm run test:e2e -- -- tests/e2e/git-clone-workflow.test.ts -t 'test name'
 ```
 
-**Important**: E2E tests (`tests/e2e/`) run sequentially (not in parallel) to avoid container resource contention. Each test spawns its own wrangler dev instance.
+**Important**: E2E tests share a single sandbox container for performance. Tests run in parallel using unique sessions for isolation.
 
 ### Code Quality
 
@@ -211,11 +211,12 @@ npm run test:e2e -- -- tests/e2e/git-clone-workflow.test.ts -t 'should handle cl
 **Architecture:**
 
 - Tests in `tests/e2e/` run against real Cloudflare Workers + Docker containers
-- **In CI**: Tests deploy to actual Cloudflare infrastructure and run against deployed workers
-- **Locally**: Each test file spawns its own `wrangler dev` instance
+- **Shared sandbox**: All tests share ONE container, using sessions for isolation
+- **In CI**: Tests deploy to actual Cloudflare infrastructure
+- **Locally**: Global setup spawns wrangler dev once, all tests share it
 - Config: `vitest.e2e.config.ts` (root level)
-- Sequential execution (`singleFork: true`) to prevent container resource contention
-- Longer timeouts (2min per test) for container operations
+- Parallel execution via thread pool (~30s for full suite)
+- See `docs/E2E_TESTING.md` for writing tests
 
 **Build system trust:** The monorepo build system (turbo + npm workspaces) is robust and handles all package dependencies automatically. E2E tests always run against the latest built code - there's no need to manually rebuild or worry about stale builds unless explicitly working on the build setup itself.
 

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -165,12 +165,11 @@ Located in `tests/e2e/`:
 
 - Test full workflows against real Workers and containers
 - Require Docker
-- Slower but comprehensive
+- Share a single sandbox container for performance (~30s for full suite)
+- Use sessions for test isolation
 
 Run with: `npm run test:e2e`
 
-You can also run specific test files or individual tests:
-
 ```bash
 # Run a single E2E test file
 npm run test:e2e -- -- tests/e2e/process-lifecycle-workflow.test.ts
@@ -179,12 +178,15 @@ npm run test:e2e -- -- tests/e2e/process-lifecycle-workflow.test.ts
 npm run test:e2e -- -- tests/e2e/git-clone-workflow.test.ts -t 'should handle cloning to default directory'
 ```
 
+**See `docs/E2E_TESTING.md` for the complete guide on writing E2E tests.**
+
 ### Writing Tests
 
 - Write tests for new features
 - Add regression tests for bug fixes
 - Ensure tests are deterministic (no flaky tests)
 - Use descriptive test names
+- For E2E tests: use `getSharedSandbox()` and `createUniqueSession()` for isolation
 
 ## Documentation
 

diff --git a/docs/E2E_TESTING.md b/docs/E2E_TESTING.md
@@ -0,0 +1,165 @@
+# E2E Testing Guide
+
+E2E tests validate full workflows against real Cloudflare Workers and Docker containers.
+
+## Architecture
+
+All E2E tests share a **single sandbox container** for performance. Test isolation is achieved through **sessions** - each test file gets a unique session that provides isolated shell state (env vars, working directory) within the shared container.
+
+```
+┌─────────────────────────────────────────────────────┐
+│                  Shared Sandbox                     │
+│  ┌─────────────┐ ┌─────────────┐ ┌─────────────┐    │
+│  │  Session A  │ │  Session B  │ │  Session C  │    │
+│  │  (test 1)   │ │  (test 2)   │ │  (test 3)   │    │
+│  └─────────────┘ └─────────────┘ └─────────────┘    │
+│                                                     │
+│              Shared filesystem & processes          │
+└─────────────────────────────────────────────────────┘
+```
+
+**Key files:**
+
+- `tests/e2e/global-setup.ts` - Creates sandbox before tests, warms containers
+- `tests/e2e/helpers/global-sandbox.ts` - Provides `getSharedSandbox()` API
+- `vitest.e2e.config.ts` - Configures parallel execution with global setup
+
+## Writing Tests
+
+### Basic Template
+
+```typescript
+import { describe, test, expect, beforeAll } from 'vitest';
+import {
+  getSharedSandbox,
+  createUniqueSession
+} from './helpers/global-sandbox';
+
+describe('My Feature', () => {
+  let workerUrl: string;
+  let headers: Record<string, string>;
+
+  beforeAll(async () => {
+    const sandbox = await getSharedSandbox();
+    workerUrl = sandbox.workerUrl;
+    headers = sandbox.createHeaders(createUniqueSession());
+  }, 120000);
+
+  test('should do something', async () => {
+    const response = await fetch(`${workerUrl}/api/execute`, {
+      method: 'POST',
+      headers,
+      body: JSON.stringify({ command: 'echo hello' })
+    });
+    expect(response.status).toBe(200);
+  }, 60000);
+});
+```
+
+### Using Python Image
+
+For tests requiring Python (code interpreter, etc.):
+
+```typescript
+beforeAll(async () => {
+  const sandbox = await getSharedSandbox();
+  workerUrl = sandbox.workerUrl;
+  // Use createPythonHeaders instead of createHeaders
+  headers = sandbox.createPythonHeaders(createUniqueSession());
+}, 120000);
+```
+
+### File Isolation
+
+Since the filesystem is shared, use unique paths to avoid conflicts:
+
+```typescript
+const sandbox = await getSharedSandbox();
+const testDir = sandbox.uniquePath('my-feature'); // /workspace/test-abc123/my-feature
+
+await fetch(`${workerUrl}/api/file/write`, {
+  method: 'POST',
+  headers,
+  body: JSON.stringify({
+    path: `${testDir}/config.json`,
+    content: '{"key": "value"}'
+  })
+});
+```
+
+### Port Usage
+
+Ports must be exposed in the Dockerfile. Currently exposed:
+
+- `8080` - General testing
+- `9090`, `9091`, `9092` - Process readiness tests
+- `9998` - Process lifecycle tests
+- `9999` - WebSocket tests
+
+To use a new port:
+
+1. Add it to both `tests/e2e/test-worker/Dockerfile` and `Dockerfile.python`
+2. Document which test uses it
+
+### Process Cleanup
+
+Always clean up background processes:
+
+```typescript
+test('should start server', async () => {
+  const startRes = await fetch(`${workerUrl}/api/process/start`, {
+    method: 'POST',
+    headers,
+    body: JSON.stringify({ command: 'bun run server.js' })
+  });
+  const { id: processId } = await startRes.json();
+
+  // ... test logic ...
+
+  // Cleanup
+  await fetch(`${workerUrl}/api/process/${processId}`, {
+    method: 'DELETE',
+    headers
+  });
+}, 60000);
+```
+
+## Test Organization
+
+| File                                    | Purpose                      |
+| --------------------------------------- | ---------------------------- |
+| `comprehensive-workflow.test.ts`        | Happy path integration tests |
+| `process-lifecycle-workflow.test.ts`    | Error handling for processes |
+| `process-readiness-workflow.test.ts`    | waitForLog/waitForPort tests |
+| `code-interpreter-workflow.test.ts`     | Python/JS code execution     |
+| `file-operations-workflow.test.ts`      | File read/write/list         |
+| `streaming-operations-workflow.test.ts` | Streaming command output     |
+| `websocket-workflow.test.ts`            | WebSocket connections        |
+| `bucket-mounting.test.ts`               | R2 bucket mounting (CI only) |
+
+## Running Tests
+
+```bash
+# All E2E tests
+npm run test:e2e
+
+# Single file
+npm run test:e2e -- -- tests/e2e/process-lifecycle-workflow.test.ts
+
+# Single test by name
+npm run test:e2e -- -- tests/e2e/git-clone-workflow.test.ts -t 'should clone repo'
+```
+
+## Debugging
+
+- Tests auto-retry once on failure (`retry: 1` in config)
+- Global setup logs sandbox ID on startup - check for initialization errors
+- If tests fail on first run only, the container might not be warmed (check global-setup.ts initializes the right image type)
+- Port conflicts: check no other test uses the same port
+
+## What NOT to Do
+
+- **Don't create new sandboxes unless strictly necessary** - use `getSharedSandbox()`
+- **Don't skip cleanup** - leaked processes affect other tests
+- **Don't use hardcoded ports** without adding to Dockerfile
+- **Don't rely on filesystem state** from other tests - use unique paths
diff --git a/packages/sandbox/package.json b/packages/sandbox/package.json
@@ -36,7 +36,7 @@
     "typecheck": "tsc --noEmit",
     "docker:local": "cd ../.. && docker build -f packages/sandbox/Dockerfile --target default --platform linux/amd64 --build-arg SANDBOX_VERSION=$npm_package_version -t cloudflare/sandbox-test:$npm_package_version . && docker build -f packages/sandbox/Dockerfile --target python --platform linux/amd64 --build-arg SANDBOX_VERSION=$npm_package_version -t cloudflare/sandbox-test:$npm_package_version-python .",
     "test": "vitest run --config vitest.config.ts \"$@\"",
-    "test:e2e": "cd ../.. && cd tests/e2e/test-worker && ./generate-config.sh && cd ../../.. && vitest run --config vitest.e2e.config.ts \"$@\""
+    "test:e2e": "cd ../../tests/e2e/test-worker && ./generate-config.sh && cd ../../.. && vitest run --config vitest.e2e.config.ts \"$@\""
   },
   "exports": {
     ".": {

diff --git a/tests/e2e/_smoke.test.ts b/tests/e2e/_smoke.test.ts
@@ -1,6 +1,5 @@
-import { describe, test, expect, beforeAll, afterAll, afterEach } from 'vitest';
-import { getTestWorkerUrl, WranglerDevRunner } from './helpers/wrangler-runner';
-import { createSandboxId, cleanupSandbox } from './helpers/test-fixtures';
+import { describe, test, expect, beforeAll } from 'vitest';
+import { getSharedSandbox } from './helpers/global-sandbox';
 import type { HealthResponse } from './test-worker/types';
 
 /**
@@ -9,50 +8,25 @@ import type { HealthResponse } from './test-worker/types';
  * This test validates that:
  * 1. Can get worker URL (deployed in CI, wrangler dev locally)
  * 2. Worker is running and responding
- * 3. Can cleanup properly
+ * 3. Shared sandbox initializes correctly
  *
- * NOTE: This is just infrastructure validation. Real SDK integration
- * tests will be in the workflow test suites.
+ * NOTE: This test runs first (sorted by name) and initializes the shared sandbox.
  */
 describe('Integration Infrastructure Smoke Test', () => {
-  describe('local', () => {
-    let runner: WranglerDevRunner | null = null;
-    let workerUrl: string;
-    let currentSandboxId: string | null = null;
+  let workerUrl: string;
 
-    beforeAll(async () => {
-      const result = await getTestWorkerUrl();
-      workerUrl = result.url;
-      runner = result.runner;
-    });
+  beforeAll(async () => {
+    // Initialize shared sandbox - this will be reused by all other tests
+    const sandbox = await getSharedSandbox();
+    workerUrl = sandbox.workerUrl;
+  }, 120000);
 
-    afterEach(async () => {
-      // Cleanup sandbox container after each test
-      if (currentSandboxId) {
-        await cleanupSandbox(workerUrl, currentSandboxId);
-        currentSandboxId = null;
-      }
-    });
+  test('should verify worker is running with health check', async () => {
+    // Verify worker is running with health check
+    const response = await fetch(`${workerUrl}/health`);
+    expect(response.status).toBe(200);
 
-    afterAll(async () => {
-      if (runner) {
-        await runner.stop();
-      }
-    });
-
-    test('should verify worker is running with health check', async () => {
-      // Verify worker is running with health check
-      const response = await fetch(`${workerUrl}/health`);
-      expect(response.status).toBe(200);
-
-      const data = (await response.json()) as HealthResponse;
-      expect(data.status).toBe('ok');
-
-      // In local mode, verify stdout captured wrangler startup
-      if (runner) {
-        const stdout = runner.getStdout();
-        expect(stdout).toContain('Ready on');
-      }
-    });
+    const data = (await response.json()) as HealthResponse;
+    expect(data.status).toBe('ok');
   });
 });
diff --git a/tests/e2e/bucket-mounting.test.ts b/tests/e2e/bucket-mounting.test.ts
@@ -1,13 +1,8 @@
-import { afterAll, afterEach, beforeAll, describe, expect, test } from 'vitest';
+import { beforeAll, describe, expect, test } from 'vitest';
 import {
-  cleanupSandbox,
-  createSandboxId,
-  createTestHeaders
-} from './helpers/test-fixtures';
-import {
-  getTestWorkerUrl,
-  type WranglerDevRunner
-} from './helpers/wrangler-runner';
+  getSharedSandbox,
+  createUniqueSession
+} from './helpers/global-sandbox';
 import type { ExecResult } from '@repo/shared';
 import type { SuccessResponse, BucketGetResponse } from './test-worker/types';
 
@@ -33,33 +28,19 @@ describe('Bucket Mounting E2E', () => {
   }
 
   describe('local', () => {
-    let runner: WranglerDevRunner | null;
     let workerUrl: string;
-    let currentSandboxId: string | null = null;
+    let headers: Record<string, string>;
 
     const TEST_BUCKET = 'sandbox-e2e-test';
     const MOUNT_PATH = '/mnt/test-data';
     const TEST_FILE = `e2e-test-${Date.now()}.txt`;
     const TEST_CONTENT = `Bucket mounting E2E test - ${new Date().toISOString()}`;
 
     beforeAll(async () => {
-      const result = await getTestWorkerUrl();
-      workerUrl = result.url;
-      runner = result.runner;
-    }, 30000);
-
-    afterEach(async () => {
-      if (currentSandboxId) {
-        await cleanupSandbox(workerUrl, currentSandboxId);
-        currentSandboxId = null;
-      }
-    });
-
-    afterAll(async () => {
-      if (runner) {
-        await runner.stop();
-      }
-    });
+      const sandbox = await getSharedSandbox();
+      workerUrl = sandbox.workerUrl;
+      headers = sandbox.createHeaders(createUniqueSession());
+    }, 120000);
 
     test('should mount bucket and perform bidirectional file operations', async () => {
       // Verify required credentials are present
@@ -76,9 +57,6 @@ describe('Bucket Mounting E2E', () => {
         );
       }
 
-      currentSandboxId = createSandboxId();
-      const headers = createTestHeaders(currentSandboxId);
-
       const PRE_EXISTING_FILE = `pre-existing-${Date.now()}.txt`;
       const PRE_EXISTING_CONTENT =
         'This file was created in R2 before mounting';