diff --git a/apps/supervisor/src/env.ts b/apps/supervisor/src/env.ts index fd6bd61050..3b2a55bcb1 100644 --- a/apps/supervisor/src/env.ts +++ b/apps/supervisor/src/env.ts @@ -38,6 +38,8 @@ const Env = z.object({ TRIGGER_DEQUEUE_MAX_RUN_COUNT: z.coerce.number().int().default(10), TRIGGER_DEQUEUE_MAX_CONSUMER_COUNT: z.coerce.number().int().default(1), + TRIGGER_WORKER_MAX_RUN_COUNT: z.coerce.number().int().default(4), + // Optional services TRIGGER_WARM_START_URL: z.string().optional(), TRIGGER_CHECKPOINT_URL: z.string().optional(), diff --git a/apps/supervisor/src/index.ts b/apps/supervisor/src/index.ts index 83fe89c1ed..b809a240fd 100644 --- a/apps/supervisor/src/index.ts +++ b/apps/supervisor/src/index.ts @@ -44,6 +44,7 @@ class ManagedSupervisor { private readonly isKubernetes = isKubernetesEnvironment(env.KUBERNETES_FORCE_ENABLED); private readonly warmStartUrl = env.TRIGGER_WARM_START_URL; + private readonly maxRunCount = env.TRIGGER_WORKER_MAX_RUN_COUNT; constructor() { const { TRIGGER_WORKER_TOKEN, MANAGED_WORKER_SECRET, ...envWithoutSecrets } = env; @@ -133,6 +134,29 @@ class ManagedSupervisor { heartbeatIntervalSeconds: env.TRIGGER_WORKER_HEARTBEAT_INTERVAL_SECONDS, sendRunDebugLogs: env.SEND_RUN_DEBUG_LOGS, preDequeue: async () => { + // Check if we've reached the maximum number of running tasks by querying WorkloadServer + const currentRunCount = this.workloadServer.getActiveRunCount(); + const activeRuns = this.workloadServer.getActiveRuns(); + + this.logger.debug("Checking task count before dequeue", { + currentRunCount, + maxRunCount: this.maxRunCount, + activeRuns, + note: "currentRunCount includes both connected and pending runs", + }); + + // Skip dequeue if we've reached the maximum run count + if (currentRunCount>= this.maxRunCount) { + this.logger.log("Skipping dequeue: max run count reached", { + currentRunCount, + maxRunCount: this.maxRunCount, + note: "Includes pending runs to prevent over-dequeuing", + }); + return { + skipDequeue: true, + }; + } + if (!env.RESOURCE_MONITOR_ENABLED) { return {}; } @@ -179,6 +203,9 @@ class ManagedSupervisor { this.workerSession.on("runQueueMessage", async ({ time, message }) => { this.logger.log(`Received message with timestamp ${time.toLocaleString()}`, message); + // Immediately reserve the run slot to prevent over-dequeuing + this.workloadServer.reservePendingRun(message.run.friendlyId); + if (message.completedWaitpoints.length> 0) { this.logger.debug("Run has completed waitpoints", { runId: message.run.id, @@ -188,6 +215,8 @@ class ManagedSupervisor { if (!message.image) { this.logger.error("Run has no image", { runId: message.run.id }); + // Release the reservation if we can't process the run + this.workloadServer.releasePendingRun(message.run.friendlyId); return; } @@ -198,6 +227,8 @@ class ManagedSupervisor { if (!this.checkpointClient) { this.logger.error("No checkpoint client", { runId: message.run.id }); + // Release the reservation since we can't restore + this.workloadServer.releasePendingRun(message.run.friendlyId); return; } @@ -213,6 +244,8 @@ class ManagedSupervisor { if (didRestore) { this.logger.log("Restore successful", { runId: message.run.id }); + // The restore process will handle the connection, so we can release here + // as the run will re-connect and be counted properly } else { this.logger.error("Restore failed", { runId: message.run.id }); } @@ -220,6 +253,8 @@ class ManagedSupervisor { this.logger.error("Failed to restore run", { error }); } + // Release the reservation after restore attempt + this.workloadServer.releasePendingRun(message.run.friendlyId); return; } @@ -229,6 +264,7 @@ class ManagedSupervisor { if (didWarmStart) { this.logger.log("Warm start successful", { runId: message.run.id }); + // Warm start handles the connection, pending reservation will be cleared when connected return; } @@ -249,6 +285,9 @@ class ManagedSupervisor { snapshotFriendlyId: message.snapshot.friendlyId, }); + // The workload will connect and the pending reservation will be cleared then + // If the workload fails to start, we should have a timeout to clean up pending reservations + // Disabled for now // this.resourceMonitor.blockResources({ // cpu: message.run.machine.cpu, @@ -256,6 +295,8 @@ class ManagedSupervisor { // }); } catch (error) { this.logger.error("Failed to create workload", { error }); + // Release the reservation if workload creation fails + this.workloadServer.releasePendingRun(message.run.friendlyId); } }); @@ -285,11 +326,25 @@ class ManagedSupervisor { async onRunConnected({ run }: { run: { friendlyId: string } }) { this.logger.debug("Run connected", { run }); this.workerSession.subscribeToRunNotifications([run.friendlyId]); + + const currentRunCount = this.workloadServer.getActiveRunCount(); + this.logger.log("Task started", { + runId: run.friendlyId, + currentRunCount, + maxRunCount: this.maxRunCount, + }); } async onRunDisconnected({ run }: { run: { friendlyId: string } }) { this.logger.debug("Run disconnected", { run }); this.workerSession.unsubscribeFromRunNotifications([run.friendlyId]); + + const currentRunCount = this.workloadServer.getActiveRunCount(); + this.logger.log("Task completed", { + runId: run.friendlyId, + currentRunCount, + maxRunCount: this.maxRunCount, + }); } private async tryWarmStart(dequeuedMessage: DequeuedMessage): Promise { @@ -370,4 +425,4 @@ class ManagedSupervisor { } const worker = new ManagedSupervisor(); -worker.start(); +worker.start(); \ No newline at end of file diff --git a/apps/supervisor/src/workloadServer/index.ts b/apps/supervisor/src/workloadServer/index.ts index e7e391bce3..c385b1fed1 100644 --- a/apps/supervisor/src/workloadServer/index.ts +++ b/apps/supervisor/src/workloadServer/index.ts @@ -84,6 +84,11 @@ export class WorkloadServer extends EventEmitter { > >(); + // Track pending runs that have been dequeued but not yet connected + private readonly pendingRuns = new Set(); + // Track timeouts for pending runs to prevent memory leaks + private readonly pendingRunTimeouts = new Map(); + private readonly workerClient: SupervisorHttpClient; constructor(opts: WorkloadServerOptions) { @@ -493,6 +498,14 @@ export class WorkloadServer extends EventEmitter { } this.runSockets.set(friendlyId, socket); + // Remove from pending when actually connected + this.pendingRuns.delete(friendlyId); + // Clear the timeout since the run has connected + const timeout = this.pendingRunTimeouts.get(friendlyId); + if (timeout) { + clearTimeout(timeout); + this.pendingRunTimeouts.delete(friendlyId); + } this.emit("runConnected", { run: { friendlyId } }); socket.data.runFriendlyId = friendlyId; }; @@ -501,6 +514,8 @@ export class WorkloadServer extends EventEmitter { socketLogger.debug("runDisconnected", { ...getSocketMetadata() }); this.runSockets.delete(friendlyId); + // Also remove from pending if it was there + this.pendingRuns.delete(friendlyId); this.emit("runDisconnected", { run: { friendlyId } }); socket.data.runFriendlyId = undefined; }; @@ -571,6 +586,54 @@ export class WorkloadServer extends EventEmitter { return websocketServer; } + getActiveRunCount(): number { + // Count both connected runs and pending runs + return this.runSockets.size + this.pendingRuns.size; + } + + getActiveRuns(): string[] { + return Array.from(this.runSockets.keys()); + } + + reservePendingRun(runFriendlyId: string): void { + this.pendingRuns.add(runFriendlyId); + + // Set a timeout to clean up if the run doesn't connect within 5 minutes + const timeout = setTimeout(() => { + if (this.pendingRuns.has(runFriendlyId)) { + this.logger.warn("Pending run timed out, releasing reservation", { runFriendlyId }); + this.releasePendingRun(runFriendlyId); + } + }, 5 * 60 * 1000); // 5 minutes + + this.pendingRunTimeouts.set(runFriendlyId, timeout); + + this.logger.debug("Reserved pending run", { + runFriendlyId, + pendingCount: this.pendingRuns.size, + connectedCount: this.runSockets.size, + totalCount: this.getActiveRunCount() + }); + } + + releasePendingRun(runFriendlyId: string): void { + this.pendingRuns.delete(runFriendlyId); + + // Clear the timeout if it exists + const timeout = this.pendingRunTimeouts.get(runFriendlyId); + if (timeout) { + clearTimeout(timeout); + this.pendingRunTimeouts.delete(runFriendlyId); + } + + this.logger.debug("Released pending run", { + runFriendlyId, + pendingCount: this.pendingRuns.size, + connectedCount: this.runSockets.size, + totalCount: this.getActiveRunCount() + }); + } + notifyRun({ run }: { run: { friendlyId: string } }) { try { const runSocket = this.runSockets.get(run.friendlyId); diff --git a/docker/scripts/entrypoint.sh b/docker/scripts/entrypoint.sh index 102af5bd31..f74d35b230 100755 --- a/docker/scripts/entrypoint.sh +++ b/docker/scripts/entrypoint.sh @@ -15,16 +15,31 @@ if [ -n "$CLICKHOUSE_URL" ]; then echo "Running ClickHouse migrations..." export GOOSE_DRIVER=clickhouse - # Ensure secure=true is in the connection string - if echo "$CLICKHOUSE_URL" | grep -q "secure="; then - # secure parameter already exists, use as is - export GOOSE_DBSTRING="$CLICKHOUSE_URL" - elif echo "$CLICKHOUSE_URL" | grep -q "?"; then - # URL has query parameters, append secure=true - export GOOSE_DBSTRING="${CLICKHOUSE_URL}&secure=true" + # Extract host and credentials from CLICKHOUSE_URL (http://user:pass@host:8123) + # Convert to goose format (tcp://user:pass@host:9000) + if [ -n "$GOOSE_DBSTRING" ]; then + # If GOOSE_DBSTRING is explicitly set, use it + echo "Using provided GOOSE_DBSTRING" else - # URL has no query parameters, add secure=true - export GOOSE_DBSTRING="${CLICKHOUSE_URL}?secure=true" + # Extract components from CLICKHOUSE_URL and build tcp connection string + # Pattern: http://user:password@host:8123 -> tcp://user:password@host:9000 + CLICKHOUSE_HOST=$(echo "$CLICKHOUSE_URL" | sed -E 's|https?://([^:]+):([^@]+)@([^:]+):.*|3円|') + CLICKHOUSE_USER=$(echo "$CLICKHOUSE_URL" | sed -E 's|https?://([^:]+):([^@]+)@.*|1円|') + CLICKHOUSE_PASS=$(echo "$CLICKHOUSE_URL" | sed -E 's|https?://([^:]+):([^@]+)@.*|2円|') + + # Default to clickhouse:9000 if extraction fails + if [ -z "$CLICKHOUSE_HOST" ]; then + CLICKHOUSE_HOST="clickhouse" + fi + if [ -z "$CLICKHOUSE_USER" ]; then + CLICKHOUSE_USER="default" + fi + if [ -z "$CLICKHOUSE_PASS" ]; then + CLICKHOUSE_PASS="password" + fi + + export GOOSE_DBSTRING="tcp://${CLICKHOUSE_USER}:${CLICKHOUSE_PASS}@${CLICKHOUSE_HOST}:9000" + echo "Generated GOOSE_DBSTRING from CLICKHOUSE_URL" fi export GOOSE_MIGRATION_DIR=/triggerdotdev/internal-packages/clickhouse/schema diff --git a/hosting/docker/docker-compose.traefik.yml b/hosting/docker/docker-compose.traefik.yml index 297c55d74c..246c151a56 100644 --- a/hosting/docker/docker-compose.traefik.yml +++ b/hosting/docker/docker-compose.traefik.yml @@ -6,31 +6,11 @@ services: - traefik labels: - "traefik.enable=true" - - "traefik.http.routers.webapp.rule=Host(`webapp.localhost`)" + - "traefik.http.routers.webapp.rule=Host(`trigger.lattebit.com`)" - "traefik.http.routers.webapp.entrypoints=${TRAEFIK_ENTRYPOINT:-web}" - # - "traefik.http.routers.webapp.tls.certresolver=letsencrypt" + - "traefik.http.routers.webapp.tls.certresolver=letsencrypt" - "traefik.http.services.webapp.loadbalancer.server.port=3000" - - registry: - networks: - - traefik - labels: - - "traefik.enable=true" - - "traefik.http.routers.registry.rule=Host(`registry.localhost`)" - - "traefik.http.routers.registry.entrypoints=${TRAEFIK_ENTRYPOINT:-web}" - # - "traefik.http.routers.registry.tls.certresolver=letsencrypt" - - "traefik.http.services.registry.loadbalancer.server.port=5000" - - minio: - networks: - - traefik - labels: - - "traefik.enable=true" - - "traefik.http.routers.minio.rule=Host(`minio.localhost`)" - - "traefik.http.routers.minio.entrypoints=${TRAEFIK_ENTRYPOINT:-web}" - # - "traefik.http.routers.minio.tls.certresolver=letsencrypt" - - "traefik.http.services.minio.loadbalancer.server.port=9000" - + traefik: image: traefik:${TRAEFIK_IMAGE_TAG:-v3.4} restart: ${RESTART_POLICY:-unless-stopped} @@ -47,10 +27,11 @@ services: - --providers.docker.network=traefik - --entrypoints.web.address=:80 - --entrypoints.websecure.address=:443 - # - --certificatesresolvers.letsencrypt.acme.tlschallenge=true - # - --certificatesresolvers.letsencrypt.acme.email=local@example.com - # - --certificatesresolvers.letsencrypt.acme.storage=/letsencrypt/acme.json + - --certificatesresolvers.letsencrypt.acme.tlschallenge=true + - --certificatesresolvers.letsencrypt.acme.email=dqaria@gmail.com + - --certificatesresolvers.letsencrypt.acme.storage=/letsencrypt/acme.json - --log.level=DEBUG + volumes: - /var/run/docker.sock:/var/run/docker.sock:ro - traefik-letsencrypt:/letsencrypt diff --git a/hosting/docker/webapp/docker-compose.traefik.yml b/hosting/docker/webapp/docker-compose.traefik.yml new file mode 120000 index 0000000000..a962deee87 --- /dev/null +++ b/hosting/docker/webapp/docker-compose.traefik.yml @@ -0,0 +1 @@ +../docker-compose.traefik.yml \ No newline at end of file diff --git a/hosting/docker/webapp/docker-compose.yml b/hosting/docker/webapp/docker-compose.yml index 1935ad5edc..4c0a27fade 100644 --- a/hosting/docker/webapp/docker-compose.yml +++ b/hosting/docker/webapp/docker-compose.yml @@ -12,17 +12,14 @@ services: image: ghcr.io/triggerdotdev/trigger.dev:${TRIGGER_IMAGE_TAG:-v4-beta} restart: ${RESTART_POLICY:-unless-stopped} logging: *logging-config - ports: - - ${WEBAPP_PUBLISH_IP:-0.0.0.0}:8030:3000 depends_on: - - postgres - redis - clickhouse networks: - webapp - - supervisor volumes: - shared:/home/node/shared + - ./scripts/entrypoint.sh:/triggerdotdev/scripts/entrypoint.sh:ro # Only needed for bootstrap user: root # Only needed for bootstrap @@ -70,7 +67,7 @@ services: RUN_REPLICATION_LOG_LEVEL: ${RUN_REPLICATION_LOG_LEVEL:-info} # Limits # TASK_PAYLOAD_OFFLOAD_THRESHOLD: 524288 # 512KB - # TASK_PAYLOAD_MAXIMUM_SIZE: 3145728 # 3MB + TASK_PAYLOAD_MAXIMUM_SIZE: 31457280 # 30MB # BATCH_TASK_PAYLOAD_MAXIMUM_SIZE: 1000000 # 1MB # TASK_RUN_METADATA_MAXIMUM_SIZE: 262144 # 256KB # DEFAULT_ENV_EXECUTION_CONCURRENCY_LIMIT: 100 @@ -79,40 +76,17 @@ services: INTERNAL_OTEL_TRACE_LOGGING_ENABLED: ${INTERNAL_OTEL_TRACE_LOGGING_ENABLED:-0} TRIGGER_CLI_TAG: ${TRIGGER_CLI_TAG:-v4-beta} - postgres: - image: postgres:${POSTGRES_IMAGE_TAG:-14} - restart: ${RESTART_POLICY:-unless-stopped} - logging: *logging-config - ports: - - ${POSTGRES_PUBLISH_IP:-127.0.0.1}:5433:5432 - volumes: - - postgres:/var/lib/postgresql/data/ - networks: - - webapp - command: - - -c - - wal_level=logical - environment: - POSTGRES_USER: ${POSTGRES_USER:-postgres} - POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres} - POSTGRES_DB: ${POSTGRES_DB:-postgres} - healthcheck: - test: ["CMD", "pg_isready", "-U", "postgres"] - interval: 10s - timeout: 5s - retries: 5 - start_period: 10s + AUTH_GITHUB_CLIENT_ID: ${AUTH_GITHUB_CLIENT_ID} + AUTH_GITHUB_CLIENT_SECRET: ${AUTH_GITHUB_CLIENT_SECRET} redis: image: redis:${REDIS_IMAGE_TAG:-7} restart: ${RESTART_POLICY:-unless-stopped} logging: *logging-config - ports: - - ${REDIS_PUBLISH_IP:-127.0.0.1}:6389:6379 - volumes: - - redis:/data networks: - webapp + volumes: + - redis:/data healthcheck: test: ["CMD", "redis-cli", "ping"] interval: 10s @@ -124,8 +98,6 @@ services: image: electricsql/electric:${ELECTRIC_IMAGE_TAG:-1.0.24} restart: ${RESTART_POLICY:-unless-stopped} logging: *logging-config - depends_on: - - postgres networks: - webapp environment: @@ -143,17 +115,14 @@ services: image: bitnami/clickhouse:${CLICKHOUSE_IMAGE_TAG:-latest} restart: ${RESTART_POLICY:-unless-stopped} logging: *logging-config - ports: - - ${CLICKHOUSE_PUBLISH_IP:-127.0.0.1}:9123:8123 - - ${CLICKHOUSE_PUBLISH_IP:-127.0.0.1}:9090:9000 + networks: + - webapp environment: CLICKHOUSE_ADMIN_USER: ${CLICKHOUSE_USER:-default} CLICKHOUSE_ADMIN_PASSWORD: ${CLICKHOUSE_PASSWORD:-password} volumes: - clickhouse:/bitnami/clickhouse - ../clickhouse/override.xml:/bitnami/clickhouse/etc/config.d/override.xml:ro - networks: - - webapp healthcheck: test: ["CMD", "clickhouse-client", "--host", "localhost", "--port", "9000", "--user", "default", "--password", "password", "--query", "SELECT 1"] interval: 5s @@ -161,62 +130,10 @@ services: retries: 5 start_period: 10s - registry: - image: registry:${REGISTRY_IMAGE_TAG:-2} - restart: ${RESTART_POLICY:-unless-stopped} - logging: *logging-config - ports: - - ${REGISTRY_PUBLISH_IP:-127.0.0.1}:5000:5000 - networks: - - webapp - volumes: - # registry-user:very-secure-indeed - - ../registry/auth.htpasswd:/auth/htpasswd:ro - environment: - REGISTRY_AUTH: htpasswd - REGISTRY_AUTH_HTPASSWD_REALM: Registry Realm - REGISTRY_AUTH_HTPASSWD_PATH: /auth/htpasswd - healthcheck: - test: ["CMD", "wget", "--spider", "-q", "http://localhost:5000/"] - interval: 10s - timeout: 5s - retries: 5 - start_period: 10s - - minio: - image: bitnami/minio:${MINIO_IMAGE_TAG:-latest} - restart: ${RESTART_POLICY:-unless-stopped} - logging: *logging-config - ports: - - ${MINIO_PUBLISH_IP:-127.0.0.1}:9000:9000 - - ${MINIO_PUBLISH_IP:-127.0.0.1}:9001:9001 - networks: - - webapp - volumes: - - minio:/bitnami/minio/data - environment: - MINIO_ROOT_USER: ${MINIO_ROOT_USER:-admin} - MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-very-safe-password} - MINIO_DEFAULT_BUCKETS: packets - MINIO_BROWSER: "on" - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] - interval: 5s - timeout: 10s - retries: 5 - start_period: 10s - volumes: clickhouse: - postgres: redis: shared: - minio: networks: - docker-proxy: - name: docker-proxy - supervisor: - name: supervisor - webapp: - name: webapp \ No newline at end of file + webapp: \ No newline at end of file diff --git a/hosting/docker/webapp/scripts/entrypoint.sh b/hosting/docker/webapp/scripts/entrypoint.sh new file mode 100755 index 0000000000..f74d35b230 --- /dev/null +++ b/hosting/docker/webapp/scripts/entrypoint.sh @@ -0,0 +1,66 @@ +#!/bin/sh +set -xe + +if [ -n "$DATABASE_HOST" ]; then + scripts/wait-for-it.sh ${DATABASE_HOST} -- echo "database is up" +fi + +# Run migrations +echo "Running prisma migrations" +pnpm --filter @trigger.dev/database db:migrate:deploy +echo "Prisma migrations done" + +if [ -n "$CLICKHOUSE_URL" ]; then + # Run ClickHouse migrations + echo "Running ClickHouse migrations..." + export GOOSE_DRIVER=clickhouse + + # Extract host and credentials from CLICKHOUSE_URL (http://user:pass@host:8123) + # Convert to goose format (tcp://user:pass@host:9000) + if [ -n "$GOOSE_DBSTRING" ]; then + # If GOOSE_DBSTRING is explicitly set, use it + echo "Using provided GOOSE_DBSTRING" + else + # Extract components from CLICKHOUSE_URL and build tcp connection string + # Pattern: http://user:password@host:8123 -> tcp://user:password@host:9000 + CLICKHOUSE_HOST=$(echo "$CLICKHOUSE_URL" | sed -E 's|https?://([^:]+):([^@]+)@([^:]+):.*|3円|') + CLICKHOUSE_USER=$(echo "$CLICKHOUSE_URL" | sed -E 's|https?://([^:]+):([^@]+)@.*|1円|') + CLICKHOUSE_PASS=$(echo "$CLICKHOUSE_URL" | sed -E 's|https?://([^:]+):([^@]+)@.*|2円|') + + # Default to clickhouse:9000 if extraction fails + if [ -z "$CLICKHOUSE_HOST" ]; then + CLICKHOUSE_HOST="clickhouse" + fi + if [ -z "$CLICKHOUSE_USER" ]; then + CLICKHOUSE_USER="default" + fi + if [ -z "$CLICKHOUSE_PASS" ]; then + CLICKHOUSE_PASS="password" + fi + + export GOOSE_DBSTRING="tcp://${CLICKHOUSE_USER}:${CLICKHOUSE_PASS}@${CLICKHOUSE_HOST}:9000" + echo "Generated GOOSE_DBSTRING from CLICKHOUSE_URL" + fi + + export GOOSE_MIGRATION_DIR=/triggerdotdev/internal-packages/clickhouse/schema + /usr/local/bin/goose up + echo "ClickHouse migrations complete." +else + echo "CLICKHOUSE_URL not set, skipping ClickHouse migrations." +fi + +# Copy over required prisma files +cp internal-packages/database/prisma/schema.prisma apps/webapp/prisma/ +cp node_modules/@prisma/engines/*.node apps/webapp/prisma/ + +cd /triggerdotdev/apps/webapp + + +# Decide how much old-space memory Node should get. +# Use $NODE_MAX_OLD_SPACE_SIZE if it’s set; otherwise fall back to 8192. +MAX_OLD_SPACE_SIZE="${NODE_MAX_OLD_SPACE_SIZE:-8192}" + +echo "Setting max old space size to ${MAX_OLD_SPACE_SIZE}" + +NODE_PATH='/triggerdotdev/node_modules/.pnpm/node_modules' exec dumb-init node --max-old-space-size=${MAX_OLD_SPACE_SIZE} ./build/server.js + diff --git a/hosting/docker/worker/docker-compose.yml b/hosting/docker/worker/docker-compose.yml index 6e9f4db272..5a01da81f6 100644 --- a/hosting/docker/worker/docker-compose.yml +++ b/hosting/docker/worker/docker-compose.yml @@ -9,7 +9,7 @@ x-logging: &logging-config services: supervisor: - image: ghcr.io/triggerdotdev/supervisor:${TRIGGER_IMAGE_TAG:-v4-beta} + image: ghcr.io/lattebit/supervisor:latest restart: ${RESTART_POLICY:-unless-stopped} logging: *logging-config depends_on: @@ -17,7 +17,6 @@ services: networks: - supervisor - docker-proxy - - webapp volumes: - shared:/home/node/shared # Only needed for bootstrap @@ -26,23 +25,25 @@ services: command: sh -c "chown -R node:node /home/node/shared && exec /usr/bin/dumb-init -- pnpm run --filter supervisor start" environment: # This needs to match the token of the worker group you want to connect to - # TRIGGER_WORKER_TOKEN: ${TRIGGER_WORKER_TOKEN} + TRIGGER_WORKER_TOKEN: ${TRIGGER_WORKER_TOKEN} # Use the bootstrap token created by the webapp - TRIGGER_WORKER_TOKEN: file:///home/node/shared/worker_token + # TRIGGER_WORKER_TOKEN: file:///home/node/shared/worker_token MANAGED_WORKER_SECRET: ${MANAGED_WORKER_SECRET} TRIGGER_API_URL: ${TRIGGER_API_URL:-http://webapp:3000} OTEL_EXPORTER_OTLP_ENDPOINT: ${OTEL_EXPORTER_OTLP_ENDPOINT:-http://webapp:3000/otel} TRIGGER_WORKLOAD_API_DOMAIN: supervisor TRIGGER_WORKLOAD_API_PORT_EXTERNAL: 8020 + RESOURCE_MONITOR_ENABLED: ${TRIGGER_WORKER_MAX_RUN_COUNT:-1} + TRIGGER_WORKER_MAX_RUN_COUNT: ${TRIGGER_WORKER_MAX_RUN_COUNT:-2} # Optional settings DEBUG: 1 ENFORCE_MACHINE_PRESETS: 1 TRIGGER_DEQUEUE_INTERVAL_MS: 1000 DOCKER_HOST: tcp://docker-proxy:2375 - DOCKER_RUNNER_NETWORKS: webapp,supervisor + DOCKER_RUNNER_NETWORKS: supervisor DOCKER_REGISTRY_URL: ${DOCKER_REGISTRY_URL:-localhost:5000} - DOCKER_REGISTRY_USERNAME: ${DOCKER_REGISTRY_USERNAME:-} - DOCKER_REGISTRY_PASSWORD: ${DOCKER_REGISTRY_PASSWORD:-} + # DOCKER_REGISTRY_USERNAME: ${DOCKER_REGISTRY_USERNAME:-} + # DOCKER_REGISTRY_PASSWORD: ${DOCKER_REGISTRY_PASSWORD:-} DOCKER_AUTOREMOVE_EXITED_CONTAINERS: 0 healthcheck: test: ["CMD", "node", "-e", "http.get('http://localhost:8020/health', res => process.exit(res.statusCode === 200 ? 0 : 1)).on('error', () => process.exit(1))"] @@ -80,6 +81,4 @@ networks: docker-proxy: name: docker-proxy supervisor: - name: supervisor - webapp: - name: webapp + name: supervisor \ No newline at end of file diff --git a/scripts/build-worker.sh b/scripts/build-worker.sh new file mode 100755 index 0000000000..2227c4c4a3 --- /dev/null +++ b/scripts/build-worker.sh @@ -0,0 +1,14 @@ +IMAGE=ghcr.io/lattebit/supervisor + +# 自动生成不可变标签:2025年08月14日_1015-abc123 +DATE=$(date -u +%Y%m%d_%H%M) +SHA=$(git rev-parse --short HEAD) +AUTO_TAG="${DATE}-${SHA}" + +# 构建并同时推 latest 和 自动标签 +docker buildx build \ + --platform linux/amd64,linux/arm64 \ + -f apps/supervisor/Containerfile \ + -t $IMAGE:latest \ + -t $IMAGE:${AUTO_TAG} \ + --push . \ No newline at end of file

AltStyle によって変換されたページ (->オリジナル) /