From 078d4c8d412abd625e3b69076b335cb01f227a1e Mon Sep 17 00:00:00 2001 From: Jakob Stadlhuber Date: Tue, 4 Jun 2024 20:52:08 +0200 Subject: [PATCH 1/3] Add Kubernetes configuration for Firecrawl deployment Added new files for setting up Firecrawl on a Kubernetes Cluster. The files include Kubernetes manifests for deploying API, worker, playwright service, and Redis with associated ConfigMap and Secret associated resources. Also, updated the self-host documentation to include instructions for Kubernetes deployment. --- SELF_HOST.md | 3 ++ examples/k8n/README.md | 42 ++++++++++++++++++++++++++++ examples/k8n/api.yaml | 39 ++++++++++++++++++++++++++ examples/k8n/configmap.yaml | 14 ++++++++++ examples/k8n/playwright-service.yaml | 36 ++++++++++++++++++++++++ examples/k8n/redis.yaml | 30 ++++++++++++++++++++ examples/k8n/secret.yaml | 20 +++++++++++++ examples/k8n/worker.yaml | 24 ++++++++++++++++ 8 files changed, 208 insertions(+) create mode 100644 examples/k8n/README.md create mode 100644 examples/k8n/api.yaml create mode 100644 examples/k8n/configmap.yaml create mode 100644 examples/k8n/playwright-service.yaml create mode 100644 examples/k8n/redis.yaml create mode 100644 examples/k8n/secret.yaml create mode 100644 examples/k8n/worker.yaml diff --git a/SELF_HOST.md b/SELF_HOST.md index ff5ee04..f74e050 100644 --- a/SELF_HOST.md +++ b/SELF_HOST.md @@ -29,3 +29,6 @@ docker compose up This will run a local instance of Firecrawl which can be accessed at `http://localhost:3002`. + +# Install Firecrawl on a Kubernetes Cluster (Simple Version) +Read the [examples/k8n/README.md](examples/k8n/README.md) for instructions on how to install Firecrawl on a Kubernetes Cluster. \ No newline at end of file diff --git a/examples/k8n/README.md b/examples/k8n/README.md new file mode 100644 index 0000000..a122965 --- /dev/null +++ b/examples/k8n/README.md @@ -0,0 +1,42 @@ +# Install Firecrawl on a Kubernetes Cluster (Simple Version) +# Before installing +1. Set [secret.yaml](secret.yaml) and [configmap.yaml](configmap.yaml) and do not check in secrets +2. Build Docker images, and host it in your Docker Registry (replace the target registry with your own) + 1. API (which is also used as a worker image) + 1. ```bash + docker build -t ghcr.io/winkk-dev/firecrawl-playwright:latest ../../apps/api + docker push ghcr.io/winkk-dev/firecrawl-playwright:latest + ``` + 2. Playwright + 1. ```bash + docker build -t ghcr.io/winkk-dev/firecrawl:latest ../../apps/playwright-service + docker push ghcr.io/winkk-dev/firecrawl:latest + ``` +3. Replace the image in [worker.yaml](worker.yaml), [api.yaml](api.yaml) and [playwright-service.yaml](playwright-service.yaml) +4. + +## Install +```bash +kubectl apply -f configmap.yaml +kubectl apply -f secret.yaml +kubectl apply -f playwright-service.yaml +kubectl apply -f api.yaml +kubectl apply -f worker.yaml +kubectl apply -f redis.yaml +``` + + +# Port Forwarding for Testing +```bash +kubectl port-forward svc/api 3002:3002 -n dev +``` + +# Delete Firecrawl on Environment +```bash +kubectl delete -f configmap.yaml +kubectl delete -f secret.yaml +kubectl delete -f playwright-service.yaml +kubectl delete -f api.yaml +kubectl delete -f worker.yaml +kubectl delete -f redis.yaml +``` \ No newline at end of file diff --git a/examples/k8n/api.yaml b/examples/k8n/api.yaml new file mode 100644 index 0000000..cdc69c3 --- /dev/null +++ b/examples/k8n/api.yaml @@ -0,0 +1,39 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: api +spec: + replicas: 1 + selector: + matchLabels: + app: api + template: + metadata: + labels: + app: api + spec: + imagePullSecrets: + - name: docker-registry-secret + containers: + - name: api + image: ghcr.io/winkk-dev/firecrawl:latest + args: [ "pnpm", "run", "start:production" ] + ports: + - containerPort: 3002 + envFrom: + - configMapRef: + name: firecrawl-config + - secretRef: + name: firecrawl-secret +--- +apiVersion: v1 +kind: Service +metadata: + name: api +spec: + selector: + app: api + ports: + - protocol: TCP + port: 3002 + targetPort: 3002 diff --git a/examples/k8n/configmap.yaml b/examples/k8n/configmap.yaml new file mode 100644 index 0000000..b415d56 --- /dev/null +++ b/examples/k8n/configmap.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: firecrawl-config +data: + NUM_WORKERS_PER_QUEUE: "8" + PORT: "3002" + HOST: "0.0.0.0" + REDIS_URL: "redis://redis:6379" + PLAYWRIGHT_MICROSERVICE_URL: "http://playwright-service:3000" + USE_DB_AUTHENTICATION: "false" + SUPABASE_ANON_TOKEN: "" + SUPABASE_URL: "" + SUPABASE_SERVICE_TOKEN: "" diff --git a/examples/k8n/playwright-service.yaml b/examples/k8n/playwright-service.yaml new file mode 100644 index 0000000..ce79425 --- /dev/null +++ b/examples/k8n/playwright-service.yaml @@ -0,0 +1,36 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: playwright-service +spec: + replicas: 1 + selector: + matchLabels: + app: playwright-service + template: + metadata: + labels: + app: playwright-service + spec: + imagePullSecrets: + - name: docker-registry-secret + containers: + - name: playwright-service + image: ghcr.io/winkk-dev/firecrawl-playwright:latest + ports: + - containerPort: 3000 + envFrom: + - configMapRef: + name: firecrawl-config +--- +apiVersion: v1 +kind: Service +metadata: + name: playwright-service +spec: + selector: + app: playwright-service + ports: + - protocol: TCP + port: 3000 + targetPort: 3000 diff --git a/examples/k8n/redis.yaml b/examples/k8n/redis.yaml new file mode 100644 index 0000000..774d371 --- /dev/null +++ b/examples/k8n/redis.yaml @@ -0,0 +1,30 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: redis +spec: + replicas: 1 + selector: + matchLabels: + app: redis + template: + metadata: + labels: + app: redis + spec: + containers: + - name: redis + image: redis:alpine + args: ["redis-server", "--bind", "0.0.0.0"] +--- +apiVersion: v1 +kind: Service +metadata: + name: redis +spec: + selector: + app: redis + ports: + - protocol: TCP + port: 6379 + targetPort: 6379 diff --git a/examples/k8n/secret.yaml b/examples/k8n/secret.yaml new file mode 100644 index 0000000..2be9632 --- /dev/null +++ b/examples/k8n/secret.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: Secret +metadata: + name: firecrawl-secret +type: Opaque +data: + OPENAI_API_KEY: "" + SLACK_WEBHOOK_URL: "" + SERPER_API_KEY: "" + LLAMAPARSE_API_KEY: "" + LOGTAIL_KEY: "" + BULL_AUTH_KEY: "" + TEST_API_KEY: "" + POSTHOG_API_KEY: "" + POSTHOG_HOST: "" + SCRAPING_BEE_API_KEY: "" + STRIPE_PRICE_ID_STANDARD: "" + STRIPE_PRICE_ID_SCALE: "" + HYPERDX_API_KEY: "" + FIRE_ENGINE_BETA_URL: "" diff --git a/examples/k8n/worker.yaml b/examples/k8n/worker.yaml new file mode 100644 index 0000000..2b3b2e7 --- /dev/null +++ b/examples/k8n/worker.yaml @@ -0,0 +1,24 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: worker +spec: + replicas: 1 + selector: + matchLabels: + app: worker + template: + metadata: + labels: + app: worker + spec: + imagePullSecrets: + - name: docker-registry-secret + containers: + - name: worker + image: ghcr.io/winkk-dev/firecrawl:latest + envFrom: + - configMapRef: + name: firecrawl-config + - secretRef: + name: firecrawl-secret From 07246d0e1c1e225110d78df50d7421985ee3b0e0 Mon Sep 17 00:00:00 2001 From: Jakob Stadlhuber Date: Tue, 4 Jun 2024 20:59:04 +0200 Subject: [PATCH 2/3] Update README file in k8n directory Removed a redundant list item and updated section title for deleting Firecrawl. The "Delete Firecrawl" section is now more concise and does not refer specifically to an environment, making it more generally applicable. --- examples/k8n/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/k8n/README.md b/examples/k8n/README.md index a122965..f243613 100644 --- a/examples/k8n/README.md +++ b/examples/k8n/README.md @@ -13,7 +13,6 @@ docker push ghcr.io/winkk-dev/firecrawl:latest ``` 3. Replace the image in [worker.yaml](worker.yaml), [api.yaml](api.yaml) and [playwright-service.yaml](playwright-service.yaml) -4. ## Install ```bash @@ -31,7 +30,7 @@ kubectl apply -f redis.yaml kubectl port-forward svc/api 3002:3002 -n dev ``` -# Delete Firecrawl on Environment +# Delete Firecrawl ```bash kubectl delete -f configmap.yaml kubectl delete -f secret.yaml From 3cca2e3bfe36645584234ce164fd3fcc6028a592 Mon Sep 17 00:00:00 2001 From: Jakob Stadlhuber Date: Tue, 4 Jun 2024 21:09:43 +0200 Subject: [PATCH 3/3] Update Docker image names in README The Docker image names in README.md in the k8n example scripts were updated to ensure they accurately reflect the intended container images. This change primarily involves switching the names of the API and Playwright service Docker images. --- examples/k8n/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/k8n/README.md b/examples/k8n/README.md index f243613..f874d82 100644 --- a/examples/k8n/README.md +++ b/examples/k8n/README.md @@ -4,13 +4,13 @@ 2. Build Docker images, and host it in your Docker Registry (replace the target registry with your own) 1. API (which is also used as a worker image) 1. ```bash - docker build -t ghcr.io/winkk-dev/firecrawl-playwright:latest ../../apps/api - docker push ghcr.io/winkk-dev/firecrawl-playwright:latest + docker build -t ghcr.io/winkk-dev/firecrawl:latest ../../apps/api + docker push ghcr.io/winkk-dev/firecrawl:latest ``` 2. Playwright 1. ```bash - docker build -t ghcr.io/winkk-dev/firecrawl:latest ../../apps/playwright-service - docker push ghcr.io/winkk-dev/firecrawl:latest + docker build -t ghcr.io/winkk-dev/firecrawl-playwright:latest ../../apps/playwright-service + docker push ghcr.io/winkk-dev/firecrawl-playwright:latest ``` 3. Replace the image in [worker.yaml](worker.yaml), [api.yaml](api.yaml) and [playwright-service.yaml](playwright-service.yaml)