diff --git a/.env.example b/.env.example deleted file mode 100644 index e7ddc9b..0000000 --- a/.env.example +++ /dev/null @@ -1,15 +0,0 @@ -NUM_WORKERS_PER_QUEUE=8 -OPENAI_API_KEY= -SLACK_WEBHOOK_URL= -SERPER_API_KEY= -LLAMAPARSE_API_KEY= -LOGTAIL_KEY= -BULL_AUTH_KEY= -TEST_API_KEY= -POSTHOG_API_KEY= -POSTHOG_HOST= -SUPABASE_ANON_TOKEN= -SUPABASE_URL= -SUPABASE_SERVICE_TOKEN= -SCRAPING_BEE_API_KEY= -USE_DB_AUTHENTICATION=false diff --git a/SELF_HOST.md b/SELF_HOST.md index 0deb543..a695f84 100644 --- a/SELF_HOST.md +++ b/SELF_HOST.md @@ -1,36 +1,17 @@ # Self-hosting Firecrawl -First, clone this repository and copy `.env.example` to `.env`. +## Getting Started + +First, clone this repository and copy the example env file from api folder `.env.example` to `.env`. ```bash git clone https://github.com/mendableai/firecrawl.git cd firecrawl -cp .env.example .env +cp ./apps/api/.env.example ./.env ``` -Then, edit the .env.example to have the correct values for your environment. -``` -## To turn on DB authentication, you need to set up supabase. +For running the simplest version of FireCrawl, edit the `USE_DB_AUTHENTICATION` on `.env` to not use the database authentication. +```yml USE_DB_AUTHENTICATION=false - -# ===== Optional ENVS ====== - -# Supabase Setup (used to support DB authentication, advanced logging, etc.) -SUPABASE_ANON_TOKEN= -SUPABASE_URL= -SUPABASE_SERVICE_TOKEN= - -# Other Optionals -TEST_API_KEY= # use if you've set up authentication and want to test with a real API key -SCRAPING_BEE_API_KEY= #Set if you'd like to use scraping Be to handle JS blocking -OPENAI_API_KEY= # add for LLM dependednt features (image alt generation, etc.) -BULL_AUTH_KEY= # -LOGTAIL_KEY= # Use if you're configuring basic logging with logtail -PLAYWRIGHT_MICROSERVICE_URL= # set if you'd like to run a playwright fallback -LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs -SERPER_API_KEY= #Set if you have a serper key you'd like to use as a search api -SLACK_WEBHOOK_URL= # set if you'd like to send slack server health status messages -POSTHOG_API_KEY= # set if you'd like to send posthog events like job logs -POSTHOG_HOST= # set if you'd like to send posthog events like job logs ``` Once that's complete, you can simply run the following commands to get started: diff --git a/apps/api/.env.example b/apps/api/.env.example index b025326..55271ec 100644 --- a/apps/api/.env.example +++ b/apps/api/.env.example @@ -3,6 +3,7 @@ NUM_WORKERS_PER_QUEUE=8 PORT=3002 HOST=0.0.0.0 REDIS_URL=redis://localhost:6379 +PLAYWRIGHT_MICROSERVICE_URL=http://playwright-service:3000 ## To turn on DB authentication, you need to set up supabase. USE_DB_AUTHENTICATION=true @@ -20,7 +21,6 @@ SCRAPING_BEE_API_KEY= #Set if you'd like to use scraping Be to handle JS blockin OPENAI_API_KEY= # add for LLM dependednt features (image alt generation, etc.) BULL_AUTH_KEY= # LOGTAIL_KEY= # Use if you're configuring basic logging with logtail -PLAYWRIGHT_MICROSERVICE_URL= # set if you'd like to run a playwright fallback LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs SERPER_API_KEY= #Set if you have a serper key you'd like to use as a search api SLACK_WEBHOOK_URL= # set if you'd like to send slack server health status messages diff --git a/docker-compose.yaml b/docker-compose.yaml index c65de3f..af6921c 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -12,10 +12,10 @@ services: api: build: apps/api environment: - - REDIS_URL=redis://redis:6379 - - PLAYWRIGHT_SERVICE_URL=http://playwright-service:3000 + - REDIS_URL=${REDIS_URL} + - PLAYWRIGHT_MICROSERVICE_URL=${PLAYWRIGHT_MICROSERVICE_URL} - USE_DB_AUTHENTICATION=${USE_DB_AUTHENTICATION} - - PORT=3002 + - PORT=${PORT} - NUM_WORKERS_PER_QUEUE=${NUM_WORKERS_PER_QUEUE} - OPENAI_API_KEY=${OPENAI_API_KEY} - SLACK_WEBHOOK_URL=${SLACK_WEBHOOK_URL} @@ -30,7 +30,7 @@ services: - SUPABASE_URL=${SUPABASE_URL} - SUPABASE_SERVICE_TOKEN=${SUPABASE_SERVICE_TOKEN} - SCRAPING_BEE_API_KEY=${SCRAPING_BEE_API_KEY} - - HOST=0.0.0.0 + - HOST=${HOST} depends_on: - redis - playwright-service @@ -41,10 +41,10 @@ services: worker: build: apps/api environment: - - REDIS_URL=redis://redis:6379 - - PLAYWRIGHT_SERVICE_URL=http://playwright-service:3000 + - REDIS_URL=${REDIS_URL} + - PLAYWRIGHT_MICROSERVICE_URL=${PLAYWRIGHT_MICROSERVICE_URL} - USE_DB_AUTHENTICATION=${USE_DB_AUTHENTICATION} - - PORT=3002 + - PORT=${PORT} - NUM_WORKERS_PER_QUEUE=${NUM_WORKERS_PER_QUEUE} - OPENAI_API_KEY=${OPENAI_API_KEY} - SLACK_WEBHOOK_URL=${SLACK_WEBHOOK_URL} @@ -59,6 +59,8 @@ services: - SUPABASE_URL=${SUPABASE_URL} - SUPABASE_SERVICE_TOKEN=${SUPABASE_SERVICE_TOKEN} - SCRAPING_BEE_API_KEY=${SCRAPING_BEE_API_KEY} + - HOST=${HOST} depends_on: - redis - playwright-service + - api