diff --git a/.gitea/workflows/main.yml b/.gitea/workflows/main.yml new file mode 100644 index 0000000..2909231 --- /dev/null +++ b/.gitea/workflows/main.yml @@ -0,0 +1,61 @@ +name: Build and Push Docker Image + +on: + push: + branches: + - "main" + - "dev" + release: + types: [published] + +jobs: + build-and-push: + runs-on: ubuntu-latest + steps: + - name: Check out code + uses: actions/checkout@v3 + + - name: Download dependencies to cache + run: | + mkdir -p /opt/hostedtoolcache/${{ gitea.repository }}-${{ gitea.ref_name }}/pnpm/ + pnpm fetch --prod --lockfile-only --lockfile-path=apps/api/pnpm-lock.yaml --store-dir /opt/hostedtoolcache/${{ gitea.repository }}-${{ gitea.ref_name }}/pnpm/ + mkdir -p ${{ gitea.workspace }}/cache + cp -r /opt/hostedtoolcache/${{ gitea.repository }}-${{ gitea.ref_name }}/pnpm/* ${{ gitea.workspace }}/cache/ + + - name: Set up Docker + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Hub + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKERHUB_USER }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Extract metadata + uses: docker/metadata-action@v5 + id: meta + with: + images: vontainment/v-gpt-node-app + flavor: latest=false + tags: | + type=raw,value=main,enable=${{ gitea.ref == 'refs/heads/main' }} + type=raw,value=dev,enable=${{ gitea.ref == 'refs/heads/dev' }} + type=sha,priority=100,enable=${{ gitea.event_name == 'push' }} + + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + context: . + file: apps/api/Dockerfile + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + build-args: | + PNPM_CACHE=${{ gitea.workspace }}/cache + cache-from: type=local,src=/opt/hostedtoolcache/${{ gitea.repository }}-${{ gitea.ref_name }}/build/ + cache-to: type=local,dest=/opt/hostedtoolcache/${{ gitea.repository }}-${{ gitea.ref_name }}/build-new/ + + - name: Rotate build cache + run: | + rm -rf /opt/hostedtoolcache/${{ gitea.repository }}-${{ gitea.ref_name }}/build/ + mv /opt/hostedtoolcache/${{ gitea.repository }}-${{ gitea.ref_name }}/build-new/ /opt/hostedtoolcache/${{ gitea.repository }}-${{ gitea.ref_name }}/build/ diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index bb47b47..0000000 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -name: Bug report -about: Create a report to help us improve -title: "[BUG]" -labels: bug -assignees: '' - ---- - -**Describe the Bug** -Provide a clear and concise description of what the bug is. - -**To Reproduce** -Steps to reproduce the issue: -1. Configure the environment or settings with '...' -2. Run the command '...' -3. Observe the error or unexpected output at '...' -4. Log output/error message - -**Expected Behavior** -A clear and concise description of what you expected to happen. - -**Screenshots** -If applicable, add screenshots or copies of the command line output to help explain the issue. - -**Environment (please complete the following information):** -- OS: [e.g. macOS, Linux, Windows] -- Firecrawl Version: [e.g. 1.2.3] -- Node.js Version: [e.g. 14.x] - -**Logs** -If applicable, include detailed logs to help understand the problem. - -**Additional Context** -Add any other context about the problem here, such as configuration specifics, network conditions, data volumes, etc. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md deleted file mode 100644 index b01699b..0000000 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -name: Feature request -about: Suggest an idea for this project -title: "[Feat]" -labels: '' -assignees: '' - ---- - -**Problem Description** -Describe the issue you're experiencing that has prompted this feature request. For example, "I find it difficult when..." - -**Proposed Feature** -Provide a clear and concise description of the feature you would like implemented. - -**Alternatives Considered** -Discuss any alternative solutions or features you've considered. Why were these alternatives not suitable? - -**Implementation Suggestions** -If you have ideas on how the feature could be implemented, share them here. This could include technical details, API changes, or interaction mechanisms. - -**Use Case** -Explain how this feature would be used and what benefits it would bring. Include specific examples to illustrate how this would improve functionality or user experience. - -**Additional Context** -Add any other context such as comparisons with similar features in other products, or links to prototypes or mockups. diff --git a/.github/archive/js-sdk.yml b/.github/archive/js-sdk.yml deleted file mode 100644 index c84bb8b..0000000 --- a/.github/archive/js-sdk.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: Run JavaScript SDK E2E Tests - -on: [] - -env: - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }} - FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} - HOST: ${{ secrets.HOST }} - LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }} - LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }} - POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }} - POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }} - NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - PLAYWRIGHT_MICROSERVICE_URL: ${{ secrets.PLAYWRIGHT_MICROSERVICE_URL }} - PORT: ${{ secrets.PORT }} - REDIS_URL: ${{ secrets.REDIS_URL }} - SCRAPING_BEE_API_KEY: ${{ secrets.SCRAPING_BEE_API_KEY }} - SUPABASE_ANON_TOKEN: ${{ secrets.SUPABASE_ANON_TOKEN }} - SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }} - SUPABASE_URL: ${{ secrets.SUPABASE_URL }} - TEST_API_KEY: ${{ secrets.TEST_API_KEY }} - HYPERDX_API_KEY: ${{ secrets.HYPERDX_API_KEY }} - HDX_NODE_BETA_MODE: 1 - -jobs: - build: - runs-on: ubuntu-latest - services: - redis: - image: redis - ports: - - 6379:6379 - - steps: - - uses: actions/checkout@v3 - - name: Set up Node.js - uses: actions/setup-node@v3 - with: - node-version: "20" - - name: Install pnpm - run: npm install -g pnpm - - name: Install dependencies for API - run: pnpm install - working-directory: ./apps/api - - name: Start the application - run: npm start & - working-directory: ./apps/api - - name: Start workers - run: npm run workers & - working-directory: ./apps/api - - name: Install dependencies for JavaScript SDK - run: pnpm install - working-directory: ./apps/js-sdk/firecrawl - - name: Run E2E tests for JavaScript SDK - run: npm run test - working-directory: ./apps/js-sdk/firecrawl \ No newline at end of file diff --git a/.github/archive/publish-js-sdk.yml b/.github/archive/publish-js-sdk.yml deleted file mode 100644 index c02a654..0000000 --- a/.github/archive/publish-js-sdk.yml +++ /dev/null @@ -1,46 +0,0 @@ -name: Publish JavaScript SDK - -on: [] - -env: - NPM_TOKEN: ${{ secrets.NPM_TOKEN }} - -jobs: - build-and-publish: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - - name: Set up Node.js - uses: actions/setup-node@v3 - with: - node-version: '20' - registry-url: 'https://registry.npmjs.org/' - scope: '@mendable' - always-auth: true - - - name: Install pnpm - run: npm install -g pnpm - - - name: Install python for running version check script - run: | - python -m pip install --upgrade pip - pip install setuptools wheel requests packaging - - - name: Install dependencies for JavaScript SDK - run: pnpm install - working-directory: ./apps/js-sdk/firecrawl - - - name: Run version check script - id: version_check_script - run: | - VERSION_INCREMENTED=$(python .github/scripts/check_version_has_incremented.py js ./apps/js-sdk/firecrawl @mendable/firecrawl-js) - echo "VERSION_INCREMENTED=$VERSION_INCREMENTED" >> $GITHUB_ENV - - - name: Build and publish to npm - if: ${{ env.VERSION_INCREMENTED == 'true' }} - env: - NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} - run: | - npm run build-and-publish - working-directory: ./apps/js-sdk/firecrawl diff --git a/.github/archive/publish-python-sdk.yml b/.github/archive/publish-python-sdk.yml deleted file mode 100644 index 6d86f1e..0000000 --- a/.github/archive/publish-python-sdk.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: Publish Python SDK - -on: [] - -env: - PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }} - PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }} - -jobs: - build-and-publish: - runs-on: ubuntu-latest - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.x' - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install setuptools wheel twine build requests packaging - - - name: Run version check script - id: version_check_script - run: | - VERSION_INCREMENTED=$(python .github/scripts/check_version_has_incremented.py python ./apps/python-sdk/firecrawl firecrawl-py) - echo "VERSION_INCREMENTED=$VERSION_INCREMENTED" >> $GITHUB_ENV - - - name: Build the package - if: ${{ env.VERSION_INCREMENTED == 'true' }} - run: | - python -m build - working-directory: ./apps/python-sdk - - - name: Publish to PyPI - if: ${{ env.VERSION_INCREMENTED == 'true' }} - env: - TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} - run: | - twine upload dist/* - working-directory: ./apps/python-sdk - diff --git a/.github/archive/python-sdk.yml b/.github/archive/python-sdk.yml deleted file mode 100644 index 2744988..0000000 --- a/.github/archive/python-sdk.yml +++ /dev/null @@ -1,70 +0,0 @@ -name: Run Python SDK E2E Tests - -on: [] - -env: - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }} - FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} - HOST: ${{ secrets.HOST }} - LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }} - LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }} - POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }} - POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }} - NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - PLAYWRIGHT_MICROSERVICE_URL: ${{ secrets.PLAYWRIGHT_MICROSERVICE_URL }} - PORT: ${{ secrets.PORT }} - REDIS_URL: ${{ secrets.REDIS_URL }} - SCRAPING_BEE_API_KEY: ${{ secrets.SCRAPING_BEE_API_KEY }} - SUPABASE_ANON_TOKEN: ${{ secrets.SUPABASE_ANON_TOKEN }} - SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }} - SUPABASE_URL: ${{ secrets.SUPABASE_URL }} - TEST_API_KEY: ${{ secrets.TEST_API_KEY }} - HYPERDX_API_KEY: ${{ secrets.HYPERDX_API_KEY }} - HDX_NODE_BETA_MODE: 1 - -jobs: - build: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ["3.10"] - services: - redis: - image: redis - ports: - - 6379:6379 - - steps: - - uses: actions/checkout@v3 - - name: Set up Node.js - uses: actions/setup-node@v3 - with: - node-version: "20" - - name: Install pnpm - run: npm install -g pnpm - - name: Install dependencies for API - run: pnpm install - working-directory: ./apps/api - - name: Start the application - run: npm start & - working-directory: ./apps/api - id: start_app - - name: Start workers - run: npm run workers & - working-directory: ./apps/api - id: start_workers - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Install Python dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - working-directory: ./apps/python-sdk - - name: Run E2E tests for Python SDK - run: | - pytest firecrawl/__tests__/e2e_withAuth/test.py - working-directory: ./apps/python-sdk diff --git a/.github/scripts/check_version_has_incremented.py b/.github/scripts/check_version_has_incremented.py deleted file mode 100644 index e437c93..0000000 --- a/.github/scripts/check_version_has_incremented.py +++ /dev/null @@ -1,88 +0,0 @@ -""" -checks local versions against published versions. - -# Usage: - -python .github/scripts/check_version_has_incremented.py js ./apps/js-sdk/firecrawl @mendable/firecrawl-js -Local version: 0.0.22 -Published version: 0.0.21 -true - -python .github/scripts/check_version_has_incremented.py python ./apps/python-sdk/firecrawl firecrawl-py -Local version: 0.0.11 -Published version: 0.0.11 -false - -""" -import json -import os -import re -import sys -from pathlib import Path - -import requests -from packaging.version import Version -from packaging.version import parse as parse_version - - -def get_python_version(file_path: str) -> str: - """Extract version string from Python file.""" - version_file = Path(file_path).read_text() - version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", version_file, re.M) - if version_match: - return version_match.group(1).strip() - raise RuntimeError("Unable to find version string.") - -def get_pypi_version(package_name: str) -> str: - """Get latest version of Python package from PyPI.""" - response = requests.get(f"https://pypi.org/pypi/{package_name}/json") - version = response.json()['info']['version'] - return version.strip() - -def get_js_version(file_path: str) -> str: - """Extract version string from package.json.""" - with open(file_path, 'r') as file: - package_json = json.load(file) - if 'version' in package_json: - return package_json['version'].strip() - raise RuntimeError("Unable to find version string in package.json.") - -def get_npm_version(package_name: str) -> str: - """Get latest version of JavaScript package from npm.""" - response = requests.get(f"https://registry.npmjs.org/{package_name}/latest") - version = response.json()['version'] - return version.strip() - -def is_version_incremented(local_version: str, published_version: str) -> bool: - """Compare local and published versions.""" - local_version_parsed: Version = parse_version(local_version) - published_version_parsed: Version = parse_version(published_version) - return local_version_parsed > published_version_parsed - -if __name__ == "__main__": - package_type = sys.argv[1] - package_path = sys.argv[2] - package_name = sys.argv[3] - - if package_type == "python": - # Get current version from __init__.py - current_version = get_python_version(os.path.join(package_path, '__init__.py')) - # Get published version from PyPI - published_version = get_pypi_version(package_name) - elif package_type == "js": - # Get current version from package.json - current_version = get_js_version(os.path.join(package_path, 'package.json')) - # Get published version from npm - published_version = get_npm_version(package_name) - else: - raise ValueError("Invalid package type. Use 'python' or 'js'.") - - # Print versions for debugging - # print(f"Local version: {current_version}") - # print(f"Published version: {published_version}") - - # Compare versions and print result - if is_version_incremented(current_version, published_version): - print("true") - else: - print("false") diff --git a/.github/scripts/requirements.txt b/.github/scripts/requirements.txt deleted file mode 100644 index 0bfc676..0000000 --- a/.github/scripts/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -requests -packaging \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index b2e42e4..0000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,64 +0,0 @@ -name: CI/CD -on: - pull_request: - branches: - - main - # schedule: - # - cron: '0 */4 * * *' - -env: - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }} - FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} - HOST: ${{ secrets.HOST }} - LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }} - LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }} - POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }} - POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }} - NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - PLAYWRIGHT_MICROSERVICE_URL: ${{ secrets.PLAYWRIGHT_MICROSERVICE_URL }} - PORT: ${{ secrets.PORT }} - REDIS_URL: ${{ secrets.REDIS_URL }} - SCRAPING_BEE_API_KEY: ${{ secrets.SCRAPING_BEE_API_KEY }} - SUPABASE_ANON_TOKEN: ${{ secrets.SUPABASE_ANON_TOKEN }} - SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }} - SUPABASE_URL: ${{ secrets.SUPABASE_URL }} - TEST_API_KEY: ${{ secrets.TEST_API_KEY }} - HYPERDX_API_KEY: ${{ secrets.HYPERDX_API_KEY }} - HDX_NODE_BETA_MODE: 1 - FIRE_ENGINE_BETA_URL: ${{ secrets.FIRE_ENGINE_BETA_URL }} - - -jobs: - pre-deploy: - name: Pre-deploy checks - runs-on: ubuntu-latest - services: - redis: - image: redis - ports: - - 6379:6379 - steps: - - uses: actions/checkout@v3 - - name: Set up Node.js - uses: actions/setup-node@v3 - with: - node-version: "20" - - name: Install pnpm - run: npm install -g pnpm - - name: Install dependencies - run: pnpm install - working-directory: ./apps/api - - name: Start the application - run: npm start & - working-directory: ./apps/api - id: start_app - - name: Start workers - run: npm run workers & - working-directory: ./apps/api - id: start_workers - - name: Run E2E tests - run: | - npm run test:prod - working-directory: ./apps/api diff --git a/.github/workflows/clean-before-24h-complete-jobs.yml b/.github/workflows/clean-before-24h-complete-jobs.yml deleted file mode 100644 index 2ced537..0000000 --- a/.github/workflows/clean-before-24h-complete-jobs.yml +++ /dev/null @@ -1,20 +0,0 @@ -name: Clean Before 24h Completed Jobs -on: - schedule: - - cron: '0 0 * * *' - -env: - BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }} - -jobs: - clean-jobs: - runs-on: ubuntu-latest - steps: - - name: Send GET request to clean jobs - run: | - response=$(curl --write-out '%{http_code}' --silent --output /dev/null https://api.firecrawl.dev/admin/${{ secrets.BULL_AUTH_KEY }}/clean-before-24h-complete-jobs) - if [ "$response" -ne 200 ]; then - echo "Failed to clean jobs. Response: $response" - exit 1 - fi - echo "Successfully cleaned jobs. Response: $response" diff --git a/.github/workflows/fly-direct.yml b/.github/workflows/fly-direct.yml deleted file mode 100644 index 944e399..0000000 --- a/.github/workflows/fly-direct.yml +++ /dev/null @@ -1,37 +0,0 @@ -name: Fly Deploy Direct -on: - schedule: - - cron: '0 */2 * * *' - -env: - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }} - FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} - HOST: ${{ secrets.HOST }} - LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }} - LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }} - POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }} - POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }} - NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - PLAYWRIGHT_MICROSERVICE_URL: ${{ secrets.PLAYWRIGHT_MICROSERVICE_URL }} - PORT: ${{ secrets.PORT }} - REDIS_URL: ${{ secrets.REDIS_URL }} - SCRAPING_BEE_API_KEY: ${{ secrets.SCRAPING_BEE_API_KEY }} - SUPABASE_ANON_TOKEN: ${{ secrets.SUPABASE_ANON_TOKEN }} - SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }} - SUPABASE_URL: ${{ secrets.SUPABASE_URL }} - TEST_API_KEY: ${{ secrets.TEST_API_KEY }} - -jobs: - deploy: - name: Deploy app - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - name: Change directory - run: cd apps/api - - uses: superfly/flyctl-actions/setup-flyctl@master - - run: flyctl deploy ./apps/api --remote-only -a firecrawl-scraper-js - env: - FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} diff --git a/.github/workflows/fly.yml b/.github/workflows/fly.yml deleted file mode 100644 index 627409e..0000000 --- a/.github/workflows/fly.yml +++ /dev/null @@ -1,265 +0,0 @@ -name: Fly Deploy -on: - push: - branches: - - main - -env: - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }} - FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} - HOST: ${{ secrets.HOST }} - LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }} - LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }} - POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }} - POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }} - NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - PLAYWRIGHT_MICROSERVICE_URL: ${{ secrets.PLAYWRIGHT_MICROSERVICE_URL }} - PORT: ${{ secrets.PORT }} - REDIS_URL: ${{ secrets.REDIS_URL }} - SCRAPING_BEE_API_KEY: ${{ secrets.SCRAPING_BEE_API_KEY }} - SUPABASE_ANON_TOKEN: ${{ secrets.SUPABASE_ANON_TOKEN }} - SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }} - SUPABASE_URL: ${{ secrets.SUPABASE_URL }} - TEST_API_KEY: ${{ secrets.TEST_API_KEY }} - PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }} - PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }} - NPM_TOKEN: ${{ secrets.NPM_TOKEN }} - -jobs: - pre-deploy-e2e-tests: - name: Pre-deploy checks - runs-on: ubuntu-latest - services: - redis: - image: redis - ports: - - 6379:6379 - steps: - - uses: actions/checkout@v3 - - name: Set up Node.js - uses: actions/setup-node@v3 - with: - node-version: "20" - - name: Install pnpm - run: npm install -g pnpm - - name: Install dependencies - run: pnpm install - working-directory: ./apps/api - - name: Start the application - run: npm start & - working-directory: ./apps/api - id: start_app - - name: Start workers - run: npm run workers & - working-directory: ./apps/api - id: start_workers - - name: Run E2E tests - run: | - npm run test:prod - working-directory: ./apps/api - - pre-deploy-test-suite: - name: Test Suite - needs: pre-deploy-e2e-tests - runs-on: ubuntu-latest - services: - redis: - image: redis - ports: - - 6379:6379 - steps: - - uses: actions/checkout@v3 - - name: Set up Node.js - uses: actions/setup-node@v3 - with: - node-version: "20" - - name: Install pnpm - run: npm install -g pnpm - - name: Install dependencies - run: pnpm install - working-directory: ./apps/api - - name: Start the application - run: npm start & - working-directory: ./apps/api - id: start_app - - name: Start workers - run: npm run workers & - working-directory: ./apps/api - id: start_workers - - name: Install dependencies - run: pnpm install - working-directory: ./apps/test-suite - - name: Run E2E tests - run: | - npm run test:suite - working-directory: ./apps/test-suite - - python-sdk-tests: - name: Python SDK Tests - needs: pre-deploy-e2e-tests - runs-on: ubuntu-latest - services: - redis: - image: redis - ports: - - 6379:6379 - steps: - - uses: actions/checkout@v3 - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.x' - - name: Install pnpm - run: npm install -g pnpm - - name: Install dependencies - run: pnpm install - working-directory: ./apps/api - - name: Start the application - run: npm start & - working-directory: ./apps/api - id: start_app - - name: Start workers - run: npm run workers & - working-directory: ./apps/api - id: start_workers - - name: Install Python dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - working-directory: ./apps/python-sdk - - name: Run E2E tests for Python SDK - run: | - pytest firecrawl/__tests__/e2e_withAuth/test.py - working-directory: ./apps/python-sdk - - js-sdk-tests: - name: JavaScript SDK Tests - needs: pre-deploy-e2e-tests - runs-on: ubuntu-latest - services: - redis: - image: redis - ports: - - 6379:6379 - steps: - - uses: actions/checkout@v3 - - name: Set up Node.js - uses: actions/setup-node@v3 - with: - node-version: "20" - - name: Install pnpm - run: npm install -g pnpm - - name: Install dependencies - run: pnpm install - working-directory: ./apps/api - - name: Start the application - run: npm start & - working-directory: ./apps/api - id: start_app - - name: Start workers - run: npm run workers & - working-directory: ./apps/api - id: start_workers - - name: Install dependencies for JavaScript SDK - run: pnpm install - working-directory: ./apps/js-sdk/firecrawl - - name: Run E2E tests for JavaScript SDK - run: npm run test - working-directory: ./apps/js-sdk/firecrawl - - deploy: - name: Deploy app - runs-on: ubuntu-latest - needs: [pre-deploy-test-suite, python-sdk-tests, js-sdk-tests] - steps: - - uses: actions/checkout@v3 - - name: Change directory - run: cd apps/api - - uses: superfly/flyctl-actions/setup-flyctl@master - - run: flyctl deploy ./apps/api --remote-only -a firecrawl-scraper-js - env: - FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} - - build-and-publish-python-sdk: - name: Build and publish Python SDK - runs-on: ubuntu-latest - needs: deploy - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.x' - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install setuptools wheel twine build requests packaging - - - name: Run version check script - id: version_check_script - run: | - PYTHON_SDK_VERSION_INCREMENTED=$(python .github/scripts/check_version_has_incremented.py python ./apps/python-sdk/firecrawl firecrawl-py) - echo "PYTHON_SDK_VERSION_INCREMENTED=$PYTHON_SDK_VERSION_INCREMENTED" >> $GITHUB_ENV - - - name: Build the package - if: ${{ env.PYTHON_SDK_VERSION_INCREMENTED == 'true' }} - run: | - python -m build - working-directory: ./apps/python-sdk - - - name: Publish to PyPI - if: ${{ env.PYTHON_SDK_VERSION_INCREMENTED == 'true' }} - env: - TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} - run: | - twine upload dist/* - working-directory: ./apps/python-sdk - - build-and-publish-js-sdk: - name: Build and publish JavaScript SDK - runs-on: ubuntu-latest - needs: deploy - - steps: - - uses: actions/checkout@v3 - - name: Set up Node.js - uses: actions/setup-node@v3 - with: - node-version: '20' - registry-url: 'https://registry.npmjs.org/' - scope: '@mendable' - always-auth: true - - - name: Install pnpm - run: npm install -g pnpm - - - name: Install python for running version check script - run: | - python -m pip install --upgrade pip - pip install setuptools wheel requests packaging - - - name: Install dependencies for JavaScript SDK - run: pnpm install - working-directory: ./apps/js-sdk/firecrawl - - - name: Run version check script - id: version_check_script - run: | - VERSION_INCREMENTED=$(python .github/scripts/check_version_has_incremented.py js ./apps/js-sdk/firecrawl @mendable/firecrawl-js) - echo "VERSION_INCREMENTED=$VERSION_INCREMENTED" >> $GITHUB_ENV - - - name: Build and publish to npm - if: ${{ env.VERSION_INCREMENTED == 'true' }} - env: - NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} - run: | - npm run build-and-publish - working-directory: ./apps/js-sdk/firecrawl - \ No newline at end of file diff --git a/apps/api/Dockerfile b/apps/api/Dockerfile index b32dab5..64512a2 100644 --- a/apps/api/Dockerfile +++ b/apps/api/Dockerfile @@ -7,17 +7,15 @@ COPY . /app WORKDIR /app FROM base AS prod-deps -RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --prod --frozen-lockfile +COPY /cache /pnpm/store +RUN pnpm install --prod --frozen-lockfile FROM base AS build -RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --frozen-lockfile - -RUN pnpm install +COPY /cache /pnpm/store +RUN pnpm install --frozen-lockfile RUN pnpm run build # Install packages needed for deployment - - FROM base RUN apt-get update -qq && \ apt-get install --no-install-recommends -y chromium chromium-sandbox && \ @@ -25,12 +23,8 @@ RUN apt-get update -qq && \ COPY --from=prod-deps /app/node_modules /app/node_modules COPY --from=build /app /app - - - # Start the server by default, this can be overwritten at runtime EXPOSE 8080 ENV PUPPETEER_EXECUTABLE_PATH="/usr/bin/chromium" CMD [ "pnpm", "run", "start:production" ] CMD [ "pnpm", "run", "worker:production" ] -