0

Merge pull request #109 from mendableai/feat/posthog-logging

Add Posthog Logging
This commit is contained in:
Nicolas 2024-05-03 09:09:08 -07:00 committed by GitHub
commit 784b81e6d6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 99 additions and 23 deletions

View File

@ -13,6 +13,8 @@ env:
HOST: ${{ secrets.HOST }} HOST: ${{ secrets.HOST }}
LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }} LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }}
LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }} LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }}
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }} NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
PLAYWRIGHT_MICROSERVICE_URL: ${{ secrets.PLAYWRIGHT_MICROSERVICE_URL }} PLAYWRIGHT_MICROSERVICE_URL: ${{ secrets.PLAYWRIGHT_MICROSERVICE_URL }}
@ -38,7 +40,7 @@ jobs:
- name: Set up Node.js - name: Set up Node.js
uses: actions/setup-node@v3 uses: actions/setup-node@v3
with: with:
node-version: '20' node-version: "20"
- name: Install pnpm - name: Install pnpm
run: npm install -g pnpm run: npm install -g pnpm
- name: Install dependencies - name: Install dependencies

View File

@ -13,6 +13,8 @@ env:
HOST: ${{ secrets.HOST }} HOST: ${{ secrets.HOST }}
LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }} LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }}
LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }} LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }}
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }} NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
PLAYWRIGHT_MICROSERVICE_URL: ${{ secrets.PLAYWRIGHT_MICROSERVICE_URL }} PLAYWRIGHT_MICROSERVICE_URL: ${{ secrets.PLAYWRIGHT_MICROSERVICE_URL }}
@ -38,7 +40,7 @@ jobs:
- name: Set up Node.js - name: Set up Node.js
uses: actions/setup-node@v3 uses: actions/setup-node@v3
with: with:
node-version: '20' node-version: "20"
- name: Install pnpm - name: Install pnpm
run: npm install -g pnpm run: npm install -g pnpm
- name: Install dependencies - name: Install dependencies
@ -68,4 +70,3 @@ jobs:
- run: flyctl deploy ./apps/api --remote-only -a firecrawl-scraper-js - run: flyctl deploy ./apps/api --remote-only -a firecrawl-scraper-js
env: env:
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}

View File

@ -4,20 +4,20 @@ Welcome to [Firecrawl](https://firecrawl.dev) 🔥! Here are some instructions o
If you're contributing, note that the process is similar to other open source repos i.e. (fork firecrawl, make changes, run tests, PR). If you have any questions, and would like help gettin on board, reach out to hello@mendable.ai for more or submit an issue! If you're contributing, note that the process is similar to other open source repos i.e. (fork firecrawl, make changes, run tests, PR). If you have any questions, and would like help gettin on board, reach out to hello@mendable.ai for more or submit an issue!
## Running the project locally ## Running the project locally
First, start by installing dependencies First, start by installing dependencies
1. node.js [instructions](https://nodejs.org/en/learn/getting-started/how-to-install-nodejs) 1. node.js [instructions](https://nodejs.org/en/learn/getting-started/how-to-install-nodejs)
2. pnpm [instructions](https://pnpm.io/installation) 2. pnpm [instructions](https://pnpm.io/installation)
3. redis [instructions](https://redis.io/docs/latest/operate/oss_and_stack/install/install-redis/) 3. redis [instructions](https://redis.io/docs/latest/operate/oss_and_stack/install/install-redis/)
Set environment variables in a .env in the /apps/api/ directoryyou can copy over the template in .env.example.
Set environment variables in a .env in the /apps/api/ directoryyou can copy over the template in .env.example.
To start, we wont set up authentication, or any optional sub services (pdf parsing, JS blocking support, AI features ) To start, we wont set up authentication, or any optional sub services (pdf parsing, JS blocking support, AI features )
.env: .env:
``` ```
# ===== Required ENVS ====== # ===== Required ENVS ======
NUM_WORKERS_PER_QUEUE=8 NUM_WORKERS_PER_QUEUE=8
@ -43,6 +43,11 @@ BULL_AUTH_KEY= #
LOGTAIL_KEY= # Use if you're configuring basic logging with logtail LOGTAIL_KEY= # Use if you're configuring basic logging with logtail
PLAYWRIGHT_MICROSERVICE_URL= # set if you'd like to run a playwright fallback PLAYWRIGHT_MICROSERVICE_URL= # set if you'd like to run a playwright fallback
LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs
SERPER_API_KEY= #Set if you have a serper key you'd like to use as a search api
SLACK_WEBHOOK_URL= # set if you'd like to send slack server health status messages
POSTHOG_API_KEY= # set if you'd like to send posthog events like job logs
POSTHOG_HOST= # set if you'd like to send posthog events like job logs
``` ```
@ -69,6 +74,7 @@ redis-server
### Terminal 2 - setting up workers ### Terminal 2 - setting up workers
Now, navigate to the apps/api/ directory and run: Now, navigate to the apps/api/ directory and run:
```bash ```bash
pnpm run workers pnpm run workers
``` ```
@ -77,7 +83,6 @@ This will start the workers who are responsible for processing crawl jobs.
### Terminal 3 - setting up the main server ### Terminal 3 - setting up the main server
To do this, navigate to the apps/api/ directory and run if you dont have this already, install pnpm here: https://pnpm.io/installation To do this, navigate to the apps/api/ directory and run if you dont have this already, install pnpm here: https://pnpm.io/installation
Next, run your server with: Next, run your server with:
@ -92,8 +97,8 @@ Alright: now lets send our first request.
```curl ```curl
curl -X GET http://localhost:3002/test curl -X GET http://localhost:3002/test
``` ```
This should return the response Hello, world!
This should return the response Hello, world!
If youd like to test the crawl endpoint, you can run this If youd like to test the crawl endpoint, you can run this
@ -110,5 +115,3 @@ curl -X POST http://localhost:3002/v0/crawl \
The best way to do this is run the test with `npm run test:local-no-auth` if you'd like to run the tests without authentication. The best way to do this is run the test with `npm run test:local-no-auth` if you'd like to run the tests without authentication.
If you'd like to run the tests with authentication, run `npm run test:prod` If you'd like to run the tests with authentication, run `npm run test:prod`

View File

@ -24,3 +24,6 @@ PLAYWRIGHT_MICROSERVICE_URL= # set if you'd like to run a playwright fallback
LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs
SERPER_API_KEY= #Set if you have a serper key you'd like to use as a search api SERPER_API_KEY= #Set if you have a serper key you'd like to use as a search api
SLACK_WEBHOOK_URL= # set if you'd like to send slack server health status messages SLACK_WEBHOOK_URL= # set if you'd like to send slack server health status messages
POSTHOG_API_KEY= # set if you'd like to send posthog events like job logs
POSTHOG_HOST= # set if you'd like to send posthog events like job logs

View File

@ -82,6 +82,7 @@
"openai": "^4.28.4", "openai": "^4.28.4",
"pdf-parse": "^1.1.1", "pdf-parse": "^1.1.1",
"pos": "^0.4.2", "pos": "^0.4.2",
"posthog-node": "^4.0.1",
"promptable": "^0.0.9", "promptable": "^0.0.9",
"puppeteer": "^22.6.3", "puppeteer": "^22.6.3",
"rate-limiter-flexible": "^2.4.2", "rate-limiter-flexible": "^2.4.2",

View File

@ -128,6 +128,9 @@ dependencies:
pos: pos:
specifier: ^0.4.2 specifier: ^0.4.2
version: 0.4.2 version: 0.4.2
posthog-node:
specifier: ^4.0.1
version: 4.0.1
promptable: promptable:
specifier: ^0.0.9 specifier: ^0.0.9
version: 0.0.9 version: 0.0.9
@ -5068,6 +5071,16 @@ packages:
source-map-js: 1.0.2 source-map-js: 1.0.2
dev: false dev: false
/posthog-node@4.0.1:
resolution: {integrity: sha512-rtqm2h22QxLGBrW2bLYzbRhliIrqgZ0k+gF0LkQ1SNdeD06YE5eilV0MxZppFSxC8TfH0+B0cWCuebEnreIDgQ==}
engines: {node: '>=15.0.0'}
dependencies:
axios: 1.6.7
rusha: 0.8.14
transitivePeerDependencies:
- debug
dev: false
/prelude-ls@1.1.2: /prelude-ls@1.1.2:
resolution: {integrity: sha512-ESF23V4SKG6lVSGZgYNpbsiaAkdab6ZgOxe52p7+Kid3W3u3bxR4Vfd/o21dmN7jSt0IwgZ4v5MUd26FEtXE9w==} resolution: {integrity: sha512-ESF23V4SKG6lVSGZgYNpbsiaAkdab6ZgOxe52p7+Kid3W3u3bxR4Vfd/o21dmN7jSt0IwgZ4v5MUd26FEtXE9w==}
engines: {node: '>= 0.8.0'} engines: {node: '>= 0.8.0'}
@ -5330,6 +5343,10 @@ packages:
engines: {node: '>=10.0.0'} engines: {node: '>=10.0.0'}
dev: false dev: false
/rusha@0.8.14:
resolution: {integrity: sha512-cLgakCUf6PedEu15t8kbsjnwIFFR2D4RfL+W3iWFJ4iac7z4B0ZI8fxy4R3J956kAI68HclCFGL8MPoUVC3qVA==}
dev: false
/safe-buffer@5.2.1: /safe-buffer@5.2.1:
resolution: {integrity: sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==} resolution: {integrity: sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==}

View File

@ -25,6 +25,8 @@ describe("E2E Tests for API Routes with No Authentication", () => {
process.env.PLAYWRIGHT_MICROSERVICE_URL = ""; process.env.PLAYWRIGHT_MICROSERVICE_URL = "";
process.env.LLAMAPARSE_API_KEY = ""; process.env.LLAMAPARSE_API_KEY = "";
process.env.TEST_API_KEY = ""; process.env.TEST_API_KEY = "";
process.env.POSTHOG_API_KEY = "";
process.env.POSTHOG_HOST = "";
}); });
// restore original process.env // restore original process.env

View File

@ -1,6 +1,7 @@
import { ExtractorOptions } from './../../lib/entities'; import { ExtractorOptions } from './../../lib/entities';
import { supabase_service } from "../supabase"; import { supabase_service } from "../supabase";
import { FirecrawlJob } from "../../types"; import { FirecrawlJob } from "../../types";
import { posthog } from "../posthog";
import "dotenv/config"; import "dotenv/config";
export async function logJob(job: FirecrawlJob) { export async function logJob(job: FirecrawlJob) {
@ -10,7 +11,6 @@ export async function logJob(job: FirecrawlJob) {
return; return;
} }
const { data, error } = await supabase_service const { data, error } = await supabase_service
.from("firecrawl_jobs") .from("firecrawl_jobs")
.insert([ .insert([
@ -30,6 +30,27 @@ export async function logJob(job: FirecrawlJob) {
num_tokens: job.num_tokens num_tokens: job.num_tokens
}, },
]); ]);
if (process.env.POSTHOG_API_KEY) {
posthog.capture({
distinctId: job.team_id === "preview" ? null : job.team_id,
event: "job-logged",
properties: {
success: job.success,
message: job.message,
num_docs: job.num_docs,
time_taken: job.time_taken,
team_id: job.team_id === "preview" ? null : job.team_id,
mode: job.mode,
url: job.url,
crawler_options: job.crawlerOptions,
page_options: job.pageOptions,
origin: job.origin,
extractor_options: job.extractor_options,
num_tokens: job.num_tokens
},
});
}
if (error) { if (error) {
console.error("Error logging job:\n", error); console.error("Error logging job:\n", error);
} }

View File

@ -0,0 +1,26 @@
import { PostHog } from 'posthog-node';
import "dotenv/config";
export default function PostHogClient() {
const posthogClient = new PostHog(process.env.POSTHOG_API_KEY, {
host: process.env.POSTHOG_HOST,
flushAt: 1,
flushInterval: 0
});
return posthogClient;
}
class MockPostHog {
capture() {}
}
// Using the actual PostHog class if POSTHOG_API_KEY exists, otherwise using the mock class
// Additionally, print a warning to the terminal if POSTHOG_API_KEY is not provided
export const posthog = process.env.POSTHOG_API_KEY
? PostHogClient()
: (() => {
console.warn(
"POSTHOG_API_KEY is not provided - your events will not be logged. Using MockPostHog as a fallback. See posthog.ts for more."
);
return new MockPostHog();
})();