0

Merge pull request #132 from mendableai/feat/idempotency-key

[Feat] Added idempotency key to crawl route
This commit is contained in:
Rafael Miller 2024-05-24 14:22:50 -03:00 committed by GitHub
commit 4ce28593b2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 150 additions and 25 deletions

View File

@ -1,6 +1,7 @@
import request from "supertest";
import { app } from "../../index";
import dotenv from "dotenv";
import { v4 as uuidv4 } from "uuid";
dotenv.config();
@ -175,6 +176,30 @@ describe("E2E Tests for API Routes", () => {
/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
);
});
it('should prevent duplicate requests using the same idempotency key', async () => {
const uniqueIdempotencyKey = uuidv4();
// First request with the idempotency key
const firstResponse = await request(TEST_URL)
.post('/v0/crawl')
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.set("x-idempotency-key", uniqueIdempotencyKey)
.send({ url: 'https://mendable.ai' });
expect(firstResponse.statusCode).toBe(200);
// Second request with the same idempotency key
const secondResponse = await request(TEST_URL)
.post('/v0/crawl')
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.set("x-idempotency-key", uniqueIdempotencyKey)
.send({ url: 'https://mendable.ai' });
expect(secondResponse.statusCode).toBe(409);
expect(secondResponse.body.error).toBe('Idempotency key already used');
});
it("should return a successful response with a valid API key and valid includes option", async () => {
const crawlResponse = await request(TEST_URL)

View File

@ -7,6 +7,8 @@ import { RateLimiterMode } from "../../src/types";
import { addWebScraperJob } from "../../src/services/queue-jobs";
import { isUrlBlocked } from "../../src/scraper/WebScraper/utils/blocklist";
import { logCrawl } from "../../src/services/logging/crawl_log";
import { validateIdempotencyKey } from "../../src/services/idempotency/validate";
import { createIdempotencyKey } from "../../src/services/idempotency/create";
export async function crawlController(req: Request, res: Response) {
try {
@ -19,6 +21,19 @@ export async function crawlController(req: Request, res: Response) {
return res.status(status).json({ error });
}
if (req.headers["x-idempotency-key"]) {
const isIdempotencyValid = await validateIdempotencyKey(req);
if (!isIdempotencyValid) {
return res.status(409).json({ error: "Idempotency key already used" });
}
try {
createIdempotencyKey(req);
} catch (error) {
console.error(error);
return res.status(500).json({ error: error.message });
}
}
const { success: creditsCheckSuccess, message: creditsCheckMessage } =
await checkTeamCredits(team_id, 1);
if (!creditsCheckSuccess) {

View File

@ -0,0 +1,22 @@
import { Request } from "express";
import { supabase_service } from "../supabase";
export async function createIdempotencyKey(
req: Request,
): Promise<string> {
const idempotencyKey = req.headers['x-idempotency-key'] as string;
if (!idempotencyKey) {
throw new Error("No idempotency key provided in the request headers.");
}
const { data, error } = await supabase_service
.from("idempotency_keys")
.insert({ key: idempotencyKey });
if (error) {
console.error("Failed to create idempotency key:", error);
throw error;
}
return idempotencyKey;
}

View File

@ -0,0 +1,32 @@
import { Request } from "express";
import { supabase_service } from "../supabase";
import { validate as isUuid } from 'uuid';
export async function validateIdempotencyKey(
req: Request,
): Promise<boolean> {
const idempotencyKey = req.headers['x-idempotency-key'];
if (!idempotencyKey) {
// // not returning for missing idempotency key for now
return true;
}
if (!isUuid(idempotencyKey)) {
console.error("Invalid idempotency key provided in the request headers.");
return false;
}
const { data, error } = await supabase_service
.from("idempotency_keys")
.select("key")
.eq("key", idempotencyKey);
if (error) {
console.error(error);
}
if (!data || data.length === 0) {
return true;
}
return false;
}

View File

@ -1,3 +1,4 @@
import { v4 as uuidv4 } from 'uuid';
import FirecrawlApp from '@mendable/firecrawl-js';
import { z } from "zod";
@ -8,7 +9,8 @@ const scrapeResult = await app.scrapeUrl('firecrawl.dev');
console.log(scrapeResult.data.content)
// Crawl a website:
const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false);
const idempotencyKey = uuidv4(); // optional
const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false, 2, idempotencyKey);
console.log(crawlResult)
const jobId = await crawlResult['jobId'];

View File

@ -110,11 +110,12 @@ export default class FirecrawlApp {
* @param {Params | null} params - Additional parameters for the crawl request.
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
* @param {number} timeout - Timeout in seconds for job status checks.
* @param {string} idempotencyKey - Optional idempotency key for the request.
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
*/
crawlUrl(url_1) {
return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, timeout = 2) {
const headers = this.prepareHeaders();
return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, timeout = 2, idempotencyKey) {
const headers = this.prepareHeaders(idempotencyKey);
let jsonData = { url };
if (params) {
jsonData = Object.assign(Object.assign({}, jsonData), params);
@ -172,11 +173,8 @@ export default class FirecrawlApp {
* Prepares the headers for an API request.
* @returns {AxiosRequestHeaders} The prepared headers.
*/
prepareHeaders() {
return {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
};
prepareHeaders(idempotencyKey) {
return Object.assign({ 'Content-Type': 'application/json', 'Authorization': `Bearer ${this.apiKey}` }, (idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {}));
}
/**
* Sends a POST request to the specified URL.

View File

@ -1,6 +1,6 @@
{
"name": "@mendable/firecrawl-js",
"version": "0.0.21",
"version": "0.0.22",
"description": "JavaScript SDK for Firecrawl API",
"main": "build/index.js",
"types": "types/index.d.ts",

View File

@ -173,15 +173,17 @@ export default class FirecrawlApp {
* @param {Params | null} params - Additional parameters for the crawl request.
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
* @param {number} timeout - Timeout in seconds for job status checks.
* @param {string} idempotencyKey - Optional idempotency key for the request.
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
*/
async crawlUrl(
url: string,
params: Params | null = null,
waitUntilDone: boolean = true,
timeout: number = 2
timeout: number = 2,
idempotencyKey?: string
): Promise<CrawlResponse | any> {
const headers = this.prepareHeaders();
const headers = this.prepareHeaders(idempotencyKey);
let jsonData: Params = { url };
if (params) {
jsonData = { ...jsonData, ...params };
@ -240,11 +242,12 @@ export default class FirecrawlApp {
* Prepares the headers for an API request.
* @returns {AxiosRequestHeaders} The prepared headers.
*/
prepareHeaders(): AxiosRequestHeaders {
prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders {
return {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
} as AxiosRequestHeaders;
'Content-Type': 'application/json',
'Authorization': `Bearer ${this.apiKey}`,
...(idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {}),
} as AxiosRequestHeaders & { 'x-idempotency-key'?: string };
}
/**

View File

@ -82,9 +82,10 @@ export default class FirecrawlApp {
* @param {Params | null} params - Additional parameters for the crawl request.
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
* @param {number} timeout - Timeout in seconds for job status checks.
* @param {string} idempotencyKey - Optional idempotency key for the request.
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
*/
crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean, timeout?: number): Promise<CrawlResponse | any>;
crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean, timeout?: number, idempotencyKey?: string): Promise<CrawlResponse | any>;
/**
* Checks the status of a crawl job using the Firecrawl API.
* @param {string} jobId - The job ID of the crawl operation.
@ -95,7 +96,7 @@ export default class FirecrawlApp {
* Prepares the headers for an API request.
* @returns {AxiosRequestHeaders} The prepared headers.
*/
prepareHeaders(): AxiosRequestHeaders;
prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders;
/**
* Sends a POST request to the specified URL.
* @param {string} url - The URL to send the request to.

View File

@ -11,6 +11,7 @@
"dependencies": {
"@mendable/firecrawl-js": "^0.0.19",
"axios": "^1.6.8",
"uuid": "^9.0.1",
"ts-node": "^10.9.2",
"typescript": "^5.4.5",
"zod": "^3.23.8"
@ -771,6 +772,18 @@
"peerDependencies": {
"zod": "^3.23.3"
}
},
"node_modules/uuid": {
"version": "9.0.1",
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
"integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
"funding": [
"https://github.com/sponsors/broofa",
"https://github.com/sponsors/ctavan"
],
"bin": {
"uuid": "dist/bin/uuid"
}
}
}
}

View File

@ -11,8 +11,9 @@
"author": "",
"license": "ISC",
"dependencies": {
"@mendable/firecrawl-js": "^0.0.19",
"axios": "^1.6.8",
"uuid": "^9.0.1",
"@mendable/firecrawl-js": "^0.0.19",
"ts-node": "^10.9.2",
"typescript": "^5.4.5",
"zod": "^3.23.8"

View File

@ -1,4 +1,5 @@
from firecrawl import FirecrawlApp
import uuid
from firecrawl.firecrawl import FirecrawlApp
app = FirecrawlApp(api_key="fc-YOUR_API_KEY")
@ -7,7 +8,8 @@ scrape_result = app.scrape_url('firecrawl.dev')
print(scrape_result['markdown'])
# Crawl a website:
crawl_result = app.crawl_url('mendable.ai', {'crawlerOptions': {'excludes': ['blog/*']}})
idempotency_key = str(uuid.uuid4()) # optional idempotency key
crawl_result = app.crawl_url('mendable.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, idempotency_key)
print(crawl_result)
# LLM Extraction:

View File

@ -127,7 +127,7 @@ class FirecrawlApp:
else:
raise Exception(f'Failed to search. Status code: {response.status_code}')
def crawl_url(self, url, params=None, wait_until_done=True, timeout=2):
def crawl_url(self, url, params=None, wait_until_done=True, timeout=2, idempotency_key=None):
"""
Initiate a crawl job for the specified URL using the Firecrawl API.
@ -136,6 +136,7 @@ class FirecrawlApp:
params (Optional[Dict[str, Any]]): Additional parameters for the crawl request.
wait_until_done (bool): Whether to wait until the crawl job is completed.
timeout (int): Timeout between status checks when waiting for job completion.
idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
Returns:
Any: The crawl job ID or the crawl results if waiting until completion.
@ -143,7 +144,7 @@ class FirecrawlApp:
Raises:
Exception: If the crawl job initiation or monitoring fails.
"""
headers = self._prepare_headers()
headers = self._prepare_headers(idempotency_key)
json_data = {'url': url}
if params:
json_data.update(params)
@ -177,16 +178,26 @@ class FirecrawlApp:
else:
self._handle_error(response, 'check crawl status')
def _prepare_headers(self):
def _prepare_headers(self, idempotency_key=None):
"""
Prepare the headers for API requests.
Args:
idempotency_key (Optional[str]): A unique key to ensure idempotency of requests.
Returns:
Dict[str, str]: The headers including content type and authorization.
Dict[str, str]: The headers including content type, authorization, and optionally idempotency key.
"""
if idempotency_key:
return {
'Content-Type': 'application/json',
'Authorization': f'Bearer {self.api_key}',
'x-idempotency-key': idempotency_key
}
return {
'Content-Type': 'application/json',
'Authorization': f'Bearer {self.api_key}'
'Authorization': f'Bearer {self.api_key}',
}
def _post_request(self, url, data, headers, retries=3, backoff_factor=0.5):