0

Merge pull request #132 from mendableai/feat/idempotency-key

[Feat] Added idempotency key to crawl route
This commit is contained in:
Rafael Miller 2024-05-24 14:22:50 -03:00 committed by GitHub
commit 4ce28593b2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 150 additions and 25 deletions

View File

@ -1,6 +1,7 @@
import request from "supertest"; import request from "supertest";
import { app } from "../../index"; import { app } from "../../index";
import dotenv from "dotenv"; import dotenv from "dotenv";
import { v4 as uuidv4 } from "uuid";
dotenv.config(); dotenv.config();
@ -175,6 +176,30 @@ describe("E2E Tests for API Routes", () => {
/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/ /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
); );
}); });
it('should prevent duplicate requests using the same idempotency key', async () => {
const uniqueIdempotencyKey = uuidv4();
// First request with the idempotency key
const firstResponse = await request(TEST_URL)
.post('/v0/crawl')
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.set("x-idempotency-key", uniqueIdempotencyKey)
.send({ url: 'https://mendable.ai' });
expect(firstResponse.statusCode).toBe(200);
// Second request with the same idempotency key
const secondResponse = await request(TEST_URL)
.post('/v0/crawl')
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.set("x-idempotency-key", uniqueIdempotencyKey)
.send({ url: 'https://mendable.ai' });
expect(secondResponse.statusCode).toBe(409);
expect(secondResponse.body.error).toBe('Idempotency key already used');
});
it("should return a successful response with a valid API key and valid includes option", async () => { it("should return a successful response with a valid API key and valid includes option", async () => {
const crawlResponse = await request(TEST_URL) const crawlResponse = await request(TEST_URL)

View File

@ -7,6 +7,8 @@ import { RateLimiterMode } from "../../src/types";
import { addWebScraperJob } from "../../src/services/queue-jobs"; import { addWebScraperJob } from "../../src/services/queue-jobs";
import { isUrlBlocked } from "../../src/scraper/WebScraper/utils/blocklist"; import { isUrlBlocked } from "../../src/scraper/WebScraper/utils/blocklist";
import { logCrawl } from "../../src/services/logging/crawl_log"; import { logCrawl } from "../../src/services/logging/crawl_log";
import { validateIdempotencyKey } from "../../src/services/idempotency/validate";
import { createIdempotencyKey } from "../../src/services/idempotency/create";
export async function crawlController(req: Request, res: Response) { export async function crawlController(req: Request, res: Response) {
try { try {
@ -19,6 +21,19 @@ export async function crawlController(req: Request, res: Response) {
return res.status(status).json({ error }); return res.status(status).json({ error });
} }
if (req.headers["x-idempotency-key"]) {
const isIdempotencyValid = await validateIdempotencyKey(req);
if (!isIdempotencyValid) {
return res.status(409).json({ error: "Idempotency key already used" });
}
try {
createIdempotencyKey(req);
} catch (error) {
console.error(error);
return res.status(500).json({ error: error.message });
}
}
const { success: creditsCheckSuccess, message: creditsCheckMessage } = const { success: creditsCheckSuccess, message: creditsCheckMessage } =
await checkTeamCredits(team_id, 1); await checkTeamCredits(team_id, 1);
if (!creditsCheckSuccess) { if (!creditsCheckSuccess) {

View File

@ -0,0 +1,22 @@
import { Request } from "express";
import { supabase_service } from "../supabase";
export async function createIdempotencyKey(
req: Request,
): Promise<string> {
const idempotencyKey = req.headers['x-idempotency-key'] as string;
if (!idempotencyKey) {
throw new Error("No idempotency key provided in the request headers.");
}
const { data, error } = await supabase_service
.from("idempotency_keys")
.insert({ key: idempotencyKey });
if (error) {
console.error("Failed to create idempotency key:", error);
throw error;
}
return idempotencyKey;
}

View File

@ -0,0 +1,32 @@
import { Request } from "express";
import { supabase_service } from "../supabase";
import { validate as isUuid } from 'uuid';
export async function validateIdempotencyKey(
req: Request,
): Promise<boolean> {
const idempotencyKey = req.headers['x-idempotency-key'];
if (!idempotencyKey) {
// // not returning for missing idempotency key for now
return true;
}
if (!isUuid(idempotencyKey)) {
console.error("Invalid idempotency key provided in the request headers.");
return false;
}
const { data, error } = await supabase_service
.from("idempotency_keys")
.select("key")
.eq("key", idempotencyKey);
if (error) {
console.error(error);
}
if (!data || data.length === 0) {
return true;
}
return false;
}

View File

@ -1,3 +1,4 @@
import { v4 as uuidv4 } from 'uuid';
import FirecrawlApp from '@mendable/firecrawl-js'; import FirecrawlApp from '@mendable/firecrawl-js';
import { z } from "zod"; import { z } from "zod";
@ -8,7 +9,8 @@ const scrapeResult = await app.scrapeUrl('firecrawl.dev');
console.log(scrapeResult.data.content) console.log(scrapeResult.data.content)
// Crawl a website: // Crawl a website:
const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false); const idempotencyKey = uuidv4(); // optional
const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false, 2, idempotencyKey);
console.log(crawlResult) console.log(crawlResult)
const jobId = await crawlResult['jobId']; const jobId = await crawlResult['jobId'];

View File

@ -110,11 +110,12 @@ export default class FirecrawlApp {
* @param {Params | null} params - Additional parameters for the crawl request. * @param {Params | null} params - Additional parameters for the crawl request.
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete. * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
* @param {number} timeout - Timeout in seconds for job status checks. * @param {number} timeout - Timeout in seconds for job status checks.
* @param {string} idempotencyKey - Optional idempotency key for the request.
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation. * @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
*/ */
crawlUrl(url_1) { crawlUrl(url_1) {
return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, timeout = 2) { return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, timeout = 2, idempotencyKey) {
const headers = this.prepareHeaders(); const headers = this.prepareHeaders(idempotencyKey);
let jsonData = { url }; let jsonData = { url };
if (params) { if (params) {
jsonData = Object.assign(Object.assign({}, jsonData), params); jsonData = Object.assign(Object.assign({}, jsonData), params);
@ -172,11 +173,8 @@ export default class FirecrawlApp {
* Prepares the headers for an API request. * Prepares the headers for an API request.
* @returns {AxiosRequestHeaders} The prepared headers. * @returns {AxiosRequestHeaders} The prepared headers.
*/ */
prepareHeaders() { prepareHeaders(idempotencyKey) {
return { return Object.assign({ 'Content-Type': 'application/json', 'Authorization': `Bearer ${this.apiKey}` }, (idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {}));
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
};
} }
/** /**
* Sends a POST request to the specified URL. * Sends a POST request to the specified URL.

View File

@ -1,6 +1,6 @@
{ {
"name": "@mendable/firecrawl-js", "name": "@mendable/firecrawl-js",
"version": "0.0.21", "version": "0.0.22",
"description": "JavaScript SDK for Firecrawl API", "description": "JavaScript SDK for Firecrawl API",
"main": "build/index.js", "main": "build/index.js",
"types": "types/index.d.ts", "types": "types/index.d.ts",

View File

@ -173,15 +173,17 @@ export default class FirecrawlApp {
* @param {Params | null} params - Additional parameters for the crawl request. * @param {Params | null} params - Additional parameters for the crawl request.
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete. * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
* @param {number} timeout - Timeout in seconds for job status checks. * @param {number} timeout - Timeout in seconds for job status checks.
* @param {string} idempotencyKey - Optional idempotency key for the request.
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation. * @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
*/ */
async crawlUrl( async crawlUrl(
url: string, url: string,
params: Params | null = null, params: Params | null = null,
waitUntilDone: boolean = true, waitUntilDone: boolean = true,
timeout: number = 2 timeout: number = 2,
idempotencyKey?: string
): Promise<CrawlResponse | any> { ): Promise<CrawlResponse | any> {
const headers = this.prepareHeaders(); const headers = this.prepareHeaders(idempotencyKey);
let jsonData: Params = { url }; let jsonData: Params = { url };
if (params) { if (params) {
jsonData = { ...jsonData, ...params }; jsonData = { ...jsonData, ...params };
@ -240,11 +242,12 @@ export default class FirecrawlApp {
* Prepares the headers for an API request. * Prepares the headers for an API request.
* @returns {AxiosRequestHeaders} The prepared headers. * @returns {AxiosRequestHeaders} The prepared headers.
*/ */
prepareHeaders(): AxiosRequestHeaders { prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders {
return { return {
"Content-Type": "application/json", 'Content-Type': 'application/json',
Authorization: `Bearer ${this.apiKey}`, 'Authorization': `Bearer ${this.apiKey}`,
} as AxiosRequestHeaders; ...(idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {}),
} as AxiosRequestHeaders & { 'x-idempotency-key'?: string };
} }
/** /**

View File

@ -82,9 +82,10 @@ export default class FirecrawlApp {
* @param {Params | null} params - Additional parameters for the crawl request. * @param {Params | null} params - Additional parameters for the crawl request.
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete. * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
* @param {number} timeout - Timeout in seconds for job status checks. * @param {number} timeout - Timeout in seconds for job status checks.
* @param {string} idempotencyKey - Optional idempotency key for the request.
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation. * @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
*/ */
crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean, timeout?: number): Promise<CrawlResponse | any>; crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean, timeout?: number, idempotencyKey?: string): Promise<CrawlResponse | any>;
/** /**
* Checks the status of a crawl job using the Firecrawl API. * Checks the status of a crawl job using the Firecrawl API.
* @param {string} jobId - The job ID of the crawl operation. * @param {string} jobId - The job ID of the crawl operation.
@ -95,7 +96,7 @@ export default class FirecrawlApp {
* Prepares the headers for an API request. * Prepares the headers for an API request.
* @returns {AxiosRequestHeaders} The prepared headers. * @returns {AxiosRequestHeaders} The prepared headers.
*/ */
prepareHeaders(): AxiosRequestHeaders; prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders;
/** /**
* Sends a POST request to the specified URL. * Sends a POST request to the specified URL.
* @param {string} url - The URL to send the request to. * @param {string} url - The URL to send the request to.

View File

@ -11,6 +11,7 @@
"dependencies": { "dependencies": {
"@mendable/firecrawl-js": "^0.0.19", "@mendable/firecrawl-js": "^0.0.19",
"axios": "^1.6.8", "axios": "^1.6.8",
"uuid": "^9.0.1",
"ts-node": "^10.9.2", "ts-node": "^10.9.2",
"typescript": "^5.4.5", "typescript": "^5.4.5",
"zod": "^3.23.8" "zod": "^3.23.8"
@ -771,6 +772,18 @@
"peerDependencies": { "peerDependencies": {
"zod": "^3.23.3" "zod": "^3.23.3"
} }
},
"node_modules/uuid": {
"version": "9.0.1",
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
"integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
"funding": [
"https://github.com/sponsors/broofa",
"https://github.com/sponsors/ctavan"
],
"bin": {
"uuid": "dist/bin/uuid"
}
} }
} }
} }

View File

@ -11,8 +11,9 @@
"author": "", "author": "",
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
"@mendable/firecrawl-js": "^0.0.19",
"axios": "^1.6.8", "axios": "^1.6.8",
"uuid": "^9.0.1",
"@mendable/firecrawl-js": "^0.0.19",
"ts-node": "^10.9.2", "ts-node": "^10.9.2",
"typescript": "^5.4.5", "typescript": "^5.4.5",
"zod": "^3.23.8" "zod": "^3.23.8"

View File

@ -1,4 +1,5 @@
from firecrawl import FirecrawlApp import uuid
from firecrawl.firecrawl import FirecrawlApp
app = FirecrawlApp(api_key="fc-YOUR_API_KEY") app = FirecrawlApp(api_key="fc-YOUR_API_KEY")
@ -7,7 +8,8 @@ scrape_result = app.scrape_url('firecrawl.dev')
print(scrape_result['markdown']) print(scrape_result['markdown'])
# Crawl a website: # Crawl a website:
crawl_result = app.crawl_url('mendable.ai', {'crawlerOptions': {'excludes': ['blog/*']}}) idempotency_key = str(uuid.uuid4()) # optional idempotency key
crawl_result = app.crawl_url('mendable.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, idempotency_key)
print(crawl_result) print(crawl_result)
# LLM Extraction: # LLM Extraction:

View File

@ -127,7 +127,7 @@ class FirecrawlApp:
else: else:
raise Exception(f'Failed to search. Status code: {response.status_code}') raise Exception(f'Failed to search. Status code: {response.status_code}')
def crawl_url(self, url, params=None, wait_until_done=True, timeout=2): def crawl_url(self, url, params=None, wait_until_done=True, timeout=2, idempotency_key=None):
""" """
Initiate a crawl job for the specified URL using the Firecrawl API. Initiate a crawl job for the specified URL using the Firecrawl API.
@ -136,6 +136,7 @@ class FirecrawlApp:
params (Optional[Dict[str, Any]]): Additional parameters for the crawl request. params (Optional[Dict[str, Any]]): Additional parameters for the crawl request.
wait_until_done (bool): Whether to wait until the crawl job is completed. wait_until_done (bool): Whether to wait until the crawl job is completed.
timeout (int): Timeout between status checks when waiting for job completion. timeout (int): Timeout between status checks when waiting for job completion.
idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
Returns: Returns:
Any: The crawl job ID or the crawl results if waiting until completion. Any: The crawl job ID or the crawl results if waiting until completion.
@ -143,7 +144,7 @@ class FirecrawlApp:
Raises: Raises:
Exception: If the crawl job initiation or monitoring fails. Exception: If the crawl job initiation or monitoring fails.
""" """
headers = self._prepare_headers() headers = self._prepare_headers(idempotency_key)
json_data = {'url': url} json_data = {'url': url}
if params: if params:
json_data.update(params) json_data.update(params)
@ -177,16 +178,26 @@ class FirecrawlApp:
else: else:
self._handle_error(response, 'check crawl status') self._handle_error(response, 'check crawl status')
def _prepare_headers(self): def _prepare_headers(self, idempotency_key=None):
""" """
Prepare the headers for API requests. Prepare the headers for API requests.
Args:
idempotency_key (Optional[str]): A unique key to ensure idempotency of requests.
Returns: Returns:
Dict[str, str]: The headers including content type and authorization. Dict[str, str]: The headers including content type, authorization, and optionally idempotency key.
""" """
if idempotency_key:
return { return {
'Content-Type': 'application/json', 'Content-Type': 'application/json',
'Authorization': f'Bearer {self.api_key}' 'Authorization': f'Bearer {self.api_key}',
'x-idempotency-key': idempotency_key
}
return {
'Content-Type': 'application/json',
'Authorization': f'Bearer {self.api_key}',
} }
def _post_request(self, url, data, headers, retries=3, backoff_factor=0.5): def _post_request(self, url, data, headers, retries=3, backoff_factor=0.5):