Merge pull request #132 from mendableai/feat/idempotency-key
[Feat] Added idempotency key to crawl route
This commit is contained in:
commit
4ce28593b2
@ -1,6 +1,7 @@
|
||||
import request from "supertest";
|
||||
import { app } from "../../index";
|
||||
import dotenv from "dotenv";
|
||||
import { v4 as uuidv4 } from "uuid";
|
||||
|
||||
dotenv.config();
|
||||
|
||||
@ -175,6 +176,30 @@ describe("E2E Tests for API Routes", () => {
|
||||
/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
|
||||
);
|
||||
});
|
||||
it('should prevent duplicate requests using the same idempotency key', async () => {
|
||||
const uniqueIdempotencyKey = uuidv4();
|
||||
|
||||
// First request with the idempotency key
|
||||
const firstResponse = await request(TEST_URL)
|
||||
.post('/v0/crawl')
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
.set("x-idempotency-key", uniqueIdempotencyKey)
|
||||
.send({ url: 'https://mendable.ai' });
|
||||
|
||||
expect(firstResponse.statusCode).toBe(200);
|
||||
|
||||
// Second request with the same idempotency key
|
||||
const secondResponse = await request(TEST_URL)
|
||||
.post('/v0/crawl')
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
.set("x-idempotency-key", uniqueIdempotencyKey)
|
||||
.send({ url: 'https://mendable.ai' });
|
||||
|
||||
expect(secondResponse.statusCode).toBe(409);
|
||||
expect(secondResponse.body.error).toBe('Idempotency key already used');
|
||||
});
|
||||
|
||||
it("should return a successful response with a valid API key and valid includes option", async () => {
|
||||
const crawlResponse = await request(TEST_URL)
|
||||
|
@ -7,6 +7,8 @@ import { RateLimiterMode } from "../../src/types";
|
||||
import { addWebScraperJob } from "../../src/services/queue-jobs";
|
||||
import { isUrlBlocked } from "../../src/scraper/WebScraper/utils/blocklist";
|
||||
import { logCrawl } from "../../src/services/logging/crawl_log";
|
||||
import { validateIdempotencyKey } from "../../src/services/idempotency/validate";
|
||||
import { createIdempotencyKey } from "../../src/services/idempotency/create";
|
||||
|
||||
export async function crawlController(req: Request, res: Response) {
|
||||
try {
|
||||
@ -19,6 +21,19 @@ export async function crawlController(req: Request, res: Response) {
|
||||
return res.status(status).json({ error });
|
||||
}
|
||||
|
||||
if (req.headers["x-idempotency-key"]) {
|
||||
const isIdempotencyValid = await validateIdempotencyKey(req);
|
||||
if (!isIdempotencyValid) {
|
||||
return res.status(409).json({ error: "Idempotency key already used" });
|
||||
}
|
||||
try {
|
||||
createIdempotencyKey(req);
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
return res.status(500).json({ error: error.message });
|
||||
}
|
||||
}
|
||||
|
||||
const { success: creditsCheckSuccess, message: creditsCheckMessage } =
|
||||
await checkTeamCredits(team_id, 1);
|
||||
if (!creditsCheckSuccess) {
|
||||
|
22
apps/api/src/services/idempotency/create.ts
Normal file
22
apps/api/src/services/idempotency/create.ts
Normal file
@ -0,0 +1,22 @@
|
||||
import { Request } from "express";
|
||||
import { supabase_service } from "../supabase";
|
||||
|
||||
export async function createIdempotencyKey(
|
||||
req: Request,
|
||||
): Promise<string> {
|
||||
const idempotencyKey = req.headers['x-idempotency-key'] as string;
|
||||
if (!idempotencyKey) {
|
||||
throw new Error("No idempotency key provided in the request headers.");
|
||||
}
|
||||
|
||||
const { data, error } = await supabase_service
|
||||
.from("idempotency_keys")
|
||||
.insert({ key: idempotencyKey });
|
||||
|
||||
if (error) {
|
||||
console.error("Failed to create idempotency key:", error);
|
||||
throw error;
|
||||
}
|
||||
|
||||
return idempotencyKey;
|
||||
}
|
32
apps/api/src/services/idempotency/validate.ts
Normal file
32
apps/api/src/services/idempotency/validate.ts
Normal file
@ -0,0 +1,32 @@
|
||||
import { Request } from "express";
|
||||
import { supabase_service } from "../supabase";
|
||||
import { validate as isUuid } from 'uuid';
|
||||
|
||||
export async function validateIdempotencyKey(
|
||||
req: Request,
|
||||
): Promise<boolean> {
|
||||
const idempotencyKey = req.headers['x-idempotency-key'];
|
||||
if (!idempotencyKey) {
|
||||
// // not returning for missing idempotency key for now
|
||||
return true;
|
||||
}
|
||||
if (!isUuid(idempotencyKey)) {
|
||||
console.error("Invalid idempotency key provided in the request headers.");
|
||||
return false;
|
||||
}
|
||||
|
||||
const { data, error } = await supabase_service
|
||||
.from("idempotency_keys")
|
||||
.select("key")
|
||||
.eq("key", idempotencyKey);
|
||||
|
||||
if (error) {
|
||||
console.error(error);
|
||||
}
|
||||
|
||||
if (!data || data.length === 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
@ -1,3 +1,4 @@
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
import FirecrawlApp from '@mendable/firecrawl-js';
|
||||
import { z } from "zod";
|
||||
|
||||
@ -8,7 +9,8 @@ const scrapeResult = await app.scrapeUrl('firecrawl.dev');
|
||||
console.log(scrapeResult.data.content)
|
||||
|
||||
// Crawl a website:
|
||||
const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false);
|
||||
const idempotencyKey = uuidv4(); // optional
|
||||
const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false, 2, idempotencyKey);
|
||||
console.log(crawlResult)
|
||||
|
||||
const jobId = await crawlResult['jobId'];
|
||||
|
@ -110,11 +110,12 @@ export default class FirecrawlApp {
|
||||
* @param {Params | null} params - Additional parameters for the crawl request.
|
||||
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
|
||||
* @param {number} timeout - Timeout in seconds for job status checks.
|
||||
* @param {string} idempotencyKey - Optional idempotency key for the request.
|
||||
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
|
||||
*/
|
||||
crawlUrl(url_1) {
|
||||
return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, timeout = 2) {
|
||||
const headers = this.prepareHeaders();
|
||||
return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, timeout = 2, idempotencyKey) {
|
||||
const headers = this.prepareHeaders(idempotencyKey);
|
||||
let jsonData = { url };
|
||||
if (params) {
|
||||
jsonData = Object.assign(Object.assign({}, jsonData), params);
|
||||
@ -172,11 +173,8 @@ export default class FirecrawlApp {
|
||||
* Prepares the headers for an API request.
|
||||
* @returns {AxiosRequestHeaders} The prepared headers.
|
||||
*/
|
||||
prepareHeaders() {
|
||||
return {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
};
|
||||
prepareHeaders(idempotencyKey) {
|
||||
return Object.assign({ 'Content-Type': 'application/json', 'Authorization': `Bearer ${this.apiKey}` }, (idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {}));
|
||||
}
|
||||
/**
|
||||
* Sends a POST request to the specified URL.
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "0.0.21",
|
||||
"version": "0.0.22",
|
||||
"description": "JavaScript SDK for Firecrawl API",
|
||||
"main": "build/index.js",
|
||||
"types": "types/index.d.ts",
|
||||
|
@ -173,15 +173,17 @@ export default class FirecrawlApp {
|
||||
* @param {Params | null} params - Additional parameters for the crawl request.
|
||||
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
|
||||
* @param {number} timeout - Timeout in seconds for job status checks.
|
||||
* @param {string} idempotencyKey - Optional idempotency key for the request.
|
||||
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
|
||||
*/
|
||||
async crawlUrl(
|
||||
url: string,
|
||||
params: Params | null = null,
|
||||
waitUntilDone: boolean = true,
|
||||
timeout: number = 2
|
||||
timeout: number = 2,
|
||||
idempotencyKey?: string
|
||||
): Promise<CrawlResponse | any> {
|
||||
const headers = this.prepareHeaders();
|
||||
const headers = this.prepareHeaders(idempotencyKey);
|
||||
let jsonData: Params = { url };
|
||||
if (params) {
|
||||
jsonData = { ...jsonData, ...params };
|
||||
@ -240,11 +242,12 @@ export default class FirecrawlApp {
|
||||
* Prepares the headers for an API request.
|
||||
* @returns {AxiosRequestHeaders} The prepared headers.
|
||||
*/
|
||||
prepareHeaders(): AxiosRequestHeaders {
|
||||
prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders {
|
||||
return {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
} as AxiosRequestHeaders;
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`,
|
||||
...(idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {}),
|
||||
} as AxiosRequestHeaders & { 'x-idempotency-key'?: string };
|
||||
}
|
||||
|
||||
/**
|
||||
|
5
apps/js-sdk/firecrawl/types/index.d.ts
vendored
5
apps/js-sdk/firecrawl/types/index.d.ts
vendored
@ -82,9 +82,10 @@ export default class FirecrawlApp {
|
||||
* @param {Params | null} params - Additional parameters for the crawl request.
|
||||
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
|
||||
* @param {number} timeout - Timeout in seconds for job status checks.
|
||||
* @param {string} idempotencyKey - Optional idempotency key for the request.
|
||||
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
|
||||
*/
|
||||
crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean, timeout?: number): Promise<CrawlResponse | any>;
|
||||
crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean, timeout?: number, idempotencyKey?: string): Promise<CrawlResponse | any>;
|
||||
/**
|
||||
* Checks the status of a crawl job using the Firecrawl API.
|
||||
* @param {string} jobId - The job ID of the crawl operation.
|
||||
@ -95,7 +96,7 @@ export default class FirecrawlApp {
|
||||
* Prepares the headers for an API request.
|
||||
* @returns {AxiosRequestHeaders} The prepared headers.
|
||||
*/
|
||||
prepareHeaders(): AxiosRequestHeaders;
|
||||
prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders;
|
||||
/**
|
||||
* Sends a POST request to the specified URL.
|
||||
* @param {string} url - The URL to send the request to.
|
||||
|
13
apps/js-sdk/package-lock.json
generated
13
apps/js-sdk/package-lock.json
generated
@ -11,6 +11,7 @@
|
||||
"dependencies": {
|
||||
"@mendable/firecrawl-js": "^0.0.19",
|
||||
"axios": "^1.6.8",
|
||||
"uuid": "^9.0.1",
|
||||
"ts-node": "^10.9.2",
|
||||
"typescript": "^5.4.5",
|
||||
"zod": "^3.23.8"
|
||||
@ -771,6 +772,18 @@
|
||||
"peerDependencies": {
|
||||
"zod": "^3.23.3"
|
||||
}
|
||||
},
|
||||
"node_modules/uuid": {
|
||||
"version": "9.0.1",
|
||||
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
|
||||
"integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
|
||||
"funding": [
|
||||
"https://github.com/sponsors/broofa",
|
||||
"https://github.com/sponsors/ctavan"
|
||||
],
|
||||
"bin": {
|
||||
"uuid": "dist/bin/uuid"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -11,8 +11,9 @@
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"@mendable/firecrawl-js": "^0.0.19",
|
||||
"axios": "^1.6.8",
|
||||
"uuid": "^9.0.1",
|
||||
"@mendable/firecrawl-js": "^0.0.19",
|
||||
"ts-node": "^10.9.2",
|
||||
"typescript": "^5.4.5",
|
||||
"zod": "^3.23.8"
|
||||
|
@ -1,4 +1,5 @@
|
||||
from firecrawl import FirecrawlApp
|
||||
import uuid
|
||||
from firecrawl.firecrawl import FirecrawlApp
|
||||
|
||||
app = FirecrawlApp(api_key="fc-YOUR_API_KEY")
|
||||
|
||||
@ -7,7 +8,8 @@ scrape_result = app.scrape_url('firecrawl.dev')
|
||||
print(scrape_result['markdown'])
|
||||
|
||||
# Crawl a website:
|
||||
crawl_result = app.crawl_url('mendable.ai', {'crawlerOptions': {'excludes': ['blog/*']}})
|
||||
idempotency_key = str(uuid.uuid4()) # optional idempotency key
|
||||
crawl_result = app.crawl_url('mendable.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, idempotency_key)
|
||||
print(crawl_result)
|
||||
|
||||
# LLM Extraction:
|
||||
|
@ -127,7 +127,7 @@ class FirecrawlApp:
|
||||
else:
|
||||
raise Exception(f'Failed to search. Status code: {response.status_code}')
|
||||
|
||||
def crawl_url(self, url, params=None, wait_until_done=True, timeout=2):
|
||||
def crawl_url(self, url, params=None, wait_until_done=True, timeout=2, idempotency_key=None):
|
||||
"""
|
||||
Initiate a crawl job for the specified URL using the Firecrawl API.
|
||||
|
||||
@ -136,6 +136,7 @@ class FirecrawlApp:
|
||||
params (Optional[Dict[str, Any]]): Additional parameters for the crawl request.
|
||||
wait_until_done (bool): Whether to wait until the crawl job is completed.
|
||||
timeout (int): Timeout between status checks when waiting for job completion.
|
||||
idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
|
||||
|
||||
Returns:
|
||||
Any: The crawl job ID or the crawl results if waiting until completion.
|
||||
@ -143,7 +144,7 @@ class FirecrawlApp:
|
||||
Raises:
|
||||
Exception: If the crawl job initiation or monitoring fails.
|
||||
"""
|
||||
headers = self._prepare_headers()
|
||||
headers = self._prepare_headers(idempotency_key)
|
||||
json_data = {'url': url}
|
||||
if params:
|
||||
json_data.update(params)
|
||||
@ -177,16 +178,26 @@ class FirecrawlApp:
|
||||
else:
|
||||
self._handle_error(response, 'check crawl status')
|
||||
|
||||
def _prepare_headers(self):
|
||||
def _prepare_headers(self, idempotency_key=None):
|
||||
"""
|
||||
Prepare the headers for API requests.
|
||||
|
||||
Args:
|
||||
idempotency_key (Optional[str]): A unique key to ensure idempotency of requests.
|
||||
|
||||
Returns:
|
||||
Dict[str, str]: The headers including content type and authorization.
|
||||
Dict[str, str]: The headers including content type, authorization, and optionally idempotency key.
|
||||
"""
|
||||
if idempotency_key:
|
||||
return {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {self.api_key}',
|
||||
'x-idempotency-key': idempotency_key
|
||||
}
|
||||
|
||||
return {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {self.api_key}'
|
||||
'Authorization': f'Bearer {self.api_key}',
|
||||
}
|
||||
|
||||
def _post_request(self, url, data, headers, retries=3, backoff_factor=0.5):
|
||||
|
Loading…
x
Reference in New Issue
Block a user