0

Merge branch 'main' into feat/improving-reative-paths

This commit is contained in:
Nicolas 2024-04-17 10:57:49 -07:00
commit 60245343c9
7 changed files with 23 additions and 31 deletions

BIN
.DS_Store vendored

Binary file not shown.

3
.gitignore vendored
View File

@ -1,7 +1,8 @@
.DS_Store
/node_modules/ /node_modules/
/dist/ /dist/
.env .env
*.csv *.csv
dump.rdb dump.rdb
/mongo-data /mongo-data
apps/js-sdk/node_modules/ apps/js-sdk/node_modules/

View File

@ -11,9 +11,9 @@ Crawl and convert any website into LLM-ready markdown. Build by [Mendable.ai](ht
## How to use it? ## How to use it?
We provide an easy to use API with our hosted version. You can find the playground and documentation [here](https://firecrawl.com/playground). You can also self host the backend if you'd like. We provide an easy to use API with our hosted version. You can find the playground and documentation [here](https://firecrawl.dev/playground). You can also self host the backend if you'd like.
- [x] [API](https://firecrawl.com/playground) - [x] [API](https://firecrawl.dev/playground)
- [x] [Python SDK](https://github.com/mendableai/firecrawl/tree/main/apps/python-sdk) - [x] [Python SDK](https://github.com/mendableai/firecrawl/tree/main/apps/python-sdk)
- [x] [Langchain Integration 🦜🔗](https://python.langchain.com/docs/integrations/document_loaders/firecrawl/) - [x] [Langchain Integration 🦜🔗](https://python.langchain.com/docs/integrations/document_loaders/firecrawl/)
- [x] [Llama Index Integration 🦙](https://docs.llamaindex.ai/en/stable/) - [x] [Llama Index Integration 🦙](https://docs.llamaindex.ai/en/stable/)
@ -25,7 +25,7 @@ Self-host. To self-host refer to guide [here](https://github.com/mendableai/fire
### API Key ### API Key
To use the API, you need to sign up on [Firecrawl](https://firecrawl.com) and get an API key. To use the API, you need to sign up on [Firecrawl](https://firecrawl.dev) and get an API key.
### Crawling ### Crawling

BIN
apps/.DS_Store vendored

Binary file not shown.

BIN
apps/api/src/.DS_Store vendored

Binary file not shown.

View File

@ -45,14 +45,14 @@ app.get("/test", async (req, res) => {
res.send("Hello, world!"); res.send("Hello, world!");
}); });
async function authenticateUser(req, res, mode?: string): Promise<string> { async function authenticateUser(req, res, mode?: string): Promise<{ success: boolean, team_id?: string, error?: string, status?: number }> {
const authHeader = req.headers.authorization; const authHeader = req.headers.authorization;
if (!authHeader) { if (!authHeader) {
return res.status(401).json({ error: "Unauthorized" }); return { success: false, error: "Unauthorized", status: 401 };
} }
const token = authHeader.split(" ")[1]; // Extract the token from "Bearer <token>" const token = authHeader.split(" ")[1]; // Extract the token from "Bearer <token>"
if (!token) { if (!token) {
return res.status(401).json({ error: "Unauthorized: Token missing" }); return { success: false, error: "Unauthorized: Token missing", status: 401 };
} }
try { try {
@ -64,13 +64,11 @@ async function authenticateUser(req, res, mode?: string): Promise<string> {
).consume(iptoken); ).consume(iptoken);
} catch (rateLimiterRes) { } catch (rateLimiterRes) {
console.error(rateLimiterRes); console.error(rateLimiterRes);
return res.status(429).json({ return { success: false, error: "Rate limit exceeded. Too many requests, try again in 1 minute.", status: 429 };
error: "Rate limit exceeded. Too many requests, try again in 1 minute.",
});
} }
if (token === "this_is_just_a_preview_token" && mode === "scrape") { if (token === "this_is_just_a_preview_token" && mode === "scrape") {
return "preview"; return { success: true, team_id: "preview" };
} }
const normalizedApi = parseApi(token); const normalizedApi = parseApi(token);
@ -80,16 +78,19 @@ async function authenticateUser(req, res, mode?: string): Promise<string> {
.select("*") .select("*")
.eq("key", normalizedApi); .eq("key", normalizedApi);
if (error || !data || data.length === 0) { if (error || !data || data.length === 0) {
return res.status(401).json({ error: "Unauthorized: Invalid token" }); return { success: false, error: "Unauthorized: Invalid token", status: 401 };
} }
return data[0].team_id; return { success: true, team_id: data[0].team_id };
} }
app.post("/v0/scrape", async (req, res) => { app.post("/v0/scrape", async (req, res) => {
try { try {
// make sure to authenticate user first, Bearer <token> // make sure to authenticate user first, Bearer <token>
const team_id = await authenticateUser(req, res, "scrape"); const { success, team_id, error, status } = await authenticateUser(req, res, "crawl");
if (!success) {
return res.status(status).json({ error });
}
const crawlerOptions = req.body.crawlerOptions ?? {}; const crawlerOptions = req.body.crawlerOptions ?? {};
try { try {
@ -159,7 +160,10 @@ app.post("/v0/scrape", async (req, res) => {
app.post("/v0/crawl", async (req, res) => { app.post("/v0/crawl", async (req, res) => {
try { try {
const team_id = await authenticateUser(req, res); const { success, team_id, error, status } = await authenticateUser(req, res, "scrape");
if (!success) {
return res.status(status).json({ error });
}
const { success: creditsCheckSuccess, message: creditsCheckMessage } = const { success: creditsCheckSuccess, message: creditsCheckMessage } =
await checkTeamCredits(team_id, 1); await checkTeamCredits(team_id, 1);
@ -251,22 +255,9 @@ app.post("/v0/crawlWebsitePreview", async (req, res) => {
app.get("/v0/crawl/status/:jobId", async (req, res) => { app.get("/v0/crawl/status/:jobId", async (req, res) => {
try { try {
const authHeader = req.headers.authorization; const { success, team_id, error, status } = await authenticateUser(req, res, "scrape");
if (!authHeader) { if (!success) {
return res.status(401).json({ error: "Unauthorized" }); return res.status(status).json({ error });
}
const token = authHeader.split(" ")[1]; // Extract the token from "Bearer <token>"
if (!token) {
return res.status(401).json({ error: "Unauthorized: Token missing" });
}
// make sure api key is valid, based on the api_keys table in supabase
const { data, error } = await supabase_service
.from("api_keys")
.select("*")
.eq("key", token);
if (error || !data || data.length === 0) {
return res.status(401).json({ error: "Unauthorized: Invalid token" });
} }
const job = await getWebScraperQueue().getJob(req.params.jobId); const job = await getWebScraperQueue().getJob(req.params.jobId);
if (!job) { if (!job) {

Binary file not shown.