Merge branch 'main' into feat/improving-reative-paths
This commit is contained in:
commit
60245343c9
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,7 +1,8 @@
|
|||||||
|
.DS_Store
|
||||||
/node_modules/
|
/node_modules/
|
||||||
/dist/
|
/dist/
|
||||||
.env
|
.env
|
||||||
*.csv
|
*.csv
|
||||||
dump.rdb
|
dump.rdb
|
||||||
/mongo-data
|
/mongo-data
|
||||||
apps/js-sdk/node_modules/
|
apps/js-sdk/node_modules/
|
||||||
|
@ -11,9 +11,9 @@ Crawl and convert any website into LLM-ready markdown. Build by [Mendable.ai](ht
|
|||||||
|
|
||||||
## How to use it?
|
## How to use it?
|
||||||
|
|
||||||
We provide an easy to use API with our hosted version. You can find the playground and documentation [here](https://firecrawl.com/playground). You can also self host the backend if you'd like.
|
We provide an easy to use API with our hosted version. You can find the playground and documentation [here](https://firecrawl.dev/playground). You can also self host the backend if you'd like.
|
||||||
|
|
||||||
- [x] [API](https://firecrawl.com/playground)
|
- [x] [API](https://firecrawl.dev/playground)
|
||||||
- [x] [Python SDK](https://github.com/mendableai/firecrawl/tree/main/apps/python-sdk)
|
- [x] [Python SDK](https://github.com/mendableai/firecrawl/tree/main/apps/python-sdk)
|
||||||
- [x] [Langchain Integration 🦜🔗](https://python.langchain.com/docs/integrations/document_loaders/firecrawl/)
|
- [x] [Langchain Integration 🦜🔗](https://python.langchain.com/docs/integrations/document_loaders/firecrawl/)
|
||||||
- [x] [Llama Index Integration 🦙](https://docs.llamaindex.ai/en/stable/)
|
- [x] [Llama Index Integration 🦙](https://docs.llamaindex.ai/en/stable/)
|
||||||
@ -25,7 +25,7 @@ Self-host. To self-host refer to guide [here](https://github.com/mendableai/fire
|
|||||||
|
|
||||||
### API Key
|
### API Key
|
||||||
|
|
||||||
To use the API, you need to sign up on [Firecrawl](https://firecrawl.com) and get an API key.
|
To use the API, you need to sign up on [Firecrawl](https://firecrawl.dev) and get an API key.
|
||||||
|
|
||||||
### Crawling
|
### Crawling
|
||||||
|
|
||||||
|
BIN
apps/.DS_Store
vendored
BIN
apps/.DS_Store
vendored
Binary file not shown.
BIN
apps/api/src/.DS_Store
vendored
BIN
apps/api/src/.DS_Store
vendored
Binary file not shown.
@ -45,14 +45,14 @@ app.get("/test", async (req, res) => {
|
|||||||
res.send("Hello, world!");
|
res.send("Hello, world!");
|
||||||
});
|
});
|
||||||
|
|
||||||
async function authenticateUser(req, res, mode?: string): Promise<string> {
|
async function authenticateUser(req, res, mode?: string): Promise<{ success: boolean, team_id?: string, error?: string, status?: number }> {
|
||||||
const authHeader = req.headers.authorization;
|
const authHeader = req.headers.authorization;
|
||||||
if (!authHeader) {
|
if (!authHeader) {
|
||||||
return res.status(401).json({ error: "Unauthorized" });
|
return { success: false, error: "Unauthorized", status: 401 };
|
||||||
}
|
}
|
||||||
const token = authHeader.split(" ")[1]; // Extract the token from "Bearer <token>"
|
const token = authHeader.split(" ")[1]; // Extract the token from "Bearer <token>"
|
||||||
if (!token) {
|
if (!token) {
|
||||||
return res.status(401).json({ error: "Unauthorized: Token missing" });
|
return { success: false, error: "Unauthorized: Token missing", status: 401 };
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@ -64,13 +64,11 @@ async function authenticateUser(req, res, mode?: string): Promise<string> {
|
|||||||
).consume(iptoken);
|
).consume(iptoken);
|
||||||
} catch (rateLimiterRes) {
|
} catch (rateLimiterRes) {
|
||||||
console.error(rateLimiterRes);
|
console.error(rateLimiterRes);
|
||||||
return res.status(429).json({
|
return { success: false, error: "Rate limit exceeded. Too many requests, try again in 1 minute.", status: 429 };
|
||||||
error: "Rate limit exceeded. Too many requests, try again in 1 minute.",
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (token === "this_is_just_a_preview_token" && mode === "scrape") {
|
if (token === "this_is_just_a_preview_token" && mode === "scrape") {
|
||||||
return "preview";
|
return { success: true, team_id: "preview" };
|
||||||
}
|
}
|
||||||
|
|
||||||
const normalizedApi = parseApi(token);
|
const normalizedApi = parseApi(token);
|
||||||
@ -80,16 +78,19 @@ async function authenticateUser(req, res, mode?: string): Promise<string> {
|
|||||||
.select("*")
|
.select("*")
|
||||||
.eq("key", normalizedApi);
|
.eq("key", normalizedApi);
|
||||||
if (error || !data || data.length === 0) {
|
if (error || !data || data.length === 0) {
|
||||||
return res.status(401).json({ error: "Unauthorized: Invalid token" });
|
return { success: false, error: "Unauthorized: Invalid token", status: 401 };
|
||||||
}
|
}
|
||||||
|
|
||||||
return data[0].team_id;
|
return { success: true, team_id: data[0].team_id };
|
||||||
}
|
}
|
||||||
|
|
||||||
app.post("/v0/scrape", async (req, res) => {
|
app.post("/v0/scrape", async (req, res) => {
|
||||||
try {
|
try {
|
||||||
// make sure to authenticate user first, Bearer <token>
|
// make sure to authenticate user first, Bearer <token>
|
||||||
const team_id = await authenticateUser(req, res, "scrape");
|
const { success, team_id, error, status } = await authenticateUser(req, res, "crawl");
|
||||||
|
if (!success) {
|
||||||
|
return res.status(status).json({ error });
|
||||||
|
}
|
||||||
const crawlerOptions = req.body.crawlerOptions ?? {};
|
const crawlerOptions = req.body.crawlerOptions ?? {};
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@ -159,7 +160,10 @@ app.post("/v0/scrape", async (req, res) => {
|
|||||||
|
|
||||||
app.post("/v0/crawl", async (req, res) => {
|
app.post("/v0/crawl", async (req, res) => {
|
||||||
try {
|
try {
|
||||||
const team_id = await authenticateUser(req, res);
|
const { success, team_id, error, status } = await authenticateUser(req, res, "scrape");
|
||||||
|
if (!success) {
|
||||||
|
return res.status(status).json({ error });
|
||||||
|
}
|
||||||
|
|
||||||
const { success: creditsCheckSuccess, message: creditsCheckMessage } =
|
const { success: creditsCheckSuccess, message: creditsCheckMessage } =
|
||||||
await checkTeamCredits(team_id, 1);
|
await checkTeamCredits(team_id, 1);
|
||||||
@ -251,22 +255,9 @@ app.post("/v0/crawlWebsitePreview", async (req, res) => {
|
|||||||
|
|
||||||
app.get("/v0/crawl/status/:jobId", async (req, res) => {
|
app.get("/v0/crawl/status/:jobId", async (req, res) => {
|
||||||
try {
|
try {
|
||||||
const authHeader = req.headers.authorization;
|
const { success, team_id, error, status } = await authenticateUser(req, res, "scrape");
|
||||||
if (!authHeader) {
|
if (!success) {
|
||||||
return res.status(401).json({ error: "Unauthorized" });
|
return res.status(status).json({ error });
|
||||||
}
|
|
||||||
const token = authHeader.split(" ")[1]; // Extract the token from "Bearer <token>"
|
|
||||||
if (!token) {
|
|
||||||
return res.status(401).json({ error: "Unauthorized: Token missing" });
|
|
||||||
}
|
|
||||||
|
|
||||||
// make sure api key is valid, based on the api_keys table in supabase
|
|
||||||
const { data, error } = await supabase_service
|
|
||||||
.from("api_keys")
|
|
||||||
.select("*")
|
|
||||||
.eq("key", token);
|
|
||||||
if (error || !data || data.length === 0) {
|
|
||||||
return res.status(401).json({ error: "Unauthorized: Invalid token" });
|
|
||||||
}
|
}
|
||||||
const job = await getWebScraperQueue().getJob(req.params.jobId);
|
const job = await getWebScraperQueue().getJob(req.params.jobId);
|
||||||
if (!job) {
|
if (!job) {
|
||||||
|
BIN
apps/playwright-service/.DS_Store
vendored
BIN
apps/playwright-service/.DS_Store
vendored
Binary file not shown.
Loading…
Reference in New Issue
Block a user