crawlee-proxyport
crawlee-proxyport
Node.js TypeScript 包提供了为 Crawlee 抓取框架使用旋转代理的简单方法。npmjs.com/package/crawlee-proxyport - npmjs.com 上的页面
github.com/proxyport/crawlee-proxyport - github 上的源代码
github.com/proxyport/crawlee-proxyport - github 上的源代码
先决条件
安装
$ npm i crawlee-proxyport
入门
import { CheerioCrawler, ProxyConfiguration } from 'crawlee';
import { ProxyProvider } from 'crawlee-proxyport';
const startUrls = ['https://example.com'];
const crawler: CheerioCrawler = new CheerioCrawler({
useSessionPool: true,
persistCookiesPerSession: true,
proxyConfiguration: new ProxyConfiguration({
newUrlFunction: (sId) => proxyProvider.newUrlFunction(sId)
}),
maxRequestRetries: 20,
sessionPoolOptions:{
sessionOptions: {
// 你可能想玩这个数字,我们建议使用 10 到 50 之间的值
maxUsageCount: 20,
},
},
async requestHandler({ request, $, log }) {
const title = $('title').text();
log.info(`Title of ${request.loadedUrl} is '${title}'`);
},
});
const proxyProvider = new ProxyProvider(<API_KEY>, crawler);
await crawler.run(startUrls);