I want to make GET request to scrape some data thru a proxy server that is randomly generated using the gimmeproxy.com free API.
I am able to get the proxy ip/port and am using
'https-proxy-agent' to setup the agent with the proxy data.
Whenever I try to call any website I always get
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
<html><head>
<title>405 Method Not Allowed</title>
</head><body>
<h1>Method Not Allowed</h1>
<p>The requested method CONNECT is not allowed for the URL
/index.html.en.backup.</p>
</body></html>
Here is my node script:
const request = require('request'), HttpsProxyAgent = require('https-proxy-agent');
generateRandomProxy(function(proxy){
var agent = new HttpsProxyAgent({
proxyHost: proxy.proxyHost,
proxyPort: proxy.proxyPort
});
request({
uri: "http://example.com",
method: "GET",
agent: agent,
timeout: 5000,
}, function(error, response, body) {
console.log(body);
});
})
function generateRandomProxy(cb){
request.get(' https://gimmeproxy.com/api/getProxy?get=true&cookies=true&country=US',{json:true},function(err,res){
if(!err){cb({
proxyHost: res.body.ip,
proxyPort: res.body.port
})}
else{console.log('problem obtaining proxy')}
})
}
So my question: How can I route my request thru the proxy and then get a returned body that is valid?
As you see now I keep getting the 405 Method Not Allowed
Thank you for any assistance.
Edit: Just found some GimmeProxy wrapper for Node.js: gimmeproxy-request.
It claims to automatically re-route requests through another proxy when one fails.
With this module code would look like this:
const setup = require('gimmeproxy-request').setup;
const request = require('gimmeproxy-request').request;
setup({
api_key: 'your api key',
query: 'get=true&cookies=true&country=US&supportsHttps=true&maxCheckPeriod=1800&minSpeed=10', // additional gimmeproxy query parameters
retries: 5, // max retries before fail
test: (body, response) => body.indexOf('captcha') === -1 && response.statusCode === 200 // test function
});
request('https://example.com', {
timeout: 10000 // additional request parameters, see https://github.com/request/request
},
function(err, res, body) {
console.log('err', err)
console.log('res', res)
console.log('body', body)
process.exit()
});
I guess the issue is that you sometimes get not an https proxy from Gimmeproxy, while 'https-proxy-agent' expects https proxy only.
To fix it, use the proxy-agent package of the same author and pass curl field of GimmeProxy response. It will select correct proxy agent implementation.
The following code works for me:
const request = require('request'), ProxyAgent = require('proxy-agent');
generateRandomProxy(function(proxy){
console.log(proxy);
var agent = new ProxyAgent(proxy.curl);
request({
uri: "https://example.com",
method: "GET",
agent: agent,
timeout: 5000,
}, function(error, response, body) {
console.log(error);
console.log(body);
});
})
function generateRandomProxy(cb){
request.get('https://gimmeproxy.com/api/getProxy?get=true&cookies=true&country=US&supportsHttps=true&maxCheckPeriod=1800&minSpeed=10',{json:true},function(err,res){
if(!err){cb(res.body)}
else{console.log('problem obtaining proxy')}
})
}
Note: If you want to call https websites, you should query for proxies with https support using supportsHttps=true parameter. Also it makes sense to query for fresh proxies with maxCheckPeriod=1800 parameter. Setting minSpeed=10 also helps:
https://gimmeproxy.com/api/getProxy?get=true&cookies=true&country=US&supportsHttps=true&maxCheckPeriod=1800&minSpeed=10
Related
I am trying to make request to a third party api from express, i want express to serve as a proxy, and send the result to the front end. the problem is that the route on the frontend has qyeries and parameters, this is my code
app.get("/api/posts/:id", function (req, res) {
request(
"https://api.xxxx.com/yyyy/1897970/user",
{
headers: {
Authorization: API_KEY,
"Content-Type": "application/json",
},
},
function (error, response, body) {
const per_page = req.query.per_page;
const page = req.query.page;
const query = req.query.query;
const id=req.params.id;
if (!response.body) {
console.log(error);
} else {
res.send({
body,
id,
"per-page": per_page,
page: page,
query: query
});
}
}
);
});
On the front end, i have to make request to a route like
axios
.get(
`/api/posts/{id}/query?per_page=10&page=${title}`)
.then((res) => {
}).catch((err)=>{console.log (err) })
The problem is that it returns an error 404, it cannot get the data, please what am i doing wrong?
The URL in the get is not a full URL. change it to the full URL of your backend, if you are in localhost so localhost.
Add proxy to the package.json in react client to fetch the data from the backend
"proxy": "http://localhost:3000",
the proxy should be the port on which the backend is running.
I currently use request to make http requests in node.js. I had at some point encountered an issue where I was getting errors that indicated UNABLE_TO_GET_ISSUER_CERT_LOCALLY. To get around that it set rejectUnauthorized. My working code with request looks like this:
var url = 'someurl';
var options = {
url: url,
port: 443,
// proxy: process.env.HTTPS_PROXY, -- no need to do this as request honors env vars
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko',
'Accept-Language': 'en-us',
'Content-Language': 'en-us'
},
timeout: 0,
encoding: null,
rejectUnauthorized: false // added this to prevent the UNABLE_TO_GET_ISSUER_CERT_LOCALLY error
};
request(options, function (err, resp, body) {
if (err) reject(err);
else resolve(body.toString());
});
I thought I would try switching to the fetch api using async/await and am now trying to use node-fetch to do the same thing. However, when I do the same thing I am back to the UNABLE_TO_GET_ISSUER_CERT_LOCALLY errors. I read that I needed to use a proxy agent and tried using the proxy-agent module but I am still not having any luck.
Based off of the post https://github.com/TooTallNate/node-https-proxy-agent/issues/11 I thought the following would work:
var options = {
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko',
'Accept-Language': 'en-us',
'Content-Language': 'en-us'
},
timeout: 0,
encoding: null
};
var proxyOptions = nodeurl.parse(process.env.HTTPS_PROXY);
proxyOptions.rejectUnauthorized = false;
options.agent = new ProxyAgent(proxyOptions);
const resp = await fetch('someurl', options);
return await resp.text();
but I still get the same error. So far the only way I've been able to get around this using node-fetch is to set NODE_TLS_REJECT_UNAUTHORIZED=0 in my environment which I don't really want to do. Can someone help show me how to get rejectUnauthorized to work with node-fetch (presumably using an agent but I don't honestly care how as long as it's specified as part of the request).
This is how I got this to work using rejectUnauthorized and the Fetch API in a Node.js app.
Keep in mind that using rejectUnauthorized is dangerous as it opens you up to potential security risks, as it circumvents a problematic certificate.
const fetch = require("node-fetch");
const https = require('https');
const httpsAgent = new https.Agent({
rejectUnauthorized: false,
});
async function getData() {
const resp = await fetch(
"https://myexampleapi.com/endpoint",
{
agent: httpsAgent,
},
)
const data = await resp.json()
return data
}
Use proxy
You should know that node-https-proxy-agent latest version have a problem and doesn't work with Fetch! You can use older version 3.x and down! And it will work! Otherwise Better you can use the node-tunnel module https://www.npmjs.com/package/tunnel! You can too use the wrapping module proxy-http-agent that is based on node-tunnel https://www.npmjs.com/package/proxy-http-agent! That provide automatic detection of protocol for the proxy! One method for all! And more options and affinity! And both of them support both http and https !
You can see the usage and see a good example of proxy building and setup in this module and repo (check the tests):
https://www.npmjs.com/package/net-proxy
https://github.com/Glitnirian/node-net-proxy#readme
ex:
import { ProxyServer } from 'net-proxy';
import { getProxyHttpAgent } from 'proxy-http-agent';
// ...
// __________ setting the proxy
const proxy = new ProxyServer({
port: proxyPort
});
proxy.server.on('data', (data: any) => { // accessing the server instance
console.log(data);
});
await proxy.awaitStartedListening(); // await server to start
// After server started
// ______________ making the call through the proxy to a server through http:
let proxyUrl = `http://localhost:${proxyPort}`; // Protocol from the proxy is automatically detected
let agent = getProxyHttpAgent({
proxy: proxyUrl,
endServerProtocol: 'http:' // the end server protocol (http://localhost:${localApiServerPort} for example)
});
const response = await fetch(`http://localhost:${localApiServerPort}`, {
method: 'GET',
agent
});
// ___________________ making a call through the proxy to a server through https:
agent = getProxyHttpAgent({
proxy: proxyUrl, // proxy as url string! We can use an object (as tunnel module require too)
rejectUnauthorized: false // <==== here it go
});
const response2 = await fetch(`https://localhost:${localApiHttpsServerPort}`, {
method: 'GET',
agent
});
You can see more examples and details in the doc here:
https://www.npmjs.com/package/proxy-http-agent
And you can too use directly node-tunnel! But the package is just a simple wrapper! That make it more simpler!
Add rejectUnauthorized
For the one that doesn't know well!
As per this thread
https://github.com/node-fetch/node-fetch/issues/15
We use the https.Agent to pass the rejectUnauthorized parameter!
const agent = new https.Agent({
key: fs.readFileSync(`${CERT_PATH}.key`),
cert: fs.readFileSync(`${CERT_PATH}.crt`),
rejectUnauthorized: false
})
A complete example
import https from "https";
const agent = new https.Agent({
rejectUnauthorized: false
});
fetch(myUrl, { agent });
For fetch you can too use an environment variable as follow
process.env.NODE_TLS_REJECT_UNAUTHORIZED = "0";
This way it gonna be set globaly and not per each call! Which may be more appropriate if you are using a constant proxy! For all calls! As when sitting behind the company proxy!
why
By default node fetch! And most of the http requests clients! All use the security and insure a valid ssl Certificate when using https!
To disable this behavior we need to disable that check somehow!
Depending on the libs it may be different!
For fetch that's how it's done!
With http.request! (underlying)
const https = require('https');
const options = {
hostname: 'encrypted.google.com',
port: 443,
path: '/',
method: 'GET',
rejectUnauthorized: false /// <<<== here
};
const req = https.request(options, (res) => {
console.log('statusCode:', res.statusCode);
console.log('headers:', res.headers);
res.on('data', (d) => {
process.stdout.write(d);
});
});
req.on('error', (e) => {
console.error(e);
});
req.end();
check this:
https://nodejs.org/api/https.html#https_https_request_url_options_callback
Also it's part of tls.connect Options
Which you can check here
https://nodejs.org/api/tls.html#tls_tls_connect_options_callback
I'm trying attach an image using the bot emulator tool and sending this image off to the microsofts customvision api, the issue I'm having is that I get
{ Code: 'BadRequestImageFormat', Message: '' }
back from custom the custom vision api call.
I'm using the the request module from npm to handle the calls
// Receive messages from the user and respond by echoing each message back (prefixed with 'You said:')
var bot = new builder.UniversalBot(connector, function (session) {
session.send("Hello"); //session.message.text
// If there is an attachment
if (session.message.attachments.length > 0){
console.log(session.message.attachments[0])
request.post({
url: 'xxx',
encoding: null,
json: true,
headers: {
'Content-Type': 'application/octet-stream',
'Prediction-Key': 'xxx'
},
body: session.message.attachments[0]
}, function(error, response, body){
console.log(body);
});
}
});
I believe that I may be sending the wrong format through to custom vision however I have been unable to figure it out as of yet.
I replicated your issue and it looks like the problem is your 'Content-Type'. You're attempting to pass JSON in your request, but setting the content-type as octet-stream. See my modified code below:
var bot = new builder.UniversalBot(connector, function (session) {
session.send("Hello"); //session.message.text
// If there is an attachment
if (session.message.attachments.length > 0){
console.log(session.message.attachments[0])
request.post({
url: 'https://northeurope.api.cognitive.microsoft.com/vision/v1.0/analyze?visualFeatures',
encoding: null,
json: true,
headers: {
'Content-Type': 'application/json',
'Ocp-Apim-Subscription-Key': 'Your API Key...'
},
body: session.message.attachments[0]
},
function (err, response, body) {
if (err) return console.log(err)
console.log(body);
});
}
});
When I run this, I get the error InvalidImageUrl which is to be expected as it's looking for a content on localhost. You could get round this by exposing your localhost using Ngrok.
From my API(nodejs), I'm accessing a third-party API (using http) to download files.
The service returns a Base64 string, chopped into smaller pieces, to be able to handle larger files.
Is it possible to do multiple http-requests (loop ?) to the third-party service, send each piece in response, to the browser until there is no longer any response from the third-party service?
The reason i want to do this, is because I don't want to consume to much memory on the node server.
I will put the pieces back together in the browser.
Any suggestions on how to do this?
See my current code below.
var request = require('request');
router.post('/getfiledata', function(req, res) {
var fileid = req.body.fileid;
var token = req.headers.authorization;
getFileData(req, res, dbconfig, fileid, token, function(err, chunkOfFile) {
if (err) {
res.status(500).send({
status: 500,
message: err
});
return;
}
res.send(chunkOfFile);
});
});
function getFileData(req, res, dbconfig, fileid, token, next) {
var url ="http://*ip*/service/rest/getfiledata";
var reqbody = {
fileId: fileid
};
var options = {
url: url,
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': token
},
json: true,
body: reqbody
};
/*SOME LOOP HERE TO GET EACH CHUNK AND SEND TO BROWSER*/
request(options, function(err, resp, body) {
if (err) {
console.log(err);
next(err, undefined);
return;
} else {
next(undefined, body)
};
});
};
I think you need Socket.io to push chunks to the browser.
Server :
socket.send("chunk", chunkOfFile)
Client :
let fullString = ""
socket.on("chunk", chunkOfFile => fullString += chunkOfFile )
Something like that
The request library you are using allows for streaming of data from one source to another. Check out the documentation on github.
Here is an example from that page:
request
.get(source)
.on('response', function(response) {
console.log(response.statusCode) // 200
console.log(response.headers['content-type']) // 'image/png'
})
.pipe(request.put(destination))
You may choose to use the http module from Nodejs, as it implements the EventEmitter class too.
I ended up doing a recursive loop from the client. Sending http-requests to my API(node) until the response no longer returns any base64 data chunks.
Thank you guys!
I want to get all public github repositories of given user from github.
I tried to make it with GET request I read from here. When i try it with curl or in the browser everything is fine, but when I run this code is gives me http 403 status code
var request = require('request');
request.get("https://api.github.com/users/:user")
.on('response', function (response) {
console.log(response.statusCode);
console.log(JSON.stringify(response));
});
I tried using this github api library, but couldn't work around the authetication
var GithubApi = require("github");
var github = new GithubApi({});
github.authenticate({
type: "oauth",
token: "GIT_TOKEN"
});
var getUsersRepos = function (user) {
github.repos.getAll({
username: user
}, function (err, res) {
res.forEach(function (element) {
console.log(`${element.name} - language: ${element.language} - issues: ${element.open_issues} - url: ${element.url}`);
}, this);
});
}
module.exports = {
getRepos: getUsersRepos
};
But when I enter my token I can get only my user information.
Any ideas what I am doing wrong or some better idea will be appreciated
The Github API requires requests to have a valid User-Agent header:
If you provide an invalid User-Agent header, you will receive a 403
Forbidden response.
Github requests that you use your GitHub username, or the name of your application, for the User-Agent header value:
var request = require('request');
options = {
url: "https://api.github.com/users/:user",
headers: {
"User-Agent": "tabula" // Your Github ID or application name
}
}
request.get(options)
.on('response', function (response) {
console.log(response.statusCode);
console.log(JSON.stringify(response));
});