GetElementById alternative - javascript

I'm trying to code a simple script with curl, but the problem is I need to gather special key that is generated on every new POST request (onLoad()). The problem can be easiyle be passed by creating a new DOM element and getting value using GetElementById function from DOM but, in this case, there is no "id" declared in specific tag I want to return value from. There is only a name.
Example:
<input name="trans_id" value="Lk+Vz957skV845b7x2DX7iyR1FI=" type="hidden">
Bellow there is a pseudo-code I did today (last paragraph is where I need help):
<?php
// Author : me
// Date : 10.11.2013.
?>
<?php
// Declaring variables :)
$data_string = '';
$url = 'http://www.website.com';
$uagent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6';
// Declaring variables for e-pay
$url2 = 'http:/website2.com';
...
$exChar = '|';
$exStr = '';
$exStr = Explode($exChar, $_POST['ccep']);
$data = array (
"email" => '',
...
"submitFromInputForm" => 'Next',
);
foreach($data as $key=>$value) { $data_string .= $key.'='.$value.'&'; }
rtrim($data_string, '&');
$ch = curl_init ();
curl_setopt ($ch, CURLOPT_COOKIEJAR, 'cookies.txt');
curl_setopt ($ch, CURLOPT_COOKIEFILE, 'cookies.txt');
curl_setopt ($ch, CURLOPT_URL, $url);
curl_setopt ($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt ($ch, CURLOPT_HEADER, 0);
curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($ch, CURLOPT_USERAGENT, $uagent);
curl_setopt ($ch, CURLOPT_POST, count($data));
curl_setopt ($ch, CURLOPT_POSTFIELDS, $data_string);
$result = curl_exec ($ch);
print($result);
// ====================================================================
// Need help with this function
$check = strpos($result, 'Confirmation');
if ($check == True) {
$doc = new DOMDocument();
#$doc->loadHTML($result);
$id = $doc->getElementsByName('trans_id');
echo 'Value:' . $id;
}
// ====================================================================
curl_close($ch);
?>
But to be honest, I wasn't able to get result from that function because, well, it doesn't exsist. Google search only help me with droping results to use getElementsByTagName which after reading documentation from PHP official, does not resolve my problem.
A note at the end: I don't want to include any Javascripting, only pure PHP.
Thank you in advance,
regards.

You can use DOMXPath in order to access specific properties by xpath.
$domx = new DOMXPath($doc);
$trans_id = $domx->evaluate("//input[contains(#name, 'trans_id')]");
Eventually loop through the object if necessary
foreach ($trans_id as $id) {
echo "Value:" . $id->nodeValue;
}
On second thought, you need the value attribute, so you'd need to use getAttribute() in order to retrieve it. I just tested with the following code and it works as expected:
<?php
$result = '<input name="trans_id" value="Lk+Vz957skV845b7x2DX7iyR1FI=" type="hidden">';
$doc = new DOMDocument();
#$doc->loadHTML($result);
$domx = new DOMXPath($doc);
$trans_id = $domx->query('//input[#name="trans_id"]');
foreach ($trans_id as $id) {
echo "Value: " . $id->getAttribute('value');
}
prints:
Value: Lk+Vz957skV845b7x2DX7iyR1FI=

Related

PHP - Retrieve Cookie to Use with GET Request via OnClick link [duplicate]

I have some problem with PHP Curl and cookies authentication.
I have a file Connector.php which authenticates users on another server and returns the cookie of the current user.
The Problem is that I want to authenticate thousands of users with curl but it authenticates and saves COOKIES only for one user at a time.
The code for connector.php is this:
<?php
if(!count($_REQUEST)) {
die("No Access!");
}
//Core Url For Services
define ('ServiceCore', 'http://example.com/core/');
//Which Internal Service Should Be Called
$path = $_GET['service'];
//Service To Be Queried
$url = ServiceCore.$path;
//Open the Curl session
$session = curl_init($url);
// If it's a GET, put the GET data in the body
if ($_GET['service']) {
//Iterate Over GET Vars
$postvars = '';
foreach($_GET as $key=>$val) {
if($key!='service') {
$postvars.="$key=$val&";
}
}
curl_setopt ($session, CURLOPT_POST, true);
curl_setopt ($session, CURLOPT_POSTFIELDS, $postvars);
}
//Create And Save Cookies
$tmpfname = dirname(__FILE__).'/cookie.txt';
curl_setopt($session, CURLOPT_COOKIEJAR, $tmpfname);
curl_setopt($session, CURLOPT_COOKIEFILE, $tmpfname);
curl_setopt($session, CURLOPT_HEADER, false);
curl_setopt($session, CURLOPT_RETURNTRANSFER, true);
curl_setopt($session, CURLOPT_FOLLOWLOCATION, true);
// EXECUTE
$json = curl_exec($session);
echo $json;
curl_close($session);
?>
Here is the process of authentication:
User enters username and password: Connector.php?service=logon&user_name=user32&user_pass=123
Connector.php?service=logosessionInfo returns info about the user based on the cookies saved earlier with logon service.
The problem is that this code saves the cookie in one file for each user and can't handle multiple user authentications.
You can specify the cookie file with a curl opt. You could use a unique file for each user.
curl_setopt( $curl_handle, CURLOPT_COOKIESESSION, true );
curl_setopt( $curl_handle, CURLOPT_COOKIEJAR, uniquefilename );
curl_setopt( $curl_handle, CURLOPT_COOKIEFILE, uniquefilename );
The best way to handle it would be to stick your request logic into a curl function and just pass the unique file name in as a parameter.
function fetch( $url, $z=null ) {
$ch = curl_init();
$useragent = isset($z['useragent']) ? $z['useragent'] : 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.2) Gecko/20100101 Firefox/10.0.2';
curl_setopt( $ch, CURLOPT_URL, $url );
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true );
curl_setopt( $ch, CURLOPT_AUTOREFERER, true );
curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true );
curl_setopt( $ch, CURLOPT_POST, isset($z['post']) );
if( isset($z['post']) ) curl_setopt( $ch, CURLOPT_POSTFIELDS, $z['post'] );
if( isset($z['refer']) ) curl_setopt( $ch, CURLOPT_REFERER, $z['refer'] );
curl_setopt( $ch, CURLOPT_USERAGENT, $useragent );
curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT, ( isset($z['timeout']) ? $z['timeout'] : 5 ) );
curl_setopt( $ch, CURLOPT_COOKIEJAR, $z['cookiefile'] );
curl_setopt( $ch, CURLOPT_COOKIEFILE, $z['cookiefile'] );
$result = curl_exec( $ch );
curl_close( $ch );
return $result;
}
I use this for quick grabs. It takes the url and an array of options.
In working with a similar problem I created the following function after combining a lot of resources I ran into on the web, and adding my own cookie handling. Hopefully this is useful to someone else.
function get_web_page( $url, $cookiesIn = '' ){
$options = array(
CURLOPT_RETURNTRANSFER => true, // return web page
CURLOPT_HEADER => true, //return headers in addition to content
CURLOPT_FOLLOWLOCATION => true, // follow redirects
CURLOPT_ENCODING => "", // handle all encodings
CURLOPT_AUTOREFERER => true, // set referer on redirect
CURLOPT_CONNECTTIMEOUT => 120, // timeout on connect
CURLOPT_TIMEOUT => 120, // timeout on response
CURLOPT_MAXREDIRS => 10, // stop after 10 redirects
CURLINFO_HEADER_OUT => true,
CURLOPT_SSL_VERIFYPEER => true, // Validate SSL Certificates
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_COOKIE => $cookiesIn
);
$ch = curl_init( $url );
curl_setopt_array( $ch, $options );
$rough_content = curl_exec( $ch );
$err = curl_errno( $ch );
$errmsg = curl_error( $ch );
$header = curl_getinfo( $ch );
curl_close( $ch );
$header_content = substr($rough_content, 0, $header['header_size']);
$body_content = trim(str_replace($header_content, '', $rough_content));
$pattern = "#Set-Cookie:\\s+(?<cookie>[^=]+=[^;]+)#m";
preg_match_all($pattern, $header_content, $matches);
$cookiesOut = implode("; ", $matches['cookie']);
$header['errno'] = $err;
$header['errmsg'] = $errmsg;
$header['headers'] = $header_content;
$header['content'] = $body_content;
$header['cookies'] = $cookiesOut;
return $header;
}
First create temporary cookie using tempnam() function:
$ckfile = tempnam ("/tmp", "CURLCOOKIE");
Then execute curl init witch saves the cookie as a temporary file:
$ch = curl_init ("http://uri.com/");
curl_setopt ($ch, CURLOPT_COOKIEJAR, $ckfile);
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, true);
$output = curl_exec ($ch);
Or visit a page using the cookie stored in the temporary file:
$ch = curl_init ("http://somedomain.com/cookiepage.php");
curl_setopt ($ch, CURLOPT_COOKIEFILE, $ckfile);
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, true);
$output = curl_exec ($ch);
This will initialize the cookie for the page:
curl_setopt ($ch, CURLOPT_COOKIEFILE, $ckfile);
Here you can find some useful info about cURL & cookies http://docstore.mik.ua/orelly/webprog/pcook/ch11_04.htm .
You can also use this well done method https://github.com/alixaxel/phunction/blob/master/phunction/Net.php#L89 like a function:
function CURL($url, $data = null, $method = 'GET', $cookie = null, $options = null, $retries = 3)
{
$result = false;
if ((extension_loaded('curl') === true) && (is_resource($curl = curl_init()) === true))
{
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_FAILONERROR, true);
curl_setopt($curl, CURLOPT_AUTOREFERER, true);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
if (preg_match('~^(?:DELETE|GET|HEAD|OPTIONS|POST|PUT)$~i', $method) > 0)
{
if (preg_match('~^(?:HEAD|OPTIONS)$~i', $method) > 0)
{
curl_setopt_array($curl, array(CURLOPT_HEADER => true, CURLOPT_NOBODY => true));
}
else if (preg_match('~^(?:POST|PUT)$~i', $method) > 0)
{
if (is_array($data) === true)
{
foreach (preg_grep('~^#~', $data) as $key => $value)
{
$data[$key] = sprintf('#%s', rtrim(str_replace('\\', '/', realpath(ltrim($value, '#'))), '/') . (is_dir(ltrim($value, '#')) ? '/' : ''));
}
if (count($data) != count($data, COUNT_RECURSIVE))
{
$data = http_build_query($data, '', '&');
}
}
curl_setopt($curl, CURLOPT_POSTFIELDS, $data);
}
curl_setopt($curl, CURLOPT_CUSTOMREQUEST, strtoupper($method));
if (isset($cookie) === true)
{
curl_setopt_array($curl, array_fill_keys(array(CURLOPT_COOKIEJAR, CURLOPT_COOKIEFILE), strval($cookie)));
}
if ((intval(ini_get('safe_mode')) == 0) && (ini_set('open_basedir', null) !== false))
{
curl_setopt_array($curl, array(CURLOPT_MAXREDIRS => 5, CURLOPT_FOLLOWLOCATION => true));
}
if (is_array($options) === true)
{
curl_setopt_array($curl, $options);
}
for ($i = 1; $i <= $retries; ++$i)
{
$result = curl_exec($curl);
if (($i == $retries) || ($result !== false))
{
break;
}
usleep(pow(2, $i - 2) * 1000000);
}
}
curl_close($curl);
}
return $result;
}
And pass this as $cookie parameter:
$cookie_jar = tempnam('/tmp','cookie');
You can define different cookies for every user with CURLOPT_COOKIEFILE and CURLOPT_COOKIEJAR. Make different file for every user so each one would have it's own cookie-based session on remote server.
Solutions which are described above, even with unique CookieFile names, can cause a lot of problems on scale.
We had to serve a lot of authentications with this solution and our server went down because of high file read write actions.
The solution for this was to use Apache Reverse Proxy and omit CURL requests at all.
Details how to use Proxy on Apache can be found here:
https://httpd.apache.org/docs/2.4/howto/reverse_proxy.html

Login to site with PHP cURL

I'm trying login to a remote site, by having curl to the login form.
I want to redirect to another subdomain and get content.
The code I have doesn't seem to work and only tries to show the main page of the site.
<?php
$username = 'user';
$password = 'pass';
$loginUrl = 'https://site_url';
//init curl
$ch = curl_init();
//Set the URL to work with
curl_setopt($ch, CURLOPT_URL, $loginUrl);
// ENABLE HTTP POST
curl_setopt($ch, CURLOPT_POST, 1);
//Set the post parameters
curl_setopt($ch, CURLOPT_POSTFIELDS, 'user='.$username.'&password='.$password);
//Handle cookies for the login
curl_setopt($ch, CURLOPT_COOKIEJAR, 'cookie.txt');
curl_setopt ($ch, CURLOPT_COOKIEFILE, 'cookie.txt');
//Setting CURLOPT_RETURNTRANSFER variable to 1 will force cURL
//not to print out the results of its query.
//Instead, it will return the results as a string return value
//from curl_exec() instead of the usual true/false.
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
//execute the request (the login)
$store = curl_exec($ch);
//the login is now done and you can continue to get the
//protected content.
//set the URL to the protected file
curl_setopt($ch, CURLOPT_URL, 'https://site_url/statistics');
//execute the request
$content = curl_exec($ch);
curl_close($ch);
//save the data to disk
file_put_contents('~/file.txt', $content);
?>
Does it have to be via cURL?
Have you considered leveraging a library like Guzzle?
They have good documentation that covers scenarios as you have described.
https://docs.guzzlephp.org/en/stable/quickstart.html#post-form-requests
Something like the below but could be laid out better
use GuzzleHttp\Client;
use GuzzleHttp\Exception\BadResponseException;
$client = new Client(['base_uri' => 'https://url_site/', 'cookies' => true]);
// Login
try {
$client->request(
'POST',
'login/',
[
'form_params' => [
'username' => $username,
'password' => $password,
],
]
);
} catch (BadResponseException $e) {
echo "Error Logging On for User {$username}";
exit;
}
// Navigate
$response = $client->request('GET', '/statistics');
$content = $response->getBody()->getContents();
file_put_contents('~/file.txt', $content);

Parse from javascript var with SimpleHTMLDom

I have this code that outputs me source page of source URL with curl!
$url = 'http://source-page.com';
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); // add this one, it seems to spawn redirect 301 header
curl_setopt($ch, CURLOPT_USERAGENT,'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13'); // spoof
$output = curl_exec($ch);
curl_close($ch);
$html = str_get_html($output);
In $output i have this:
var flashvars = {
"image_url":"http://path-to-image.com",
"video_title":"This is video title",
"videoUrl":"http://this-is-path-to-mp4.com"
}
I want to echo videoUrl and I have tried with this:
$videoUrl = $html->find('flashvars[0].videoUrl');
echo $videoUrl
And is giving me empty results. What is a good code for doing that?
Someone else suggessted regex + json_decode and then deleted it.
Here's what I would do:
$output = <<<EOF
var flashvars = {
"image_url":"http://path-to-image.com",
"video_title":"This is video title",
"videoUrl":"http://this-is-path-to-mp4.com"
}
EOF;
$str = preg_match('/var flashvars = (\{.*?\})/s', $output, $m);
$data = json_decode($m[1], true);
echo $data['videoUrl'];

XmlHttpRequest - post JSON - json string too many quotes (i think) [duplicate]

I looked around a lot before posting this question so my apologies if it is on another post and this is only my second quesiton on here so apologies if I don't format this question correctly.
I have a really simple web service that I have created that needs to take post values and return a JSON encoded array. That all worked fine until I was told I would need to post the form data with a content-type of application/json. Since then I cannot return any values from the web service and it is definitely something to do with how I am filtering their post values.
Basically in my local setup I have created a test page that does the following -
$curl = curl_init();
curl_setopt($curl, CURLOPT_CUSTOMREQUEST, "POST");
curl_setopt($curl, CURLOPT_POSTFIELDS, $data);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_HTTPHEADER, array(
'Content-Type: application/json',
'Content-Length: ' . strlen($data))
);
curl_setopt($curl, CURLOPT_URL, 'http://webservice.local/'); // Set the url path we want to call
$result = curl_exec($curl);
//see the results
$json=json_decode($result,true);
curl_close($curl);
print_r($json);
On the webservice I have this (I have stripped out some of the functions) -
<?php
header('Content-type: application/json');
/* connect to the db */
$link = mysql_connect('localhost','root','root') or die('Cannot connect to the DB');
mysql_select_db('webservice',$link) or die('Cannot select the DB');
if(isset($_POST['action']) && $_POST['action'] == 'login') {
$statusCode = array('statusCode'=>1, 'statusDescription'=>'Login Process - Fail');
$posts[] = array('status'=>$statusCode);
header('Content-type: application/json');
echo json_encode($posts);
/* disconnect from the db */
}
#mysql_close($link);
?>
Basically I know that it is due to the $_POST values not being set but I can't find what I need to put instead of the $_POST. I tried
json_decode($_POST), file_get_contents("php://input") and a number of other ways but I was shooting in the dark a bit.
Any help would be greatly appreciated.
Thanks, Steve
Thanks Michael for the help, that was a definite step forward I now have at least got a repsonse when I echo the post....even if it is null
updated CURL -
$curl = curl_init();
curl_setopt($curl, CURLOPT_HTTPHEADER, array('Content-Type: application/json'));
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "POST");
curl_setopt($curl, CURLOPT_URL, 'http://webservice.local/');
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_POSTFIELDS, json_encode($data));
updated php on the page that the data is posted to -
$inputJSON = file_get_contents('php://input');
$input= json_decode( $inputJSON, TRUE ); //convert JSON into array
print_r(json_encode($input));
As I say at least I see a response now wheras prior it was returning a blank page
You have empty $_POST. If your web-server wants see data in json-format you need to read the raw input and then parse it with JSON decode.
You need something like that:
$json = file_get_contents('php://input');
$obj = json_decode($json);
Also you have wrong code for testing JSON-communication...
CURLOPT_POSTFIELDS tells curl to encode your parameters as application/x-www-form-urlencoded. You need JSON-string here.
UPDATE
Your php code for test page should be like that:
$data_string = json_encode($data);
$ch = curl_init('http://webservice.local/');
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "POST");
curl_setopt($ch, CURLOPT_POSTFIELDS, $data_string);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, array(
'Content-Type: application/json',
'Content-Length: ' . strlen($data_string))
);
$result = curl_exec($ch);
$result = json_decode($result);
var_dump($result);
Also on your web-service page you should remove one of the lines header('Content-type: application/json');. It must be called only once.
Hello this is a snippet from an old project of mine that uses curl to get ip information from some free ip databases services which reply in json format. I think it might help you.
$ip_srv = array("http://freegeoip.net/json/$this->ip","http://smart-ip.net/geoip-json/$this->ip");
getUserLocation($ip_srv);
Function:
function getUserLocation($services) {
$ctx = stream_context_create(array('http' => array('timeout' => 15))); // 15 seconds timeout
for ($i = 0; $i < count($services); $i++) {
// Configuring curl options
$options = array (
CURLOPT_RETURNTRANSFER => true, // return web page
//CURLOPT_HEADER => false, // don't return headers
CURLOPT_HTTPHEADER => array('Content-type: application/json'),
CURLOPT_FOLLOWLOCATION => true, // follow redirects
CURLOPT_ENCODING => "", // handle compressed
CURLOPT_USERAGENT => "test", // who am i
CURLOPT_AUTOREFERER => true, // set referer on redirect
CURLOPT_CONNECTTIMEOUT => 5, // timeout on connect
CURLOPT_TIMEOUT => 5, // timeout on response
CURLOPT_MAXREDIRS => 10 // stop after 10 redirects
);
// Initializing curl
$ch = curl_init($services[$i]);
curl_setopt_array ( $ch, $options );
$content = curl_exec ( $ch );
$err = curl_errno ( $ch );
$errmsg = curl_error ( $ch );
$header = curl_getinfo ( $ch );
$httpCode = curl_getinfo ( $ch, CURLINFO_HTTP_CODE );
curl_close ( $ch );
//echo 'service: ' . $services[$i] . '</br>';
//echo 'err: '.$err.'</br>';
//echo 'errmsg: '.$errmsg.'</br>';
//echo 'httpCode: '.$httpCode.'</br>';
//print_r($header);
//print_r(json_decode($content, true));
if ($err == 0 && $httpCode == 200 && $header['download_content_length'] > 0) {
return json_decode($content, true);
}
}
}
you can put your json in a parameter and send it instead of put only your json in header:
$post_string= 'json_param=' . json_encode($data);
//open connection
$ch = curl_init();
//set the url, number of POST vars, POST data
curl_setopt($ch,CURLOPT_POST, 1);
curl_setopt($ch,CURLOPT_POSTFIELDS, $post_string);
curl_setopt($curl, CURLOPT_URL, 'http://webservice.local/'); // Set the url path we want to call
//execute post
$result = curl_exec($curl);
//see the results
$json=json_decode($result,true);
curl_close($curl);
print_r($json);
on the service side you can get your json string as a parameter:
$json_string = $_POST['json_param'];
$obj = json_decode($json_string);
then you can use your converted data as object.

Scraping data in dynamic sites

I'm trying to scrape data from our local government. What I want is address from kids adoption offices. Here, in Brazil, all adoptions go through the government. So I have the URL of one office, there are 2 or 3 thousands more. But if I can manage to get one, the others will be easy.
I made many attempts, bellow I show three.
The problem could be related to a Javascript (Ajax maybe) that refresh the page.
Note: I am not a PHP developer.
First attempt
echo '<html><head></head><body>';
echo '<h1>Scraper PHP GET 1</h1>';
echo ini_get("allow_url_fopen");
echo ini_get("allow_url_fopen");
// I used this url for test
//$url = 'http://www.portaldaadocao.com.br';
//This is the URL that I really want
$url = 'http://www.cnj.jus.br/cna/Controle/ConsultaPublicaBuscaControle.php?transacao=CONSULTA&vara=2673';
$html = file_get_contents($url);
var_dump($html);
echo '</body></html>';
// Output
// 11
// Warning:
file_get_contents(http://www.cnj.jus.br/cna/Controle/ConsultaPublicaBuscaControle.php?
transacao=CONSULTA&vara=2673) [function.file-get-contents]: failed to open stream: HTTP
request failed! HTTP/1.1 404 Not Found in /home/rsl/www/sc01_get.php on line 14
// bool(false)
Second attempt
echo '<html><head></head><body>';
echo '<h1>Scraper PHP CURL 3</h1>';
// I used this url for test
//$url = 'http://www.portaldaadocao.com.br';
//This is the URL that I really want
$url = 'http://www.cnj.jus.br/cna/Controle/ConsultaPublicaBuscaControle.php?transacao=CONSULTA&vara=2673';
$curl = curl_init($url);
#curl_setopt($curl, CURLOPT_POSTFIELDS, "foo");
#curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
#curl_setopt($curl, CURLOPT_CUSTOMREQUEST, "POST");;
$html=#curl_exec($curl);
if (!$html) {
echo "<br />cURL error number:" .curl_errno($curl);
echo "<br />cURL error:" . curl_error($curl);
exit;
}
else{
echo '<br>begin HTML[';
echo $html;
echo '<br>]end html ';
}
echo '</body></html>';
// Output
// 1
third attempt
function curl($url){
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.6 (KHTML, like Gecko) Chrome/16.0.897.0 Safari/535.6');
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_COOKIEFILE, "cookie.txt");
curl_setopt($ch, CURLOPT_COOKIEJAR, "cookie.txt");
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($ch, CURLOPT_REFERER, "http://www.windowsphone.com");
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
echo '<html><head></head><body>';
echo '<h1>Scraper PHP CURL 5</h1>';
// I used this url for test
//$url = 'http://www.portaldaadocao.com.br';
//This is the URL that I really want
$url = 'http://www.cnj.jus.br/cna/Controle/ConsultaPublicaBuscaControle.php?transacao=CONSULTA&vara=2673';
$curl = curl_init($url);
#curl_setopt($curl, CURLOPT_POSTFIELDS, "foo");
#curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
#curl_setopt($curl, CURLOPT_CUSTOMREQUEST, "POST");;
$html=#curl($curl);
if (!$html) {
echo "<br />cURL error number:" .curl_errno($curl);
echo "<br />cURL error:" . curl_error($curl);
exit;
}
else{
echo '<br>begin HTML[';
echo $html;
echo '<br>]end html ';
}
echo '</body></html>';
// Output
// cURL error number:0
// cURL error:

Categories

Resources