00001 <?php
00002
00003 lt_include(PLOG_CLASS_PATH."class/data/validator/rules/rule.class.php");
00004 lt_include(PLOG_CLASS_PATH."class/net/dns.class.php");
00005 lt_include(PLOG_CLASS_PATH."class/net/http/httpvars.class.php");
00006
00007 define( "EW_OK", 1 );
00008
00009 define( "EW_ERR_URL_INVALID_PROTOCOL", 10 );
00010 define( "EW_ERR_URL_INVALID_USER", 20 );
00011 define( "EW_ERR_URL_INVALID_PASSWORD", 30 );
00012 define( "EW_ERR_URL_INVALID_SERVER", 40 );
00013 define( "EW_ERR_URL_INVALID_TLD", 50 );
00014 define( "EW_ERR_URL_INVALID_PORT", 60 );
00015 define( "EW_ERR_URL_INVALID_RESOURCE", 70 );
00016 define( "EW_ERR_URL_INVALID_QUERYSTRING", 80 );
00017 define( "EW_ERR_URL_INVALID_ANCHOR", 100 );
00018
00019 define( "EW_ERR_URL_MISSING_PROTOCOL", 110 );
00020 define( "EW_ERR_URL_MISSING_USER", 120 );
00021 define( "EW_ERR_URL_MISSING_PASSWORD", 130 );
00022 define( "EW_ERR_URL_MISSING_SERVER", 140 );
00023 define( "EW_ERR_URL_MISSING_TLD", 150 );
00024 define( "EW_ERR_URL_MISSING_PORT", 160 );
00025 define( "EW_ERR_URL_MISSING_RESOURCE", 170 );
00026 define( "EW_ERR_URL_MISSING_QUERYSTRING", 180 );
00027 define( "EW_ERR_URL_MISSING_ANCHOR", 190 );
00028
00029
00030
00045 class UrlFormatRule extends Rule
00046 {
00047 var $_options;
00048
00049 function UrlFormatRule( $options = Array())
00050 {
00051 $this->Rule();
00052 $this->_options = $options;
00053 }
00054
00055 function validate( $value )
00056 {
00057 $result = $this->_ValURL( $value, $this->_options );
00058 return( $result['Result'] == EW_OK );
00059 }
00060
00061 function _ValURL($value, $options = array())
00062 {
00063 $value = trim($value);
00064
00065 if (!$value)
00066 return array('Result' => array(EW_ERR_URL_EMPTY_STRING), 'Value' => '');
00067
00068 if(!isset($options['Require']))
00069 $options['Require'] = array();
00070 if(!isset($options['Forbid']))
00071 $options['Forbid'] = array();
00072
00073
00074 $options = array_merge(array(
00075
00076 'AllowedProtocols' =>
00077 array('http', 'https', 'ftp', 'mailto',
00078 'file', 'news', 'gopher', 'telnet',
00079 'nntp'),
00080 'AllowBracks' => false,
00081 'Protocols' => array('http', 'https', 'ftp', 'mailto', 'file', 'news', 'gopher', 'telnet', 'nntp'),
00082 'AssumeProtocol' => false,
00083 ), $options);
00084
00085
00086 $options['Require'] = array_merge(array(
00087 'Protocol' => true,
00088 'User' => false,
00089 'Password' => false,
00090 'Server' => true,
00091 'TLD' => false,
00092 'Port' => false,
00093 'Resource' => false,
00094 'QueryString' => false,
00095 'Anchor' => false,
00096 ), $options['Require']);
00097
00098
00099 $options['Forbid'] = array_merge(array(
00100 'Protocol' => false,
00101 'User' => false,
00102 'Password' => false,
00103 'Server' => false,
00104 'TLD' => false,
00105 'Port' => false,
00106 'Resource' => false,
00107 'QueryString' => false,
00108 'Anchor' => false,
00109 ), $options['Forbid']);
00110
00111
00112 $url = array(
00113 'Protocol' => '',
00114 'User' => '',
00115 'Password' => '',
00116 'Server' => '',
00117 'Port' => '',
00118 'Resource' => '',
00119 'TLD' => '',
00120 'QueryString' => '',
00121 'Anchor' => '');
00122
00123
00124 $errCodeInvalid = array(
00125 'Protocol' => EW_ERR_URL_INVALID_PROTOCOL,
00126 'User' => EW_ERR_URL_INVALID_USER,
00127 'Password' => EW_ERR_URL_INVALID_PASSWORD,
00128 'Server' => EW_ERR_URL_INVALID_SERVER,
00129 'TLD' => EW_ERR_URL_INVALID_TLD,
00130 'Port' => EW_ERR_URL_INVALID_PORT,
00131 'Resource' => EW_ERR_URL_INVALID_RESOURCE,
00132 'QueryString' => EW_ERR_URL_INVALID_QUERYSTRING,
00133 'Anchor' => EW_ERR_URL_INVALID_ANCHOR);
00134
00135
00136 $errCodeMissing = array(
00137 'Protocol' => EW_ERR_URL_MISSING_PROTOCOL,
00138 'User' => EW_ERR_URL_MISSING_USER,
00139 'Password' => EW_ERR_URL_MISSING_PASSWORD,
00140 'Server' => EW_ERR_URL_MISSING_SERVER,
00141 'TLD' => EW_ERR_URL_MISSING_TLD,
00142 'Port' => EW_ERR_URL_MISSING_PORT,
00143 'Resource' => EW_ERR_URL_MISSING_RESOURCE,
00144 'QueryString' => EW_ERR_URL_MISSING_QUERYSTRING,
00145 'Anchor' => EW_ERR_URL_MISSING_ANCHOR);
00146
00147
00148 extract($options);
00149 $errArr = array();
00150 $tmpValue = $value;
00151 $lcValue = strtolower($value);
00152
00157 foreach ($Protocols as $key => $protocol)
00158 {
00159 if (strpos($lcValue, "$protocol:") === 0)
00160 {
00161 $tmp = explode(':', $tmpValue, 2);
00162 $url['Protocol'] = $tmp[0];
00163 $tmpValue = $tmp[1];
00164
00165 if ($url['Protocol'] == 'mailto' || $url['Protocol'] == 'news')
00166 {
00167
00168
00169 if (preg_match('/%[^a-f0-9]/i', $tmpValue) || preg_match("/^[^a-z0-9;&=+$,_.!*'()%~-]/i", $tmpValue))
00170 {
00171 $errArr[EW_ERR_URL_INVALID_PROTOCOL] = EW_ERR_URL_INVALID_PROTOCOL;
00172 }
00173 }
00174 else
00175 {
00176 if (!(strpos($tmpValue, '//') === 0))
00177 {
00178 $errArr[EW_ERR_URL_INVALID_PROTOCOL] = EW_ERR_URL_INVALID_PROTOCOL;
00179 }
00180 else
00181 {
00182 $tmpValue = substr($tmpValue, 2);
00183 }
00184 }
00185 }
00186 }
00187
00188 if (!$url['Protocol'])
00189 {
00190 if (strpos(strtolower($tmpValue), ('mailto:')) === 0 || strpos(strtolower($tmpValue), ('news:')) === 0)
00191 $tmp = ':';
00192 else
00193 $tmp = '://';
00194
00195 $tmp = explode($tmp, $tmpValue, 2);
00196 if (count($tmp) == 2)
00197 {
00198 $url['Protocol'] = strtolower($tmp[0]);
00199 $tmpValue = $tmp[1];
00200 }
00201 }
00202
00203 $tmp = explode('?', $tmpValue);
00204
00205 if (count($tmp) > 1)
00206 {
00207 $tmpValue = $tmp[0];
00208 $url['QueryString'] = $tmp[1];
00209
00210 $tmp = explode('#', $url['QueryString']);
00211 if (count($tmp) > 1)
00212 {
00213 $url['QueryString'] = $tmp[0];
00214 $url['Anchor'] = $tmp[1];
00215 }
00216 }
00217 else
00218 {
00219 $tmp = explode('#', $tmpValue);
00220 if (count($tmp) > 1)
00221 {
00222 $tmpValue = $tmp[0];
00223 $url['Anchor'] = $tmp[1];
00224 }
00225 }
00226
00227 $tmp = explode('/', $tmpValue, 2);
00228 if (count($tmp) > 1)
00229 {
00230 $url['Server'] = strtolower($tmp[0]);
00231 $url['Resource'] = $tmp[1];
00232 }
00233 else
00234 {
00235 $url['Server'] = strtolower($tmpValue);
00236 }
00237
00238
00239 $tmp = explode('@', $url['Server']);
00240 if (count($tmp) > 1)
00241 {
00242 $url['User'] = $tmp[0];
00243 $url['Server'] = $tmp[1];
00244
00245 if ($url['User'])
00246 {
00247 $tmp = explode(':', $url['User']);
00248 if (count($tmp) > 1)
00249 {
00250 $url['User'] = $tmp[0];
00251 $url['Password'] = $tmp[1];
00252 }
00253 }
00254 }
00255
00256 $tmp = explode(':', $url['Server'], 2);
00257 if (count($tmp) > 1)
00258 {
00259 if ($tmp[0])
00260 {
00261 $url['Server'] = $tmp[0];
00262 $url['Port'] = $tmp[1];
00263
00264 }
00265 }
00266
00267 if (!$url['Protocol'] && !$url['Password'] && in_array(strtolower($url['User']), array('mail', 'news')))
00268 {
00269 $url['Protocol'] = strtolower($url['User']);
00270 $url['User'] = '';
00271
00272 }
00273
00274 if ($url['Protocol'] == 'mailto' && $url['Server'] && !$url['User'])
00275 {
00276 $url['User'] = $url['Server'];
00277 $url['Server'] = '';
00278 }
00279
00284
00285 if ($url['Protocol'])
00286 {
00287 $tmp = preg_replace("/[^a-z0-9+-.]/", '', $url['Protocol']);
00288
00289 if ($tmp != $url['Protocol'])
00290 {
00291 $errArr[EW_ERR_URL_INVALID_PROTOCOL] = EW_ERR_URL_INVALID_PROTOCOL;
00292 }
00293
00294 if (count($options['AllowedProtocols']))
00295 if (!in_array($url['Protocol'], $options['AllowedProtocols']))
00296 $errArr[EW_ERR_URL_INVALID_PROTOCOL] = EW_ERR_URL_INVALID_PROTOCOL;
00297
00298 }
00299
00300
00301 if ($url['User'])
00302 {
00303
00304 if (preg_match('/%[^a-f0-9]/i', $url['User']) || preg_match("/[^a-z0-9;&=+$,_.!~*'()%-]/i", $url['User']))
00305 {
00306 $errArr[EW_ERR_URL_INVALID_USER] = EW_ERR_URL_INVALID_USER;
00307 $url['User'] = urlencode(urldecode($url['User']));
00308 }
00309 }
00310 if ($url['Password'])
00311 {
00312
00313 if (preg_match('/%[^a-f0-9]/i', $url['Password']) || preg_match("/[^a-z0-9;&=+$,_.!~*'()%-]/i", $url['Password']))
00314 {
00315 $errArr[EW_ERR_URL_INVALID_PASSWORD] = EW_ERR_URL_INVALID_PASSWORD;
00316 }
00317 $url['Password'] = urlencode(urldecode($url['Password']));
00318 }
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328 if ($url['Server'])
00329 {
00330 if (!preg_match('/[^.0-9]/', $url['Server']))
00331 {
00332 $ServerIsIP = true;
00333
00334 $ipErr = false;
00335
00336 $ipPart = explode('.', $url['Server']);
00337
00338 if ($ipPart[0] > 224 || $ipPart[0] == 0)
00339 {
00340 $errArr[EW_ERR_URL_INVALID_SERVER] = EW_ERR_URL_INVALID_SERVER;
00341 }
00342 else
00343 {
00344 for ($i = 1; $i < 4; $i ++)
00345 {
00346 $ipPart[$i] = (integer) $ipPart[$i];
00347 if ($ipPart[$i] > 255)
00348 $errArr[EW_ERR_URL_INVALID_SERVER] = EW_ERR_URL_INVALID_SERVER;
00349 }
00350 }
00351
00361 $url['Server'] = join('.', $ipPart);
00362 }
00363
00364 else
00365 {
00366 $ServerIsIP = false;
00367
00368 $serverParts = explode('.', $url['Server']);
00369
00370
00371 for ($i = 0; $i < count($serverParts); $i ++)
00372 {
00373 $tmp = preg_replace('/[^a-z0-9-]/', '', $serverParts[$i]);
00374
00375
00376 if ($i && $i == count($serverParts) - 1)
00377 $tmp = preg_replace('/^[^a-z]/', '', $tmp);
00378 else
00379 $tmp = preg_replace('/^[^a-z0-9]/', '', $serverParts[$i]);
00380
00381 $tmp = preg_replace('/[^a-z0-9]$/', '', $tmp);
00382
00383 if ($serverParts[$i] == '' || $tmp != $serverParts[$i])
00384 {
00385 if ($tmp != '')
00386 $serverParts[$i] = $tmp;
00387 else
00388 unset($serverParts[$i]);
00389
00390 $errArr[EW_ERR_URL_INVALID_SERVER] = EW_ERR_URL_INVALID_SERVER;
00391
00392 }
00393 }
00394
00395 if (count($serverParts) < 2)
00396 {
00397 if ($Require['TLD'])
00398 {
00399 $errArr[EW_ERR_URL_MISSING_TLD] = EW_ERR_URL_MISSING_TLD;
00400 }
00401 }
00402 else
00403 {
00404
00405 $url['TLD'] = $serverParts[count($serverParts) - 1];
00406 }
00407
00408 $url['Server'] = join('.', $serverParts);
00409 }
00410 }
00411
00412
00413 if ($url['Port'])
00414 {
00415 $tmp = (integer) $url['Port'];
00416 if ($url['Port'] != (string) $tmp)
00417 {
00418 $errArr[EW_ERR_URL_INVALID_PORT] = EW_ERR_URL_INVALID_PORT;
00419
00420 $url['Port'] = '';
00421 }
00422 else
00423 {
00424 $url['Port'] = $tmp;
00425 if ($url['Port'] > 65535)
00426 $errArr[EW_ERR_URL_INVALID_PORT] = EW_ERR_URL_INVALID_PORT;
00427 }
00428
00429 }
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439 if ($url['Resource'])
00440 {
00441 $resourceParts = explode('/', $url['Resource']);
00442
00443 if ($resourceParts[count($resourceParts) - 1] == '')
00444 array_pop($resourceParts);
00445
00446 if ($resourceParts[0] == '')
00447 unset($resourceParts[0]);
00448
00449 foreach ($resourceParts as $key => $part)
00450 {
00451 if ($part == '')
00452 {
00453 $errArr[EW_ERR_URL_INVALID_RESOURCE] = EW_ERR_URL_INVALID_RESOURCE;
00454 unset($resourceParts[$key]);
00455 }
00456
00457
00458 elseif (preg_match('/%[^a-f0-9]/i', $part) || preg_match("/[^@a-z0-9_.!~*'()$+&,%:=;?-]/i", $part))
00459 {
00460 $errArr[EW_ERR_URL_INVALID_RESOURCE] = EW_ERR_URL_INVALID_RESOURCE;
00461 $resourceParts[$key] = urlencode(urldecode($part));
00462 }
00463
00464
00465
00466 }
00467 $url['Resource'] = join('/', $resourceParts);
00468 }
00469
00470 if ($url['QueryString'])
00471 {
00472
00473
00474 $tmp = $options['AllowBracks'] ?
00475 "^a-z0-9_.!~*'()%;\/?:@&=+$,\[\]-" :
00476 "^a-z0-9_.!~*'()%;\/?:@&=+$,-";
00477
00478 if (preg_match('/%[^a-f0-9]/i', $url['QueryString']) || preg_match("/[$tmp]+/i", $url['QueryString']))
00479 {
00480 $errArr[EW_ERR_URL_INVALID_QUERYSTRING] = EW_ERR_URL_INVALID_QUERYSTRING;
00481 $url['QueryString'] = $url['QueryString'];
00482 }
00483
00484 }
00485 if ($url['Anchor'])
00486 {
00487 if (preg_match('/%[^a-f0-9][a-f0-9]?/i', $url['Anchor']) ||
00488 preg_match("/[^a-z0-9-_.!~*'()%;\/?:@&=+$,]/i", $url['Anchor']))
00489 {
00490 $errArr[EW_ERR_URL_INVALID_ANCHOR] = EW_ERR_URL_INVALID_ANCHOR;
00491 $url['Anchor'] = $url['Anchor'];
00492 }
00493
00494 }
00495 foreach ($url as $partName => $notused)
00496 {
00497 if ($partName == 'TLD' && $ServerIsIP)
00498 continue;
00499
00500 if ($Require[$partName] && !$url[$partName])
00501 $errArr[$errCodeMissing[$partName]] = $errCodeMissing[$partName];
00502
00503 if ($Forbid[$partName] && $url[$partName])
00504 $errArr[$errCodeMissing[$partName]] = $errCodeInvalid[$partName];
00505 }
00506
00507
00508 if ($options['AssumeProtocol'] && !$url['Protocol'] && ($url['Server'] || (!$url['Server'] && !$url['Resource'])))
00509 $url['Protocol'] = $options['AssumeProtocol'];
00510
00511 $value = $url['Protocol'];
00512
00513 if ($url['Protocol'])
00514 {
00515 if ($url['Protocol'] == 'mailto' | $url['Protocol'] == 'mailto')
00516 $value.= ':';
00517 else
00518 $value.= '://';
00519 }
00520
00521 if ($url['User'])
00522 {
00523 if ($url['Password'])
00524 $value.= "{$url['User']}:{$url['Password']}";
00525 else
00526 $value.= "{$url['User']}";
00527
00528 if ($url['Server'])
00529 $value.= '@';
00530 }
00531
00532 $value.= $url['Server'];
00533
00534 if ($url['Port'])
00535 $value.= ":{$url['Port']}";
00536
00537 if ($url['Server'] && $url['Resource'])
00538 $value.= "/";
00539
00540 $value.= $url['Resource'];
00541
00542 if ($url['QueryString'])
00543 $value.= "?{$url['QueryString']}";
00544
00545 if ($url['Anchor'])
00546 $value.= "#{$url['Anchor']}";
00547
00548 $r = array('Result' => count($errArr) ? $errArr : EW_OK, 'Value' => $value, 'URLParts' => $url);
00549
00550 return $r;
00551
00552 }
00553
00554 }
00555 ?>