2012年10月24日 星期三
2012年10月20日 星期六
PHP去除指定的html標籤
創建 test.html
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>無標題文件</title>
</head>
<body>
aaa
<strong>strong</strong>
bbb
<em>em</em>
ccc
<script type="text/javascript" src="/script1.js"></script>
<script type="text/javascript" src="/script2.js">/* inside script, same line */</script>
ccc
<script type="text/JavaScript">
/* inside script, change line */
</script>
ddd
</body>
</html>
php 程式碼
<?php
$filename = "test.html";
$handle = fopen("$filename","r");
$Data = fread($handle, filesize($filename));
$newData = strip_only_tags($Data, 'script', 0);
fclose($handle );
print_r($newData);
function strip_only_tags($source, $tag, $stripContent=FALSE){
//不成對的標籤
$nonPair = array('img', 'br');
if(in_array($tag, $nonPair)){
$pattern = '#(<'.$tag.'(.*)>)#';
$str = preg_replace($pattern, '', $str);
continue;
}
$newData = '';
$restData = $source;
while(1){
//找出起始標籤
$pattern = '#<'.$tag.'[^>]*>#';
$isMatch = preg_match($pattern, $restData, $matches);
if($isMatch){
$posStart = strpos($restData, $matches[0]);
$len = strlen($matches[0]);
$posEnd = $posStart + $len;
$newData .= substr($restData, 0, $posStart);
$restData = substr($restData, $posEnd);
//找出結尾標籤,並計算長度
$pattern = '#</.*'.$tag.'[^>]*>#';
preg_match($pattern, $restData, $matches);
$endTag = $matches[0];
$posStart = strpos($restData, $endTag);
$len = strlen($endTag);
$posEnd = $posStart + $len;
if($stripContent==1){
$newData .= substr($restData, 0, $posStart);
}
$restData = substr($restData, $posEnd);
}else{
$newData .= $restData;
break;
}
}
return $newData;
}
?>
結果
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>無標題文件</title>
</head>
<body>
aaa
<strong>strong</strong>
bbb
<em>em</em>
ccc
ccc
ddd
</body>
</html>
訂閱:
文章 (Atom)