2012年10月20日 星期六

PHP去除指定的html標籤





創建 test.html

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>無標題文件</title>
</head>

<body>
aaa
<strong>strong</strong>
bbb
<em>em</em>
ccc
<script type="text/javascript" src="/script1.js"></script>
<script type="text/javascript" src="/script2.js">/* inside script, same line */</script>
ccc
<script type="text/JavaScript">
/* inside script, change line */
</script>
ddd
</body>
</html>


php 程式碼

<?php
$filename = "test.html";
$handle = fopen("$filename","r");
$Data = fread($handle, filesize($filename));
$newData = strip_only_tags($Data, 'script', 0);
fclose($handle );
print_r($newData);

function strip_only_tags($source, $tag, $stripContent=FALSE){

//不成對的標籤
$nonPair = array('img', 'br');
if(in_array($tag, $nonPair)){
$pattern = '#(<'.$tag.'(.*)>)#';
$str = preg_replace($pattern, '', $str);
continue;
}

$newData = '';
$restData = $source;
while(1){
//找出起始標籤
$pattern = '#<'.$tag.'[^>]*>#';
$isMatch = preg_match($pattern, $restData, $matches);

if($isMatch){
$posStart = strpos($restData, $matches[0]);
$len = strlen($matches[0]);
$posEnd = $posStart + $len;
$newData .= substr($restData, 0, $posStart);
$restData = substr($restData, $posEnd);

//找出結尾標籤,並計算長度
$pattern = '#</.*'.$tag.'[^>]*>#';
preg_match($pattern, $restData, $matches);
$endTag = $matches[0];

$posStart = strpos($restData, $endTag);
$len = strlen($endTag);
$posEnd = $posStart + $len;

if($stripContent==1){
$newData .= substr($restData, 0, $posStart);
}
$restData = substr($restData, $posEnd);

}else{
$newData .= $restData;
break;
}
}
return $newData;
}
?>


結果

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>無標題文件</title>
</head>

<body>
aaa
<strong>strong</strong>
bbb
<em>em</em>
ccc


ccc

ddd
</body>
</html>

網站多國語言架構之比較