清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>
一个理想的去除html包括空格css样式、js 的PHP函数。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | $descclear = str_replace ( "\r" , "" , $descclear ); //过滤换行 $descclear = str_replace ( "\n" , "" , $descclear ); //过滤换行 $descclear = str_replace ( "\t" , "" , $descclear ); //过滤换行 $descclear = str_replace ( "\r\n" , "" , $descclear ); //过滤换行 $descclear = preg_replace( "/\s+/" , " " , $descclear ); //过滤多余回车 $descclear = preg_replace( "/<[ ]+/si" , "<" , $descclear ); //过滤<__("<"号后面带空格) $descclear = preg_replace( "/<\!--.*?-->/si" , "" , $descclear ); //过滤html注释 $descclear = preg_replace( "/<(\!.*?)>/si" , "" , $descclear ); //过滤DOCTYPE $descclear = preg_replace( "/<(\/?html.*?)>/si" , "" , $descclear ); //过滤html标签 $descclear = preg_replace( "/<(\/?head.*?)>/si" , "" , $descclear ); //过滤head标签 $descclear = preg_replace( "/<(\/?meta.*?)>/si" , "" , $descclear ); //过滤meta标签 $descclear = preg_replace( "/<(\/?body.*?)>/si" , "" , $descclear ); //过滤body标签 $descclear = preg_replace( "/<(\/?link.*?)>/si" , "" , $descclear ); //过滤link标签 $descclear = preg_replace( "/<(\/?form.*?)>/si" , "" , $descclear ); //过滤form标签 $descclear = preg_replace( "/cookie/si" , "COOKIE" , $descclear ); //过滤COOKIE标签 $descclear = preg_replace( "/<(applet.*?)>(.*?)<(\/applet.*?)>/si" , "" , $descclear ); //过滤applet标签 $descclear = preg_replace( "/<(\/?applet.*?)>/si" , "" , $descclear ); //过滤applet标签 $descclear = preg_replace( "/<(style.*?)>(.*?)<(\/style.*?)>/si" , "" , $descclear ); //过滤style标签 $descclear = preg_replace( "/<(\/?style.*?)>/si" , "" , $descclear ); //过滤style标签 $descclear = preg_replace( "/<(title.*?)>(.*?)<(\/title.*?)>/si" , "" , $descclear ); //过滤title标签 $descclear = preg_replace( "/<(\/?title.*?)>/si" , "" , $descclear ); //过滤title标签 $descclear = preg_replace( "/<(object.*?)>(.*?)<(\/object.*?)>/si" , "" , $descclear ); //过滤object标签 $descclear = preg_replace( "/<(\/?objec.*?)>/si" , "" , $descclear ); //过滤object标签 $descclear = preg_replace( "/<(noframes.*?)>(.*?)<(\/noframes.*?)>/si" , "" , $descclear ); //过滤noframes标签 $descclear = preg_replace( "/<(\/?noframes.*?)>/si" , "" , $descclear ); //过滤noframes标签 $descclear = preg_replace( "/<(i?frame.*?)>(.*?)<(\/i?frame.*?)>/si" , "" , $descclear ); //过滤frame标签 $descclear = preg_replace( "/<(\/?i?frame.*?)>/si" , "" , $descclear ); //过滤frame标签 $descclear = preg_replace( "/<(script.*?)>(.*?)<(\/script.*?)>/si" , "" , $descclear ); //过滤script标签 $descclear = preg_replace( "/<(\/?script.*?)>/si" , "" , $descclear ); //过滤script标签 $descclear = preg_replace( "/javascript/si" , "Javascript" , $descclear ); //过滤script标签 $descclear = preg_replace( "/vbscript/si" , "Vbscript" , $descclear ); //过滤script标签 $descclear = preg_replace( "/on([a-z]+)\s*=/si" , "On\\1=" , $descclear ); //过滤script标签 $descclear = preg_replace( "/&#/si" , "&#" , $descclear ); //过滤script标签,如javAsCript:alert(); //使用正则替换 $pat = "/<(\/?)(script|i?frame|style|html|body|li|i|map|title|img|link|span|u|font|table|tr|b|marquee|td|strong|div|a|meta|\?|\%)([^>]*?)>/isU" ; $descclear = preg_replace( $pat , "" , $descclear ); |