[nycphp-talk] Converting the pesky MS Word quotes and other characters
Daniel Convissor
danielc at analysisandsolutions.com
Wed Oct 29 23:55:26 EDT 2008
<?php
/**
* Gets rid of stupid quotes, etc.
*
* This uses a convoluted preg_replace() approach, rather than
* str_replace(), because Chrome (the REAL Chrome, as in Firefox
* applications, not Google's usurpation) translates the characters.
*
* If you need to see what characters are coming in, uncomment
* the debug call to analyze_string_polaris() that exists at the top
* of this method.
*
* @see analyze_string_polaris()
*/
function filter_fancy_characters_polaris($in) {
static $search, $replace;
// echo analyze_string_polaris($in);
if (!isset($search)) {
$search = array(
'/\x96/',
'/\xE2\x80\x93/',
'/\x97/',
'/\xE2\x80\x94/',
'/\x91/',
'/\xE2\x80\x98/',
'/\x92/',
'/\xE2\x80\x99/',
'/\x93/',
'/\xE2\x80\x9C/',
'/\x94/',
'/\xE2\x80\x9D/',
'/\x85/',
'/\xE2\x80\xA6/',
'/\x95/',
'/\xE2\x80\xA2/',
'/\x09/',
// The order of these is very important.
'/\xC2\xBC/',
'/\xBC/',
'/\xC2\xBD/',
'/\xBD/',
'/\xC2\xBE/',
'/\xBE/',
);
$replace = array(
'-',
'-',
'--',
'--',
"'",
"'",
"'",
"'",
'"',
'"',
'"',
'"',
'...',
'...',
'*',
'*',
' ',
'1/4',
'1/4',
'1/2',
'1/2',
'3/4',
'3/4',
);
}
return preg_replace($search, $replace, $in);
}
/**
* Returns the hex, oct and ord numbers of characters found in a string;
* makes debugging odd user input much easier
*
* @see filter_fancy_characters_polaris()
*/
function analyze_string_polaris($in) {
$out = '';
for ($i = 0, $len = strlen($in); $i < $len; $i++) {
$out .= ' Chr:' . $in[$i];
$out .= ' Hex:' . dechex(ord($in[$i]));
$out .= ' Oct:' . decoct(ord($in[$i]));
$out .= ' Ord:' . ord($in[$i]);
$out .= "\n-------\n";
}
return $out;
}
?>
--
T H E A N A L Y S I S A N D S O L U T I O N S C O M P A N Y
data intensive web and database programming
http://www.AnalysisAndSolutions.com/
4015 7th Ave #4, Brooklyn NY 11232 v: 718-854-0335 f: 718-854-0409
More information about the talk
mailing list