CREATE TABLE `test` (
`id` int(10) NOT NULL AUTO_INCREMENT,
`name` varchar(100) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
insert into `test` (name) values ('x'), (unhex('c2a3'));
SELECT
name,
length(`name`) as ln,
char_length(`name`) as cl,
length(CONVERT(`name` USING utf8)) as u_ln,
char_length(CONVERT(`name` USING utf8)) as u_cl,
hex(name) as hx,
CONVERT(`name` USING utf8) as u_nm,
hex(CONVERT(`name` USING utf8)) as h_nm
FROM test;
After running this query you will notice that the length is the same for latin characters when it counted using length and still the same after converting to utf8. However, for utf8 data you will notice that the length changes after being converted to utf8. This makes it easy to identify where you have utf8 data.
With this understanding, you can add a WHERE clause to only show the rows where there are differences like this:
SELECT
name,
length(`name`) as ln,
char_length(`name`) as cl,
length(CONVERT(`name` USING utf8)) as u_ln,
char_length(CONVERT(`name` USING utf8)) as u_cl,
hex(name) as hx,
CONVERT(`name` USING utf8) as u_nm,
hex(CONVERT(`name` USING utf8)) as h_nm
FROM test
WHERE length(`name`) <> length(CONVERT(`name` USING utf8));
SELECT
name,
length(`name`) as ln,
char_length(`name`) as cl,
length(CONVERT(`name` USING utf8)) as u_ln,
char_length(CONVERT(`name` USING utf8)) as u_cl,
hex(name) as hx,
CONVERT(`name` USING utf8) as u_nm,
hex(CONVERT(`name` USING utf8)) as h_nm
FROM test
WHERE length(`name`) <> length(CONVERT(`name` USING utf8));