User:Topbanana/Reports/This page contains a link that might be mis-punctuated/SQL
Jump to navigation
Jump to search
The SQL below should be run on a Link Analysis Database. It creates a number of extra tables to aid in the analysis of 'close matches' for red links.
-- Create tables and copy bad link targets and article titles
CREATE TABLE bad_squish (
from_id mediumint(7) unsigned NOT NULL,
orig_title varchar(255) binary NOT NULL,
squish_title varchar(255) binary NOT NULL,
PRIMARY KEY( from_id, orig_title )
) ENGINE=MyISAM;
CREATE TABLE art_squish (
orig_title varchar(255) binary NOT NULL,
squish_title varchar(255) binary NOT NULL,
PRIMARY KEY( orig_title )
) ENGINE=MyISAM;
INSERT INTO art_squish
SELECT art_title, art_title
FROM art;
INSERT INTO bad_squish
SELECT from_id, link, link
FROM bad_links;
DELETE FROM art_squish
WHERE Length( orig_title ) <=3;
DELETE FROM bad_squish
WHERE Length( orig_title ) <=3;
-- Now progressively mangle the link targets and article titles, removing matches
-- as we go.
-- Remove punctuation and double spaces
UPDATE bad_squish SET squish_title = replace( squish_title, '(', '' );
UPDATE art_squish SET squish_title = replace( squish_title, '(', '' );
UPDATE bad_squish SET squish_title = replace( squish_title, ')', '' );
UPDATE art_squish SET squish_title = replace( squish_title, ')', '' );
UPDATE bad_squish SET squish_title = replace( squish_title, '[', '' );
UPDATE art_squish SET squish_title = replace( squish_title, '[', '' );
UPDATE bad_squish SET squish_title = replace( squish_title, ']', '' );
UPDATE art_squish SET squish_title = replace( squish_title, ']', '' );
UPDATE bad_squish SET squish_title = replace( squish_title, ',', '' );
UPDATE art_squish SET squish_title = replace( squish_title, ',', '' );
UPDATE bad_squish SET squish_title = replace( squish_title, '!', '' );
UPDATE art_squish SET squish_title = replace( squish_title, '!', '' );
UPDATE bad_squish SET squish_title = replace( squish_title, '"', '' );
UPDATE art_squish SET squish_title = replace( squish_title, '"', '' );
UPDATE bad_squish SET squish_title = replace( squish_title, '%', '' );
UPDATE art_squish SET squish_title = replace( squish_title, '%', '' );
UPDATE bad_squish SET squish_title = replace( squish_title, '&', '' );
UPDATE art_squish SET squish_title = replace( squish_title, '&', '' );
UPDATE bad_squish SET squish_title = replace( squish_title, '*', '' );
UPDATE art_squish SET squish_title = replace( squish_title, '*', '' );
UPDATE bad_squish SET squish_title = replace( squish_title, '-', '' );
UPDATE art_squish SET squish_title = replace( squish_title, '-', '' );
UPDATE bad_squish SET squish_title = replace( squish_title, '+', '' );
UPDATE art_squish SET squish_title = replace( squish_title, '+', '' );
UPDATE bad_squish SET squish_title = replace( squish_title, '=', '' );
UPDATE art_squish SET squish_title = replace( squish_title, '=', '' );
UPDATE bad_squish SET squish_title = replace( squish_title, '{', '' );
UPDATE art_squish SET squish_title = replace( squish_title, '{', '' );
UPDATE bad_squish SET squish_title = replace( squish_title, '}', '' );
UPDATE art_squish SET squish_title = replace( squish_title, '}', '' );
UPDATE bad_squish SET squish_title = replace( squish_title, ':', '' );
UPDATE art_squish SET squish_title = replace( squish_title, ':', '' );
UPDATE bad_squish SET squish_title = replace( squish_title, ';', '' );
UPDATE art_squish SET squish_title = replace( squish_title, ';', '' );
UPDATE bad_squish SET squish_title = replace( squish_title, '@', '' );
UPDATE art_squish SET squish_title = replace( squish_title, '@', '' );
UPDATE bad_squish SET squish_title = replace( squish_title, '#', '' );
UPDATE art_squish SET squish_title = replace( squish_title, '#', '' );
UPDATE bad_squish SET squish_title = replace( squish_title, '<', '' );
UPDATE art_squish SET squish_title = replace( squish_title, '<', '' );
UPDATE bad_squish SET squish_title = replace( squish_title, '>', '' );
UPDATE art_squish SET squish_title = replace( squish_title, '>', '' );
UPDATE bad_squish SET squish_title = replace( squish_title, '.', '' );
UPDATE art_squish SET squish_title = replace( squish_title, '.', '' );
UPDATE bad_squish SET squish_title = replace( squish_title, '/', '' );
UPDATE art_squish SET squish_title = replace( squish_title, '/', '' );
UPDATE bad_squish SET squish_title = replace( squish_title, '\\', '' );
UPDATE art_squish SET squish_title = replace( squish_title, '\\', '' );
UPDATE bad_squish SET squish_title = replace( squish_title, '''', '' );
UPDATE art_squish SET squish_title = replace( squish_title, '''', '' );
ALTER TABLE bad_squish ADD INDEX ( squish_title );
ALTER TABLE art_squish ADD INDEX ( squish_title );
SELECT concat( '*[[',art_title,']] links to [[', bad_squish.orig_title, ']], try ', group_concat( concat( '[[', art_squish.orig_title, ']]' )) )
INTO OUTFILE 'c:\punc.txt'
FROM art_squish, bad_squish, art
WHERE art.art_id = bad_squish.from_id
AND bad_squish.squish_title = art_squish.squish_title
GROUP BY bad_squish.from_id, bad_squish.orig_title
ORDER BY art_title, bad_squish.orig_title;