Wiki143:WikiProject Red Link Recovery/Link matching script/Repeated letters

From Wikipedia, the free encyclopedia
Revision as of 23:37, 17 November 2014 by imported>YiFeiBot (Bot: Migrating interwiki links, now provided by Wikidata on d:q15885048)
(diff) ← Previous revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

(This repeats the same SQL statements twice deliberately - this is to ensure that repetitions of 3 or 4 letters in a row are handled properly).

This crushing method often produces unacceptably high numbers of false positives. Additional work must be done to filter the results. Positive hits are more likely in titles with more then 2 words.

delete from suggestions
where length( suggestion ) - length( replace( suggestion, '_', '' ) ) < 2;

// NB - do not crush xx's and ii's as these appear in roman numerals
//     - also  ee's and oo's - these are just too common in English

update crushed_art set title = replace( title, 'aa', 'a' );
update crushed_art set title = replace( title, 'bb', 'b' );
update crushed_art set title = replace( title, 'cc', 'c' );
update crushed_art set title = replace( title, 'dd', 'd' );
update crushed_art set title = replace( title, 'ff', 'f' );
update crushed_art set title = replace( title, 'gg', 'g' );
update crushed_art set title = replace( title, 'hh', 'h' );
update crushed_art set title = replace( title, 'jj', 'j' );
update crushed_art set title = replace( title, 'kk', 'k' );
update crushed_art set title = replace( title, 'll', 'l' );
update crushed_art set title = replace( title, 'mm', 'm' );
update crushed_art set title = replace( title, 'nn', 'n' );
update crushed_art set title = replace( title, 'pp', 'p' );
update crushed_art set title = replace( title, 'qq', 'q' );
update crushed_art set title = replace( title, 'rr', 'r' );
update crushed_art set title = replace( title, 'ss', 's' );
update crushed_art set title = replace( title, 'tt', 't' );
update crushed_art set title = replace( title, 'uu', 'u' );
update crushed_art set title = replace( title, 'vv', 'v' );
update crushed_art set title = replace( title, 'ww', 'w' );
update crushed_art set title = replace( title, 'yy', 'y' );
update crushed_art set title = replace( title, 'zz', 'z' );

update crushed_links set link = replace( link, 'aa', 'a' );
update crushed_links set link = replace( link, 'bb', 'b' );
update crushed_links set link = replace( link, 'cc', 'c' );
update crushed_links set link = replace( link, 'dd', 'd' );
update crushed_links set link = replace( link, 'ff', 'f' );
update crushed_links set link = replace( link, 'gg', 'g' );
update crushed_links set link = replace( link, 'hh', 'h' );
update crushed_links set link = replace( link, 'jj', 'j' );
update crushed_links set link = replace( link, 'kk', 'k' );
update crushed_links set link = replace( link, 'll', 'l' );
update crushed_links set link = replace( link, 'mm', 'm' );
update crushed_links set link = replace( link, 'nn', 'n' );
update crushed_links set link = replace( link, 'pp', 'p' );
update crushed_links set link = replace( link, 'qq', 'q' );
update crushed_links set link = replace( link, 'rr', 'r' );
update crushed_links set link = replace( link, 'ss', 's' );
update crushed_links set link = replace( link, 'tt', 't' );
update crushed_links set link = replace( link, 'uu', 'u' );
update crushed_links set link = replace( link, 'vv', 'v' );
update crushed_links set link = replace( link, 'ww', 'w' );
update crushed_links set link = replace( link, 'yy', 'y' );
update crushed_links set link = replace( link, 'zz', 'z' );

update crushed_art set title = replace( title, 'aa', 'a' );
update crushed_art set title = replace( title, 'bb', 'b' );
update crushed_art set title = replace( title, 'cc', 'c' );
update crushed_art set title = replace( title, 'dd', 'd' );
update crushed_art set title = replace( title, 'ff', 'f' );
update crushed_art set title = replace( title, 'gg', 'g' );
update crushed_art set title = replace( title, 'hh', 'h' );
update crushed_art set title = replace( title, 'jj', 'j' );
update crushed_art set title = replace( title, 'kk', 'k' );
update crushed_art set title = replace( title, 'll', 'l' );
update crushed_art set title = replace( title, 'mm', 'm' );
update crushed_art set title = replace( title, 'nn', 'n' );
update crushed_art set title = replace( title, 'pp', 'p' );
update crushed_art set title = replace( title, 'qq', 'q' );
update crushed_art set title = replace( title, 'rr', 'r' );
update crushed_art set title = replace( title, 'ss', 's' );
update crushed_art set title = replace( title, 'tt', 't' );
update crushed_art set title = replace( title, 'uu', 'u' );
update crushed_art set title = replace( title, 'vv', 'v' );
update crushed_art set title = replace( title, 'ww', 'w' );
update crushed_art set title = replace( title, 'yy', 'y' );
update crushed_art set title = replace( title, 'zz', 'z' );

update crushed_links set link = replace( link, 'aa', 'a' );
update crushed_links set link = replace( link, 'bb', 'b' );
update crushed_links set link = replace( link, 'cc', 'c' );
update crushed_links set link = replace( link, 'dd', 'd' );
update crushed_links set link = replace( link, 'ff', 'f' );
update crushed_links set link = replace( link, 'gg', 'g' );
update crushed_links set link = replace( link, 'hh', 'h' );
update crushed_links set link = replace( link, 'jj', 'j' );
update crushed_links set link = replace( link, 'kk', 'k' );
update crushed_links set link = replace( link, 'll', 'l' );
update crushed_links set link = replace( link, 'mm', 'm' );
update crushed_links set link = replace( link, 'nn', 'n' );
update crushed_links set link = replace( link, 'pp', 'p' );
update crushed_links set link = replace( link, 'qq', 'q' );
update crushed_links set link = replace( link, 'rr', 'r' );
update crushed_links set link = replace( link, 'ss', 's' );
update crushed_links set link = replace( link, 'tt', 't' );
update crushed_links set link = replace( link, 'uu', 'u' );
update crushed_links set link = replace( link, 'vv', 'v' );
update crushed_links set link = replace( link, 'ww', 'w' );
update crushed_links set link = replace( link, 'yy', 'y' );
update crushed_links set link = replace( link, 'zz', 'z' );