Manual:findAnomalies.php
Note: The findAnomalies.php script is no longer included in recent releases. See source code below if you want to use it.
findAnomalies.php is a maintenance script that searches for missing revisions and archive rows. The plmra mode is useful for situations in which, for a bunch of pages, you get an error of "The revision #0 of the page named '[page name]" does not exist. This is usually caused by following an outdated history link to a page that has been deleted. Details can be found in the deletion log." You can find all the pages that have that error.
Adding a --fix option for plmr would be a simple matter of finding the most recent revision for a page and setting page.page_latest to that. More simply, one could just use attachLatest.php.
Usage
[edit ]| Option | Required | Description | |
|---|---|---|---|
| mode | Required |
plmra
Finds pages whose page.page_latest points to a revision ID that's missing from revision.rev_id AND archive.ar_rev_id.
plmr
Finds pages whose page.page_latest points to a revision ID that's missing from revision.rev_id but is in archive.ar_rev_id.
rmra
Finds revisions missing from both revision.rev_id and archive.ar_id.
rmr
Finds revisions missing from revision.rev_id.
| |
| ranges | Optional | For rmr or rmra mode, list adjacent rev_ids using ranges (e.g. 500-600) rather than in the comma-delimited 500, 501, 502 format. | |
Sample output
[edit ]rmra mode
[edit ]$ php findAnomalies.php --mode=rmra --ranges Reading the revision table... Reading the archive table... Reading the page table... Here are the revision IDs of the revisions missing from both the revision and archive tables: 557 1 row; 1 anomalies found so far 19195 1 row; 2 anomalies found so far 20515 1 row; 3 anomalies found so far 20517 1 row; 4 anomalies found so far 20519 1 row; 5 anomalies found so far 20530 1 row; 6 anomalies found so far 23454 1 row; 7 anomalies found so far 23473 1 row; 8 anomalies found so far 23475 1 row; 9 anomalies found so far 27001-27030 30 rows; 39 anomalies found so far 27061-27226 166 rows; 205 anomalies found so far 27228-27252 25 rows; 230 anomalies found so far 27254-27562 309 rows; 539 anomalies found so far 28828 1 row; 540 anomalies found so far 29303 1 row; 541 anomalies found so far 29305 1 row; 542 anomalies found so far 31735-31736 2 rows; 544 anomalies found so far 32414 1 row; 545 anomalies found so far 32545 1 row; 546 anomalies found so far 32554 1 row; 547 anomalies found so far 32702 1 row; 548 anomalies found so far 32721 1 row; 549 anomalies found so far 32840 1 row; 550 anomalies found so far 32951 1 row; 551 anomalies found so far 32976 1 row; 552 anomalies found so far 33007 1 row; 553 anomalies found so far 33044 1 row; 554 anomalies found so far 33079 1 row; 555 anomalies found so far 33170 1 row; 556 anomalies found so far 33200 1 row; 557 anomalies found so far 33263 1 row; 558 anomalies found so far 33332 1 row; 559 anomalies found so far 33381 1 row; 560 anomalies found so far 33774 1 row; 561 anomalies found so far 561 anomalies found
plmra mode
[edit ]$ php findAnomalies.php --mode=plmra Reading the revision table... Reading the archive table... Reading the page table... Here are the pages that have no revision nor archived revision for their page_latest: Cambodia Cambridge,_Massachusetts Campaign_finance_law Campus_libertarian Canada [... etc., etc. ...] Drug_test Cannibalism Genital_integrity French_économistes Frédéric_Bastiat 178 anomalies found
Code
[edit ]<?php /** * findAnomalies.php * By Leucosticte * Version 1.0.1 * * Searches for: * Mode plmra: pages whose page.page_latest points to a revision ID * that's missing from revision.rev_id AND archive.ar_rev_id. * Mode plmr: pages whose page.page_latest points to a revision ID * that's missing from revision.rev_id but is in archive.ar_rev_id. * Mode rmra: revisions missing from both revision.rev_id and * archive.ar_id. * Mode rmr: revisions missing from revision.rev_id. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * * @file * @ingroup Maintenance */ require_once __DIR__ . '/Maintenance.php'; /** * Maintenance script to find database anomalies. * * @ingroup Maintenance */ class FindAnomalies extends Maintenance { public function __construct() { parent::__construct(); $this->addDescription( "Find database anomalies" ); $this->addOption( "mode", "Mode of operation (plmra, plmr, rmra, or rmr)", true, true ); $this->addOption( "ranges", "For rmr or rmra mode, list adjacent rev_ids using ranges " . "(e.g. 500-600) rather than in the comma-delimited 500, 501, 502 format", false ); } public function execute() { if ( !defined( DB_SLAVE ) ) { define( 'DB_SLAVE', DB_REPLICA ); } $acceptableModes = array( 'plmra', 'plmr', 'rmra', 'rmr' ); $mode = $this->getOption( 'mode' ); $ranges = $this->getOption( 'ranges' ); if ( !in_array( $mode, $acceptableModes ) ) { $this->output( "Usage: php findAnomalies.php -mode<plmra, plmr, rmra, or rmr>\n" ); die(); } $dbr = wfGetDB( DB_SLAVE ); $this->output( "Reading the revision table...\n" ); $revisionResult = $dbr->select( 'revision', array( 'rev_id' ), array( '1=1' ) ); $revision = array(); foreach ( $revisionResult as $row ) { $revision[$row->rev_id] = true; } $highestRevision = 0; if ( $mode == 'plmra' || $mode == 'rmra' ) { $this->output( "Reading the archive table...\n" ); $archiveResult = $dbr->select( 'archive', array( 'ar_rev_id' ), array( '1=1' ) ); foreach ( $archiveResult as $row ) { $archive[$row->ar_rev_id] = true; } } $this->output( "Reading the page table...\n" ); $pageResult = $dbr->select( 'page', array( 'page_namespace', 'page_title', 'page_latest' ), array( '1=1' ) ); $page = array(); foreach ( $pageResult as $row ) { $page[$row->page_latest] = array( 'page_namespace' => $row->page_namespace, 'page_title' => $row->page_title, ); if ( $row->page_latest > $highestRevision ) { $highestRevision = $row->page_latest; } } $archiveOnly = array(); $noArchiveNorRevision = array(); $modeMessages = array ( 'plmra' => "Here are the pages that have no revision nor archived revision for their " . "page_latest:\n", 'plmr' => "Here are the pages that have no revision for their page_latest:\n", 'rmra' => "Here are the revision IDs of the revisions missing from both the revision and " . "archive tables:\n", 'rmr' => "Here are the revision IDs of the revisions missing from the revision table:\n" ); $this->output( "\n" . $modeMessages[$mode] ); $namespaces = MWNamespace::getCanonicalNamespaces(); $foundAny = 0; if ( $mode == 'plmra' || $mode == 'plmr' ) { foreach ( $page as $latest => $thisPage ) { if ( $mode == 'plmra' ) { if ( !isset( $revision[$latest] ) && !isset( $archive[$latest] ) ) { $foundAny++; if ( $namespaces[$thisPage['page_namespace']] ) { $this->output( $namespaces[$thisPage['page_namespace']] . ":" ); } $this->output( $thisPage['page_title'] . "\n" ); } } if ( $mode == 'plmr' ) { if ( !isset( $revision[$latest] ) ) { $foundAny++; if ( $namespaces[$thisPage['page_namespace']] ) { $this->output( $namespaces[$thisPage['page_namespace']] . ":" ); } $this->output( $thisPage['page_title'] . "\n" ); } } } } $rangeBegins = 0; // Set to a number that will never be adjacent to an actual rev_id or ar_rev_id $lastFound = -1; $comma = false; if ( $mode == 'rmra' || $mode == 'rmr' ) { $count = 1; while ( $count < $highestRevision ) { if ( ( $mode == 'rmra' && !isset( $revision[$count] ) && !isset( $archive[$count] ) ) || ( $mode == 'rmr' && !isset( $revision[$count] ) ) ) { $foundAny++; if ( $ranges ) { if ( !$rangeBegins ) { $rangeBegins = $count; } $lastFound = $count; } elseif ( $comma ) { $this->output( $count . ", " ); } } elseif ( $rangeBegins ) { if ( $rangeBegins == $lastFound ) { $this->output( $lastFound . "\t\t1 row; $foundAny anomalies " . "found so far\n" ); } else { $this->output( $rangeBegins . "-" . $lastFound . "\t" . ( $lastFound - $rangeBegins + 1 ) . " rows; " ); if ( $lastFound - $rangeBegins + 1 < 10 ) { $this->output( " " ); } if ( $lastFound - $rangeBegins + 1 < 100 ) { $this->output( " " ); } if ( $lastFound - $rangeBegins + 1 < 1000 ) { $this->output( " " ); } if ( $lastFound - $rangeBegins + 1 < 10000 ) { $this->output( " " ); } $this->output( "$foundAny " . "anomalies found so far\n" ); } $rangeBegins = 0; } $count++; $comma = true; } if ( $ranges && $rangeBegins ) { if ( $rangeBegins == $lastFound ) { $this->output( $lastFound . "\t\t1 row; $foundAny anomalies " . "found so far\n" ); } else { $this->output( $rangeBegins . "-" . $lastFound . "\t" . ( $lastFound - $rangeBegins + 1 ) . " rows; " ); if ( $lastFound - $rangeBegins + 1 < 10 ) { $this->output( " " ); } if ( $lastFound - $rangeBegins + 1 < 100 ) { $this->output( " " ); } if ( $lastFound - $rangeBegins + 1 < 1000 ) { $this->output( " " ); } if ( $lastFound - $rangeBegins + 1 < 10000 ) { $this->output( " " ); } $this->output( "$foundAny " . "anomalies found so far\n" ); } } if ( !$ranges ) { $this->output( "\n" ); } } if ( !$foundAny ) { $this->output( "No anomalies found!\n"); } else { $this->output( "\n$foundAny anomalies found\n" ); } } } $maintClass = 'FindAnomalies'; if( defined('RUN_MAINTENANCE_IF_MAIN') ) { require_once( RUN_MAINTENANCE_IF_MAIN ); } else { require_once( DO_MAINTENANCE ); # Make this work on versions before 1.17 }