Jump to content
MediaWiki

Manual:findAnomalies.php

From mediawiki.org

Note: The findAnomalies.php script is no longer included in recent releases. See source code below if you want to use it.

findAnomalies.php is a maintenance script that searches for missing revisions and archive rows. The plmra mode is useful for situations in which, for a bunch of pages, you get an error of "The revision #0 of the page named '[page name]" does not exist. This is usually caused by following an outdated history link to a page that has been deleted. Details can be found in the deletion log." You can find all the pages that have that error.

Adding a --fix option for plmr would be a simple matter of finding the most recent revision for a page and setting page.page_latest to that. More simply, one could just use attachLatest.php.

Usage

[edit ]
Option Required Description
mode Required
plmra Finds pages whose page.page_latest points to a revision ID that's missing from revision.rev_id AND archive.ar_rev_id.
plmr Finds pages whose page.page_latest points to a revision ID that's missing from revision.rev_id but is in archive.ar_rev_id.
rmra Finds revisions missing from both revision.rev_id and archive.ar_id.
rmr Finds revisions missing from revision.rev_id.
ranges Optional For rmr or rmra mode, list adjacent rev_ids using ranges (e.g. 500-600) rather than in the comma-delimited 500, 501, 502 format.

Sample output

[edit ]

rmra mode

[edit ]
$ php findAnomalies.php --mode=rmra --ranges
Reading the revision table...
Reading the archive table...
Reading the page table...
Here are the revision IDs of the revisions missing from both the revision and archive tables:
557		1 row; 1 anomalies found so far
19195		1 row; 2 anomalies found so far
20515		1 row; 3 anomalies found so far
20517		1 row; 4 anomalies found so far
20519		1 row; 5 anomalies found so far
20530		1 row; 6 anomalies found so far
23454		1 row; 7 anomalies found so far
23473		1 row; 8 anomalies found so far
23475		1 row; 9 anomalies found so far
27001-27030	30 rows; 39 anomalies found so far
27061-27226	166 rows; 205 anomalies found so far
27228-27252	25 rows; 230 anomalies found so far
27254-27562	309 rows; 539 anomalies found so far
28828		1 row; 540 anomalies found so far
29303		1 row; 541 anomalies found so far
29305		1 row; 542 anomalies found so far
31735-31736	2 rows; 544 anomalies found so far
32414		1 row; 545 anomalies found so far
32545		1 row; 546 anomalies found so far
32554		1 row; 547 anomalies found so far
32702		1 row; 548 anomalies found so far
32721		1 row; 549 anomalies found so far
32840		1 row; 550 anomalies found so far
32951		1 row; 551 anomalies found so far
32976		1 row; 552 anomalies found so far
33007		1 row; 553 anomalies found so far
33044		1 row; 554 anomalies found so far
33079		1 row; 555 anomalies found so far
33170		1 row; 556 anomalies found so far
33200		1 row; 557 anomalies found so far
33263		1 row; 558 anomalies found so far
33332		1 row; 559 anomalies found so far
33381		1 row; 560 anomalies found so far
33774		1 row; 561 anomalies found so far
561 anomalies found

plmra mode

[edit ]
$ php findAnomalies.php --mode=plmra
Reading the revision table...
Reading the archive table...
Reading the page table...
Here are the pages that have no revision nor archived revision for their page_latest:
Cambodia
Cambridge,_Massachusetts
Campaign_finance_law
Campus_libertarian
Canada
[... etc., etc. ...]
Drug_test
Cannibalism
Genital_integrity
French_économistes
Frédéric_Bastiat
178 anomalies found

Code

[edit ]
<?php
/**
* findAnomalies.php
* By Leucosticte
* Version 1.0.1
*
* Searches for:
* Mode plmra: pages whose page.page_latest points to a revision ID
* that's missing from revision.rev_id AND archive.ar_rev_id.
* Mode plmr: pages whose page.page_latest points to a revision ID
* that's missing from revision.rev_id but is in archive.ar_rev_id.
* Mode rmra: revisions missing from both revision.rev_id and
* archive.ar_id.
* Mode rmr: revisions missing from revision.rev_id.
* 
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @ingroup Maintenance
*/
require_once __DIR__ . '/Maintenance.php';
/**
* Maintenance script to find database anomalies.
*
* @ingroup Maintenance
*/
class FindAnomalies extends Maintenance {
 public function __construct() {
 parent::__construct();
 $this->addDescription( "Find database anomalies" );
 $this->addOption( "mode", "Mode of operation (plmra, plmr, rmra, or rmr)", true,
 true );
 $this->addOption( "ranges", "For rmr or rmra mode, list adjacent rev_ids using ranges "
 . "(e.g. 500-600) rather than in the comma-delimited 500, 501, 502 format",
 false );
 }
 public function execute() {
 if ( !defined( DB_SLAVE ) ) {
 define( 'DB_SLAVE', DB_REPLICA );
 }
 $acceptableModes = array( 'plmra', 'plmr', 'rmra', 'rmr' );
 $mode = $this->getOption( 'mode' );
 $ranges = $this->getOption( 'ranges' );
 if ( !in_array( $mode, $acceptableModes ) ) {
 $this->output( "Usage: php findAnomalies.php -mode<plmra, plmr, rmra, or rmr>\n" );
 die();
 }
 $dbr = wfGetDB( DB_SLAVE );
 $this->output( "Reading the revision table...\n" );
 $revisionResult = $dbr->select( 'revision', array( 'rev_id' ), array( '1=1' ) );
 $revision = array();
 foreach ( $revisionResult as $row ) {
 $revision[$row->rev_id] = true;
 }
 $highestRevision = 0;
 if ( $mode == 'plmra' || $mode == 'rmra' ) {
 $this->output( "Reading the archive table...\n" );
 $archiveResult = $dbr->select( 'archive', array( 'ar_rev_id' ), array( '1=1' ) );
 foreach ( $archiveResult as $row ) {
 $archive[$row->ar_rev_id] = true;
 }
 }
 $this->output( "Reading the page table...\n" );
 $pageResult = $dbr->select( 'page', array( 'page_namespace', 'page_title',
 'page_latest' ), array( '1=1' ) );
 $page = array();
 foreach ( $pageResult as $row ) {
 $page[$row->page_latest] = array(
 'page_namespace' => $row->page_namespace,
 'page_title' => $row->page_title,
 );
 if ( $row->page_latest > $highestRevision ) {
 $highestRevision = $row->page_latest;
 }
 }
 $archiveOnly = array();
 $noArchiveNorRevision = array();
 $modeMessages = array (
 'plmra' => "Here are the pages that have no revision nor archived revision for their "
 . "page_latest:\n",
 'plmr' => "Here are the pages that have no revision for their page_latest:\n",
 'rmra' => "Here are the revision IDs of the revisions missing from both the revision and "
 . "archive tables:\n",
 'rmr' => "Here are the revision IDs of the revisions missing from the revision table:\n"
 );
 $this->output( "\n" . $modeMessages[$mode] );
 $namespaces = MWNamespace::getCanonicalNamespaces();
 $foundAny = 0;
 if ( $mode == 'plmra' || $mode == 'plmr' ) {
 foreach ( $page as $latest => $thisPage ) {
 if ( $mode == 'plmra' ) {
 if ( !isset( $revision[$latest] ) && !isset( $archive[$latest] ) ) {
 $foundAny++;
 if ( $namespaces[$thisPage['page_namespace']] ) {
 $this->output( $namespaces[$thisPage['page_namespace']] . ":" );
 }
 $this->output( $thisPage['page_title'] . "\n" );
 }
 }
 if ( $mode == 'plmr' ) {
 if ( !isset( $revision[$latest] ) ) {
 $foundAny++;
 if ( $namespaces[$thisPage['page_namespace']] ) {
 $this->output( $namespaces[$thisPage['page_namespace']] . ":" );
 }
 $this->output( $thisPage['page_title'] . "\n" );
 }
 }
 }
 }
 $rangeBegins = 0;
 // Set to a number that will never be adjacent to an actual rev_id or ar_rev_id
 $lastFound = -1;
 $comma = false;
 if ( $mode == 'rmra' || $mode == 'rmr' ) {
 $count = 1;
 while ( $count < $highestRevision ) {
 if ( ( $mode == 'rmra' && !isset( $revision[$count] )
 && !isset( $archive[$count] ) )
 || ( $mode == 'rmr' && !isset( $revision[$count] ) ) )
 {
 $foundAny++;
 if ( $ranges ) {
 if ( !$rangeBegins ) {
 $rangeBegins = $count;
 }
 $lastFound = $count;
 } elseif ( $comma ) {
 $this->output( $count . ", " );
 }
 } elseif ( $rangeBegins ) {
 if ( $rangeBegins == $lastFound ) {
 $this->output( $lastFound . "\t\t1 row; $foundAny anomalies "
 . "found so far\n" );
 } else {
 $this->output( $rangeBegins . "-" . $lastFound . "\t"
 . ( $lastFound - $rangeBegins + 1 ) . " rows; " );
 if ( $lastFound - $rangeBegins + 1 < 10 ) {
 $this->output( " " );
 }
 if ( $lastFound - $rangeBegins + 1 < 100 ) {
 $this->output( " " );
 }
 if ( $lastFound - $rangeBegins + 1 < 1000 ) {
 $this->output( " " );
 }
 if ( $lastFound - $rangeBegins + 1 < 10000 ) {
 $this->output( " " );
 }
 $this->output( "$foundAny "
 . "anomalies found so far\n" );
 }
 $rangeBegins = 0;
 }
 $count++;
 $comma = true;
 }
 if ( $ranges && $rangeBegins ) {
 if ( $rangeBegins == $lastFound ) {
 $this->output( $lastFound . "\t\t1 row; $foundAny anomalies "
 . "found so far\n" );
 } else {
 $this->output( $rangeBegins . "-" . $lastFound . "\t"
 . ( $lastFound - $rangeBegins + 1 ) . " rows; " );
 if ( $lastFound - $rangeBegins + 1 < 10 ) {
 $this->output( " " );
 }
 if ( $lastFound - $rangeBegins + 1 < 100 ) {
 $this->output( " " );
 }
 if ( $lastFound - $rangeBegins + 1 < 1000 ) {
 $this->output( " " );
 }
 if ( $lastFound - $rangeBegins + 1 < 10000 ) {
 $this->output( " " );
 }
 $this->output( "$foundAny "
 . "anomalies found so far\n" );
 }
 }
 if ( !$ranges ) {
 $this->output( "\n" );
 }
 }
 if ( !$foundAny ) {
 $this->output( "No anomalies found!\n");
 } else {
 $this->output( "\n$foundAny anomalies found\n" );
 }
 }
}
$maintClass = 'FindAnomalies';
 if( defined('RUN_MAINTENANCE_IF_MAIN') ) {
 require_once( RUN_MAINTENANCE_IF_MAIN );
 } else {
 require_once( DO_MAINTENANCE ); # Make this work on versions before 1.17
}

AltStyle によって変換されたページ (->オリジナル) /