Jump to content
Wikipedia The Free Encyclopedia

User:Tom.Bot/Task6 code

From Wikipedia, the free encyclopedia

Source

[edit ]
publicstringProcessArticle(stringArticleText,stringArticleTitle,intwikiNamespace,outstringSummary,outboolSkip)
{
// global switches //////////////////////////////////////////////////////////

boolTomBot=true;
boolSaveSkipSummaries=false;
boolSkipIfBlacklisted=true;
boolManuallyCheckPagesWithoutAGoodInfobox=false;// usually it's an {{infobox person}} or {{infobox scientist}}
boolManuallyPlaceAuthorityAtEndOfPage=false;// aid for pages w/o a {{DEFAULTSORT}} nor cats; manual use only
boolLiveDebug=false;
boolSandboxDebug=false;// auto-detect
Skip=false;


// global-use vars //////////////////////////////////////////////////////////

Summary="";


// preliminary exceptions/error checking ////////////////////////////////////

if(ArticleTitle=="User:Tom.Reding/sandbox")SandboxDebug=true;

List<string>BlackList=newList<string>(newstring[]{
""
});
if(!Skip&&BlackList.Contains(ArticleTitle))
{
if(SkipIfBlacklisted)
{
Summary="Blacklisted article";
Skip=true;
}
}

// check for appropriate (bio) infoboxes (now done via PetScan for all templates in [[Category:People and person infobox templates]], per BRFA)

stringPeopleTemplates_Regex=@"\{\{\s*(?:[Ii]nfobox[ _]+actor[ _]+voice|[Ii]nfobox[ _]+Actor|[Ii]nfobox[ _]+actor|[Ii]nfobox[ _]+Actress|[Ii]nfobox[ _]+actress|[Ii]nfobox[ _]+adult[ _]+biography|[Ii]nfobox[ _]+adult[ _]+female|[Ii]nfobox[ _]+adult[ _]+male|[Ii]nfobox[ _]+Biography|[Ii]nfobox[ _]+biography|[Ii]nfobox[ _]+bio|[Ii]nfobox[ _]+Celebrity|[Ii]nfobox[ _]+director|[Ii]nfobox[ _]+entertainer|[Ii]nfobox[ _]+Fashion[ _]+Designer|[Ii]nfobox[ _]+fashion[ _]+designer|[Ii]nfobox[ _]+film[ _]+actor|[Ii]nfobox[ _]+film[ _]+director|[Ii]nfobox[ _]+human[ _]+being|[Ii]nfobox[ _]+human|[Ii]nfobox[ _]+Indian[ _]+Businessmen|[Ii]nfobox[ _]+Journalist|[Ii]nfobox[ _]+journalist|[Ii]nfobox[ _]+people|[Ii]nfobox[ _]+performer|[Ii]nfobox[ _]+person/measurements|[Ii]nfobox[ _]+person[ _]+ii|[Ii]nfobox[ _]+person|[Ii]nfobox[ _]+Person|[Ii]nfobox[ _]+photographer|[Ii]nfobox[ _]+Real[ _]+Person|[Ii]nfobox[ _]+trade[ _]+unionist|[Ii]nfobox[ _]+victim|[Pp]ersonbox)(?=\s*(?:\||\<\!\-\-))";

stringScientistTemplates_Regex=@"\{\{\s*(?:[Ii]nfobox[ _]+Academic|[Ii]nfobox[ _]+chemist|[Ii]nfobox[ _]+historian|[Ii]nfobox[ _]+mathematician|[Ii]nfobox[ _]+Professor|[Ii]nfobox[ _]+scientist|[Ii]nfobox[ _]+Scientist)(?=\s*(?:\||\<\!\-\-))";

boolBio1=Regex.IsMatch(ArticleText,PeopleTemplates_Regex,RegexOptions.IgnoreCase);
boolBio2=Regex.IsMatch(ArticleText,ScientistTemplates_Regex,RegexOptions.IgnoreCase);
boolNoBioTemplates=(Bio1==false&&Bio2==false);
if(!Skip&&NoBioTemplates)
{
if(ManuallyCheckPagesWithoutAGoodInfobox)
{
// OK to proceed (manually)
}
else
{
Summary+=@"No bio templates found. ";
Skip=true;
}
}

// check for {{Authority control
if(!Skip)
{
stringAuthorityAliases_Regex=@"\{\{\s*(?:[Aa]uthoritycontrol|[Aa]uthority[ _]+controll|[Aa]uthority[ _]+control|[Aa]uthority[ _]+Control|[Aa]utorité|[Ee]xternal[ _]+identifiers|[Nn]ormdaten)";// 0 grps
boolHasAuthority=Regex.IsMatch(ArticleText,AuthorityAliases_Regex,RegexOptions.IgnoreCase);
if(HasAuthority)
{
Summary+=@"{{Authority control}} exists. ";
Skip=true;
}
}

// get wikibase_item via WP API
// ex: https://en.wikipedia.org//w/api.php?action=query&format=json&prop=pageprops&titles=Panthera%20leo&redirects=0&formatversion=2&ppprop=wikibase_item
// TODO: find a proper URL_Encode function that behaves similarly
stringArticleTitle_URL=ArticleTitle.Replace(" ",@"%20").Replace(",",@"%2C").Replace("'",@"%27").Replace("-",@"%2D").Replace("–",@"%96").Replace("(",@"%28").Replace(")",@"%29").Replace(".",@"%2E").Replace("&",@"%26").Replace("?",@"%3F").Replace("+",@"%2B").Replace(":",@"%3A").Replace("!",@"%21").Replace("/",@"%2F").Replace(@"\",@"%5C");
stringURL1=@"https://en.wikipedia.org//w/api.php?action=query&format=json&prop=pageprops&titles="+
ArticleTitle_URL+@"&redirects=0&formatversion=2&ppprop=wikibase_item";
stringHTML1="";
if(!Skip&&!SandboxDebug)
{
try
{
HTML1=Tools.GetHTML(URL1);
}
catch
{
Summary="GetHTML1 failed. ArticleTitle_URL = "+ArticleTitle_URL+" . ";
if(!LiveDebug)Skip=true;
}
}


// html1 error checks ///////////////////////////////////////////////////////

stringQID=Regex.Match(HTML1,@"wikibase_item"":""([^""]+)").Groups[1].Value;
if(string.IsNullOrEmpty(QID)&&!Skip&&!SandboxDebug)
{
Summary=@"QID retrieval failed. ";
Skip=true;
}

if(!Regex.IsMatch(QID,@"^Q\d+$")&&!Skip&&!SandboxDebug)// case sensitive, jtbs
{
Summary=@"Unexpected QID format. ";
Skip=true;
}


// determine # of WD properties used ////////////////////////////////////////

List<string>ACPropertyList=newList<string>(newstring[]{
// from Module:Authority control's local conf = { ... } table:
"P864",
"P2558",
"P3293",
"P1015",
"P2092",
"P950",
"P268",
"P428",
"P651",
"P271",
"P2456",
"P227",
"P902",
"P213",
"P347",
"P1248",
"P244",
"P886",
"P640",
"P434",
"P549",
"P1225",
"P1223",
"P1222",
"P1048",
"P349",
"P691",
"P409",
"P496",
"P2750",
"P1053",
"P650",
"P350",
"P947",
"P396",
"P906",
"P781",
"P3430",
"P269",
"P1362",
"P245",
"P1157",
"P214"
});

// get Wikidata
// ex: https://www.wikidata.org//w/api.php?action=wbgetclaims&format=json&entity=Q184201
stringURL2=@"https://www.wikidata.org//w/api.php?action=wbgetclaims&format=json&entity="+QID;
stringHTML2="";
if(!Skip&&!SandboxDebug)
{
try
{
HTML2=Tools.GetHTML(URL2);
}
catch
{
Summary="GetHTML2 failed. URL2 = "+URL2+" . ";
if(!LiveDebug)Skip=true;
}
}

// scrape Wikidata
// example text surrounding a populated property from
// https://www.wikidata.org/w/api.php?action=wbgetclaims&entity=Q184201 :
// "P227": [
// {
// "mainsnak": {
// "snaktype": "value",
// "property": "P227",
// "hash": "275a0595679f80411271280f2ee7344a94dfbeb6",
// "datavalue": {
// "value": "4776869-1",
// "type": "string"
// },
// "datatype": "external-id"
// },
intiProps=0;
if(!Skip&&!SandboxDebug)
{
foreach(stringpinACPropertyList)
{
stringp_regex=@"""property"":\s*"""+p+@""",[^\{\}]*""datavalue"":\s*\{\s*""value"":\s*""[^""]+""";
boolFound=Regex.IsMatch(HTML2,p_regex);
if(Found)iProps++;
}

if(iProps==0)
{
Summary=@"0 IDs on Wikidata. ";
Skip=true;
}
}


// main /////////////////////////////////////////////////////////////////////

if(!Skip)
{
if(SandboxDebug)
{
iProps=1;
QID="1";
}

// std {{DEFAULTSORT
stringDF_Regex=@"\{\{\s*(?:DEFAULTSORT|[Dd]efaultSort|[Dd]efaultsort|DEFAULT[ _]+SORT|[Dd]efault[ _]+sort|[Ss]ORTIERUNG:Lasorling|SORTIERUNG)(?=[:\|\}])";
ArticleText=Regex.Replace(ArticleText,DF_Regex,@"{{DEFAULTSORT",RegexOptions.IgnoreCase);

// Move {{-stub}} tag closer to end of page, otherwise GenFixes adds an extra line before {{Authority control}} that can't be fixed w/o a reparse.
// Leading "\s*" replaced with "\n" fix cases like "{{reflist}}{{blah-stub}}" on the same line.
stringMoveStubAfterCat_Regex=@"\s*(\{\{[^\{\}]*[ -]stub\s*\}\})\s*(\[\[\s*Category[^\[\]]+\]\])";
ArticleText=Regex.Replace(ArticleText,MoveStubAfterCat_Regex,"\n"+@"2ドル"+"\n"+@"1ドル",RegexOptions.IgnoreCase);

stringAuthorityComplete=@"{{Authority control}}";
stringAddBeforeCats_Regex=@"(^[\d\D]+?)(?=[\r\n]+[ 	]*(?:\{\{DEFAULTSORT|\[\[\s*Category))";// better results than adding after last cat

stringPlural=(iProps>1)?"s":"";
stringSuccessSummary=@"+{{[[Template:Authority control|Authority control]]}}";
if(TomBot)SuccessSummary=@"[[Wikipedia:Bots/Requests for approval/Tom.Bot 6|Task 6]]: "+SuccessSummary;
if(iProps>0)SuccessSummary+=" ("+iProps+@" source"+Plural+@" from Wikidata)";
SuccessSummary+=", [[WP:GenFixes]] on,";

boolNoCat=!Regex.IsMatch(ArticleText,AddBeforeCats_Regex,RegexOptions.IgnoreCase);
if(NoCat)
{
if(ManuallyPlaceAuthorityAtEndOfPage)
{
ArticleText+="\n"+AuthorityComplete;
Summary=SuccessSummary+" (uncategorized page) ";
}
else
{
Summary+=@"No cats/DEFAULTSORT to anchor {{Authority control}} to. Batch manually/code later. ";
Skip=true;
}
}
else
{
ArticleText=Regex.Replace(ArticleText,AddBeforeCats_Regex,@"1ドル"+"\n"+AuthorityComplete,RegexOptions.IgnoreCase);
Summary=SuccessSummary;
}
}


// exception tracking ///////////////////////////////////////////////////////

if(Skip&&SaveSkipSummaries&&!SandboxDebug)
{
stringMessage=ArticleTitle+"\t"+Summary+"\n";
stringFile=@"Module output - Add {{Authority control}} (skip summaries).txt";
stringPath=@"F:\";// desktop
stringFullPath=Path+File;
constboolAPPEND=true;
Tools.WriteTextFileAbsolutePath(Message,FullPath,APPEND);
}

if(LiveDebug||SandboxDebug)Skip=false;

returnArticleText;
}

AltStyle によって変換されたページ (->オリジナル) /