User:Tom.Bot/Task6 code
Appearance
From Wikipedia, the free encyclopedia
Source
[edit ]publicstringProcessArticle(stringArticleText,stringArticleTitle,intwikiNamespace,outstringSummary,outboolSkip) { // global switches ////////////////////////////////////////////////////////// boolTomBot=true; boolSaveSkipSummaries=false; boolSkipIfBlacklisted=true; boolManuallyCheckPagesWithoutAGoodInfobox=false;// usually it's an {{infobox person}} or {{infobox scientist}} boolManuallyPlaceAuthorityAtEndOfPage=false;// aid for pages w/o a {{DEFAULTSORT}} nor cats; manual use only boolLiveDebug=false; boolSandboxDebug=false;// auto-detect Skip=false; // global-use vars ////////////////////////////////////////////////////////// Summary=""; // preliminary exceptions/error checking //////////////////////////////////// if(ArticleTitle=="User:Tom.Reding/sandbox")SandboxDebug=true; List<string>BlackList=newList<string>(newstring[]{ "" }); if(!Skip&&BlackList.Contains(ArticleTitle)) { if(SkipIfBlacklisted) { Summary="Blacklisted article"; Skip=true; } } // check for appropriate (bio) infoboxes (now done via PetScan for all templates in [[Category:People and person infobox templates]], per BRFA) stringPeopleTemplates_Regex=@"\{\{\s*(?:[Ii]nfobox[ _]+actor[ _]+voice|[Ii]nfobox[ _]+Actor|[Ii]nfobox[ _]+actor|[Ii]nfobox[ _]+Actress|[Ii]nfobox[ _]+actress|[Ii]nfobox[ _]+adult[ _]+biography|[Ii]nfobox[ _]+adult[ _]+female|[Ii]nfobox[ _]+adult[ _]+male|[Ii]nfobox[ _]+Biography|[Ii]nfobox[ _]+biography|[Ii]nfobox[ _]+bio|[Ii]nfobox[ _]+Celebrity|[Ii]nfobox[ _]+director|[Ii]nfobox[ _]+entertainer|[Ii]nfobox[ _]+Fashion[ _]+Designer|[Ii]nfobox[ _]+fashion[ _]+designer|[Ii]nfobox[ _]+film[ _]+actor|[Ii]nfobox[ _]+film[ _]+director|[Ii]nfobox[ _]+human[ _]+being|[Ii]nfobox[ _]+human|[Ii]nfobox[ _]+Indian[ _]+Businessmen|[Ii]nfobox[ _]+Journalist|[Ii]nfobox[ _]+journalist|[Ii]nfobox[ _]+people|[Ii]nfobox[ _]+performer|[Ii]nfobox[ _]+person/measurements|[Ii]nfobox[ _]+person[ _]+ii|[Ii]nfobox[ _]+person|[Ii]nfobox[ _]+Person|[Ii]nfobox[ _]+photographer|[Ii]nfobox[ _]+Real[ _]+Person|[Ii]nfobox[ _]+trade[ _]+unionist|[Ii]nfobox[ _]+victim|[Pp]ersonbox)(?=\s*(?:\||\<\!\-\-))"; stringScientistTemplates_Regex=@"\{\{\s*(?:[Ii]nfobox[ _]+Academic|[Ii]nfobox[ _]+chemist|[Ii]nfobox[ _]+historian|[Ii]nfobox[ _]+mathematician|[Ii]nfobox[ _]+Professor|[Ii]nfobox[ _]+scientist|[Ii]nfobox[ _]+Scientist)(?=\s*(?:\||\<\!\-\-))"; boolBio1=Regex.IsMatch(ArticleText,PeopleTemplates_Regex,RegexOptions.IgnoreCase); boolBio2=Regex.IsMatch(ArticleText,ScientistTemplates_Regex,RegexOptions.IgnoreCase); boolNoBioTemplates=(Bio1==false&&Bio2==false); if(!Skip&&NoBioTemplates) { if(ManuallyCheckPagesWithoutAGoodInfobox) { // OK to proceed (manually) } else { Summary+=@"No bio templates found. "; Skip=true; } } // check for {{Authority control if(!Skip) { stringAuthorityAliases_Regex=@"\{\{\s*(?:[Aa]uthoritycontrol|[Aa]uthority[ _]+controll|[Aa]uthority[ _]+control|[Aa]uthority[ _]+Control|[Aa]utorité|[Ee]xternal[ _]+identifiers|[Nn]ormdaten)";// 0 grps boolHasAuthority=Regex.IsMatch(ArticleText,AuthorityAliases_Regex,RegexOptions.IgnoreCase); if(HasAuthority) { Summary+=@"{{Authority control}} exists. "; Skip=true; } } // get wikibase_item via WP API // ex: https://en.wikipedia.org//w/api.php?action=query&format=json&prop=pageprops&titles=Panthera%20leo&redirects=0&formatversion=2&ppprop=wikibase_item // TODO: find a proper URL_Encode function that behaves similarly stringArticleTitle_URL=ArticleTitle.Replace(" ",@"%20").Replace(",",@"%2C").Replace("'",@"%27").Replace("-",@"%2D").Replace("–",@"%96").Replace("(",@"%28").Replace(")",@"%29").Replace(".",@"%2E").Replace("&",@"%26").Replace("?",@"%3F").Replace("+",@"%2B").Replace(":",@"%3A").Replace("!",@"%21").Replace("/",@"%2F").Replace(@"\",@"%5C"); stringURL1=@"https://en.wikipedia.org//w/api.php?action=query&format=json&prop=pageprops&titles="+ ArticleTitle_URL+@"&redirects=0&formatversion=2&ppprop=wikibase_item"; stringHTML1=""; if(!Skip&&!SandboxDebug) { try { HTML1=Tools.GetHTML(URL1); } catch { Summary="GetHTML1 failed. ArticleTitle_URL = "+ArticleTitle_URL+" . "; if(!LiveDebug)Skip=true; } } // html1 error checks /////////////////////////////////////////////////////// stringQID=Regex.Match(HTML1,@"wikibase_item"":""([^""]+)").Groups[1].Value; if(string.IsNullOrEmpty(QID)&&!Skip&&!SandboxDebug) { Summary=@"QID retrieval failed. "; Skip=true; } if(!Regex.IsMatch(QID,@"^Q\d+$")&&!Skip&&!SandboxDebug)// case sensitive, jtbs { Summary=@"Unexpected QID format. "; Skip=true; } // determine # of WD properties used //////////////////////////////////////// List<string>ACPropertyList=newList<string>(newstring[]{ // from Module:Authority control's local conf = { ... } table: "P864", "P2558", "P3293", "P1015", "P2092", "P950", "P268", "P428", "P651", "P271", "P2456", "P227", "P902", "P213", "P347", "P1248", "P244", "P886", "P640", "P434", "P549", "P1225", "P1223", "P1222", "P1048", "P349", "P691", "P409", "P496", "P2750", "P1053", "P650", "P350", "P947", "P396", "P906", "P781", "P3430", "P269", "P1362", "P245", "P1157", "P214" }); // get Wikidata // ex: https://www.wikidata.org//w/api.php?action=wbgetclaims&format=json&entity=Q184201 stringURL2=@"https://www.wikidata.org//w/api.php?action=wbgetclaims&format=json&entity="+QID; stringHTML2=""; if(!Skip&&!SandboxDebug) { try { HTML2=Tools.GetHTML(URL2); } catch { Summary="GetHTML2 failed. URL2 = "+URL2+" . "; if(!LiveDebug)Skip=true; } } // scrape Wikidata // example text surrounding a populated property from // https://www.wikidata.org/w/api.php?action=wbgetclaims&entity=Q184201 : // "P227": [ // { // "mainsnak": { // "snaktype": "value", // "property": "P227", // "hash": "275a0595679f80411271280f2ee7344a94dfbeb6", // "datavalue": { // "value": "4776869-1", // "type": "string" // }, // "datatype": "external-id" // }, intiProps=0; if(!Skip&&!SandboxDebug) { foreach(stringpinACPropertyList) { stringp_regex=@"""property"":\s*"""+p+@""",[^\{\}]*""datavalue"":\s*\{\s*""value"":\s*""[^""]+"""; boolFound=Regex.IsMatch(HTML2,p_regex); if(Found)iProps++; } if(iProps==0) { Summary=@"0 IDs on Wikidata. "; Skip=true; } } // main ///////////////////////////////////////////////////////////////////// if(!Skip) { if(SandboxDebug) { iProps=1; QID="1"; } // std {{DEFAULTSORT stringDF_Regex=@"\{\{\s*(?:DEFAULTSORT|[Dd]efaultSort|[Dd]efaultsort|DEFAULT[ _]+SORT|[Dd]efault[ _]+sort|[Ss]ORTIERUNG:Lasorling|SORTIERUNG)(?=[:\|\}])"; ArticleText=Regex.Replace(ArticleText,DF_Regex,@"{{DEFAULTSORT",RegexOptions.IgnoreCase); // Move {{-stub}} tag closer to end of page, otherwise GenFixes adds an extra line before {{Authority control}} that can't be fixed w/o a reparse. // Leading "\s*" replaced with "\n" fix cases like "{{reflist}}{{blah-stub}}" on the same line. stringMoveStubAfterCat_Regex=@"\s*(\{\{[^\{\}]*[ -]stub\s*\}\})\s*(\[\[\s*Category[^\[\]]+\]\])"; ArticleText=Regex.Replace(ArticleText,MoveStubAfterCat_Regex,"\n"+@"2ドル"+"\n"+@"1ドル",RegexOptions.IgnoreCase); stringAuthorityComplete=@"{{Authority control}}"; stringAddBeforeCats_Regex=@"(^[\d\D]+?)(?=[\r\n]+[ ]*(?:\{\{DEFAULTSORT|\[\[\s*Category))";// better results than adding after last cat stringPlural=(iProps>1)?"s":""; stringSuccessSummary=@"+{{[[Template:Authority control|Authority control]]}}"; if(TomBot)SuccessSummary=@"[[Wikipedia:Bots/Requests for approval/Tom.Bot 6|Task 6]]: "+SuccessSummary; if(iProps>0)SuccessSummary+=" ("+iProps+@" source"+Plural+@" from Wikidata)"; SuccessSummary+=", [[WP:GenFixes]] on,"; boolNoCat=!Regex.IsMatch(ArticleText,AddBeforeCats_Regex,RegexOptions.IgnoreCase); if(NoCat) { if(ManuallyPlaceAuthorityAtEndOfPage) { ArticleText+="\n"+AuthorityComplete; Summary=SuccessSummary+" (uncategorized page) "; } else { Summary+=@"No cats/DEFAULTSORT to anchor {{Authority control}} to. Batch manually/code later. "; Skip=true; } } else { ArticleText=Regex.Replace(ArticleText,AddBeforeCats_Regex,@"1ドル"+"\n"+AuthorityComplete,RegexOptions.IgnoreCase); Summary=SuccessSummary; } } // exception tracking /////////////////////////////////////////////////////// if(Skip&&SaveSkipSummaries&&!SandboxDebug) { stringMessage=ArticleTitle+"\t"+Summary+"\n"; stringFile=@"Module output - Add {{Authority control}} (skip summaries).txt"; stringPath=@"F:\";// desktop stringFullPath=Path+File; constboolAPPEND=true; Tools.WriteTextFileAbsolutePath(Message,FullPath,APPEND); } if(LiveDebug||SandboxDebug)Skip=false; returnArticleText; }