|
| 1 | +#!/usr/bin/env perl |
| 2 | +# Get the title, description , journal name and citation from google scholar for a particular author |
| 3 | + |
| 4 | +# https://metacpan.org/pod/Mojo::UserAgent |
| 5 | +# https://metacpan.org/pod/Mojo::DOM |
| 6 | +# https://metacpan.org/pod/Mojo::Collection |
| 7 | + |
| 8 | +use strict; |
| 9 | +use warnings; |
| 10 | +use Carp qw( croak ); |
| 11 | +use Mojo::UserAgent; |
| 12 | +use Data::Dumper; |
| 13 | + |
| 14 | +# The output sometimes contains unicode character. |
| 15 | +# Hence, tells the Perl parser to allow UTF-8 in the program text |
| 16 | +use open ':std', ':encoding(UTF-8)'; |
| 17 | + |
| 18 | +sub crawl_results { |
| 19 | + my ($ua, $url) = @_; |
| 20 | + my $response = $ua->get($url)->result; |
| 21 | + |
| 22 | + if ($response->is_success) { |
| 23 | + |
| 24 | + # https://docs.mojolicious.org/Mojo/DOM#find |
| 25 | + my $divs = $response->dom->find('div.gs_ri'); |
| 26 | + my @publications; |
| 27 | + for my $div ($divs->each) { |
| 28 | + my $article = {}; |
| 29 | + my $title = $div->find('h3.gs_rt a')->map('text')->join("\n"); |
| 30 | + if (defined $title && $title ne "") { |
| 31 | + $article->{"Title"} = "$title"; |
| 32 | + } |
| 33 | + my $abstract = $div->find('div.gs_rs')->map('text')->join("\n"); |
| 34 | + if (defined $abstract && $abstract ne "") { |
| 35 | + $article->{"Abstract"} = "$abstract"; |
| 36 | + } |
| 37 | + my $journal = $div->find('div.gs_a')->map('text')->map( |
| 38 | + sub { |
| 39 | + my ($aut_name, $journal) = split(/-/, $_, 2); |
| 40 | + $journal =~ s/^\s?//; |
| 41 | + return $journal; |
| 42 | + } |
| 43 | + )->join("\n"); |
| 44 | + if (defined $journal && $journal ne "") { |
| 45 | + $article->{"Journal"} = "$journal"; |
| 46 | + } |
| 47 | + |
| 48 | + # https://docs.mojolicious.org/Mojo/Collection |
| 49 | + my $citation |
| 50 | + = $div->find('div.gs_fl a')->grep(sub { $_->text =~ /Cited by/ })->map('text') |
| 51 | + ->join("\n"); |
| 52 | + if (defined $citation && $citation ne "") { |
| 53 | + $article->{"Citation"} = "$citation"; |
| 54 | + } |
| 55 | + push(@publications, $article); |
| 56 | + } |
| 57 | + return \@publications; |
| 58 | + } |
| 59 | + else { |
| 60 | + croak $response->message; |
| 61 | + } |
| 62 | +} |
| 63 | + |
| 64 | +sub main { |
| 65 | + my $search_text = "Perl"; |
| 66 | + my $base_url = "https://scholar.google.com/scholar"; |
| 67 | + my $url = Mojo::URL->new($base_url); |
| 68 | + |
| 69 | + # You can use multiple elements for your search. |
| 70 | + # { |
| 71 | + # "as_ylo" => <Lowest year in year range>, |
| 72 | + # "as_yhi" => <Highest year in year range>, |
| 73 | + # "as_vis" => <Include citations(0|1) (Doesn't include citation is 1)>, |
| 74 | + # "as_sdt" => <Include Patent(0|1) (Doesn't include patent is 1)>, |
| 75 | + # "scisbd" => <Sort by date(0|1)> |
| 76 | + # "as_publication" => <Journal/Source name> |
| 77 | + # "hl" => <Language of the result/output, "en" means english>, |
| 78 | + # "as_q" => "<Title of the article to search> author:<name>" |
| 79 | + # } |
| 80 | + # Getting all the article for a paricular author |
| 81 | + $url = $url->query({"as_q" => "author:\"kshama Rai\"", "hl" => "en"}); |
| 82 | + |
| 83 | + my $ua = Mojo::UserAgent->new; |
| 84 | + $ua->transactor->name('Mozilla/5.0'); |
| 85 | + my $output = crawl_results($ua, $url); |
| 86 | + print Dumper($output); |
| 87 | +} |
| 88 | + |
| 89 | +main(); |
| 90 | + |
| 91 | +__END__ |
| 92 | + |
| 93 | +Output - |
| 94 | + |
| 95 | +[ |
| 96 | + { |
| 97 | + 'Title' => 'Role of supplemental UV-B in changing the level of ozone toxicity in two cultivars of sunflower: growth, seed yield and oil quality', |
| 98 | + 'Journal' => 'Ecotoxicology, 2019 - Springer', |
| 99 | + 'Abstract' => "Abstract Ultraviolet-B radiation (UV-B) is inherent part of solar spectrum and tropospheric ozone (O 3) is a potent secondary air pollutant. Therefore the present study was conducted to evaluate the responses of Helianthus annuus L. cvs DRSF 108 and Sungold (sunflower)\x{a0}\x{2026}", |
| 100 | + 'Citation' => 'Cited by 5' |
| 101 | + }, |
| 102 | + { |
| 103 | + 'Title' => 'Effects of UV-B radiation on morphological, physiological and biochemical aspects of plants: an overview', |
| 104 | + 'Journal' => 'J Sci Res, 2017 - bhu.ac.in', |
| 105 | + 'Citation' => 'Cited by 10', |
| 106 | + 'Abstract' => "Origin of life was never be thought without considering the role of UV radiation but once the \x{201c}boon\x{201d}, is |
| 107 | +slowly becoming \x{201c}curse\x{201d} for life. Plants are exposed to many factors but the problem of enhanced UV-B is created by the anthropogenic activities resulted in ozone layer\x{a0}\x{2026}" |
| 108 | + }, |
| 109 | + { |
| 110 | + 'Abstract' => "In the present study sensitivity of a medicinal plant Eclipta alba L.(Hassk)(False daisy) was assessed under intermittent (IT) and continuous (CT) doses of elevated ultraviolet-B (eUV-B). Eclipta alba is rich in medicinally important phytochemical constituents, used against\x{a0}\x{2026}", |
| 111 | + 'Journal' => 'Physiology and Molecular Biology of Plants, 2020 - Springer', |
| 112 | + 'Title' => "Effect on essential oil components and wedelolactone content of a medicinal plant Eclipta alba due to modifications in the growth and morphology under different\x{a0}\x{2026}" |
| 113 | + }, |
| 114 | + { |
| 115 | + 'Citation' => 'Cited by 1', |
| 116 | + 'Abstract' => "Climate change is associated to how weather patterns change over decades or longer due to natural and human influences. Since the industrial revolution, humans have contributed to climate change through the emission of greenhouse gases and aerosols as well as changes\x{a0}\x{2026}", |
| 117 | + 'Title' => 'Climate Change and Secondary Metabolism in Plants: Resilience to Disruption', |
| 118 | + 'Journal' => "Climate Change and Agricultural\x{a0}\x{2026}, 2019 - Elsevier" |
| 119 | + }, |
| 120 | + { |
| 121 | + 'Title' => 'HOST PATHOGEN INTERACTIONS BETWEEN DROSOPHILA MELANOGASTER AND BEAUVERIA BASSIANA _ A Thesis Presented to the', |
| 122 | + 'Journal' => '2019 - search.proquest.com', |
| 123 | + 'Abstract' => "Drosophila melanogaster is an established model organism for immunity as their immune system is similar to insect disease vectors and pests and also shares similarities with that of the mammalian innate immune system. Our study uses the entomopathogenic fungus\x{a0}\x{2026}" |
| 124 | + }, |
| 125 | + { |
| 126 | + 'Abstract' => "Page 1. i \x{201c}LOW WEIGHT GAIN AS A PREDICTOR FOR DEVELOPMENT OF RETINOPATHY |
| 127 | +OF PREMATURITY\x{201d} By Dr. KSHAMA RAI MBBS Dissertation Submitted to the Rajiv Gandhi |
| 128 | +University of Health Sciences, Karnataka, Bangalore In partial fulfilment of the requirements\x{a0}\x{2026}", |
| 129 | + 'Title' => 'Low weight gain as a predictor for development of retinopathy of prematurity', |
| 130 | + 'Journal' => '2018 - 112.133.228.240' |
| 131 | + }, |
| 132 | + { |
| 133 | + 'Journal' => 'gyanvihar.org', |
| 134 | + 'Title' => 'Use of High Resolution Remote Sensing Data and GIS Techniques for Monitoring Of \'U\'Shaped Wetland At GB Nagar District, |
| 135 | +Uttar Pradesh', |
| 136 | + 'Abstract' => "In developing countries of the world, the ever increasing population and to fulfill its need for housing and other economic activities almost urban fringe are getting encroached and our surrounding environment and natural wetlands, water bodies and other biological cycles are\x{a0}\x{2026}" |
| 137 | + } |
| 138 | +] |
0 commit comments