How can one verify pdf using Selenium + Java?
Until recently, I was verifying pdf using Firefox (making sure that the pdf file wasn't empty). This was nice and working fine, but it's not multi-platform.
I now need to verify pdf using chrome, edge, safari, etc... So doing something platform specific isn't really an option anymore.
Here is what I was doing so far using Firefox:
Click on the pdf link
Wait for the new window to appear
Switch to the new window
Verify the presence of class "page" and of id "viewer"
Close the new window and go back to the old one
However, the class "page" and the id "viewer" are specific to how Firefox displays pdf and this same code doesn't work with chrome.
So, how can I verify a pdf using Selenium + Java?
1 Answer 1
After a lot of thinking and research, here is the solution I finally came up with:
Download the pdf
Verify the pdf using Java
Verifying the pdf using Java was the easy part (since it was, by default, multi-platform). Here is the website who helped me do so: https://www.mkyong.com/java/pdfbox-how-to-read-pdf-file-in-java/
Downloading the pdf in a multiplatform way was more complicated. Especially since I couldn't use the class "Robot" because my driver was executing remotely (I didn't actually try this solution, but I read that it wouldn't work for my specific case, so I didn't take any chance and search for another way to do what I wanted to).
I finally found this answer who lead me to the resolution of my problem.
So, after all this, here is what (part of) my code looks like:
pom.xml:
<!-- https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.17</version>
</dependency>
java:
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.PDFTextStripperByArea;
import java.io.InputStream;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
public class MyClass {
private WebDriver driver;
private String parentHandle;
//My entry point
public void iVerifyThatPdfLinksAreProperlyDisplayed() {
List<WebElement> pdfLinks = driver.findElements(By.xpath("//a[contains(@href,'pdf')]"));
for (WebElement pdfLink : pdfLinks) {
pdfLink.click();
switchToNewWindow();
verifyPdf();
goBackToOldWindow();
}
}
private void switchToNewWindow() {
parentHandle = driver.getWindowHandle(); // get the current window handle
WebDriverWait wait = new WebDriverWait(driver, 5);
wait.until(ExpectedConditions.numberOfWindowsToBe(2));
for (String winHandle : driver.getWindowHandles()) {
if (!winHandle.equals(parentHandle)){
driver.switchTo().window(winHandle); // switch focus of WebDriver to the next found window handle (that's your newly opened window)
}
}
}
private void goBackToOldWindow(){
if (!driver.getWindowHandle().equals(parentHandle)){
driver.close(); // close newly opened window when done with it
}
driver.switchTo().window(parentHandle); // switch back to the original window
}
private void verifyPdf(){
WebDriverWait wait = new WebDriverWait(driver, 10);
wait.until(ExpectedConditions.not(ExpectedConditions.urlContains("about:blank")));
System.out.println(driver.getCurrentUrl());
try {
ClassWhereIPutStaticStuff.download(driver.getCurrentUrl(), "target"+ File.separator +"my_pdf.pdf");
ClassWhereIPutStaticStuff.readPDF("target"+ File.separator +"my_pdf.pdf", "");
} catch (Exception e) {
e.printStackTrace();
fail();
}
}
}
////////////////////////////////////////
public class ClassWhereIPutStaticStuff {
public static void download(String url, String fileName) throws Exception {
try (InputStream in = URI.create(url).toURL().openStream()) {
Files.copy(in, Paths.get(fileName), StandardCopyOption.REPLACE_EXISTING);
}
}
public static void readPDF(String fileName, String textWeWant) throws IOException {
try (PDDocument document = PDDocument.load(new File(fileName))) {
if (!document.isEncrypted()) {
PDFTextStripperByArea stripper = new PDFTextStripperByArea();
stripper.setSortByPosition(true);
PDFTextStripper tStripper = new PDFTextStripper();
String pdfFileInText = tStripper.getText(document);
// split by whitespace
String lines[] = pdfFileInText.split("\\r?\\n");
for (String line : lines) {
System.out.println(line);
}
pdfFileInText = pdfFileInText.toLowerCase();
assertNotNull(pdfFileInText);
assertFalse(pdfFileInText.isEmpty());
assertTrue(pdfFileInText.length() > 10);
assertTrue(pdfFileInText.contains(textWeWant.toLowerCase()));
} else {
fail("file is encrypted!");
}
}
}
}