diff --git a/3-0-java-core/3-6-3-crazy-regex/README.MD b/3-0-java-core/3-6-3-crazy-regex/README.MD new file mode 100644 index 000000000..34afaf353 --- /dev/null +++ b/3-0-java-core/3-6-3-crazy-regex/README.MD @@ -0,0 +1,67 @@ +# Crazy Regex + +### Pre-conditions ❗ +You're supposed to know how to work regex and be able to build Patterns and Matchers + +### Objectives +* **build Patterns to extract** necessary parts from text ✅ +* **manipulate** extracted text with **Matcher** object ✅ + +### Regular expressions - sequence of characters that define a search pattern for text + +--- + +There 2 peace pf puzzle: +* Literal characters - I want to match literally the character I specified (like 'a') +* Meta characters - I want to match any character of this kind (more generic/abstract thing) + +Single char + +* \\d -> 0-9 +* \\D -> negate of \\d +* \\w -> A-Za-z0-9 +* \\W -> negate of \\w +* \\s -> whitespace, tab +* \\S -> negate of \\s +* . -> anything but newline +* \\. -> literal dot + + +Quantifiers - modify single characters how many of them you want match in a row +* \* -> Occurs zero or more times +* \+ -> 1 or more +* ? -> zero or one +* {min, max} -> some range +* {n} -> precise quantity + + +Position +* ^ -> beginning +* $ -> end +* \\b -> word boundary + +--- + +Character class -> is the thing that appears in between []. For example [abc] -> match 'a' or 'b' or 'c'. +Another example [-.] -> match dash or period. Here . is not meta character anymore and ^ are special characters inside [] +* [0-5] -> match all numbers from 0 to 5. [^0-5] -> match anything that NOT 0-5 +BUT it works like meta character only when it on first position, otherwise - its literal, [a^bc] - like this + +--- + +Capturing Groups - whenever u do regex search it matches whole result as a group 0. +* \\d{3}-\\d{3}-\\d{4} -> 212-555-1234 = GROUP 0 + +Parentheses can capture a subgroup: +\\d{3}-(\\d{3})-(\\d{4}) where 212-555-1234 = GROUP 0, 555 = GROUP 1, 1234 = GROUP 2 + +We can refer to this groups by 1ドル ($ when we want to replace) and 1円 (within regex itself referring to capture group +it's called back reference) + +--- + +#### 🆕 First time here? – [See Introduction](https://github.com/bobocode-projects/java-fundamentals-course/tree/main/0-0-intro#introduction) +#### ➡️ Have any feedback? – [Please fill the form ](https://forms.gle/jhXEAzG4TB81S43CA) + + + diff --git a/3-0-java-core/3-6-3-crazy-regex/pom.xml b/3-0-java-core/3-6-3-crazy-regex/pom.xml new file mode 100644 index 000000000..53d0fa6b7 --- /dev/null +++ b/3-0-java-core/3-6-3-crazy-regex/pom.xml @@ -0,0 +1,19 @@ + + + + 3-0-java-core + com.bobocode + 1.0-SNAPSHOT + + 4.0.0 + + 3-6-3-crazy-regex + + + 11 + 11 + + + \ No newline at end of file diff --git a/3-0-java-core/3-6-3-crazy-regex/src/main/java/com/bobocode/se/CrazyRegex.java b/3-0-java-core/3-6-3-crazy-regex/src/main/java/com/bobocode/se/CrazyRegex.java new file mode 100644 index 000000000..9c56a05aa --- /dev/null +++ b/3-0-java-core/3-6-3-crazy-regex/src/main/java/com/bobocode/se/CrazyRegex.java @@ -0,0 +1,255 @@ +package com.bobocode.se; + +import com.bobocode.util.ExerciseNotCompletedException; + +import java.util.regex.Pattern; + +/** + * {@link CrazyRegex} is an exercise class. Each method returns Pattern class which + * should be created using regex expression. Every method that is not implemented yet + * throws {@link ExerciseNotCompletedException} + * @author Andriy Paliychuk + * TODO: remove exception and implement each method of this class using java.util.regex.Pattern + */ +public class CrazyRegex { + + /** + * A Pattern that that finds all words "Curiosity" in text + * + * @return a pattern that looks for the word "Curiosity" + */ + public Pattern findSpecificWord() { + throw new ExerciseNotCompletedException(); + } + + /** + * A Pattern that finds first word in text + * + * @return a pattern that looks for the first word in text + */ + public Pattern findFirstWord() { + throw new ExerciseNotCompletedException(); + } + + /** + * A Pattern that finds last word in text + * + * @return a pattern that looks for the last word in text + */ + public Pattern findLastWord() { + throw new ExerciseNotCompletedException(); + } + + /** + * A Pattern that finds all numbers in text. When we have "555-555", "(555)555" and "30th" in text + * our pattern must grab all that numbers: + * "555" - four times, and one "30" + * + * @return a pattern that looks for numbers + */ + public Pattern findAllNumbers() { + throw new ExerciseNotCompletedException(); + } + + /** + * A Pattern that finds all dates. For instance: "1971-11-23" + * + * @return a pattern that looks for dates + */ + public Pattern findDates() { + throw new ExerciseNotCompletedException(); + } + + /** + * A Pattern that finds different variations of word "color". + * We are looking for: "color", "colour", "colors", "colours" + * + * @return a pattern that looks for different variations of word "color" + */ + public Pattern findDifferentSpellingsOfColor() { + throw new ExerciseNotCompletedException(); + } + + /** + * A Pattern that finds all zip codes in text. + * Zip code is a 5-digit number without any characters or special symbols. + * For example: 72300 + * + * @return a pattern that looks for zip codes + */ + public Pattern findZipCodes() { + throw new ExerciseNotCompletedException(); + } + + /** + * A Pattern that finds different variations of word "link". + * We are looking for: "lynk", "link", "l nk", "l(nk" + * + * @return a pattern that looks for different variations of word "link" + */ + public Pattern findDifferentSpellingsOfLink() { + throw new ExerciseNotCompletedException(); + } + + /** + * A Pattern that finds phone numbers. + * For example: "555-555-5555" + * + * @return a pattern that looks for phone numbers + */ + public Pattern findSimplePhoneNumber() { + throw new ExerciseNotCompletedException(); + } + + /** + * A Pattern that finds numbers with following requirements: + * - inside the number can be only digits from 0 to 5 + * - length 3 + * + * @return a pattern that looks for numbers with length 3 and digits from 0 to 5 in the middle + */ + public Pattern findNumbersFromZeroToFiveWithLengthThree() { + throw new ExerciseNotCompletedException(); + } + + /** + * A Pattern that finds all words in text that have length 5 + * + * @return a pattern that looks for the words that have length 5 + */ + public Pattern findAllWordsWithFiveLength() { + throw new ExerciseNotCompletedException(); + } + + /** + * A Pattern that finds words and numbers with following constraints: + * - not shorter than two symbols + * - not longer than three symbols + * + * @return a pattern that looks for words and numbers that not shorter 2 and not longer 3 + */ + public Pattern findAllLettersAndDigitsWithLengthThree() { + throw new ExerciseNotCompletedException(); + } + + /** + * A Pattern that finds all words that begin with capital letter + * + * @return a pattern that looks for the words that begin with capital letter + */ + public Pattern findAllWordsWhichBeginWithCapitalLetter() { + throw new ExerciseNotCompletedException(); + } + + /** + * A Pattern that finds only the following abbreviation: + * - AK, AL, AR, AZ, CA, CO, CT, PR, PA, PD + * + * @return a pattern that looks for the abbreviations above + */ + public Pattern findAbbreviation() { + throw new ExerciseNotCompletedException(); + } + + /** + * A Pattern that finds all open braces + * + * @return a pattern that looks for all open braces + */ + public Pattern findAllOpenBraces() { + throw new ExerciseNotCompletedException(); + } + + /** + * A Pattern that finds everything inside [] + * + * @return a pattern that looks for everything inside [] + */ + public Pattern findOnlyResources() { + throw new ExerciseNotCompletedException(); + } + + /** + * A Pattern that finds all https links in note.txt + * + * @return a pattern that looks for all https links in note.txt + */ + public Pattern findOnlyLinksInNote() { + throw new ExerciseNotCompletedException(); + } + + /** + * A Pattern that finds all http links in nasa.json + * + * @return a pattern that looks for all http links in nasa.json + */ + public Pattern findOnlyLinksInJson() { + throw new ExerciseNotCompletedException(); + } + + /** + * A Pattern that finds all .com, .net and .edu emails + * + * @return a pattern that looks for all .com, .net and .edu emails + */ + public Pattern findAllEmails() { + throw new ExerciseNotCompletedException(); + } + + /** + * A Pattern that finds the following examples of phone numbers: + * - 555-555-5555 + * - 555.555.5555 + * - (555)555-5555 + * + * @return a pattern that looks for phone numbers patterns above + */ + public Pattern findAllPatternsForPhoneNumbers() { + throw new ExerciseNotCompletedException(); + } + + /** + * A Pattern that finds only duplicates + * + * @return a pattern that looks for duplicates + */ + public Pattern findOnlyDuplicates() { + throw new ExerciseNotCompletedException(); + } + + /** + * You have a text where all names recorded as first name, last name. + * Create matcher and use method replaceAll to record that names as: + * - last name first name + * + * @return String where all names recorded as last name first name + */ + public String replaceFirstAndLastNames(String names) { + throw new ExerciseNotCompletedException(); + } + + /** + * You have a text with phone numbers. + * Create matcher and use method replaceAll to replace last digits: + * - 555-XXX-XXXX + * + * @return String where in all phone numbers last 7 digits replaced to X + */ + public String replaceLastSevenDigitsOfPhoneNumberToX(String phones) { + throw new ExerciseNotCompletedException(); + } + + /** + * You have a text with resources and links to those resources: + * - [Bobocode](https://www.bobocode.com) + * Create matcher and use method replaceAll to get the following result: + * - Bobocode + * + * @return String where all resources embraced in href + */ + public String insertLinksAndResourcesIntoHref(String links) { + throw new ExerciseNotCompletedException(); + } + + +} diff --git a/3-0-java-core/3-6-3-crazy-regex/src/test/java/com/bobocode/se/CrazyRegexTest.java b/3-0-java-core/3-6-3-crazy-regex/src/test/java/com/bobocode/se/CrazyRegexTest.java new file mode 100644 index 000000000..a01c42c24 --- /dev/null +++ b/3-0-java-core/3-6-3-crazy-regex/src/test/java/com/bobocode/se/CrazyRegexTest.java @@ -0,0 +1,241 @@ +package com.bobocode.se; + +import lombok.SneakyThrows; +import org.junit.jupiter.api.MethodOrderer; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestMethodOrder; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Stream; + +import static java.util.stream.Collectors.joining; +import static org.assertj.core.api.AssertionsForClassTypes.assertThat; + +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +public class CrazyRegexTest { + + private final CrazyRegex crazyRegex = new CrazyRegex(); + + private final String text; + private final String json; + + public CrazyRegexTest() { + this.text = readWholeFile("note.txt"); + this.json = readWholeFile("nasa.json"); + } + + @Test + @Order(1) + void findSpecificWord() { + String result = regexChecker(crazyRegex.findSpecificWord(), json); + assertThat(result).isEqualTo("\nCuriosity\nCuriosity\nCuriosity"); + } + + @Test + @Order(2) + void findFirstWord() { + String result = regexChecker(crazyRegex.findFirstWord(), text); + assertThat(result).isEqualTo("\nThe"); + } + + @Test + @Order(3) + void findLastWord() { + String result = regexChecker(crazyRegex.findLastWord(), text); + assertThat(result).isEqualTo("\nfish"); + } + + @Test + @Order(4) + void findAllNumbers() { + String result = regexChecker(crazyRegex.findAllNumbers(), text); + assertThat(result).isEqualTo("\n01001\n03148\n02132\n412\n555\n1212\n412\n555" + + "\n1234\n412\n555\n1234\n646\n555\n1234\n1"); + } + + @Test + @Order(5) + void findDates() { + String result = regexChecker(crazyRegex.findDates(), json); + assertThat(result).isEqualTo("\n2015-05-30\n2012-08-06\n2011-11-26\n2015-05-30\n2012-08-06\n" + + "2011-11-26\n2015-05-30\n2012-08-06\n2011-11-26"); + } + + @Test + @Order(6) + void findDifferentSpellingsOfColor() { + String result = regexChecker(crazyRegex.findDifferentSpellingsOfColor(), text); + assertThat(result).isEqualTo("\ncolors\ncolours\ncolour"); + } + + @Test + @Order(7) + void findZipCodes() { + String result = regexChecker(crazyRegex.findZipCodes(), text); + assertThat(result).isEqualTo("\n 01001 \n 03148 \n 02132 "); + } + + @Test + @Order(8) + void findDifferentSpellingsOfLink() { + String result = regexChecker(crazyRegex.findDifferentSpellingsOfLink(), text); + assertThat(result).isEqualTo("\nlynk\nlink\nl nk\nl(nk"); + } + + @Test + @Order(9) + void findSimplePhoneNumber() { + String result = regexChecker(crazyRegex.findSimplePhoneNumber(), text); + assertThat(result).isEqualTo("\n412-555-1234"); + } + + @Test + @Order(10) + void findNumbersFromZeroToFiveWithLengthThree() { + String result = regexChecker(crazyRegex.findNumbersFromZeroToFiveWithLengthThree(), text); + assertThat(result).isEqualTo("\n010\n031\n021\n412\n555\n121\n412" + + "\n555\n123\n412\n555\n123\n555\n123"); + } + + @Test + @Order(11) + void findAllWordsWithFiveLength() { + String result = regexChecker(crazyRegex.findAllWordsWithFiveLength(), json); + assertThat(result).isEqualTo("\nFront\nrover\nFront\nrover\nrover"); + } + + @Test + @Order(12) + void findAllLettersAndDigitsWithLengthThree() { + String result = regexChecker(crazyRegex.findAllLettersAndDigitsWithLengthThree(), text); + assertThat(result).isEqualTo("\nThe\nof\nthe\nand\nthe\nnot\nThe\nis\ndon\nyou\nnk\nnk\nThe\nCA\nAK\nPA\n412" + + "\n555\ncom\n412\n555\n412\n555\n646\n555\nof\ncom\nnet\nor\nnyu\nedu\n1Z\naaa\nOf\nwww\ncom\ncom\nwww\ncom" + + "\nis\nis\nam\nnot\nnot\nwhy\nwhy\nam\nok\ncat\ncat\ndog\ndog"); + } + + @Test + @Order(13) + void findAllWordsWhichBeginWithCapitalLetter() { + String result = regexChecker(crazyRegex.findAllWordsWhichBeginWithCapitalLetter(), json); + assertThat(result).isEqualTo("\nFront\nHazard\nAvoidance\nCamera" + + "\nCuriosity\nFront\nHazard\nAvoidance\nCamera\nCuriosity\nRear\nHazard\nAvoidance\nCamera\nCuriosity"); + } + + @Test + @Order(14) + void findAbbreviation() { + String result = regexChecker(crazyRegex.findAbbreviation(), text); + assertThat(result).isEqualTo("\nCA\nAK\nPA"); + } + + @Test + @Order(15) + void findAllOpenBraces() { + String result = regexChecker(crazyRegex.findAllOpenBraces(), text); + assertThat(result).isEqualTo("\n{{{\n{{\n{"); + } + + @Test + @Order(16) + void findOnlyResources() { + String result = regexChecker(crazyRegex.findOnlyResources(), text); + assertThat(result).isEqualTo("\nGoogle\nStackOverflow\nYoutube"); + } + + @Test + @Order(17) + void findOnlyLinksInNote() { + String result = regexChecker(crazyRegex.findOnlyLinksInNote(), text); + assertThat(result).isEqualTo("\nhttps://www.google.com\nhttps://stackoverflow.com\nhttps://www.youtube.com"); + } + + @Test + @Order(18) + void findOnlyLinksInJson() { + String result = regexChecker(crazyRegex.findOnlyLinksInJson(), json); + assertThat(result).isEqualTo( + "\nhttp://mars.jpl.nasa.gov/msl-raw-images/proj/msl/redops/ods/surface/sol/01000/opgs/edr/fcam/FLB_486265257EDR_F0481570FHAZ00323M_.JPG\n" + + "http://mars.jpl.nasa.gov/msl-raw-images/proj/msl/redops/ods/surface/sol/01000/opgs/edr/fcam/FRB_486265257EDR_F0481570FHAZ00323M_.JPG\n" + + "http://mars.jpl.nasa.gov/msl-raw-images/proj/msl/redops/ods/surface/sol/01000/opgs/edr/rcam/RLB_486265291EDR_F0481570RHAZ00323M_.JPG" + ); + } + + @Test + @Order(19) + void findAllEmails() { + String result = regexChecker(crazyRegex.findAllEmails(), text); + assertThat(result).isEqualTo("\njohnsmith@yahoo.com\nterek.koval@gmail.com\nterek@koval.net" + + "\nterek.koval@nyu.edu"); + } + + @Test + @Order(20) + void findAllPatternsForPhoneNumbers() { + String result = regexChecker(crazyRegex.findAllPatternsForPhoneNumbers(), text); + assertThat(result).isEqualTo("\n(412)555-1212\n412-555-1234\n646.555.1234"); + } + + @Test + @Order(21) + void findOnlyDuplicates() { + String result = regexChecker(crazyRegex.findOnlyDuplicates(), text); + assertThat(result).isEqualTo("\nis is\ntext text\ndouble double\nI I\nnot not\nwhy why" + + "\ncat cat\ndog\ndog\nfish fish"); + } + + @Test + @Order(22) + void replaceFirstAndLastNames() { + String names = "Tarasenko, Nazar ... Petrashyk, Petro ... Zlepko, Andrii"; + String result = crazyRegex.replaceFirstAndLastNames(names); + assertThat(result).isEqualTo("Nazar Tarasenko ... Petro Petrashyk ... Andrii Zlepko"); + } + + @Test + @Order(23) + void replaceLastSevenDigitsOfPhoneNumberToX() { + String phones = "(948)333-5656 1235-889-7897 111.747.6236"; + String result = crazyRegex.replaceLastSevenDigitsOfPhoneNumberToX(phones); + assertThat(result).isEqualTo("948-XXX-XXXX 1235-XXX-XXXX 111-XXX-XXXX"); + } + + @Test + @Order(24) + void insertLinksAndResourcesIntoHref() { + String links = "[Bobocode](https://www.bobocode.com)" + + "\n[LinkedIn](https://www.linkedin.com)" + + "\n[Netflix](https://www.netflix.com)"; + String result = crazyRegex.insertLinksAndResourcesIntoHref(links); + assertThat(result).isEqualTo( + "Bobocode\n" + + "LinkedIn\n" + + "Netflix" + ); + } + + private String regexChecker(Pattern pattern, String str2WorkWith) { + Matcher matcher = pattern.matcher(str2WorkWith); + StringBuilder stringBuilder = new StringBuilder(); + while (matcher.find()) { + if(matcher.group().length() != 0) { + stringBuilder.append("\n").append(matcher.group()); + } + } + return stringBuilder.toString(); + } + + @SneakyThrows + private String readWholeFile(String fileName) { + Path filePath = Paths.get(CrazyRegex.class.getClassLoader() + .getResource(fileName) + .toURI()); + try (Stream fileLinesStream = Files.lines(filePath)) { + return fileLinesStream.collect(joining("\n")); + } + } +} \ No newline at end of file diff --git a/3-0-java-core/3-6-3-crazy-regex/src/test/resources/nasa.json b/3-0-java-core/3-6-3-crazy-regex/src/test/resources/nasa.json new file mode 100644 index 000000000..e21232795 --- /dev/null +++ b/3-0-java-core/3-6-3-crazy-regex/src/test/resources/nasa.json @@ -0,0 +1,23 @@ +{"photos":[ + {"id":102693, + "sol":1000, + "camera":{"id":20,"name":"FHAZ","rover_id":5,"full_name":"Front Hazard Avoidance Camera"}, + "img_src":"http://mars.jpl.nasa.gov/msl-raw-images/proj/msl/redops/ods/surface/sol/01000/opgs/edr/fcam/FLB_486265257EDR_F0481570FHAZ00323M_.JPG", + "earth_date":"2015-05-30", + "rover":{"id":5,"name":"Curiosity","landing_date":"2012-08-06","launch_date":"2011-11-26","status":"active"} + }, + {"id":102694, + "sol":1000, + "camera":{"id":20,"name":"FHAZ","rover_id":5,"full_name":"Front Hazard Avoidance Camera"}, + "img_src":"http://mars.jpl.nasa.gov/msl-raw-images/proj/msl/redops/ods/surface/sol/01000/opgs/edr/fcam/FRB_486265257EDR_F0481570FHAZ00323M_.JPG", + "earth_date":"2015-05-30", + "rover":{"id":5,"name":"Curiosity","landing_date":"2012-08-06","launch_date":"2011-11-26","status":"active"} + }, + {"id":102850, + "sol":1000, + "camera":{"id":21,"name":"RHAZ","rover_id":5,"full_name":"Rear Hazard Avoidance Camera"}, + "img_src":"http://mars.jpl.nasa.gov/msl-raw-images/proj/msl/redops/ods/surface/sol/01000/opgs/edr/rcam/RLB_486265291EDR_F0481570RHAZ00323M_.JPG", + "earth_date":"2015-05-30", + "rover":{"id":5,"name":"Curiosity","landing_date":"2012-08-06","launch_date":"2011-11-26","status":"active"} + } +]} \ No newline at end of file diff --git a/3-0-java-core/3-6-3-crazy-regex/src/test/resources/note.txt b/3-0-java-core/3-6-3-crazy-regex/src/test/resources/note.txt new file mode 100644 index 000000000..2ab940b50 --- /dev/null +++ b/3-0-java-core/3-6-3-crazy-regex/src/test/resources/note.txt @@ -0,0 +1,9 @@ +The colors of the rainbow have many colours and the rainbow does not have a single colour +The lynk is quite a link don't you think? l nk l(nk +The Collin Richardson CA 01001 AK 03148 PA 02132 (412)555-1212 johnsmith@yahoo.com 412-555-1234 412 555-1234 646.555.1234 +I know email addresses of fascinating people like terek.koval@gmail.com terek@koval.net or +terek.koval@nyu.edu + 1Z aaa **** *** {{{ {{ { Of +[Google](https://www.google.com)[StackOverflow](https://stackoverflow.com)[Youtube](https://www.youtube.com) +This is is some text text with double double words some where I I I am not not sure why why I am typing ok? cat cat dog +dog fish fish \ No newline at end of file diff --git a/3-0-java-core/pom.xml b/3-0-java-core/pom.xml index 9d1bf8e3b..0ca6bcf96 100644 --- a/3-0-java-core/pom.xml +++ b/3-0-java-core/pom.xml @@ -13,6 +13,7 @@ 3-6-1-file-reader 3-6-2-file-stats + 3-6-3-crazy-regex

AltStyle によって変換されたページ (->オリジナル) /