Web Scraping and read URL using java

Web Scraping and read url using java

Web scraping, web harvesting, or web data extraction is data scraping used for extracting data from websites. Web scraping software may access the World Wide Web directly using the Hypertext Transfer Protocol, or through a web browser.

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class PortScanner {

    public static void main(String[] args){
        try {

            URL  url = new URL("https://www.yamicode.com");
            URLConnection urlConnection = url.openConnection();

            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(urlConnection.getInputStream()));
            StringBuilder result = new StringBuilder();
            String lineReaded = null;
            while((lineReaded = bufferedReader.readLine())!=null){
                result.append(lineReaded);
            }
            String scrappedPage = result.toString();
            System.out.println(scrappedPage);

            //Read the titile using regex
            Pattern pattern = Pattern.compile("<title>(.*)</title>");
            Matcher matcher = pattern.matcher(scrappedPage);
            String title = null;
            while (matcher.find()) {
                title = matcher.group(1);
            }
            System.out.println(title);

        } catch (Exception e) {
            e.printStackTrace();
        }

    }
}