Note that there are some explanatory texts on larger screens.

plurals
  1. PO
    primarykey
    data
    text
    <p>This code might be of some help :) Taken from <a href="http://www.vogella.de/articles/JavaRegularExpressions/article.html" rel="nofollow noreferrer">http://www.vogella.de/articles/JavaRegularExpressions/article.html</a>.</p> <pre><code>import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class LinkGetter { private Pattern htmltag; private Pattern link; private final String root; public LinkGetter(String root) { this.root = root; htmltag = Pattern.compile("&lt;a\\b[^&gt;]*href=\"[^&gt;]*&gt;(.*?)&lt;/a&gt;"); link = Pattern.compile("href=\"[^&gt;]*\"&gt;"); } public List&lt;String&gt; getLinks(String url) { List&lt;String&gt; links = new ArrayList&lt;String&gt;(); try { BufferedReader bufferedReader = new BufferedReader( new InputStreamReader(new URL(url).openStream())); String s; StringBuilder builder = new StringBuilder(); while ((s = bufferedReader.readLine()) != null) { builder.append(s); } Matcher tagmatch = htmltag.matcher(builder.toString()); while (tagmatch.find()) { Matcher matcher = link.matcher(tagmatch.group()); matcher.find(); String link = matcher.group().replaceFirst("href=\"", "") .replaceFirst("\"&gt;", ""); if (valid(link)) { links.add(makeAbsolute(url, link)); } } } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return links; } private boolean valid(String s) { if (s.matches("javascript:.*|mailto:.*")) { return false; } return true; } private String makeAbsolute(String url, String link) { if (link.matches("http://.*")) { return link; } if (link.matches("/.*") &amp;&amp; url.matches(".*$[^/]")) { return url + "/" + link; } if (link.matches("[^/].*") &amp;&amp; url.matches(".*[^/]")) { return url + "/" + link; } if (link.matches("/.*") &amp;&amp; url.matches(".*[/]")) { return url + link; } if (link.matches("/.*") &amp;&amp; url.matches(".*[^/]")) { return url + link; } throw new RuntimeException("Cannot make the link absolute. Url: " + url + " Link " + link); } } </code></pre>
    singulars
    1. This table or related slice is empty.
    1. This table or related slice is empty.
    plurals
    1. This table or related slice is empty.
    1. This table or related slice is empty.
    1. This table or related slice is empty.
    1. This table or related slice is empty.
    1. This table or related slice is empty.
 

Querying!

 
Guidance

SQuiL has stopped working due to an internal error.

If you are curious you may find further information in the browser console, which is accessible through the devtools (F12).

Reload