-
Notifications
You must be signed in to change notification settings - Fork 1.3k
/
Copy path_1242.java
82 lines (74 loc) · 3.27 KB
/
_1242.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
package com.fishercoder.solutions.secondthousand;
import java.util.*;
import java.util.concurrent.*;
public class _1242 {
public interface HtmlParser {
public List<String> getUrls(String url);
}
public static class Solution1 {
/*
* credit: https://leetcode.com/problems/web-crawler-multithreaded/solutions/699006/java-blockingqueue-executorservice/
*/
public List<String> crawl(String startUrl, HtmlParser htmlParser) {
String targetHostName = getHostName(startUrl);
List<String> result = new ArrayList<>();
BlockingQueue<String> queue = new LinkedBlockingQueue<>();
queue.offer(startUrl);
Set<String> visited = new HashSet<>();
Queue<Future> tasks = new LinkedList<>();
// create a thread pool to crawling the URLs
ExecutorService executorService =
Executors.newFixedThreadPool(
4,
r -> {
Thread t = new Thread(r);
// LeetCode doesn't allow executor.shutdown(), so use daemon threads
// to let the program shutdown, otherwise TLE.
t.setDaemon(true);
return t;
});
while (true) {
String url = queue.poll();
if (url != null) {
if (getHostName(url).equals(targetHostName) && visited.add(url)) {
result.add(url);
tasks.add(
executorService.submit(
() -> {
List<String> urls = htmlParser.getUrls(url);
for (String u : urls) {
queue.offer(u);
}
}));
}
} else {
if (!tasks.isEmpty()) {
// wait for the next task to complete which might add new URLs into the
// queue
Future nextTask = tasks.poll();
try {
nextTask.get();
} catch (InterruptedException | ExecutionException e) {
}
} else {
// exit when all tasks are completed.
break;
}
}
}
return result;
}
private String getHostName(String url) {
url = url.substring("http://".length());
String[] parts = url.split("/");
return parts[0];
}
}
public static void main(String... args) {
Solution1 solution1 = new Solution1();
System.out.println(solution1.getHostName("http://news.yahoo.com"));
System.out.println(solution1.getHostName("http://news.yahoo.com/news"));
System.out.println(solution1.getHostName("http://news.yahoo.com/us"));
System.out.println(solution1.getHostName("http://news.yahoo.com"));
}
}