/** * // This is the HtmlParser's API interface. * // You should not implement it, or speculate about its implementation * class HtmlParser { * public: * vector<string> getUrls(string url); * }; */classSolution{public:vector<string>crawl(stringstartUrl,HtmlParserhtmlParser){queue<string>q{{startUrl}};unordered_set<string>seen{{startUrl}};conststring&hostname=getHostname(startUrl);// threadingconstintnThreads=std::thread::hardware_concurrency();vector<thread>threads;std::mutexmtx;std::condition_variablecv;autot=[&](){while(true){unique_lock<mutex>lock(mtx);cv.wait_for(lock,30ms,[&](){returnq.size();});if(q.empty())return;autocur=q.front();q.pop();lock.unlock();constvector<string>urls=htmlParser.getUrls(cur);lock.lock();for(conststring&url:urls){if(seen.count(url))continue;if(url.find(hostname)!=string::npos){q.push(url);seen.insert(url);}}lock.unlock();cv.notify_all();}};for(inti=0;i<nThreads;++i)threads.emplace_back(t);for(std::thread&t:threads)t.join();return{begin(seen),end(seen)};}private:stringgetHostname(conststring&url){constintfirstSlash=url.find_first_of('/');constintthirdSlash=url.find_first_of('/',firstSlash+2);returnurl.substr(firstSlash+2,thirdSlash-firstSlash-2);}};
Login to Codeflu
Log in to stay update and get notify on new arrivals.