Add method to scrape Hackerrank contests

coderick14 · coderick14 · commit 2cfa6ea355f2 · 2017-08-15T02:33:39.000+05:30
diff --git a/ACedIt/util.py b/ACedIt/util.py
@@ -99,8 +99,8 @@ def create_workdir_structure(site, contest):
 
         workdir = data.get('workdir', None)
 
-        if not os.path.isdir(os.path.join(workdir,site,contest)):
-            os.makedirs(os.path.join(workdir,site,contest))
+        if not os.path.isdir(os.path.join(workdir, site, contest)):
+            os.makedirs(os.path.join(workdir, site, contest))
 
     @staticmethod
     def check_cache(site, contest, problem):
@@ -212,17 +212,18 @@ def handle_kbd_interrupt(args):
         print 'Interrupted manually. Cleaning up...'
 
         if args['problem'] is not None:
-            path = os.path.join(Utilities.cache_dir, args['site'], args['contest'], args['problem'])
+            path = os.path.join(Utilities.cache_dir, args['site'], args[
+                                'contest'], args['problem'])
             if os.path.isdir(path):
                 rmtree(path)
         else:
-            path = os.path.join(Utilities.cache_dir, args['site'], args['contest'])
+            path = os.path.join(Utilities.cache_dir, args[
+                                'site'], args['contest'])
             if os.path.isdir(path):
                 rmtree(path)
 
         print 'Done. Exiting gracefully.'
 
-
     @staticmethod
     def run_solution(problem):
         """
@@ -423,8 +424,8 @@ def parse_html(self, req):
             pre = re.sub('<[^<]+?>', '', pre)
             formatted_outputs += [pre]
 
-        print 'Inputs', formatted_inputs
-        print 'Outputs', formatted_outputs
+        # print 'Inputs', formatted_inputs
+        # print 'Outputs', formatted_outputs
 
         return formatted_inputs, formatted_outputs
 
@@ -452,6 +453,7 @@ def scrape_problem(self):
         inputs, outputs = self.parse_html(req)
         Utilities.store_files(self.site, self.contest,
                               self.problem, inputs, outputs)
+        print 'Done.'
 
     def scrape_contest(self):
         """
@@ -462,8 +464,7 @@ def scrape_contest(self):
         req = Utilities.get_html(url)
         links = self.get_problem_links(req)
 
-        print 'Found problems'
-        print '\n'.join(links)
+        print 'Found %d problems..' % (len(links))
 
         if not self.force_download:
             cached_problems = os.listdir(os.path.join(
@@ -528,8 +529,8 @@ def parse_html(self, req):
             formatted_inputs += [inp.strip()]
             formatted_outputs += [out.strip()]
 
-        print 'Inputs', formatted_inputs
-        print 'Outputs', formatted_outputs
+        # print 'Inputs', formatted_inputs
+        # print 'Outputs', formatted_outputs
 
         return formatted_inputs, formatted_outputs
 
@@ -559,6 +560,7 @@ def scrape_problem(self):
         inputs, outputs = self.parse_html(req)
         Utilities.store_files(self.site, self.contest,
                               self.problem, inputs, outputs)
+        print 'Done.'
 
     def scrape_contest(self):
         """
@@ -569,8 +571,7 @@ def scrape_contest(self):
         req = Utilities.get_html(url)
         links = self.get_problem_links(req)
 
-        print 'Found problems'
-        print '\n'.join(links)
+        print 'Found %d problems..' % (len(links))
 
         if not self.force_download:
             cached_problems = os.listdir(os.path.join(
@@ -638,8 +639,8 @@ def parse_html(self, req):
             formatted_inputs += [inp.strip()]
             formatted_outputs += [out.strip()]
 
-        print 'Inputs', formatted_inputs
-        print 'Outputs', formatted_outputs
+        # print 'Inputs', formatted_inputs
+        # print 'Outputs', formatted_outputs
 
         return formatted_inputs, formatted_outputs
 
@@ -653,6 +654,7 @@ def scrape_problem(self):
         inputs, outputs = self.parse_html(req)
         Utilities.store_files(self.site, self.contest,
                               self.problem, inputs, outputs)
+        print 'Done.'
 
 
 class Hackerrank:
@@ -663,7 +665,8 @@ class Hackerrank:
     def __init__(self, args):
         self.site = args['site']
         self.contest = args['contest']
-        self.problem = '-'.join(args['problem'].split()).lower()
+        self.problem = '-'.join(args['problem'].split()
+                                ).lower() if args['problem'] is not None else None
         self.force_download = args['force']
 
     def parse_html(self, req):
@@ -709,11 +712,23 @@ def parse_html(self, req):
 
             formatted_outputs += [formatted_output.strip()]
 
-        print 'Inputs', formatted_inputs
-        print 'Outputs', formatted_outputs
+        # print 'Inputs', formatted_inputs
+        # print 'Outputs', formatted_outputs
 
         return formatted_inputs, formatted_outputs
 
+    def get_problem_links(self, req):
+        """
+        Method to get the links for the problems
+        in a given hackerrank contest
+        """
+        data = json.loads(req.text)
+        data = data['models']
+        links = ['https://www.hackerrank.com/rest/contests/' + self.contest +
+                 '/challenges/' + problem['slug'] for problem in data]
+
+        return links
+
     def scrape_problem(self):
         """
         Method to scrape a single problem from hackerrank
@@ -725,3 +740,32 @@ def scrape_problem(self):
         inputs, outputs = self.parse_html(req)
         Utilities.store_files(self.site, self.contest,
                               self.problem, inputs, outputs)
+        print 'Done.'
+
+    def scrape_contest(self):
+        """
+        Method to scrape all problems from a given hackerrank contest
+        """
+        print 'Checking problems available for contest ' + self.contest + '...'
+        url = 'https://www.hackerrank.com/rest/contests/' + self.contest + '/challenges'
+        req = Utilities.get_html(url)
+        links = self.get_problem_links(req)
+
+        print 'Found %d problems..' % (len(links))
+
+        if not self.force_download:
+            cached_problems = os.listdir(os.path.join(
+                Utilities.cache_dir, self.site, self.contest))
+            links = [link for link in links if link.split(
+                '/')[-1] not in cached_problems]
+
+        rs = (grq.get(link) for link in links)
+        responses = grq.map(rs)
+
+        for response in responses:
+            if response is not None and response.status_code == 200:
+                inputs, outputs = self.parse_html(response)
+                self.problem = response.url.split('/')[-1]
+                Utilities.check_cache(self.site, self.contest, self.problem)
+                Utilities.store_files(
+                    self.site, self.contest, self.problem, inputs, outputs)
diff --git a/README.md b/README.md
@@ -1,7 +1,10 @@
+[![contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat)](https://github.com/coderick14/ACedIt/issues)
+[![Open Source Love](https://badges.frapsoft.com/os/v2/open-source.svg?v=103)](https://github.com/coderick14/ACedIt/) 
+[![MIT Licence](https://badges.frapsoft.com/os/mit/mit.svg?v=103)](https://opensource.org/licenses/mit-license.php)
 <h1 align="center">
     <img src="https://github.com/coderick14/ACedIt/blob/master/images/logo.png" width="500"/><br/>
 </h1>
-A command line tool to run your code against sample test cases. Without leaving the terminal :)
+A command line tool to run your code against sample test cases. Without leaving the terminal :) 
 
 #### Installation
 ##### Build from source