Skip to content

Commit 67bd962

Browse files
committed
test
1 parent 120afb6 commit 67bd962

File tree

2 files changed

+8
-5
lines changed

2 files changed

+8
-5
lines changed

src/get_faces.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
tmp_faces = {f.text for f in face_tablet}
2626
faces = faces | tmp_faces
2727
print("face_length: {}".format(len(faces)))
28-
sleep(3)
28+
sleep(2)
2929
if i // 10 == 0:
3030
driver.refresh()
3131

src/main.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# from selenium.common.exceptions import ElementNotVisibleException
44
from time import sleep
55
import codecs
6+
from datetime import datetime
67

78
# FIXME: Your Driver Path
89
driver = webdriver.Chrome(executable_path="C:\Program Files (x86)\chromedriver\chromedriver.exe")
@@ -31,14 +32,16 @@
3132
after_h = driver.execute_script("var h = window.pageYOffset; return h")
3233
if previous_h == after_h:
3334
break
34-
print('Last page')
35+
print("Scroll All Page {face} {time}".format(face=face, time=datetime.now()))
3536

3637
page_source = driver.page_source
3738

38-
questions = driver.find_elements_by_class_name("streamItem_header")
39-
answers = driver.find_elements_by_class_name("streamItem_content")
39+
qa_elements = driver.find_elements_by_class_name("streamItem-answer")
4040

41-
q_and_a = [(q.find_element_by_tag_name('h2').text, a.text) for q, a in zip(questions, answers)]
41+
print("QA content: {}".format(len(qa_elements)))
42+
43+
q_and_a = [(qa.find_element_by_class_name("streamItem_header").find_element_by_tag_name('h2').text,
44+
qa.find_element_by_class_name("streamItem_content").text) for qa in qa_elements]
4245

4346
with codecs.open("data/askfm_data/" + face + ".txt", "w", "utf-8") as f:
4447
for q, a in q_and_a:

0 commit comments

Comments
 (0)