Skip to content

Commit efaa52a

Browse files
committed
add news crawling script
1 parent 51b7de2 commit efaa52a

File tree

1 file changed

+129
-0
lines changed

1 file changed

+129
-0
lines changed

data/crawler/cron_news.php

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
#!/usr/bin/php -q
2+
3+
<?php
4+
5+
6+
echo "\nstock cron start".date("Ymd")."\n";
7+
8+
9+
10+
11+
$start=1;
12+
$display=20;
13+
$total=20;
14+
$newname = "news.txt";
15+
$line = "";
16+
17+
for($start=1;$start <=$total; $start=$start+$display)
18+
{
19+
20+
21+
$encText = urlencode("insert search keyword here");
22+
$url = "https://openapi.naver.com/v1/search/news.json?display=$display&start=$start&sort=date&query=".$encText;
23+
24+
25+
if($debug) echo " url:".$url;
26+
$response = getSearchResult($url);
27+
if($debug) echo " result:".$response;
28+
29+
if ($response !== false)
30+
{
31+
$rs = getJsonArray($response);
32+
if($debug) print_r($rs);
33+
for($i=0; $i < sizeof($rs['items']); $i++)
34+
{
35+
$item = $rs['items'][$i];
36+
$title = $item['title'];
37+
38+
$title = getNewHighlightTag($title);
39+
$desc = getNewHighlightTag($desc);
40+
$originallink = getNewHighlightTag($originallink);
41+
42+
43+
$desc = trim(str_replace("\n", " ", $item['description']));
44+
$desc = str_replace("\t", " ", $desc);
45+
$pubDate = $item['pubDate'];
46+
$link = $item['link'];
47+
48+
$line .= $pubDate."\t".$title."\t".$desc."\t".$originallink."\n";
49+
}
50+
51+
}
52+
else {
53+
echo "Error: It's not possible to get $new_request_addr";
54+
}
55+
}
56+
echo "\nline:".$line;
57+
file_put_contents($newname, $line, FILE_APPEND);
58+
$total_request++;
59+
}
60+
}
61+
62+
63+
64+
function getNewHighlightTag($title)
65+
{
66+
$title = str_replace("&lt;b&gt;", "<strong class=\"hl\">", $title);
67+
$title = str_replace("<b>", "<strong class=\"hl\">", $title);
68+
$title = str_replace("&lt;/b&gt;", "</strong>", $title);
69+
$title = str_replace("</b>", "</strong>", $title);
70+
$title = str_replace("&", "&amp;", $title);
71+
72+
return $title;
73+
}
74+
75+
function getJsonArray($response)
76+
{
77+
$json_array = json_decode($response, true);
78+
79+
switch(json_last_error())
80+
{
81+
case JSON_ERROR_DEPTH:
82+
echo ' - Maximum stack depth exceeded';
83+
break;
84+
case JSON_ERROR_CTRL_CHAR:
85+
echo ' - Unexpected control character found';
86+
break;
87+
case JSON_ERROR_SYNTAX:
88+
echo ' - Syntax error, malformed JSON';
89+
break;
90+
case JSON_ERROR_NONE:
91+
//echo ' - No errors';
92+
break;
93+
}
94+
return $json_array;
95+
96+
}
97+
98+
function getSearchResult($url)
99+
{
100+
$client_id = "insert your id here";
101+
$client_secret = "insert your screte code here";
102+
$is_post = false;
103+
104+
105+
$ch = curl_init();
106+
curl_setopt($ch, CURLOPT_URL, $url);
107+
curl_setopt($ch, CURLOPT_POST, $is_post);
108+
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
109+
$headers = array();
110+
$headers[] = "X-Naver-Client-Id: ".$client_id;
111+
$headers[] = "X-Naver-Client-Secret: ".$client_secret;
112+
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
113+
$response = curl_exec ($ch);
114+
$status_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
115+
//echo "status_code:".$status_code;
116+
curl_close ($ch);
117+
if($status_code == 200) {
118+
return $response;
119+
} else {
120+
echo "Error 내용:".$response;
121+
return false;
122+
}
123+
124+
125+
}
126+
127+
?>
128+
129+

0 commit comments

Comments
 (0)