|
| 1 | + |
| 2 | +#-*- coding: utf-8 -*- |
| 3 | + |
| 4 | + |
| 5 | +import sys |
| 6 | +import pandas as pd |
| 7 | +import requests |
| 8 | +import urllib.request |
| 9 | +from bs4 import BeautifulSoup |
| 10 | +import pymysql |
| 11 | +import time |
| 12 | +import os |
| 13 | + |
| 14 | +os.environ['http_proxy']='' |
| 15 | + |
| 16 | + |
| 17 | + |
| 18 | + |
| 19 | + |
| 20 | + |
| 21 | +def getaptdata(address, yearmonth, key, target ): |
| 22 | + if target == "apt": |
| 23 | + url = "http://openapi.molit.go.kr:8081/OpenAPI_ToolInstallPackage/service/rest/RTMSOBJSvc/getRTMSDataSvcAptTrade?LAWD_CD=" + str(address) + "&DEAL_YMD=" + str(yearmonth) +"&serviceKey="+key |
| 24 | + elif target == "land": |
| 25 | + url = "http://openapi.molit.go.kr/OpenAPI_ToolInstallPackage/service/rest/RTMSOBJSvc/getRTMSDataSvcLandTrade?LAWD_CD=" +str(address) +"&DEAL_YMD="+str(yearmonth) +"&serviceKey="+key |
| 26 | + elif target == "rent": |
| 27 | + url = "http://openapi.molit.go.kr:8081/OpenAPI_ToolInstallPackage/service/rest/RTMSOBJSvc/getRTMSDataSvcAptRent?LAWD_CD=" +str(address) +"&DEAL_YMD="+str(yearmonth) +"&serviceKey="+key |
| 28 | + elif target == "villa": |
| 29 | + url = "http://openapi.molit.go.kr:8081/OpenAPI_ToolInstallPackage/service/rest/RTMSOBJSvc/getRTMSDataSvcRHTrade?LAWD_CD=" + str(address) +"&DEAL_YMD="+str(yearmonth) +"&serviceKey="+key |
| 30 | + elif target == "villarent": |
| 31 | + url = "http://openapi.molit.go.kr:8081/OpenAPI_ToolInstallPackage/service/rest/RTMSOBJSvc/getRTMSDataSvcRHRent?LAWD_CD=" + str(address) +"&DEAL_YMD="+str(yearmonth) +"&serviceKey="+key |
| 32 | + elif target == "studio": |
| 33 | + url = "http://openapi.molit.go.kr/OpenAPI_ToolInstallPackage/service/rest/RTMSOBJSvc/getRTMSDataSvcOffiTrade?LAWD_CD=" + str(address) +"&DEAL_YMD="+str(yearmonth) +"&serviceKey="+key |
| 34 | + elif target =="studiorent": |
| 35 | + url = "http://openapi.molit.go.kr/OpenAPI_ToolInstallPackage/service/rest/RTMSOBJSvc/getRTMSDataSvcOffiRent?LAWD_CD=" + str(address) +"&DEAL_YMD="+str(yearmonth) +"&serviceKey="+key |
| 36 | + |
| 37 | + elif target == "single": |
| 38 | + url = "http://openapi.molit.go.kr:8081/OpenAPI_ToolInstallPackage/service/rest/RTMSOBJSvc/getRTMSDataSvcSHTrade?LAWD_CD=" + str(address) +"&DEAL_YMD="+str(yearmonth) +"&serviceKey="+key |
| 39 | + else: |
| 40 | + startmonth=yearmonth+"01" |
| 41 | + endmonth=yearmonth+"12" |
| 42 | + if target == "landindex": |
| 43 | + url ="http://openapi.kab.co.kr/OpenAPI_ToolInstallPackage/service/rest/LfrPrcIndexSvc/getLfrPrcIndex?startmonth=" + str(startmonth) +"&endmonth="+str(endmonth) + "®ion=" + str(address) + "&serviceKey=" + key |
| 44 | + elif target == "aptindex": |
| 45 | + url ="http://openapi.kab.co.kr/OpenAPI_ToolInstallPackage/service/rest/AptRealPrcIndexSvc/getAptRealPrcIndex?startmonth=" + str(startmonth) +"&endmonth="+str(endmonth) + "®ion=" + str(address) + "&serviceKey=" + key |
| 46 | + elif target == "rentindex": |
| 47 | + url ="http://openapi.kab.co.kr/OpenAPI_ToolInstallPackage/service/rest/RentPrcIndexSvc/getRentPrcIndex?startmonth=" + str(startmonth) +"&endmonth="+str(endmonth) + "®ion=" + str(address) + "&serviceKey=" + key |
| 48 | + print(url) |
| 49 | + try: |
| 50 | + f = urllib.request.urlopen(url) |
| 51 | + except Exception as e: |
| 52 | + print('Fail ' + str(e)) |
| 53 | + time.sleep(100) |
| 54 | + f = urllib.request.urlopen(url) |
| 55 | + |
| 56 | + aptdata2 = f.read().decode("utf8") |
| 57 | + f.close() |
| 58 | + soup = BeautifulSoup(aptdata2, "lxml") |
| 59 | + |
| 60 | + aptdata = list(aptdata.get_text().replace('\n','').split(">") for aptdata in soup.find_all("item")) |
| 61 | + return(aptdata) |
| 62 | + |
| 63 | +def updateIndexTable(aptdata, conn, target, code): |
| 64 | + try: |
| 65 | + with conn.cursor() as curs: |
| 66 | + for i in aptdata: |
| 67 | + if "index" in target: |
| 68 | + print(i) |
| 69 | + |
| 70 | + rows = str(i[0]).split('|') |
| 71 | + for i,r in enumerate(rows): |
| 72 | + units = r.split(',') |
| 73 | + ym=units[0] |
| 74 | + print(ym) |
| 75 | + if i==0: |
| 76 | + code = ym[:5] |
| 77 | + # 수도권 |
| 78 | + if code == "A2000": |
| 79 | + area = ym[5:8] |
| 80 | + year = ym[8:12] |
| 81 | + month = ym[12:14] |
| 82 | + # 강북지역, 강남지역 |
| 83 | + elif code == "11A01" or code == "11A02": |
| 84 | + area = ym[5:9] |
| 85 | + year = ym[9:13] |
| 86 | + month = ym[13:15] |
| 87 | + else: |
| 88 | + area = ym[5:7] |
| 89 | + year = ym[7:11] |
| 90 | + month = ym[11:13] |
| 91 | + else: |
| 92 | + year = ym[:4] |
| 93 | + month = ym[4:6] |
| 94 | + index = r.split(',')[1] |
| 95 | + date = str(year) + "-" + str(month) + "-01" |
| 96 | + curs.execute("""insert into priceIndex (`year`, `month`, `date`, `areacode`, `areacity`, `indexvalue`, `type`) VALUES ( %s, %s, %s, %s, %s, %s, %s)""", (str(year), str(month), str(date), code, area, str(index), target )) |
| 97 | + print("""insert into priceIndex (`year`, `month`, `date`, `areacode`, `areacity`, `indexvalue`, `type`) VALUES ( %s, %s, %s, %s, %s, %s, %s)""", (str(year), str(month), str(date), code, area, str(index), target )) |
| 98 | + |
| 99 | + conn.commit() |
| 100 | + except pymysql.InternalError as error: |
| 101 | + code, message = error.args |
| 102 | + print(code) |
| 103 | + print( message) |
| 104 | + |
| 105 | +def updateBasicTable(aptdata, conn): |
| 106 | + try: |
| 107 | + with conn.cursor(pymysql.cursors.DictCursor) as curs: |
| 108 | + for i in aptdata: |
| 109 | + print(i) |
| 110 | + print(len(i)) |
| 111 | + conn.commit() |
| 112 | + except pymysql.InternalError as error: |
| 113 | + code, message = error.args |
| 114 | + print(code) |
| 115 | + print( message) |
| 116 | + |
| 117 | +def checkfield(field, landtype): |
| 118 | + try: |
| 119 | + if len(field) == 0: |
| 120 | + return |
| 121 | + |
| 122 | + if "거래금액" in field: |
| 123 | + value = field[:-4].strip().replace(',', '') |
| 124 | + return ( "price", value) |
| 125 | + elif "보증금" in field: |
| 126 | + value = field[:-3].strip().replace(',', '') |
| 127 | + return ("deposit", value) |
| 128 | + elif "보증금액" in field: |
| 129 | + value = field[:-4].strip().replace(',', '') |
| 130 | + return ("deposit", value) |
| 131 | + elif "월세금액" in field: |
| 132 | + value = field[:-4].strip().replace(',', '') |
| 133 | + return ("rentprice", value) |
| 134 | + elif "월세" in field: |
| 135 | + value = field[:-2].strip().replace(',', '') |
| 136 | + return ("rentprice", value) |
| 137 | + elif "건축년도" in field: |
| 138 | + return ( "constructionyear" , field[:-4] ) |
| 139 | + elif len(field) == 5 and "년" in field: |
| 140 | + return ( "year", field[:-1]) |
| 141 | + elif "단지" in field and (landtype == "studio" or landtype=="studiorent"): |
| 142 | + return ( "name", field[:-2].strip()) |
| 143 | + elif "아파트" in field and (landtype == "apt" or landtype =="rent"): |
| 144 | + return ( "name", field[:-3].strip()) |
| 145 | + elif "연립다세대" in field: |
| 146 | + return ( "name", field[:-5].strip()) |
| 147 | + elif "법정동" in field: |
| 148 | + area = field[:-3].strip() |
| 149 | + if landtype == "apt": |
| 150 | + return ( "area" , area) |
| 151 | + else: |
| 152 | + return ("areadong", area) |
| 153 | + elif "시군구" in field: |
| 154 | + return ( "areagu", field[:-3]) |
| 155 | + elif ( len(field) == 2 or len(field) == 3) and "월" in field: |
| 156 | + return ("month", field[:-1]) |
| 157 | + elif "1~10일" in field or "11~20일" in field or "21~30일" in field or "21~28일" in field or "21~29일" in field or "21~31일" in field: |
| 158 | + return ("day", field[:-1]) |
| 159 | + elif "거래면적" in field: |
| 160 | + return ("landarea", field[:-4].strip().replace(',', '')) |
| 161 | + elif "대지권면적" in field: |
| 162 | + return ("landrightarea", field[:-5]) |
| 163 | + elif "대지면적" in field: |
| 164 | + return ("landarea", field[:-4]) |
| 165 | + elif "연면적" in field: |
| 166 | + return ("totalgroundarea", field[:-3]) |
| 167 | + elif "주택유형" in field: |
| 168 | + return ("housetype", field[:-4]) |
| 169 | + elif "전용면적" in field: |
| 170 | + return ("exclusiveusearea", field[:-4]) |
| 171 | + elif "지역코드" in field: |
| 172 | + return ("areacode", field[:-4]) |
| 173 | + elif ( len(field) == 2 or len(field) == 3 ) and "층" in field: |
| 174 | + return ("floor", field[:-1]) |
| 175 | + elif "지번" in field: |
| 176 | + return ("lotnumber", field[:-2]) |
| 177 | + elif "구분" in field: |
| 178 | + return ("shares", field[:-2]) |
| 179 | + elif "지목" in field: |
| 180 | + return ("category", field[:-2]) |
| 181 | + elif "용도지역" in field: |
| 182 | + return ("subcategory", field[:-4]) |
| 183 | + elif "건축유형" in field: |
| 184 | + return ("housetype", field[:-4]) |
| 185 | + else: |
| 186 | + print("out of scope:%s", field) |
| 187 | + return("error", "error") |
| 188 | + except Exception as e: |
| 189 | + print('Fail ' + str(e)) |
| 190 | + return ("check field error", "check field error") |
| 191 | + |
| 192 | + |
| 193 | +def getDate(year, month, day, interval): |
| 194 | + day = day.split('~', 1)[0] |
| 195 | + if interval == "365": |
| 196 | + year = str(int(year) + 1) |
| 197 | + date = year + "-" + month + "-" + day |
| 198 | + return date |
| 199 | + |
| 200 | + |
| 201 | + |
| 202 | + |
| 203 | + |
| 204 | +def updateTable(aptdata, conn, target): |
| 205 | + try: |
| 206 | + with conn.cursor() as curs: |
| 207 | + for i in aptdata: |
| 208 | + print("\n") |
| 209 | + print(i) |
| 210 | + entry = {} |
| 211 | + for field in i: |
| 212 | + if len(field) == 0: |
| 213 | + continue |
| 214 | + (fieldname, value) = checkfield(field, target) |
| 215 | + entry[fieldname] = value |
| 216 | + if 'error' in entry: |
| 217 | + print("Data error:%s", i) |
| 218 | + continue |
| 219 | + |
| 220 | + # 데이타 Fetch |
| 221 | + print(curs.rowcount) |
| 222 | + print(curs._last_executed) |
| 223 | + rows = curs.fetchall() |
| 224 | + if curs.rowcount == 0: |
| 225 | + # insert query here |
| 226 | + print(curs._last_executed) |
| 227 | + conn.commit() |
| 228 | + except pymysql.InternalError as error: |
| 229 | + code, message = error.args |
| 230 | + print(code) |
| 231 | + print( message) |
| 232 | + except Exception as e: |
| 233 | + print('Update Table Data Error:' + str(e)) |
| 234 | + |
| 235 | + |
| 236 | +def getQuery(entry, target): |
| 237 | + insert = "insert into " + "tablename" + "(" |
| 238 | + value = " VALUES (" |
| 239 | + datavalue = [] |
| 240 | + for i,unit in enumerate(entry): |
| 241 | + if i == 0: |
| 242 | + insert += "`" + unit +"`" |
| 243 | + value += "%s" |
| 244 | + else: |
| 245 | + insert += ",`" + unit +"`" |
| 246 | + value += ",%s" |
| 247 | + datavalue.append(str(entry[unit])) |
| 248 | + insertvalue = insert + ")" + value + ")" |
| 249 | + datavalues=tuple(datavalue) |
| 250 | + print(insertvalue) |
| 251 | + print(datavalues) |
| 252 | + return(insertvalue, datavalues) |
| 253 | + |
| 254 | + |
| 255 | +def getDataFrame(aptdata): |
| 256 | + blist1 = [] |
| 257 | + blist2 = [] |
| 258 | + blist3 = [] |
| 259 | + blist4 = [] |
| 260 | + blist5 = [] |
| 261 | + blist6 = [] |
| 262 | + blist7 = [] |
| 263 | + blist8 = [] |
| 264 | + blist9 = [] |
| 265 | + blist10 = [] |
| 266 | + blist11 = [] |
| 267 | + |
| 268 | + for i in aptdata: |
| 269 | + blist1.append(i[0][:-4]) |
| 270 | + blist2.append(i[1][:-4]) |
| 271 | + blist3.append(i[2][:-1]) |
| 272 | + blist4.append(i[3][:-3]) |
| 273 | + blist5.append(i[4][:-3]) |
| 274 | + blist6.append(i[5][:-1]) |
| 275 | + blist7.append(i[6][:-1]) |
| 276 | + blist8.append(i[7][:-4]) |
| 277 | + blist9.append(i[8][:-2]) |
| 278 | + blist10.append(i[9][:-4]) |
| 279 | + blist11.append(i[10][:-1]) |
| 280 | + |
| 281 | + apt = pd.DataFrame({'건축년도':blist1, '월' : blist2, '법정동':blist3, '년':blist4, '전용면적':blist5, '아파트':blist6, '거래금액':blist7, '일':blist8, '>지번':blist9, '층':blist10, '지역코드':blist11}) |
| 282 | + apt.columns = ['일', '거래금액','법정동','년','월','건축년도','전용면적','아파트','지번','층', '지역코드'] |
| 283 | + return(apt) |
| 284 | + |
| 285 | + |
| 286 | +ilocCodes = ["11000", "26000", "27000", "28000", "29000", "30000", "31000", "41000", "42000", "43000", "44000", "45000", "46000" , "47000", "48000", "49000"] |
| 287 | + |
| 288 | +# 전체 코드 |
| 289 | +clocCodes = [ "11110","11140","11170","11200","11215","11230","11260","11290","11305","11320","11350","11380","11410","11440","11470","11500","11530","11545","11560","11590","11620","11650","11680","11710","11740", "26110", "26140", "26170", "26200", "26230", "26260", "26290", "26320", "26350", "26380", "26410", "26440", "26470", "26500", "26530", "26710", "27110", "27140", "27170", "27200", "27230", "27260", "27290", "27710", "28110", "28140", "28170", "28185", "28200", "28237", "28245", "28260", "28710", "28720", "29110", "29140", "29155", "29170", "29200", "30110", "30140", "30170", "30200", "30230", "31110", "31140", "31170", "31200", "31710", "36110", "41111", "41113", "41115", "41117", "41131", "41133", "41135", "41150", "41171", "41173", "41190", "41210", "41220", "41250", "41271" , "41273", "41281", "41285", "41287", "41290", "41310","41360","41370","41390","41410","41430","41450","41461", "41463","41465","41480","41500","41550","41570","41590","41610","41630","41650","41670","41800","41820","41830", "42110","42130","42150","42170","42190","42210","42230","42720","42730","42750","42760","42770","42780","42790","42800","42810","42820","42830","43111","43112","43113","43114","43130","43150","43720","43730","43740","43745","43750","43760","43770","43800","44131","44133","44150","44180","44200","44210","44230","44250","44270","44710","44760","44770","44790","44800","44810","44825" , "45111","45113","45130","45140","45180","45190","45210","45710","45720","45730","45740","45750","45770","45790","45800","46110","46130","46150","46170","46230","46710","46720","46730","46770","46780","46790","46800","46810","46820","46830","46840","46860","46870","46880","46890","46900","46910", "47111","47113","47130","47150","47170","47190","47210","47230","47250","47280","47290","47720","47730","47750","47760","47770","47820","47830","47840","47850","47900","47920","47930","47940","48121","48123","48125","48127","48129","48170","48220","48240","48250","48270","48310","48330","48720","48730","48740","48820","48840","48850","48860","48870","48880","48890", "50110", "50130"] |
| 290 | + |
| 291 | +years = [ "2006", "2007", "2008", "2009", "2010","2011", "2012", "2013", "2014", "2015", "2016", "2017" ] |
| 292 | +months = [ "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"] |
| 293 | +conn = pymysql.connect(host='localhost', user='userid', password='userpwd', |
| 294 | + db='dbname', charset='utf8') |
| 295 | +print(len(sys.argv)) |
| 296 | +key = "add key here" |
| 297 | + |
| 298 | + |
| 299 | +if len(sys.argv) == 4: |
| 300 | + target=sys.argv[1] |
| 301 | + code=sys.argv[2] |
| 302 | + period=sys.argv[3] |
| 303 | + |
| 304 | + if len(period) == 6: |
| 305 | + print("get data by yearmn") |
| 306 | + aptdata=getaptdata(code,period, key, target) |
| 307 | + updateTable(aptdata, conn, target) |
| 308 | + elif len(period) == 4: |
| 309 | + print("get data by year") |
| 310 | + for m in months: |
| 311 | + ym = str(period)+m |
| 312 | + aptdata=getaptdata(code,ym, key, target) |
| 313 | + updateTable(aptdata, conn, target) |
| 314 | + else: |
| 315 | + print("usage: code period") |
| 316 | +elif len(sys.argv) ==3: |
| 317 | + target = sys.argv[1] |
| 318 | + code=sys.argv[2] |
| 319 | + for y in years: |
| 320 | + for m in months: |
| 321 | + ym = str(y)+m |
| 322 | + aptdata=getaptdata(code,ym, key, target) |
| 323 | + updateTable(aptdata, conn, target) |
| 324 | +elif len(sys.argv) ==2: |
| 325 | + target = sys.argv[1] |
| 326 | + print(target) |
| 327 | + years = [ "2017" ] |
| 328 | + months = [ "10", "11", "12"] |
| 329 | + locCodes = clocCodes |
| 330 | + if "index" in target: |
| 331 | + if target == "rentindex": |
| 332 | + ilocCodes = [ "A2000", "11000", "11A01", "11A02", "41000", "28000", "26000", "27000", "29000", "30000", "31000"] |
| 333 | + |
| 334 | + for code in ilocCodes: |
| 335 | + for y in years: |
| 336 | + key = "H**D" |
| 337 | + aptdata=getaptdata(code, y, key, target) |
| 338 | + updateIndexTable(aptdata, conn, target, code) |
| 339 | + time.sleep(5) |
| 340 | + |
| 341 | + |
| 342 | + else: |
| 343 | + for code in locCodes: |
| 344 | + for y in years: |
| 345 | + for m in months: |
| 346 | + ym = str(y)+m |
| 347 | + aptdata=getaptdata(code,ym, key, target) |
| 348 | + updateTable(aptdata, conn, target) |
| 349 | + time.sleep(1) |
| 350 | + time.sleep(1) |
| 351 | +else: |
| 352 | + print("usage: target code period(Ym or Y) / target code / target ") |
| 353 | +conn.close() |
0 commit comments