From 017e2e442bf8a446d7b7ddb13f98f9b2e6fef5fa Mon Sep 17 00:00:00 2001 From: user Date: Mon, 21 Sep 2015 19:11:25 +0000 Subject: [PATCH 1/6] added a .gitignore for character files --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9e7edcd --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +character_*.png +character_*.txt From f9b139b9fb7118a343372262701bcf1dc96da5de Mon Sep 17 00:00:00 2001 From: user Date: Thu, 24 Sep 2015 20:33:24 +0000 Subject: [PATCH 2/6] better file handling Don't download again if the final saved file exists (the preview) Don't modify files in-place. --- update_from_db.py | 86 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 58 insertions(+), 28 deletions(-) diff --git a/update_from_db.py b/update_from_db.py index be6930c..b5c0c9b 100755 --- a/update_from_db.py +++ b/update_from_db.py @@ -2,16 +2,36 @@ from http.client import HTTPConnection import json import base64 +import sys + +def die(message,code=23): + print(message,file=sys.stderr) + raise SystemExit(code) server = "minetest.fensta.bplaced.net" skinsdir = "u_skins/textures/" metadir = "u_skins/meta/" -i = 1 -pages = 1 +curskin = 0 +pages = None + +def replace(path,encoding=None): + mode = "wt" if encoding else "wb" + # an unpredictable temp name only needed for a+rwxt directories + tmp = '.'+path+'-tmp' + def deco(handle): + with open(tmp,mode,encoding=encoding) as out: + yield out + os.rename(tmp,path) + return deco + +def maybeReplace(path,encoding=None): + def deco(handle): + if os.path.exists(path): return + return replace(path,encoding)(handle) c = HTTPConnection(server) def addpage(page): - global i, pages + global curskin, pages print("Page: " + str(page)) r = 0 try: @@ -20,42 +40,52 @@ def addpage(page): except Exception: if r != 0: if r.status != 200: - print("Error", r.status) - exit(r.status) + die("Error", r.status) return data = r.read().decode() l = json.loads(data) if not l["success"]: - print("Success != True") - exit(1) + die("Success != True") r = 0 pages = int(l["pages"]) + foundOne = False for s in l["skins"]: - f = open(skinsdir + "character_" + str(i) + ".png", "wb") - f.write(base64.b64decode(bytes(s["img"], 'utf-8'))) - f.close() - f = open(metadir + "character_" + str(i) + ".txt", "w") - f.write(str(s["name"]) + '\n') - f.write(str(s["author"]) + '\n') - f.write(str(s["license"])) - f.close() + # make sure to increment this, even if the preview exists! + curskin = curskin + 1 + preview = skinsdir + "character_" + str(curskin) + "_preview.png" + if os.path.exists(preview): continue + foundOne = True + tmp = dest+'-tmp' + @maybeReplace(skinsdir + "character_" + str(curskin) + ".png") + def go(f): + f.write(base64.b64decode(bytes(s["img"], 'utf-8'))) + f.close() + + @maybeReplace(metadir + "character_" + str(curskin) + ".txt", + encoding='utf-8') + def go(f): + f.write(str(s["name"]) + '\n') + f.write(str(s["author"]) + '\n') + f.write(str(s["license"])) try: c.request("GET", "/skins/1/" + str(s["id"]) + ".png") r = c.getresponse() - except Exception: - if r != 0: - if r.status != 200: - print("Error", r.status) + except HTTPException as e: + print(type(e),dir(e)) + raise(e) + if r.status != 200: + print("Error", r.status) continue - - data = r.read() - f = open(skinsdir + "character_" + str(i) + "_preview.png", "wb") - f.write(data) - f.close() - i = i + 1 + @replace(preview) + def go(f): + shutil.copyfileobj(r,f) + if not foundOne: + print("No skins updated on this page. Seems we're done?") + raise SystemExit addpage(1) -if pages > 1: - for p in range(pages-1): - addpage(p+2) +curpage = 1 +while pages > curpage: + curpage = curpage + 1 + addpage(curpage) print("Skins have been updated!") From 8f7e189e0adcd66916ed91b5ed6cf10f6139dc52 Mon Sep 17 00:00:00 2001 From: user Date: Thu, 24 Sep 2015 20:35:37 +0000 Subject: [PATCH 3/6] fixing better file handling locations are important, not just the filenames also getresponse needs to be closed --- update_from_db.py | 59 ++++++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/update_from_db.py b/update_from_db.py index b5c0c9b..0cb00ee 100755 --- a/update_from_db.py +++ b/update_from_db.py @@ -1,12 +1,13 @@ #!/usr/bin/python3 -from http.client import HTTPConnection +from http.client import HTTPConnection,HTTPException import json import base64 -import sys +from contextlib import closing +import sys,os,shutil def die(message,code=23): - print(message,file=sys.stderr) - raise SystemExit(code) + print(message,file=sys.stderr) + raise SystemExit(code) server = "minetest.fensta.bplaced.net" skinsdir = "u_skins/textures/" @@ -14,20 +15,24 @@ metadir = "u_skins/meta/" curskin = 0 pages = None -def replace(path,encoding=None): +def replace(location,base,encoding=None,path=None): + if path is None: + path = os.path.join(location,base) mode = "wt" if encoding else "wb" # an unpredictable temp name only needed for a+rwxt directories - tmp = '.'+path+'-tmp' + tmp = os.path.join(location,'.'+base+'-tmp') def deco(handle): with open(tmp,mode,encoding=encoding) as out: - yield out + handle(out) os.rename(tmp,path) return deco -def maybeReplace(path,encoding=None): +def maybeReplace(location,base,encoding=None): def deco(handle): + path = os.path.join(location,base) if os.path.exists(path): return - return replace(path,encoding)(handle) + return replace(location,base,encoding=encoding,path=path)(handle) + return deco c = HTTPConnection(server) def addpage(page): @@ -53,33 +58,39 @@ def addpage(page): for s in l["skins"]: # make sure to increment this, even if the preview exists! curskin = curskin + 1 - preview = skinsdir + "character_" + str(curskin) + "_preview.png" - if os.path.exists(preview): continue + previewbase = "character_" + str(curskin) + "_preview.png" + preview = os.path.join(skinsdir, previewbase) + if os.path.exists(preview): + print('skin',curskin,'already retrieved') + continue + print('updating skin',curskin) foundOne = True - tmp = dest+'-tmp' - @maybeReplace(skinsdir + "character_" + str(curskin) + ".png") + @maybeReplace(skinsdir, "character_" + str(curskin) + ".png") def go(f): f.write(base64.b64decode(bytes(s["img"], 'utf-8'))) f.close() - @maybeReplace(metadir + "character_" + str(curskin) + ".txt", + @maybeReplace(metadir, "character_" + str(curskin) + ".txt", encoding='utf-8') def go(f): f.write(str(s["name"]) + '\n') f.write(str(s["author"]) + '\n') f.write(str(s["license"])) + url = "/skins/1/" + str(s["id"]) + ".png" try: - c.request("GET", "/skins/1/" + str(s["id"]) + ".png") - r = c.getresponse() + c.request("GET", url) + with closing(c.getresponse()) as r: + if r.status != 200: + print("Error", r.status) + continue + @replace(skinsdir,previewbase,path=preview) + def go(f): + shutil.copyfileobj(r,f) except HTTPException as e: - print(type(e),dir(e)) - raise(e) - if r.status != 200: - print("Error", r.status) - continue - @replace(preview) - def go(f): - shutil.copyfileobj(r,f) + die("Couldn't get {} because of a {} (url={})".format( + s["id"], + e, + url)) if not foundOne: print("No skins updated on this page. Seems we're done?") raise SystemExit From 9027231f7f8a3e7d9dbaab3101b8818c819c414f Mon Sep 17 00:00:00 2001 From: user Date: Thu, 24 Sep 2015 20:39:48 +0000 Subject: [PATCH 4/6] pipelining Python http.client has pipelining disabled in the most hard coded fashion possible with python, making implementing pipelining deliberately difficult. BUT I COULD GET ALL THE PREVIEW THUMBS AS A ONE BIG BLOB NOT LEETLE PIECES --- update_from_db.py | 191 ++++++++++++++++++++++++++++++---------------- 1 file changed, 125 insertions(+), 66 deletions(-) diff --git a/update_from_db.py b/update_from_db.py index 0cb00ee..8266889 100755 --- a/update_from_db.py +++ b/update_from_db.py @@ -1,9 +1,9 @@ #!/usr/bin/python3 -from http.client import HTTPConnection,HTTPException +from http.client import HTTPConnection,HTTPException,BadStatusLine import json import base64 from contextlib import closing -import sys,os,shutil +import sys,os,shutil,time def die(message,code=23): print(message,file=sys.stderr) @@ -13,6 +13,7 @@ server = "minetest.fensta.bplaced.net" skinsdir = "u_skins/textures/" metadir = "u_skins/meta/" curskin = 0 +curpage = 1 pages = None def replace(location,base,encoding=None,path=None): @@ -34,69 +35,127 @@ def maybeReplace(location,base,encoding=None): return replace(location,base,encoding=encoding,path=path)(handle) return deco -c = HTTPConnection(server) -def addpage(page): - global curskin, pages - print("Page: " + str(page)) - r = 0 - try: - c.request("GET", "/api/get.json.php?getlist&page=" + str(page) + "&outformat=base64") - r = c.getresponse() - except Exception: - if r != 0: - if r.status != 200: - die("Error", r.status) - return - - data = r.read().decode() - l = json.loads(data) - if not l["success"]: - die("Success != True") - r = 0 - pages = int(l["pages"]) - foundOne = False - for s in l["skins"]: - # make sure to increment this, even if the preview exists! - curskin = curskin + 1 - previewbase = "character_" + str(curskin) + "_preview.png" - preview = os.path.join(skinsdir, previewbase) - if os.path.exists(preview): - print('skin',curskin,'already retrieved') - continue - print('updating skin',curskin) - foundOne = True - @maybeReplace(skinsdir, "character_" + str(curskin) + ".png") - def go(f): - f.write(base64.b64decode(bytes(s["img"], 'utf-8'))) - f.close() - - @maybeReplace(metadir, "character_" + str(curskin) + ".txt", - encoding='utf-8') - def go(f): - f.write(str(s["name"]) + '\n') - f.write(str(s["author"]) + '\n') - f.write(str(s["license"])) - url = "/skins/1/" + str(s["id"]) + ".png" +class Pipeline(list): + "Gawd why am I being so elaborate?" + def __init__(self, threshold=10): + "threshold is how many requests in parallel to pipeline" + self.threshold = threshold + self.sent = True + def __enter__(self,*a): + self.reopen() + def __exit__(self): + self.drain() + def reopen(self): + self.c = HTTPConnection(server) + self.send() + def append(self,url,recv,diemessage): + super().append((url,recv,diemessage)) + if len(self) > self.threshold: + self.send() + self.drain() + def trydrain(self): + for url,recv,diemessage in self: + try: + recv(self.c) + except BadStatusLine as e: + return False + except HTTPException as e: + die(diemessage+' (url='+url+')') + self.clear() + return True + def drain(self): + print('draining pipeline...') + assert self.sent, "Can't drain without sending the requests!" + self.sent = False + while trydrain() is not True: + self.c.close() + print('derped requesting',url) + print('drain failed, trying again') + time.sleep(1) + self.reopen() + def trysend(self): + for url,_,diemessage in pipeline: + try: + self.c.request("GET", url) + except BadStatusLine: + return False + except HTTPException as e: + die(diemessage) + return True + def send(self): + if self.sent: return + print('filling pipeline...') + while self.tryresend() is not True: + self.c.close() + print('derped resending') + time.sleep(1) + self.reopen() + self.sent = True + +with Pipeline() as pipeline: + # two connections is okay, right? one for json, one for preview images + c = HTTPConnection(server) + def addpage(page): + global curskin, pages + print("Page: " + str(page)) + r = 0 try: - c.request("GET", url) - with closing(c.getresponse()) as r: + c.request("GET", "/api/get.json.php?getlist&page=" + str(page) + "&outformat=base64") + r = c.getresponse() + except Exception: + if r != 0: if r.status != 200: - print("Error", r.status) - continue - @replace(skinsdir,previewbase,path=preview) - def go(f): - shutil.copyfileobj(r,f) - except HTTPException as e: - die("Couldn't get {} because of a {} (url={})".format( - s["id"], - e, - url)) - if not foundOne: - print("No skins updated on this page. Seems we're done?") - raise SystemExit -addpage(1) -curpage = 1 -while pages > curpage: - curpage = curpage + 1 - addpage(curpage) -print("Skins have been updated!") + die("Error", r.status) + return + + data = r.read().decode() + l = json.loads(data) + if not l["success"]: + die("Success != True") + r = 0 + pages = int(l["pages"]) + foundOne = False + for s in l["skins"]: + # make sure to increment this, even if the preview exists! + curskin = curskin + 1 + previewbase = "character_" + str(curskin) + "_preview.png" + preview = os.path.join(skinsdir, previewbase) + if os.path.exists(preview): + print('skin',curskin,'already retrieved') + continue + print('updating skin',curskin,'id',s["id"]) + foundOne = True + @maybeReplace(skinsdir, "character_" + str(curskin) + ".png") + def go(f): + f.write(base64.b64decode(bytes(s["img"], 'utf-8'))) + f.close() + + @maybeReplace(metadir, "character_" + str(curskin) + ".txt", + encoding='utf-8') + def go(f): + f.write(str(s["name"]) + '\n') + f.write(str(s["author"]) + '\n') + f.write(str(s["license"])) + url = "/skins/1/" + str(s["id"]) + ".png" + def tryget(c): + with closing(c.getresponse()) as r: + if r.status != 200: + print("Error", r.status) + return + @replace(skinsdir,previewbase,path=preview) + def go(f): + shutil.copyfileobj(r,f) + + pipeline.append(url,tryget, + "Couldn't get {} because of a {}".format( + s["id"], + e)) + if not foundOne: + print("No skins updated on this page. Seems we're done?") + #raise SystemExit + addpage(1) + while pages > curpage: + curpage = curpage + 1 + addpage(curpage) + print("Skins have been updated!") + From 7778dc556084f6e70c28bf9bdc66ac22cdd5b1bd Mon Sep 17 00:00:00 2001 From: user Date: Thu, 24 Sep 2015 20:40:32 +0000 Subject: [PATCH 5/6] trying to fix pipelining also trying to understand it. according to python: 1) you send a request 2) you MUST get response headers for (1) (THIS IS MANDATORY) 3) you send another request 4) you get response body for (2) 5) response headers for (3) 6) response body for (5) Only two requests can be pipelined. Surely this is an unavoidable, wait no it's just written into the code to error out if you don't do it that way. according to reality: 1) you send a request 2) you do not get response headers for (1) 3) you repeat steps 1-2 until enough responses are queued 4) you receive those responses as header,body,header,body... they even name it with a __ so to make it hard to override, but the state can safely go to Idle after a request has sent, whether or not response headers have come in. Sure the connection might close, but then you adjust to not pipeline, and re-send the rest of your requests over a new connection. --- update_from_db.py | 65 ++++++++++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 26 deletions(-) diff --git a/update_from_db.py b/update_from_db.py index 8266889..16fb0c3 100755 --- a/update_from_db.py +++ b/update_from_db.py @@ -1,5 +1,5 @@ #!/usr/bin/python3 -from http.client import HTTPConnection,HTTPException,BadStatusLine +from http.client import HTTPConnection,HTTPException,BadStatusLine,_CS_IDLE import json import base64 from contextlib import closing @@ -35,57 +35,72 @@ def maybeReplace(location,base,encoding=None): return replace(location,base,encoding=encoding,path=path)(handle) return deco +class Penguin: + "idk" + def __init__(self, url, recv, diemessage): + self.url = url + self.recv = recv + self.diemessage = diemessage + class Pipeline(list): "Gawd why am I being so elaborate?" def __init__(self, threshold=10): "threshold is how many requests in parallel to pipeline" self.threshold = threshold self.sent = True - def __enter__(self,*a): + def __enter__(self): self.reopen() - def __exit__(self): + return self + def __exit__(self,typ,exn,trace): + self.send() self.drain() def reopen(self): self.c = HTTPConnection(server) self.send() def append(self,url,recv,diemessage): - super().append((url,recv,diemessage)) + self.sent = False + super().append(Penguin(url,recv,diemessage)) if len(self) > self.threshold: self.send() self.drain() def trydrain(self): - for url,recv,diemessage in self: + for penguin in self: try: - recv(self.c) + penguin.response.begin() + penguin.recv(penguin.response) except BadStatusLine as e: + print('derped requesting',penguin.url) return False except HTTPException as e: - die(diemessage+' (url='+url+')') + die(penguin.diemessage+' '+repr(e)+' (url='+penguin.url+')') self.clear() return True def drain(self): print('draining pipeline...') assert self.sent, "Can't drain without sending the requests!" self.sent = False - while trydrain() is not True: + while self.trydrain() is not True: self.c.close() - print('derped requesting',url) print('drain failed, trying again') time.sleep(1) self.reopen() def trysend(self): - for url,_,diemessage in pipeline: + for penguin in pipeline: try: - self.c.request("GET", url) + self.c.request("GET", penguin.url) + self.c._HTTPConnection__state = _CS_IDLE + penguin.response = self.c.response_class(self.c.sock, + method="GET") + # begin LATER so we can send multiple requests w/out response headers except BadStatusLine: return False except HTTPException as e: - die(diemessage) + die(diemessage+' because of a '+repr(e)) return True def send(self): if self.sent: return print('filling pipeline...') - while self.tryresend() is not True: + while self.trysend() is not True: self.c.close() print('derped resending') time.sleep(1) @@ -137,23 +152,21 @@ with Pipeline() as pipeline: f.write(str(s["author"]) + '\n') f.write(str(s["license"])) url = "/skins/1/" + str(s["id"]) + ".png" - def tryget(c): - with closing(c.getresponse()) as r: - if r.status != 200: - print("Error", r.status) - return - @replace(skinsdir,previewbase,path=preview) - def go(f): - shutil.copyfileobj(r,f) - + def tryget(r): + if r.status != 200: + print("Error", r.status) + return + @replace(skinsdir,previewbase,path=preview) + def go(f): + shutil.copyfileobj(r,f) + pipeline.append(url,tryget, - "Couldn't get {} because of a {}".format( - s["id"], - e)) + "Couldn't get {} because of a".format( + s["id"])) if not foundOne: print("No skins updated on this page. Seems we're done?") #raise SystemExit - addpage(1) + addpage(curpage) while pages > curpage: curpage = curpage + 1 addpage(curpage) From 057b4cce3cd4770dc93051722023fdc5f49fa024 Mon Sep 17 00:00:00 2001 From: user Date: Thu, 24 Sep 2015 20:41:32 +0000 Subject: [PATCH 6/6] fixed pipelining cleaning everything up, working around python's stupidity for capturing loop variables (it was saving every image as ...preview_584.png), testing it on 1-9 to make sure they come in as a single block of drains. It works, yay! --- update_from_db.py | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/update_from_db.py b/update_from_db.py index 16fb0c3..159acf7 100755 --- a/update_from_db.py +++ b/update_from_db.py @@ -6,8 +6,8 @@ from contextlib import closing import sys,os,shutil,time def die(message,code=23): - print(message,file=sys.stderr) - raise SystemExit(code) + print(message,file=sys.stderr) + raise SystemExit(code) server = "minetest.fensta.bplaced.net" skinsdir = "u_skins/textures/" @@ -65,6 +65,7 @@ class Pipeline(list): self.drain() def trydrain(self): for penguin in self: + print('drain',penguin.url) try: penguin.response.begin() penguin.recv(penguin.response) @@ -73,10 +74,10 @@ class Pipeline(list): return False except HTTPException as e: die(penguin.diemessage+' '+repr(e)+' (url='+penguin.url+')') - self.clear() - return True + self.clear() + return True def drain(self): - print('draining pipeline...') + print('draining pipeline...',len(self)) assert self.sent, "Can't drain without sending the requests!" self.sent = False while self.trydrain() is not True: @@ -86,6 +87,7 @@ class Pipeline(list): self.reopen() def trysend(self): for penguin in pipeline: + print('fill',penguin.url) try: self.c.request("GET", penguin.url) self.c._HTTPConnection__state = _CS_IDLE @@ -99,7 +101,7 @@ class Pipeline(list): return True def send(self): if self.sent: return - print('filling pipeline...') + print('filling pipeline...',len(self)) while self.trysend() is not True: self.c.close() print('derped resending') @@ -152,15 +154,19 @@ with Pipeline() as pipeline: f.write(str(s["author"]) + '\n') f.write(str(s["license"])) url = "/skins/1/" + str(s["id"]) + ".png" - def tryget(r): - if r.status != 200: - print("Error", r.status) - return - @replace(skinsdir,previewbase,path=preview) - def go(f): - shutil.copyfileobj(r,f) + def closure(skinsdir,previewbase,preview,s): + "explanation: python sucks" + def tryget(r): + print('replacing',s["id"]) + if r.status != 200: + print("Error", r.status) + return + @replace(skinsdir,previewbase,path=preview) + def go(f): + shutil.copyfileobj(r,f) + return tryget - pipeline.append(url,tryget, + pipeline.append(url,closure(skinsdir,previewbase,preview,s), "Couldn't get {} because of a".format( s["id"])) if not foundOne: