[Tutor] Multiprocessing question
Albert-Jan Roskam
sjeik_appie at hotmail.com
Sun Oct 29 10:27:51 EDT 2023
Hi,
I'm trying to create multiprocessing task that runs multiple SQL queries
in parallel (COPY TO csv), then adds those csv files to a zip. In the code
below, tge csv files don't end up in the zip. Why not?
I'm using a buffer instead of a file for the zip because in my actual
scenario, a Flask endpoint returns the .zip
Thanks in advance!
Albert-Jan
import io
import logging
import multiprocessing as mp
#from multiprocessing.shared_memory import SharedMemory
from random import randint
from time import sleep
from zipfile import ZipFile, ZIP_DEFLATED
mp.log_to_stderr(logging.DEBUG)
def task(lock, archive):
# Do some work in parellel (SQL), followed
# by adding the query result to a zip
name = mp.current_process().name
print(f"==begin {name}")
sleep(randint(1, 5))
csv_buff = io.BytesIO(b"name = " + name.encode("utf-8"))
with lock:
print(f"* begin zipping {name}")
sleep(randint(1, 5))
data = csv_buff.getvalue()
print("csv data", data)
archive.writestr(name, data)
print(f"* end zipping {name}")
print(f"==end {name}")
def main():
#shm = SharedMemory(create=True, size=2048) # read-only! :-(
#zip_buff = shm.buf
zip_buff = io.BytesIO()
archive = ZipFile(zip_buff, "w", ZIP_DEFLATED, False, 2)
lock = mp.Lock()
procs = [mp.Process(target=task, args=(lock, archive)) for _ in
range(3)]
for proc in procs:
proc.start()
for proc in procs:
proc.join()
with archive:
archive.writestr("main", b"from main")
zip_buff.seek(0)
data = zip_buff.getvalue()
assert len(data), "No data!"
print("data:", data)
with open("archive.zip", mode="wb") as archive:
archive.write(data)
if __name__ == "__main__":
main()
More information about the Tutor
mailing list