While attempting to analyse the network transfer speeds using a hypercorn ASGI app and requests API, there is a significant drop in speed when the data is collected than otherwise.
ASGI app
from typing import Any
ONE_KB = 1024
ONE_MB = ONE_KB * ONE_KB
ONE_GB = ONE_MB * ONE_KB
chunk_size = 256 * ONE_KB
dump = bytes(ONE_GB) # 1GB
async def app(scope: Any, receive: Any, send: Any) -> None:
'''
Run: `hypercorn app:app --bind localhost:7001`
'''
assert scope["type"] == "http"
await send(
{
"type": "http.response.start",
"status": 200,
"headers": [[b"Content-Type", b"application/octet-stream"], [b"Content-Length", str(len(dump)).encode()]],
}
)
chunks = len(dump) // chunk_size
for i in range(chunks):
await send(
{
"type": "http.response.body",
"body": dump[i * chunk_size : (i + 1) * chunk_size],
"more_body": i != chunks - 1,
}
)
test.py
import time
import requests
ONE_MB = 2**20
data_size_mb = 1000
async def test_speed():
# Parse the URL to get host and path
path = "/"
print(psutil.Process(os.getpid()).memory_info())
load_mem = [b"a" * ONE_MB for _ in range(20 * 1000)] # << ==== This line doesn't impact the transfer speeds!
print(psutil.Process(os.getpid()).memory_info())
start_time = time.time()
# Create a TCP socket
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
# Connect to the server
s.connect((host, port))
# Send HTTP GET request
request = f"GET {path} HTTP/1.1\r\nHost: {host}\r\n\r\n"
s.sendall(request.encode())
# Create a BufferedReader from the socket
bufferedReader = io.BufferedReader(s.makefile("rb"))
try:
# Read response headers
headers = bufferedReader.readline()
while headers.strip():
headers = bufferedReader.readline()
bufferedReader.flush()
# Receive response in chunks and write to file
recv_size = 0
chunks = []
for _ in range(20 * 1000):
data = bufferedReader.read(ONE_MB)
chunks.append(data) # <<< ===== This line is causing the above line to slow down !!!
recv_size += len(data)
psutil.Process(os.getpid()).memory_info()
finally:
bufferedReader.close()
recv_data_mb = recv_size / ONE_MB
print("[*] Received data ", recv_size / ONE_MB, "MB")
print("[*] Chunks size {}".format(str(len(chunks))))
end_time = time.time()
recv_time = end_time - start_time
recv_speed = recv_data_mb / recv_time
print("===========================================")
print("Data size: {:.2f} MB".format(data_size_mb))
print("Recv data size: {:.2f} MB".format(recv_data_mb))
print("Recv time: {:.6f} seconds".format(recv_time))
print("Recv speed: {:.2f} mbps".format(recv_speed))
print("===========================================")
Expectation:
The transfer speed must be identical whether we collect the data into chunks variable or not.
Actual:
The transfer speed when chunks.append(chunk) is commented was around 2.6 gbps and when it was NOT commented it reduced to 1.4 gbps.
EDIT #1:
The machine has over 1 TB of RAM and to check if its a memory issue I created a dummy list of size 20 GB before the response streaming and the transfer speed was 2.6 gbps. But if I create the list DURING the streaming and the speeds dropped to 1.4 gbps.
Running cProfile for the two runs showed that for the faster run
(<method 'recv_into' of '_socket.socket' objects>)was called by the BufferedReader almost10 timesmore leading me to suspect that the buffer size was changed for when I increase the memory during streaming.
EDIT #2:
- To close in on the bug, I'm now using sockets + buffered reader instead of request lib which is having the same behaviour.
- As pointed out by Steffen, I'm ensuring that actual physical memory is being used while.
EDIT #3:
- Adding line profiler outputs to highlight the slow down in
bufferedReader.read(..)where the time taken almost doubled.
Fast Run
Line # Hits Time Per Hit % Time Line Contents
==============================================================
95 1 0.2 0.2 0.0 try:
96 # Read response headers
97 1 1178.9 1178.9 0.0 headers = bufferedReader.readline()
98 7 14.1 2.0 0.0 while headers.strip():
99 6 3.8 0.6 0.0 headers = bufferedReader.readline()
100 1 3.9 3.9 0.0 bufferedReader.flush()
101
102 # Receive response in chunks and write to file
103 1 0.8 0.8 0.0 recv_size = 0
104 1 0.7 0.7 0.0 chunks = []
105 20001 7028.7 0.4 0.0 for _ in range(20 * 1000):
106 20000 7692581.8 384.6 38.2 data = bufferedReader.read(ONE_MB)
107 # chunks.append(data) # <<< ===== This line is causing the above line to slow down !!!
108 20000 27589.1 1.4 0.1 recv_size += len(data)
Slow Run
Line # Hits Time Per Hit % Time Line Contents
==============================================================
95 1 0.4 0.4 0.0 try:
96 # Read response headers
97 1 1073.7 1073.7 0.0 headers = bufferedReader.readline()
98 7 7.8 1.1 0.0 while headers.strip():
99 6 3.7 0.6 0.0 headers = bufferedReader.readline()
100 1 1.5 1.5 0.0 bufferedReader.flush()
101
102 # Receive response in chunks and write to file
103 1 0.2 0.2 0.0 recv_size = 0
104 1 0.4 0.4 0.0 chunks = []
105 20001 6652.0 0.3 0.0 for _ in range(20 * 1000):
106 20000 13057516.0 652.9 50.6 data = bufferedReader.read(ONE_MB)
107 20000 17805.7 0.9 0.1 chunks.append(data) # <<< ===== This line is causing the above line to slow down !!!
108 20000 22505.9 1.1 0.1 recv_size += len(data)
EDIT #4:
- Based on Steffen's comment, I tried to not append the response from server but a random 1MB data to a list. Oddly enough the
bufferedReader.read()were still high though the memory allocation is unrelated. But when I perform adel dataat the end, thebufferedReader.read()speeds are down50%
Additionally just adding random 1 MB to chunks every iteration
Line # Hits Time Per Hit % Time Line Contents
==============================================================
106 20001 6580.4 0.3 0.0 for _ in range(20 * 1000):
107 20000 14320784.1 716.0 50.3 data = bufferedReader.read(ONE_MB)
108 20000 25183.2 1.3 0.1 recv_size += len(data)
109 20000 1284105.9 64.2 4.5 chunks.append(b"b" * ONE_MB)
110 # bufferedReader.flush() # <<< ===== This line doesn't…
111 # chunks.append(copy(data)) # <<< ===== This line is c…
112 # del data # <<< ===== Running this, improves the runt…
113 1 470.7 470.7 0.0 psutil.Process(os.getpid()).memory_info()
Adding random 1 MB to chunks and deleting data every iteration
Line # Hits Time Per Hit % Time Line Contents
==============================================================
106 20001 5896.1 0.3 0.0 for _ in range(20 * 1000):
107 20000 5089738.8 254.5 17.2 data = bufferedReader.read(ONE_MB)
108 20000 21017.8 1.1 0.1 recv_size += len(data)
109 20000 12253592.9 612.7 41.3 chunks.append(b"b" * ONE_MB)
110 # bufferedReader.flush() # <<< ===== This line doesn't…
111 # chunks.append(copy(data)) # <<< ===== This line is c…
112 20000 13553.3 0.7 0.0 del data # <<< ===== Running this, improves the runtim…