Compare commits

..

No commits in common. "a292a488772dfd6ef5ca2387061ca0d2017c9c69" and "340f199b3acef13a9be06a3b087b320e514b417f" have entirely different histories.

8 changed files with 46 additions and 110 deletions

View File

@ -1,23 +1,15 @@
# Ethereum network sender address mapper
This script, designed for deployment in a Docker container, monitors the Ethereum network's mempool via a WebSocket connection. It tracks sender addresses and their transaction counts, generating a list of the most active addresses. This data can be used to black-/whitelist addresses in other research projects, ensuring targeting is done based on up-to-date information.
## Configuration
A list of the possible environment variables and their purpose:
- `MODE`: Either `development` or `production`, the logging level is set based on this
- `EXPORT_INTERVAL`: The interval of how often the SQLite database is exported as a JSON file, does nothing if `IS_EXPORT` is not true
- Notably set as a string (similar to all environment variables)
- `IS_EXPORT`: Boolean that indicates whether the aforementioned export task is enabled or not
- Possible values that are interpreted as `True` (case insensitive): `true`, `1`, and `t`
Script that, once deployed in a Docker container, monitors a live feed of the Ethereum network via a WebSocket connection, stores the sender addresses with transaction counts, and creates statistics of the most active addresses.
## Development
Most critically `MODE=development` should be specified, as it sets the logging level from `INFO` to `DEBUG`. Low `EXPORT_INTERVAL` should be used for testing the export functionality (obviously).
```shell
mkvirtualenv chainmapper # OR 'workon chainmapper'
pip3 install -r requirements.txt
touch .env # Optional, see the previous section
touch .env && echo -e "MODE=\"development\"\nEXPORT_INTERVAL=\"60\"" > .env # 60 seconds export period for testing
```
## Usage
@ -26,13 +18,5 @@ The included `deploy.sh` shellscript should be used for any kind of (development
```shell
chmod +x ./scripts/deploy.sh
# Add `-y` flag to automatically overwrite existing containers with the same name
./scripts/deploy.sh
```
Use the following command if you wish to proxy the WebSocket connection:
```shell
# Proxy format: <protocol>://<ip>:<port>
./scripts/deploy.sh -p <proxy>
```

41
main.py
View File

@ -1,19 +1,17 @@
#!/usr/bin/env python3
import os
import sys
import asyncio
import threading
import logging
import signal
from collections import namedtuple
import sqlite3
import aioprocessing
import websockets
from dotenv import load_dotenv
import requests
from src import const, mempool, db
from src.const import DEFAULT_MODE, DEFAULT_EXPORT_INTERVAL, DEFAULT_IS_EXPORT
from src.mempool import WebSocketThread, QueueProcessor
from src.db import Handler, periodic_export
Config = namedtuple("Config", ["mode", "export_interval", "is_export"])
@ -43,19 +41,15 @@ def load_cfg(dotenv_path=".env"):
load_dotenv(dotenv_path)
print(f"[+] Environment variables loaded from '{dotenv_path}'\n---")
mode = os.getenv("MODE", const.DEFAULT_MODE).lower()
export_interval = int(os.getenv("EXPORT_INTERVAL", const.DEFAULT_EXPORT_INTERVAL))
is_export = os.getenv("IS_EXPORT", const.DEFAULT_IS_EXPORT).lower() in ("true", "1", "t")
mode = os.getenv("MODE", DEFAULT_MODE).lower()
export_interval = int(os.getenv("EXPORT_INTERVAL", DEFAULT_EXPORT_INTERVAL))
is_export = os.getenv("IS_EXPORT", DEFAULT_IS_EXPORT).lower() in ("true", "1", "t")
cfg = Config(mode, export_interval, is_export)
return cfg
def get_ip(addr=const.IP_TEST_ADDR):
return requests.get(addr, timeout=10).text.strip()
def main():
cfg = load_cfg()
@ -66,31 +60,23 @@ def main():
logging.basicConfig(format="%(asctime)s %(levelname)s: %(message)s", level=log_level)
logging.info("Logger initialized")
logging.info("Currently running version %s", const.VERSION)
logging.info("MODE: %s", cfg.mode)
logging.info("EXPORT_INTERVAL: %d (seconds)", cfg.export_interval)
logging.info("IS_EXPORT: %r", cfg.is_export)
logging.info("IP: %s", get_ip())
# Information for debugging issues caused by potential version differences
logging.info("Python version: %s", sys.version)
logging.info("aioprocessing version: %s", aioprocessing.__version__)
logging.info("websockets version: %s", websockets.__version__)
logging.info("sqlite3 version: %s", sqlite3.version)
# FIFO queue for cross-thread communications
q = aioprocessing.AioQueue()
handler = db.Handler()
handler = Handler()
shutdown_event = threading.Event()
shutdown_loop = asyncio.new_event_loop()
export_loop = asyncio.new_event_loop()
ws_thread = mempool.WebSocketThread(q, shutdown_event)
qp_thread = mempool.QueueProcessor(q, shutdown_event, handler)
ws_thread = WebSocketThread(q, shutdown_event)
qp_thread = QueueProcessor(q, shutdown_event, handler)
export_thread = threading.Thread(
target=db.periodic_export,
target=periodic_export,
args=(
export_loop,
handler,
@ -106,13 +92,10 @@ def main():
def handle_exit():
logging.info("Shutdown procedure initialized")
shutdown_event.set()
shutdown_loop.run_until_complete(shutdown(shutdown_loop))
# NOTE: It's vital to close the queue processor first so that it doesn't halt the shutdown
qp_thread.join()
ws_thread.join()
qp_thread.join()
export_thread.join()
def handle_signal(signal, _frame):
@ -124,8 +107,8 @@ def main():
signal.signal(signal.SIGTERM, handle_signal)
try:
qp_thread.join()
ws_thread.join()
qp_thread.join()
except KeyboardInterrupt:
logging.info("Keyboard interrupt received, shutting down threads")
handle_exit()

View File

@ -1,4 +1,3 @@
aioprocessing==2.0.1
python-dotenv==1.0.1
Requests==2.32.3
websockets==12.0

View File

@ -1,61 +1,23 @@
#!/usr/bin/env bash
AUTOREMOVE=false
VOLUME_PATH="./data" # Local path to the volume's mount point
IS_PROXIED=false
PROXY=""
while getopts ":hyp:" opt
do
case "$opt" in
h)
echo "Usage: $0 [-y]"
exit 0
;;
y)
echo -e "[+] Automatically removing all containers with the same tag (if any)\n"
AUTOREMOVE=true
;;
p)
IS_PROXIED=true
PROXY=${OPTARG}
echo -e "[+] Proxying enabled: $PROXY"
;;
*)
exit 1
;;
esac
done
echo "[+] Starting the deployment script"
! command -v docker &> /dev/null && echo "[!] Docker could not be found, exiting..." && exit 1
# Building with '--no-cache' ensures a fresh build will always be used
echo -e "[+] Building the Docker image without caching...\n"
echo -e "\n[+] Building the Docker image without caching..."
docker build --no-cache -t chainmapper .
[ ! -d "./data" ] && mkdir data && echo -e "\n[+] Created the default volume directory 'data'"
OLD_ID=$(docker ps -a -q -f name="chainmapper")
OLD_ID=$(docker ps -a -q -f name="chainmapper-prod")
if [ "$OLD_ID" ] && [ "$AUTOREMOVE" = true ]
then
echo -e "\n[+] Removing existing container with the same tag ($OLD_ID)"
docker stop "$OLD_ID" &> /dev/null
docker rm "$OLD_ID" &> /dev/null
elif [ "$OLD_ID" ]
if [ "$OLD_ID" ]
then
read -p "[?] Existing container found with id '$OLD_ID', do you want to remove it? " -n 1 -r
[[ "$REPLY" =~ ^[Yy]$ ]] || (echo "[!] Exiting..." && exit 0)
docker stop "$OLD_ID" &> /dev/null
docker rm "$OLD_ID" &> /dev/null
fi
echo -e "\n[+] Deploying the container with 'docker run' ('data' as the volume)..."
if [ "$IS_PROXIED" = true ]
then
# Override the default entrypoint to run the connections through the given proxy
docker run -it --restart unless-stopped -v $VOLUME_PATH:/app/data --name chainmapper --entrypoint /bin/bash -d chainmapper -c "HTTPS_PROXY=$PROXY python main.py"
else
docker run -it --restart unless-stopped -v $VOLUME_PATH:/app/data --name chainmapper -d chainmapper
fi
docker run -it --restart unless-stopped -v ./data:/app/data --name chainmapper-prod -d chainmapper

View File

@ -2,7 +2,7 @@
# Included into the built image via its Dockerfile
DB_FILE="/app/data/chainmapper.sqlite3"
DB_FILE="/app/chainmapper.sqlite3"
if [ -s $DB_FILE ]; then
exit 0

View File

@ -1,7 +1,5 @@
import json
VERSION = "v1.1.0"
# Blockchain.com endpoint and the subscription message which initializes the "transaction stream"
WS_ADDR = "wss://ws.blockchain.info/coins"
# Optionally `confirmed_transaction` can be used (bursts of data instead of a steady stream, which is worse for the overall performance)
@ -10,6 +8,12 @@ SUB_MSG = json.dumps({"coin": "eth", "command": "subscribe", "entity": "pending_
# Pause before reconnecting after the WebSocket connection is accidentally dropped by either party
WS_RECONNECT_PAUSE = 2
# Timeout for asynchronous WebSocket reading (seconds)
WS_INTERMSG_TIMEOUT = 1
# Timeout for asynchronous queue operations (`coro_get` and `coro_put`, seconds)
QUEUE_OP_TIMEOUT = 1
# Paths inside the Docker container where data is stored/exported (should match with the mounted volume in `deploy.sh`)
DEFAULT_DB_PATH = "./data/chainmapper.sqlite3"
DEFAULT_EXPORT_PATH = "./data/export.json"
@ -18,5 +22,3 @@ DEFAULT_EXPORT_PATH = "./data/export.json"
DEFAULT_MODE = "production"
DEFAULT_EXPORT_INTERVAL = "10800"
DEFAULT_IS_EXPORT = "False"
IP_TEST_ADDR = "https://ipv4.icanhazip.com"

View File

@ -4,13 +4,13 @@ import logging
import threading
import asyncio
from src import const
from src.const import DEFAULT_DB_PATH, DEFAULT_EXPORT_PATH
class Handler:
"""Handle all SQLite connections required to create, update, and export the stored addresses."""
def __init__(self, database=const.DEFAULT_DB_PATH):
def __init__(self, database=DEFAULT_DB_PATH):
self.database = database
# Notably `connect` automatically creates the database if it doesn't already exist
self.con = sqlite3.connect(self.database, check_same_thread=False)
@ -52,7 +52,7 @@ class Handler:
)
self.con.commit()
async def export(self, filepath=const.DEFAULT_EXPORT_PATH):
async def export(self, filepath=DEFAULT_EXPORT_PATH):
"""Export the addresses from the SQLite database in descending order based on the transaction counts."""
with self.lock:
logging.debug("Reentrant lock acquired")

View File

@ -4,13 +4,13 @@ import threading
import logging
import websockets
from src import const
from src.const import WS_ADDR, SUB_MSG, WS_RECONNECT_PAUSE, WS_INTERMSG_TIMEOUT, QUEUE_OP_TIMEOUT
class WebSocketThread(threading.Thread):
"""Handle connection, subscription, and message parsing for the Blockchain.com WebSocket."""
def __init__(self, q, shutdown_event, sub_msg=const.SUB_MSG):
def __init__(self, q, shutdown_event, sub_msg=SUB_MSG):
super().__init__()
self.name = "WebSocketThread"
self.q = q
@ -19,7 +19,9 @@ class WebSocketThread(threading.Thread):
self.tx_count = 0
async def connect(self):
async with websockets.connect(const.WS_ADDR) as ws:
async with websockets.connect(WS_ADDR) as ws:
logging.info("Inter message timeout set to %d seconds", WS_INTERMSG_TIMEOUT)
logging.info("WebSocket connection established successfully")
await ws.send(self.sub_msg)
logging.info("Subscription message sent")
@ -30,22 +32,24 @@ class WebSocketThread(threading.Thread):
while not self.shutdown_event.is_set():
try:
msg = await ws.recv()
# Timeout is necessary to make sure the state of the shutdown event is checked often enough
msg = await asyncio.wait_for(ws.recv(), timeout=WS_INTERMSG_TIMEOUT)
data = self.handle_msg(msg)
if data is None:
continue
await self.q.coro_put(data)
# This shouldn't really be an issue, but it's safer to set a timeout here too...
await asyncio.wait_for(self.q.coro_put(data), timeout=QUEUE_OP_TIMEOUT)
except asyncio.TimeoutError:
logging.debug("WebSocket receiver timed out before fetching a new message, reattempting")
continue
except websockets.exceptions.ConnectionClosed:
logging.info(
"WebSocket connection closed unexpectedly, sleeping for %d seconds before rebooting the connection",
const.WS_RECONNECT_PAUSE,
WS_RECONNECT_PAUSE,
)
await asyncio.sleep(const.WS_RECONNECT_PAUSE)
await asyncio.sleep(WS_RECONNECT_PAUSE)
break
# pylint: disable=broad-exception-caught
except Exception as e:
@ -100,10 +104,12 @@ class QueueProcessor(threading.Thread):
self.handler = handler
async def process_queue(self):
logging.info("Queue operations timeout set to %d seconds", QUEUE_OP_TIMEOUT)
while not self.shutdown_event.is_set():
try:
# Might prevent a proper shutdown procedure if the queue feeder is closed before the processor
tx_sender = await self.q.coro_get()
# Timeout is necessary to make sure the state of the shutdown event is checked often enough
tx_sender = await asyncio.wait_for(self.q.coro_get(), timeout=QUEUE_OP_TIMEOUT)
await self.handler.store(tx_sender)
# pylint: disable=broad-exception-caught
except asyncio.TimeoutError: