Compare commits
No commits in common. "a292a488772dfd6ef5ca2387061ca0d2017c9c69" and "340f199b3acef13a9be06a3b087b320e514b417f" have entirely different histories.
a292a48877
...
340f199b3a
24
README.md
24
README.md
@ -1,23 +1,15 @@
|
||||
# Ethereum network sender address mapper
|
||||
|
||||
This script, designed for deployment in a Docker container, monitors the Ethereum network's mempool via a WebSocket connection. It tracks sender addresses and their transaction counts, generating a list of the most active addresses. This data can be used to black-/whitelist addresses in other research projects, ensuring targeting is done based on up-to-date information.
|
||||
|
||||
## Configuration
|
||||
|
||||
A list of the possible environment variables and their purpose:
|
||||
|
||||
- `MODE`: Either `development` or `production`, the logging level is set based on this
|
||||
- `EXPORT_INTERVAL`: The interval of how often the SQLite database is exported as a JSON file, does nothing if `IS_EXPORT` is not true
|
||||
- Notably set as a string (similar to all environment variables)
|
||||
- `IS_EXPORT`: Boolean that indicates whether the aforementioned export task is enabled or not
|
||||
- Possible values that are interpreted as `True` (case insensitive): `true`, `1`, and `t`
|
||||
Script that, once deployed in a Docker container, monitors a live feed of the Ethereum network via a WebSocket connection, stores the sender addresses with transaction counts, and creates statistics of the most active addresses.
|
||||
|
||||
## Development
|
||||
|
||||
Most critically `MODE=development` should be specified, as it sets the logging level from `INFO` to `DEBUG`. Low `EXPORT_INTERVAL` should be used for testing the export functionality (obviously).
|
||||
|
||||
```shell
|
||||
mkvirtualenv chainmapper # OR 'workon chainmapper'
|
||||
pip3 install -r requirements.txt
|
||||
touch .env # Optional, see the previous section
|
||||
touch .env && echo -e "MODE=\"development\"\nEXPORT_INTERVAL=\"60\"" > .env # 60 seconds export period for testing
|
||||
```
|
||||
|
||||
## Usage
|
||||
@ -26,13 +18,5 @@ The included `deploy.sh` shellscript should be used for any kind of (development
|
||||
|
||||
```shell
|
||||
chmod +x ./scripts/deploy.sh
|
||||
# Add `-y` flag to automatically overwrite existing containers with the same name
|
||||
./scripts/deploy.sh
|
||||
```
|
||||
|
||||
Use the following command if you wish to proxy the WebSocket connection:
|
||||
|
||||
```shell
|
||||
# Proxy format: <protocol>://<ip>:<port>
|
||||
./scripts/deploy.sh -p <proxy>
|
||||
```
|
||||
|
41
main.py
41
main.py
@ -1,19 +1,17 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import asyncio
|
||||
import threading
|
||||
import logging
|
||||
import signal
|
||||
from collections import namedtuple
|
||||
import sqlite3
|
||||
import aioprocessing
|
||||
import websockets
|
||||
from dotenv import load_dotenv
|
||||
import requests
|
||||
|
||||
from src import const, mempool, db
|
||||
from src.const import DEFAULT_MODE, DEFAULT_EXPORT_INTERVAL, DEFAULT_IS_EXPORT
|
||||
from src.mempool import WebSocketThread, QueueProcessor
|
||||
from src.db import Handler, periodic_export
|
||||
|
||||
Config = namedtuple("Config", ["mode", "export_interval", "is_export"])
|
||||
|
||||
@ -43,19 +41,15 @@ def load_cfg(dotenv_path=".env"):
|
||||
load_dotenv(dotenv_path)
|
||||
print(f"[+] Environment variables loaded from '{dotenv_path}'\n---")
|
||||
|
||||
mode = os.getenv("MODE", const.DEFAULT_MODE).lower()
|
||||
export_interval = int(os.getenv("EXPORT_INTERVAL", const.DEFAULT_EXPORT_INTERVAL))
|
||||
is_export = os.getenv("IS_EXPORT", const.DEFAULT_IS_EXPORT).lower() in ("true", "1", "t")
|
||||
mode = os.getenv("MODE", DEFAULT_MODE).lower()
|
||||
export_interval = int(os.getenv("EXPORT_INTERVAL", DEFAULT_EXPORT_INTERVAL))
|
||||
is_export = os.getenv("IS_EXPORT", DEFAULT_IS_EXPORT).lower() in ("true", "1", "t")
|
||||
|
||||
cfg = Config(mode, export_interval, is_export)
|
||||
|
||||
return cfg
|
||||
|
||||
|
||||
def get_ip(addr=const.IP_TEST_ADDR):
|
||||
return requests.get(addr, timeout=10).text.strip()
|
||||
|
||||
|
||||
def main():
|
||||
cfg = load_cfg()
|
||||
|
||||
@ -66,31 +60,23 @@ def main():
|
||||
|
||||
logging.basicConfig(format="%(asctime)s %(levelname)s: %(message)s", level=log_level)
|
||||
logging.info("Logger initialized")
|
||||
logging.info("Currently running version %s", const.VERSION)
|
||||
logging.info("MODE: %s", cfg.mode)
|
||||
logging.info("EXPORT_INTERVAL: %d (seconds)", cfg.export_interval)
|
||||
logging.info("IS_EXPORT: %r", cfg.is_export)
|
||||
logging.info("IP: %s", get_ip())
|
||||
|
||||
# Information for debugging issues caused by potential version differences
|
||||
logging.info("Python version: %s", sys.version)
|
||||
logging.info("aioprocessing version: %s", aioprocessing.__version__)
|
||||
logging.info("websockets version: %s", websockets.__version__)
|
||||
logging.info("sqlite3 version: %s", sqlite3.version)
|
||||
|
||||
# FIFO queue for cross-thread communications
|
||||
q = aioprocessing.AioQueue()
|
||||
handler = db.Handler()
|
||||
handler = Handler()
|
||||
shutdown_event = threading.Event()
|
||||
|
||||
shutdown_loop = asyncio.new_event_loop()
|
||||
export_loop = asyncio.new_event_loop()
|
||||
|
||||
ws_thread = mempool.WebSocketThread(q, shutdown_event)
|
||||
qp_thread = mempool.QueueProcessor(q, shutdown_event, handler)
|
||||
ws_thread = WebSocketThread(q, shutdown_event)
|
||||
qp_thread = QueueProcessor(q, shutdown_event, handler)
|
||||
|
||||
export_thread = threading.Thread(
|
||||
target=db.periodic_export,
|
||||
target=periodic_export,
|
||||
args=(
|
||||
export_loop,
|
||||
handler,
|
||||
@ -106,13 +92,10 @@ def main():
|
||||
|
||||
def handle_exit():
|
||||
logging.info("Shutdown procedure initialized")
|
||||
|
||||
shutdown_event.set()
|
||||
shutdown_loop.run_until_complete(shutdown(shutdown_loop))
|
||||
|
||||
# NOTE: It's vital to close the queue processor first so that it doesn't halt the shutdown
|
||||
qp_thread.join()
|
||||
ws_thread.join()
|
||||
qp_thread.join()
|
||||
export_thread.join()
|
||||
|
||||
def handle_signal(signal, _frame):
|
||||
@ -124,8 +107,8 @@ def main():
|
||||
signal.signal(signal.SIGTERM, handle_signal)
|
||||
|
||||
try:
|
||||
qp_thread.join()
|
||||
ws_thread.join()
|
||||
qp_thread.join()
|
||||
except KeyboardInterrupt:
|
||||
logging.info("Keyboard interrupt received, shutting down threads")
|
||||
handle_exit()
|
||||
|
@ -1,4 +1,3 @@
|
||||
aioprocessing==2.0.1
|
||||
python-dotenv==1.0.1
|
||||
Requests==2.32.3
|
||||
websockets==12.0
|
||||
|
@ -1,61 +1,23 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
AUTOREMOVE=false
|
||||
VOLUME_PATH="./data" # Local path to the volume's mount point
|
||||
IS_PROXIED=false
|
||||
PROXY=""
|
||||
|
||||
while getopts ":hyp:" opt
|
||||
do
|
||||
case "$opt" in
|
||||
h)
|
||||
echo "Usage: $0 [-y]"
|
||||
exit 0
|
||||
;;
|
||||
y)
|
||||
echo -e "[+] Automatically removing all containers with the same tag (if any)\n"
|
||||
AUTOREMOVE=true
|
||||
;;
|
||||
p)
|
||||
IS_PROXIED=true
|
||||
PROXY=${OPTARG}
|
||||
echo -e "[+] Proxying enabled: $PROXY"
|
||||
;;
|
||||
*)
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
echo "[+] Starting the deployment script"
|
||||
|
||||
! command -v docker &> /dev/null && echo "[!] Docker could not be found, exiting..." && exit 1
|
||||
|
||||
# Building with '--no-cache' ensures a fresh build will always be used
|
||||
echo -e "[+] Building the Docker image without caching...\n"
|
||||
echo -e "\n[+] Building the Docker image without caching..."
|
||||
docker build --no-cache -t chainmapper .
|
||||
|
||||
[ ! -d "./data" ] && mkdir data && echo -e "\n[+] Created the default volume directory 'data'"
|
||||
|
||||
OLD_ID=$(docker ps -a -q -f name="chainmapper")
|
||||
OLD_ID=$(docker ps -a -q -f name="chainmapper-prod")
|
||||
|
||||
if [ "$OLD_ID" ] && [ "$AUTOREMOVE" = true ]
|
||||
then
|
||||
echo -e "\n[+] Removing existing container with the same tag ($OLD_ID)"
|
||||
docker stop "$OLD_ID" &> /dev/null
|
||||
docker rm "$OLD_ID" &> /dev/null
|
||||
elif [ "$OLD_ID" ]
|
||||
if [ "$OLD_ID" ]
|
||||
then
|
||||
read -p "[?] Existing container found with id '$OLD_ID', do you want to remove it? " -n 1 -r
|
||||
[[ "$REPLY" =~ ^[Yy]$ ]] || (echo "[!] Exiting..." && exit 0)
|
||||
docker stop "$OLD_ID" &> /dev/null
|
||||
docker rm "$OLD_ID" &> /dev/null
|
||||
fi
|
||||
|
||||
echo -e "\n[+] Deploying the container with 'docker run' ('data' as the volume)..."
|
||||
|
||||
if [ "$IS_PROXIED" = true ]
|
||||
then
|
||||
# Override the default entrypoint to run the connections through the given proxy
|
||||
docker run -it --restart unless-stopped -v $VOLUME_PATH:/app/data --name chainmapper --entrypoint /bin/bash -d chainmapper -c "HTTPS_PROXY=$PROXY python main.py"
|
||||
else
|
||||
docker run -it --restart unless-stopped -v $VOLUME_PATH:/app/data --name chainmapper -d chainmapper
|
||||
fi
|
||||
docker run -it --restart unless-stopped -v ./data:/app/data --name chainmapper-prod -d chainmapper
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
# Included into the built image via its Dockerfile
|
||||
|
||||
DB_FILE="/app/data/chainmapper.sqlite3"
|
||||
DB_FILE="/app/chainmapper.sqlite3"
|
||||
|
||||
if [ -s $DB_FILE ]; then
|
||||
exit 0
|
||||
|
10
src/const.py
10
src/const.py
@ -1,7 +1,5 @@
|
||||
import json
|
||||
|
||||
VERSION = "v1.1.0"
|
||||
|
||||
# Blockchain.com endpoint and the subscription message which initializes the "transaction stream"
|
||||
WS_ADDR = "wss://ws.blockchain.info/coins"
|
||||
# Optionally `confirmed_transaction` can be used (bursts of data instead of a steady stream, which is worse for the overall performance)
|
||||
@ -10,6 +8,12 @@ SUB_MSG = json.dumps({"coin": "eth", "command": "subscribe", "entity": "pending_
|
||||
# Pause before reconnecting after the WebSocket connection is accidentally dropped by either party
|
||||
WS_RECONNECT_PAUSE = 2
|
||||
|
||||
# Timeout for asynchronous WebSocket reading (seconds)
|
||||
WS_INTERMSG_TIMEOUT = 1
|
||||
|
||||
# Timeout for asynchronous queue operations (`coro_get` and `coro_put`, seconds)
|
||||
QUEUE_OP_TIMEOUT = 1
|
||||
|
||||
# Paths inside the Docker container where data is stored/exported (should match with the mounted volume in `deploy.sh`)
|
||||
DEFAULT_DB_PATH = "./data/chainmapper.sqlite3"
|
||||
DEFAULT_EXPORT_PATH = "./data/export.json"
|
||||
@ -18,5 +22,3 @@ DEFAULT_EXPORT_PATH = "./data/export.json"
|
||||
DEFAULT_MODE = "production"
|
||||
DEFAULT_EXPORT_INTERVAL = "10800"
|
||||
DEFAULT_IS_EXPORT = "False"
|
||||
|
||||
IP_TEST_ADDR = "https://ipv4.icanhazip.com"
|
||||
|
@ -4,13 +4,13 @@ import logging
|
||||
import threading
|
||||
import asyncio
|
||||
|
||||
from src import const
|
||||
from src.const import DEFAULT_DB_PATH, DEFAULT_EXPORT_PATH
|
||||
|
||||
|
||||
class Handler:
|
||||
"""Handle all SQLite connections required to create, update, and export the stored addresses."""
|
||||
|
||||
def __init__(self, database=const.DEFAULT_DB_PATH):
|
||||
def __init__(self, database=DEFAULT_DB_PATH):
|
||||
self.database = database
|
||||
# Notably `connect` automatically creates the database if it doesn't already exist
|
||||
self.con = sqlite3.connect(self.database, check_same_thread=False)
|
||||
@ -52,7 +52,7 @@ class Handler:
|
||||
)
|
||||
self.con.commit()
|
||||
|
||||
async def export(self, filepath=const.DEFAULT_EXPORT_PATH):
|
||||
async def export(self, filepath=DEFAULT_EXPORT_PATH):
|
||||
"""Export the addresses from the SQLite database in descending order based on the transaction counts."""
|
||||
with self.lock:
|
||||
logging.debug("Reentrant lock acquired")
|
||||
|
@ -4,13 +4,13 @@ import threading
|
||||
import logging
|
||||
import websockets
|
||||
|
||||
from src import const
|
||||
from src.const import WS_ADDR, SUB_MSG, WS_RECONNECT_PAUSE, WS_INTERMSG_TIMEOUT, QUEUE_OP_TIMEOUT
|
||||
|
||||
|
||||
class WebSocketThread(threading.Thread):
|
||||
"""Handle connection, subscription, and message parsing for the Blockchain.com WebSocket."""
|
||||
|
||||
def __init__(self, q, shutdown_event, sub_msg=const.SUB_MSG):
|
||||
def __init__(self, q, shutdown_event, sub_msg=SUB_MSG):
|
||||
super().__init__()
|
||||
self.name = "WebSocketThread"
|
||||
self.q = q
|
||||
@ -19,7 +19,9 @@ class WebSocketThread(threading.Thread):
|
||||
self.tx_count = 0
|
||||
|
||||
async def connect(self):
|
||||
async with websockets.connect(const.WS_ADDR) as ws:
|
||||
async with websockets.connect(WS_ADDR) as ws:
|
||||
logging.info("Inter message timeout set to %d seconds", WS_INTERMSG_TIMEOUT)
|
||||
|
||||
logging.info("WebSocket connection established successfully")
|
||||
await ws.send(self.sub_msg)
|
||||
logging.info("Subscription message sent")
|
||||
@ -30,22 +32,24 @@ class WebSocketThread(threading.Thread):
|
||||
|
||||
while not self.shutdown_event.is_set():
|
||||
try:
|
||||
msg = await ws.recv()
|
||||
# Timeout is necessary to make sure the state of the shutdown event is checked often enough
|
||||
msg = await asyncio.wait_for(ws.recv(), timeout=WS_INTERMSG_TIMEOUT)
|
||||
data = self.handle_msg(msg)
|
||||
|
||||
if data is None:
|
||||
continue
|
||||
|
||||
await self.q.coro_put(data)
|
||||
# This shouldn't really be an issue, but it's safer to set a timeout here too...
|
||||
await asyncio.wait_for(self.q.coro_put(data), timeout=QUEUE_OP_TIMEOUT)
|
||||
except asyncio.TimeoutError:
|
||||
logging.debug("WebSocket receiver timed out before fetching a new message, reattempting")
|
||||
continue
|
||||
except websockets.exceptions.ConnectionClosed:
|
||||
logging.info(
|
||||
"WebSocket connection closed unexpectedly, sleeping for %d seconds before rebooting the connection",
|
||||
const.WS_RECONNECT_PAUSE,
|
||||
WS_RECONNECT_PAUSE,
|
||||
)
|
||||
await asyncio.sleep(const.WS_RECONNECT_PAUSE)
|
||||
await asyncio.sleep(WS_RECONNECT_PAUSE)
|
||||
break
|
||||
# pylint: disable=broad-exception-caught
|
||||
except Exception as e:
|
||||
@ -100,10 +104,12 @@ class QueueProcessor(threading.Thread):
|
||||
self.handler = handler
|
||||
|
||||
async def process_queue(self):
|
||||
logging.info("Queue operations timeout set to %d seconds", QUEUE_OP_TIMEOUT)
|
||||
|
||||
while not self.shutdown_event.is_set():
|
||||
try:
|
||||
# Might prevent a proper shutdown procedure if the queue feeder is closed before the processor
|
||||
tx_sender = await self.q.coro_get()
|
||||
# Timeout is necessary to make sure the state of the shutdown event is checked often enough
|
||||
tx_sender = await asyncio.wait_for(self.q.coro_get(), timeout=QUEUE_OP_TIMEOUT)
|
||||
await self.handler.store(tx_sender)
|
||||
# pylint: disable=broad-exception-caught
|
||||
except asyncio.TimeoutError:
|
||||
|
Loading…
Reference in New Issue
Block a user