feat(BREAKING): comprehensive filtering, fixes #1
This commit is contained in:
parent
4f26e93fd2
commit
01de99663b
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,2 +1 @@
|
|||||||
/target
|
target/
|
||||||
testdata.json
|
|
||||||
|
1292
Cargo.lock
generated
1292
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
20
Cargo.toml
20
Cargo.toml
@ -1,13 +1,17 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "dlrs"
|
name = "dlrs"
|
||||||
version = "0.2.1"
|
description = "Imageboard media downloader"
|
||||||
|
version = "0.2.2"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
clap = "3.1.18"
|
base64 = "0.22.1"
|
||||||
regex = "1.5.6"
|
clap = { version = "4.5.18", features = ["derive"] }
|
||||||
reqwest = { version = "0.11", features = ["json"] }
|
colored = "2.1.0"
|
||||||
tokio = { version = "1.18.5", features = ["full"] }
|
futures = "0.3.30"
|
||||||
serde_json = "1.0"
|
lazy-regex = "3.3.0"
|
||||||
futures = "0.3"
|
md-5 = "0.10.6"
|
||||||
colored = "2.0.0"
|
num = "0.4.3"
|
||||||
|
reqwest = "0.12.7"
|
||||||
|
serde_json = "1.0.128"
|
||||||
|
tokio = { version = "1.40.0", features = ["full"] }
|
||||||
|
20
README.md
20
README.md
@ -4,16 +4,16 @@
|
|||||||
|
|
||||||
### Usage
|
### Usage
|
||||||
|
|
||||||
Build the optimized binary with `cargo build --release`.
|
|
||||||
|
|
||||||
```
|
```
|
||||||
USAGE:
|
Usage: dlrs [OPTIONS] -o <PATH> <-t <URL>|-b <URL>>
|
||||||
dlrs --output <PATH> <--thread <URL>|--board <URL>>
|
|
||||||
|
|
||||||
OPTIONS:
|
Options:
|
||||||
-b, --board <URL> Set a board URL
|
-o <PATH> Set the output directory
|
||||||
-h, --help Print help information
|
-t <URL> Set the thread URL
|
||||||
-o, --output <PATH> Set an output directory
|
-b <URL> Set the board URL
|
||||||
-t, --thread <URL> Set a thread URL
|
-n <MIN_RES> Set the minimum image resolution (e.g. "1920x1080")
|
||||||
-V, --version Print version information
|
-m <MAX_RES> Set the minimum image resolution (e.g. "3840x2160")
|
||||||
|
-a <RATIOS> Set a comma-separated list of accepted image aspect ratios (e.g. "4:3,16:9")
|
||||||
|
-v Toggle verbose output
|
||||||
|
-h, --help Print help
|
||||||
```
|
```
|
||||||
|
98
src/board.rs
98
src/board.rs
@ -1,37 +1,95 @@
|
|||||||
|
use std::{error::Error, process::exit};
|
||||||
|
|
||||||
|
use colored::Colorize;
|
||||||
|
use reqwest::{header::USER_AGENT, Client};
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
|
use crate::{Config, UA};
|
||||||
|
|
||||||
pub fn parse_url(url: &str) -> (String, String) {
|
pub async fn dl(cfg: Config, client: Client) -> Result<(usize, usize), Box<dyn Error>> {
|
||||||
|
let mut dl = 0;
|
||||||
|
let mut fl = 0;
|
||||||
|
|
||||||
|
let (json_url, board) = parse_url(cfg.target.clone());
|
||||||
|
println!("{}", format!("Board JSON URL: {json_url}").blue().bold());
|
||||||
|
|
||||||
|
let (tc, threads) = parse_json(&client, json_url, board.clone()).await?;
|
||||||
|
println!(
|
||||||
|
"{}",
|
||||||
|
format!("Current thread count of {board}: {tc}")
|
||||||
|
.blue()
|
||||||
|
.bold()
|
||||||
|
);
|
||||||
|
|
||||||
|
for url in threads {
|
||||||
|
let (ndl, nfl) =
|
||||||
|
crate::thread::dl(cfg.clone(), client.clone(), Some((url, board.clone()))).await?;
|
||||||
|
|
||||||
|
dl += ndl;
|
||||||
|
fl += nfl;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok((dl, fl))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_url(url: String) -> (String, String) {
|
||||||
let url_split: Vec<&str> = url.split('/').collect();
|
let url_split: Vec<&str> = url.split('/').collect();
|
||||||
let board_name = url_split.get(url_split.len() - 2).unwrap();
|
let board = url_split.get(url_split.len() - 2).unwrap();
|
||||||
|
|
||||||
(
|
(
|
||||||
format!("https://a.4cdn.org/{}/catalog.json", board_name),
|
format!("https://a.4cdn.org/{board}/catalog.json",),
|
||||||
board_name.to_string(),
|
board.to_string(),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn get_threadlist(json_url: &str, board_name: &str) -> Result<(usize, Vec<String>)> {
|
async fn parse_json(
|
||||||
let req_body = reqwest::get(json_url).await?.text().await?;
|
client: &Client,
|
||||||
let json_data: Value = serde_json::from_str(req_body.as_str())?;
|
url: String,
|
||||||
let board: Vec<Value> = json_data
|
board: String,
|
||||||
|
) -> Result<(usize, Vec<String>), Box<dyn Error>> {
|
||||||
|
let mut threads = Vec::new();
|
||||||
|
|
||||||
|
let res_txt = match client.get(url.clone()).header(USER_AGENT, UA).send().await {
|
||||||
|
Ok(res) => res.text().await?,
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!(
|
||||||
|
"{}",
|
||||||
|
format!("Failed to request the thread JSON data from {url}: {e}")
|
||||||
|
.red()
|
||||||
|
.bold()
|
||||||
|
);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let data: Value = match serde_json::from_str(&res_txt) {
|
||||||
|
Ok(data) => data,
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!(
|
||||||
|
"{}",
|
||||||
|
format!("Failed to parse the raw data from {url}: {e}")
|
||||||
|
.red()
|
||||||
|
.bold()
|
||||||
|
);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let catalogue: Vec<Value> = data
|
||||||
.as_array()
|
.as_array()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.iter()
|
.iter()
|
||||||
.map(|page| page["threads"].clone())
|
.map(|page| page["threads"].clone())
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let mut board_data: Vec<String> = Vec::new();
|
for thread_array in catalogue {
|
||||||
board.iter().for_each(|thread_arr| {
|
for thread in thread_array.as_array().unwrap() {
|
||||||
thread_arr.as_array().unwrap().iter().for_each(|thread| {
|
let thread_id = &thread["no"];
|
||||||
let url = format!(
|
let url = format!("https://a.4cdn.org/{board}/thread/{thread_id}.json");
|
||||||
"https://a.4cdn.org/{}/thread/{}.json",
|
|
||||||
board_name, thread["no"]
|
|
||||||
);
|
|
||||||
board_data.push(url);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
Ok((board_data.len(), board_data))
|
threads.push(url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok((threads.len(), threads))
|
||||||
}
|
}
|
||||||
|
@ -1,95 +0,0 @@
|
|||||||
use colored::Colorize;
|
|
||||||
use futures::{stream, StreamExt};
|
|
||||||
use reqwest::Client;
|
|
||||||
use serde_json::Value;
|
|
||||||
use std::{
|
|
||||||
path::{Path, PathBuf},
|
|
||||||
process::exit,
|
|
||||||
};
|
|
||||||
use tokio::{fs::File, io::AsyncWriteExt};
|
|
||||||
|
|
||||||
type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
|
|
||||||
|
|
||||||
pub async fn get_imagelist(
|
|
||||||
json_url: &str,
|
|
||||||
board_name: &str,
|
|
||||||
output_path: &Path,
|
|
||||||
) -> Result<Vec<(String, PathBuf)>> {
|
|
||||||
let req_body_raw = match reqwest::get(json_url).await {
|
|
||||||
Ok(n) => n,
|
|
||||||
Err(_) => {
|
|
||||||
eprintln!("{}", format!("Error requesting {}", json_url).bold().red());
|
|
||||||
exit(0x0100);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
let req_body_text = req_body_raw.text().await?;
|
|
||||||
let json_data: Value = match serde_json::from_str(req_body_text.as_str()) {
|
|
||||||
Ok(n) => n,
|
|
||||||
Err(e) => {
|
|
||||||
eprintln!(
|
|
||||||
"{}",
|
|
||||||
format!("Error parsing json from {}: {}", json_url, e)
|
|
||||||
.bold()
|
|
||||||
.red()
|
|
||||||
);
|
|
||||||
exit(0x0100);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut img_data: Vec<(String, PathBuf)> = Vec::new();
|
|
||||||
json_data["posts"]
|
|
||||||
.as_array()
|
|
||||||
.unwrap()
|
|
||||||
.iter()
|
|
||||||
.filter(|post| post["tim"].is_i64())
|
|
||||||
.for_each(|post| {
|
|
||||||
let id = post["tim"].to_string();
|
|
||||||
let ext = post["ext"].as_str().unwrap().to_string();
|
|
||||||
let filepath = output_path.join(format!("{}{}", id, ext).as_str());
|
|
||||||
|
|
||||||
img_data.push((
|
|
||||||
format!("https://i.4cdn.org/{}/{}{}", board_name, id, ext),
|
|
||||||
filepath,
|
|
||||||
))
|
|
||||||
});
|
|
||||||
|
|
||||||
Ok(img_data)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn get_images(img_data: &Vec<(String, PathBuf)>) -> Result<usize> {
|
|
||||||
let client = Client::builder().build()?;
|
|
||||||
|
|
||||||
let futures = stream::iter(img_data.iter().map(|data| async {
|
|
||||||
let (url, path) = data;
|
|
||||||
let send_fut = client.get(url).send();
|
|
||||||
|
|
||||||
match send_fut.await {
|
|
||||||
Ok(res) => match res.bytes().await {
|
|
||||||
Ok(bytes) => {
|
|
||||||
let mut file = File::create(path).await.unwrap();
|
|
||||||
file.write_all(&bytes).await.unwrap();
|
|
||||||
|
|
||||||
println!(
|
|
||||||
"{}",
|
|
||||||
format!("{} bytes: {:?} -> {:?}", bytes.len(), url, path)
|
|
||||||
.italic()
|
|
||||||
.purple()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
Err(_) => eprintln!(
|
|
||||||
"{}",
|
|
||||||
format!("Error converting request from {} to bytes", url)
|
|
||||||
.bold()
|
|
||||||
.red()
|
|
||||||
),
|
|
||||||
},
|
|
||||||
Err(_) => eprintln!("{}", format!("Error requesting {}", url).bold().red()),
|
|
||||||
}
|
|
||||||
}))
|
|
||||||
.buffer_unordered(100)
|
|
||||||
.collect::<Vec<()>>();
|
|
||||||
|
|
||||||
futures.await;
|
|
||||||
|
|
||||||
Ok(img_data.len())
|
|
||||||
}
|
|
84
src/http.rs
Normal file
84
src/http.rs
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
use std::{error::Error, path::PathBuf, sync::Arc, time::Duration};
|
||||||
|
|
||||||
|
use base64::{engine::general_purpose, Engine};
|
||||||
|
use colored::Colorize;
|
||||||
|
use futures::{lock::Mutex, stream, StreamExt};
|
||||||
|
use md5::{Digest, Md5};
|
||||||
|
use reqwest::{header::USER_AGENT, Client};
|
||||||
|
use tokio::{fs::File, io::AsyncWriteExt};
|
||||||
|
|
||||||
|
use crate::UA;
|
||||||
|
|
||||||
|
const KEEP_ALIVE_TIMEOUT: Duration = Duration::from_secs(5);
|
||||||
|
|
||||||
|
pub async fn concurrent_dl(
|
||||||
|
images: Vec<(String, PathBuf, String)>,
|
||||||
|
) -> Result<(usize, usize), Box<dyn Error>> {
|
||||||
|
let dl_count = Arc::new(Mutex::new(0));
|
||||||
|
let sk_count = Arc::new(Mutex::new(0));
|
||||||
|
|
||||||
|
let client = Client::builder()
|
||||||
|
.pool_idle_timeout(KEEP_ALIVE_TIMEOUT)
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
let futures = stream::iter(images.iter().map(|data| async {
|
||||||
|
let dl_count = Arc::clone(&dl_count);
|
||||||
|
let _sk_count = Arc::clone(&sk_count);
|
||||||
|
let client = client.clone();
|
||||||
|
|
||||||
|
let (url, path, _expct_md5) = data;
|
||||||
|
let send_fut = client.get(url).header(USER_AGENT, UA).send();
|
||||||
|
|
||||||
|
match send_fut.await {
|
||||||
|
Ok(res) => match res.bytes().await {
|
||||||
|
Ok(bytes) => {
|
||||||
|
let byte_count = bytes.len();
|
||||||
|
|
||||||
|
let mut hasher = Md5::new();
|
||||||
|
hasher.update(&bytes);
|
||||||
|
let result = hasher.finalize();
|
||||||
|
let b64_md5 = general_purpose::STANDARD.encode(result);
|
||||||
|
|
||||||
|
// 4chan file attachment hash is always 24 character packed base64 encoded MD5. Truly a fucking state of art CRC.
|
||||||
|
|
||||||
|
// TODO: Figure out how the MD5 should be converted before uncommenting the following filtering condition
|
||||||
|
|
||||||
|
// if b64_md5 != *expct_md5 {
|
||||||
|
// eprintln!("{}", format!("File skipped due to mismatched MD5 (expected {expct_md5}, got {b64_md5})").red().bold());
|
||||||
|
// let mut sk_count = sk_count.lock().await;
|
||||||
|
// *sk_count += 1;
|
||||||
|
|
||||||
|
// return;
|
||||||
|
// }
|
||||||
|
|
||||||
|
let mut file = File::create(path).await.unwrap();
|
||||||
|
file.write_all(&bytes).await.unwrap();
|
||||||
|
|
||||||
|
let mut dl_count = dl_count.lock().await;
|
||||||
|
*dl_count += 1;
|
||||||
|
|
||||||
|
println!(
|
||||||
|
"{}",
|
||||||
|
format!("{b64_md5}: {byte_count} bytes").truecolor(0, 209, 27)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Err(_) => eprintln!(
|
||||||
|
"{}",
|
||||||
|
format!("Failed to convert request from {} to bytes", url)
|
||||||
|
.red()
|
||||||
|
.bold()
|
||||||
|
),
|
||||||
|
},
|
||||||
|
Err(_) => eprintln!("{}", format!("Failed to request {}", url).red().bold()),
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
.buffer_unordered(100)
|
||||||
|
.collect::<Vec<()>>();
|
||||||
|
|
||||||
|
futures.await;
|
||||||
|
|
||||||
|
let dl = *dl_count.lock().await;
|
||||||
|
let sk = *sk_count.lock().await;
|
||||||
|
|
||||||
|
Ok((dl, sk))
|
||||||
|
}
|
282
src/main.rs
282
src/main.rs
@ -1,127 +1,199 @@
|
|||||||
mod board;
|
mod board;
|
||||||
mod downloader;
|
mod http;
|
||||||
mod thread;
|
mod thread;
|
||||||
|
|
||||||
use clap::{Arg, ArgGroup, Command};
|
|
||||||
use colored::Colorize;
|
|
||||||
use regex::Regex;
|
|
||||||
use std::{path::PathBuf, process::exit};
|
use std::{path::PathBuf, process::exit};
|
||||||
|
|
||||||
type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
|
use clap::{Args, Parser};
|
||||||
|
use colored::Colorize;
|
||||||
|
use lazy_regex::*;
|
||||||
|
|
||||||
#[derive(Debug)]
|
static RES_REX: Lazy<Regex> = lazy_regex!(r"^\d{3,6}x\d{3,6}$");
|
||||||
|
static RATIO_REX: Lazy<Regex> = lazy_regex!(r"^\d{1,2}:\d{1,2}$");
|
||||||
|
static URL_REX: Lazy<Regex> = lazy_regex!(
|
||||||
|
r"^((http|https)://)?boards.(4chan|4channel).org/[a-zA-Z]{1,4}/(catalog|thread/\d+)$"
|
||||||
|
);
|
||||||
|
|
||||||
|
pub static UA: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.3";
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
enum Mode {
|
enum Mode {
|
||||||
Thread,
|
Thread,
|
||||||
Board,
|
Board,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_cli_args() -> Result<(PathBuf, String, Mode)> {
|
#[derive(Debug, Clone)]
|
||||||
let matches = Command::new("dlrs")
|
struct Config {
|
||||||
.arg(
|
mode: Mode,
|
||||||
Arg::new("output")
|
target: String,
|
||||||
.short('o')
|
min_res: Option<(u64, u64)>,
|
||||||
.long("output")
|
max_res: Option<(u64, u64)>,
|
||||||
.value_name("PATH")
|
aspect_ratios: Option<Vec<(u32, u32)>>,
|
||||||
.help("Set an output directory")
|
out_dir: PathBuf,
|
||||||
.takes_value(true)
|
verbose: bool,
|
||||||
.required(true),
|
}
|
||||||
)
|
|
||||||
.arg(
|
|
||||||
Arg::new("thread")
|
|
||||||
.short('t')
|
|
||||||
.long("thread")
|
|
||||||
.value_name("URL")
|
|
||||||
.help("Set a thread URL")
|
|
||||||
.takes_value(true),
|
|
||||||
)
|
|
||||||
.arg(
|
|
||||||
Arg::new("board")
|
|
||||||
.short('b')
|
|
||||||
.long("board")
|
|
||||||
.value_name("URL")
|
|
||||||
.help("Set a board URL")
|
|
||||||
.takes_value(true),
|
|
||||||
)
|
|
||||||
.group(
|
|
||||||
ArgGroup::new("target")
|
|
||||||
.args(&["thread", "board"])
|
|
||||||
.required(true),
|
|
||||||
)
|
|
||||||
.get_matches();
|
|
||||||
|
|
||||||
let re = Regex::new(
|
impl From<Cli> for Config {
|
||||||
r"^((http|https)://)?boards.(4chan|4channel).org/[a-zA-Z]{1,4}/(catalog|thread/\d+)$",
|
fn from(value: Cli) -> Self {
|
||||||
)?;
|
let mode: Mode;
|
||||||
|
let target: String;
|
||||||
|
|
||||||
let path = PathBuf::from(matches.value_of("output").unwrap());
|
if let Some(thread_url) = value.target.thread {
|
||||||
let target_match = matches.value_of("target").unwrap();
|
mode = Mode::Thread;
|
||||||
let target = match re.is_match(target_match) {
|
target = thread_url;
|
||||||
true => target_match,
|
} else if let Some(board_url) = value.target.board {
|
||||||
false => {
|
mode = Mode::Board;
|
||||||
eprintln!("{}", "Error: Invalid URL format".to_string().bold().red());
|
target = board_url;
|
||||||
exit(0x0100);
|
} else {
|
||||||
|
eprintln!("{}", "No target URL".red().bold());
|
||||||
|
exit(1);
|
||||||
}
|
}
|
||||||
};
|
|
||||||
let mode = match matches.is_present("thread") {
|
|
||||||
true => Mode::Thread,
|
|
||||||
false => Mode::Board,
|
|
||||||
};
|
|
||||||
|
|
||||||
Ok((path, target.to_string(), mode))
|
if !URL_REX.is_match(&target) {
|
||||||
|
eprintln!("{}", "Target URL doesn't pass the RegEx check".red().bold());
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
let out_dir = PathBuf::from(value.output);
|
||||||
|
|
||||||
|
Config {
|
||||||
|
mode,
|
||||||
|
target,
|
||||||
|
min_res: Self::parse_res(value.min_res),
|
||||||
|
max_res: Self::parse_res(value.max_res),
|
||||||
|
aspect_ratios: Self::parse_aspect_ratios(value.aspect_ratios),
|
||||||
|
out_dir,
|
||||||
|
verbose: value.verbose,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Config {
|
||||||
|
fn parse_res(res_str: Option<String>) -> Option<(u64, u64)> {
|
||||||
|
let res = match res_str {
|
||||||
|
Some(res) => res,
|
||||||
|
None => return None,
|
||||||
|
};
|
||||||
|
|
||||||
|
if !RES_REX.is_match(&res) {
|
||||||
|
eprintln!(
|
||||||
|
"{}",
|
||||||
|
"Given resolution doesn't pass the RegEx check".red().bold()
|
||||||
|
);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
let split: Vec<&str> = res.split('x').collect();
|
||||||
|
let parsed = (
|
||||||
|
split[0].parse::<u64>().unwrap(),
|
||||||
|
split[1].parse::<u64>().unwrap(),
|
||||||
|
);
|
||||||
|
|
||||||
|
Some(parsed)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_aspect_ratios(ratios_str: Option<String>) -> Option<Vec<(u32, u32)>> {
|
||||||
|
let ratios = match ratios_str {
|
||||||
|
Some(ratios) => ratios,
|
||||||
|
None => return None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let ratios_vec: Vec<&str> = ratios.split(',').collect();
|
||||||
|
let mut parsed_vec = Vec::new();
|
||||||
|
|
||||||
|
for r in ratios_vec {
|
||||||
|
if !RATIO_REX.is_match(r) {
|
||||||
|
eprintln!(
|
||||||
|
"{}",
|
||||||
|
"One or more of the given aspect ratios don't pass the RegEx check"
|
||||||
|
.red()
|
||||||
|
.bold()
|
||||||
|
);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
let split: Vec<&str> = r.split(':').collect();
|
||||||
|
let parsed = (
|
||||||
|
split[0].parse::<u32>().unwrap(),
|
||||||
|
split[1].parse::<u32>().unwrap(),
|
||||||
|
);
|
||||||
|
|
||||||
|
parsed_vec.push(parsed);
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(parsed_vec)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Parser)]
|
||||||
|
struct Cli {
|
||||||
|
/// Set the output directory
|
||||||
|
#[arg(short = 'o', value_name = "PATH", required = true)]
|
||||||
|
output: String,
|
||||||
|
|
||||||
|
/// Set the target URL
|
||||||
|
#[command(flatten)]
|
||||||
|
target: Target,
|
||||||
|
|
||||||
|
/// Set the minimum image resolution (e.g. "1920x1080")
|
||||||
|
#[arg(short = 'n', value_name = "MIN_RES")]
|
||||||
|
min_res: Option<String>,
|
||||||
|
|
||||||
|
/// Set the minimum image resolution (e.g. "3840x2160")
|
||||||
|
#[arg(short = 'm', value_name = "MAX_RES")]
|
||||||
|
max_res: Option<String>,
|
||||||
|
|
||||||
|
/// Set a comma-separated list of accepted image aspect ratios (e.g. "4:3,16:9")
|
||||||
|
#[arg(short = 'a', value_name = "RATIOS")]
|
||||||
|
aspect_ratios: Option<String>,
|
||||||
|
|
||||||
|
/// Toggle verbose output
|
||||||
|
#[arg(short = 'v', action, default_value_t = false)]
|
||||||
|
verbose: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Args)]
|
||||||
|
#[group(required = true, multiple = false)]
|
||||||
|
struct Target {
|
||||||
|
/// Set the thread URL
|
||||||
|
#[arg(short = 't', value_name = "URL")]
|
||||||
|
thread: Option<String>,
|
||||||
|
|
||||||
|
/// Set the board URL
|
||||||
|
#[arg(short = 'b', value_name = "URL")]
|
||||||
|
board: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> Result<()> {
|
async fn main() {
|
||||||
let (path, target, mode) = parse_cli_args()?;
|
let args = Cli::parse();
|
||||||
println!(
|
let cfg = Config::from(args);
|
||||||
"{}",
|
let client = reqwest::Client::new();
|
||||||
format!(
|
|
||||||
"\nDownload configuration:\n\tOUTPUT PATH: {:?}\n\tURL: {}\n\tDOWNLOAD MODE: {:?}\n",
|
|
||||||
path, target, mode
|
|
||||||
)
|
|
||||||
.bold()
|
|
||||||
.green()
|
|
||||||
);
|
|
||||||
|
|
||||||
match mode {
|
if cfg.verbose {
|
||||||
Mode::Thread => {
|
println!("\nDLRS CONFIG:\n{cfg:#?}\n");
|
||||||
let (json_url, board_name) = thread::parse_url(&target);
|
|
||||||
println!(
|
|
||||||
"{}",
|
|
||||||
format!("Parsing JSON from {}", json_url).bold().blue()
|
|
||||||
);
|
|
||||||
let img_data = downloader::get_imagelist(&json_url, &board_name, &path).await?;
|
|
||||||
let filecount = downloader::get_images(&img_data).await?;
|
|
||||||
|
|
||||||
println!(
|
|
||||||
"{}",
|
|
||||||
format!("Total of {} files downloaded from 1 thread.\n", filecount)
|
|
||||||
.bold()
|
|
||||||
.green()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
Mode::Board => {
|
|
||||||
let (json_url, board_name) = board::parse_url(&target);
|
|
||||||
let (thread_amt, thread_data) = board::get_threadlist(&json_url, &board_name).await?;
|
|
||||||
let mut filecount: usize = 0;
|
|
||||||
for url in &thread_data {
|
|
||||||
println!("{}", format!("Parsing JSON from {}", url).bold().blue());
|
|
||||||
let img_data = downloader::get_imagelist(url, &board_name, &path).await?;
|
|
||||||
let total_amt = downloader::get_images(&img_data).await?;
|
|
||||||
filecount += total_amt;
|
|
||||||
}
|
|
||||||
|
|
||||||
println!(
|
|
||||||
"{}",
|
|
||||||
format!(
|
|
||||||
"Total of {} files downloaded from {} threads.\n",
|
|
||||||
filecount, thread_amt
|
|
||||||
)
|
|
||||||
.bold()
|
|
||||||
.green()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
let res = match cfg.mode {
|
||||||
|
Mode::Thread => thread::dl(cfg, client, None).await,
|
||||||
|
Mode::Board => board::dl(cfg, client).await,
|
||||||
|
};
|
||||||
|
|
||||||
|
let (dl, fl) = match res {
|
||||||
|
Ok((dl, fl)) => (dl, fl),
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!(
|
||||||
|
"{}",
|
||||||
|
format!("Error during thread download: {e}").red().bold()
|
||||||
|
);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
println!(
|
||||||
|
"{}",
|
||||||
|
format!("A total of {dl} files downloaded and {fl} filtered")
|
||||||
|
.truecolor(252, 156, 12)
|
||||||
|
.bold()
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
163
src/thread.rs
163
src/thread.rs
@ -1,13 +1,162 @@
|
|||||||
pub fn parse_url(url: &str) -> (String, String) {
|
use core::fmt;
|
||||||
|
use std::{error::Error, path::PathBuf, process::exit};
|
||||||
|
|
||||||
|
use crate::{http, Config, UA};
|
||||||
|
|
||||||
|
use colored::Colorize;
|
||||||
|
use num::integer::gcd;
|
||||||
|
use reqwest::{header::USER_AGENT, Client};
|
||||||
|
use serde_json::Value;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum FilterCondition {
|
||||||
|
MaxRes,
|
||||||
|
MinRes,
|
||||||
|
AspectRatio,
|
||||||
|
None,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for FilterCondition {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
match self {
|
||||||
|
Self::MaxRes => write!(f, "Image is larger than the set maximum resolution"),
|
||||||
|
Self::MinRes => write!(f, "Image is smaller than the set maximum resolution"),
|
||||||
|
Self::AspectRatio => write!(f, "Image's aspect ratio doesn't match the listed ones"),
|
||||||
|
Self::None => write!(f, "No reason"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn dl(
|
||||||
|
cfg: Config,
|
||||||
|
client: Client,
|
||||||
|
combo: Option<(String, String)>,
|
||||||
|
) -> Result<(usize, usize), Box<dyn Error>> {
|
||||||
|
let (json_url, board) = match combo {
|
||||||
|
Some((json_url, board)) => (json_url, board),
|
||||||
|
None => parse_url(cfg.target.clone()),
|
||||||
|
};
|
||||||
|
|
||||||
|
println!("{}", format!("Thread JSON URL: {json_url}").blue().bold());
|
||||||
|
|
||||||
|
let (images, fl) = parse_json(cfg.clone(), &client, json_url, board).await?;
|
||||||
|
let (dl, sk) = http::concurrent_dl(images).await?;
|
||||||
|
|
||||||
|
Ok((dl, fl + sk))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_url(url: String) -> (String, String) {
|
||||||
let url_split: Vec<&str> = url.split('/').collect();
|
let url_split: Vec<&str> = url.split('/').collect();
|
||||||
let thread_id = url_split.last().unwrap();
|
let thread_id = url_split.last().unwrap();
|
||||||
let board_name = url_split.get(url_split.len() - 3).unwrap();
|
let board = url_split.get(url_split.len() - 3).unwrap();
|
||||||
|
|
||||||
(
|
(
|
||||||
format!(
|
format!("https://a.4cdn.org/{board}/thread/{thread_id}.json"),
|
||||||
"https://a.4cdn.org/{}/thread/{}.json",
|
board.to_string(),
|
||||||
board_name, thread_id
|
|
||||||
),
|
|
||||||
board_name.to_string(),
|
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn parse_json(
|
||||||
|
cfg: Config,
|
||||||
|
client: &Client,
|
||||||
|
url: String,
|
||||||
|
board: String,
|
||||||
|
) -> Result<(Vec<(String, PathBuf, String)>, usize), Box<dyn Error>> {
|
||||||
|
let mut filtered = 0;
|
||||||
|
let mut images = Vec::new();
|
||||||
|
|
||||||
|
let res_txt = match client.get(url.clone()).header(USER_AGENT, UA).send().await {
|
||||||
|
Ok(res) => res.text().await?,
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!(
|
||||||
|
"{}",
|
||||||
|
format!("Failed to request the thread JSON data from {url}: {e}")
|
||||||
|
.red()
|
||||||
|
.bold()
|
||||||
|
);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let data: Value = match serde_json::from_str(&res_txt) {
|
||||||
|
Ok(data) => data,
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!(
|
||||||
|
"{}",
|
||||||
|
format!("Failed to parse the raw data from {url}: {e}")
|
||||||
|
.red()
|
||||||
|
.bold()
|
||||||
|
);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
for entry in data["posts"].as_array().unwrap() {
|
||||||
|
// Reply without an attachment
|
||||||
|
if !entry["tim"].is_i64() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let (is_filtered, cond) = img_filter(cfg.clone(), entry);
|
||||||
|
|
||||||
|
if is_filtered {
|
||||||
|
if cfg.verbose {
|
||||||
|
println!("Image filtered: {cond}");
|
||||||
|
}
|
||||||
|
|
||||||
|
filtered += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Format: (img url, output file, md5 hash)
|
||||||
|
images.push(parse_img(cfg.clone(), entry, &board));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok((images, filtered))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_img(cfg: Config, entry: &Value, board: &str) -> (String, PathBuf, String) {
|
||||||
|
let id = entry["tim"].to_string();
|
||||||
|
// Chained conversions for `ext` and `md5` to get rid of the quotes
|
||||||
|
let ext = entry["ext"].as_str().unwrap().to_string();
|
||||||
|
let md5 = entry["md5"].as_str().unwrap().to_string();
|
||||||
|
let path = cfg.out_dir.join(format!("{id}{ext}"));
|
||||||
|
let url = format!("https://i.4cdn.org/{board}/{id}{ext}");
|
||||||
|
|
||||||
|
(url, path, md5)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn img_filter(cfg: Config, entry: &Value) -> (bool, FilterCondition) {
|
||||||
|
let img_w = entry["w"].as_u64().unwrap();
|
||||||
|
let img_h = entry["h"].as_u64().unwrap();
|
||||||
|
let aspect_ratio = img_aspect_ratio(img_w, img_h);
|
||||||
|
|
||||||
|
if let Some(min_res) = cfg.min_res {
|
||||||
|
if min_res.0 > img_w || min_res.1 > img_h {
|
||||||
|
return (true, FilterCondition::MinRes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(max_res) = cfg.max_res {
|
||||||
|
if max_res.0 < img_w || max_res.1 < img_h {
|
||||||
|
return (true, FilterCondition::MaxRes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(aspect_ratios) = &cfg.aspect_ratios {
|
||||||
|
// Non-empty list of preferred aspect ratios
|
||||||
|
if !aspect_ratios.contains(&aspect_ratio) {
|
||||||
|
return (true, FilterCondition::AspectRatio);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(false, FilterCondition::None)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn img_aspect_ratio(w: u64, h: u64) -> (u32, u32) {
|
||||||
|
let div = gcd(w, h);
|
||||||
|
let simpl_w = (w / div) as u32;
|
||||||
|
let simpl_h = (h / div) as u32;
|
||||||
|
|
||||||
|
(simpl_w, simpl_h)
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user