feat(BREAKING): comprehensive filtering, fixes #1
This commit is contained in:
parent
4f26e93fd2
commit
01de99663b
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,2 +1 @@
|
||||
/target
|
||||
testdata.json
|
||||
target/
|
||||
|
1292
Cargo.lock
generated
1292
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
20
Cargo.toml
20
Cargo.toml
@ -1,13 +1,17 @@
|
||||
[package]
|
||||
name = "dlrs"
|
||||
version = "0.2.1"
|
||||
description = "Imageboard media downloader"
|
||||
version = "0.2.2"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
clap = "3.1.18"
|
||||
regex = "1.5.6"
|
||||
reqwest = { version = "0.11", features = ["json"] }
|
||||
tokio = { version = "1.18.5", features = ["full"] }
|
||||
serde_json = "1.0"
|
||||
futures = "0.3"
|
||||
colored = "2.0.0"
|
||||
base64 = "0.22.1"
|
||||
clap = { version = "4.5.18", features = ["derive"] }
|
||||
colored = "2.1.0"
|
||||
futures = "0.3.30"
|
||||
lazy-regex = "3.3.0"
|
||||
md-5 = "0.10.6"
|
||||
num = "0.4.3"
|
||||
reqwest = "0.12.7"
|
||||
serde_json = "1.0.128"
|
||||
tokio = { version = "1.40.0", features = ["full"] }
|
||||
|
20
README.md
20
README.md
@ -4,16 +4,16 @@
|
||||
|
||||
### Usage
|
||||
|
||||
Build the optimized binary with `cargo build --release`.
|
||||
|
||||
```
|
||||
USAGE:
|
||||
dlrs --output <PATH> <--thread <URL>|--board <URL>>
|
||||
Usage: dlrs [OPTIONS] -o <PATH> <-t <URL>|-b <URL>>
|
||||
|
||||
OPTIONS:
|
||||
-b, --board <URL> Set a board URL
|
||||
-h, --help Print help information
|
||||
-o, --output <PATH> Set an output directory
|
||||
-t, --thread <URL> Set a thread URL
|
||||
-V, --version Print version information
|
||||
Options:
|
||||
-o <PATH> Set the output directory
|
||||
-t <URL> Set the thread URL
|
||||
-b <URL> Set the board URL
|
||||
-n <MIN_RES> Set the minimum image resolution (e.g. "1920x1080")
|
||||
-m <MAX_RES> Set the minimum image resolution (e.g. "3840x2160")
|
||||
-a <RATIOS> Set a comma-separated list of accepted image aspect ratios (e.g. "4:3,16:9")
|
||||
-v Toggle verbose output
|
||||
-h, --help Print help
|
||||
```
|
||||
|
98
src/board.rs
98
src/board.rs
@ -1,37 +1,95 @@
|
||||
use std::{error::Error, process::exit};
|
||||
|
||||
use colored::Colorize;
|
||||
use reqwest::{header::USER_AGENT, Client};
|
||||
use serde_json::Value;
|
||||
|
||||
type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
|
||||
use crate::{Config, UA};
|
||||
|
||||
pub fn parse_url(url: &str) -> (String, String) {
|
||||
pub async fn dl(cfg: Config, client: Client) -> Result<(usize, usize), Box<dyn Error>> {
|
||||
let mut dl = 0;
|
||||
let mut fl = 0;
|
||||
|
||||
let (json_url, board) = parse_url(cfg.target.clone());
|
||||
println!("{}", format!("Board JSON URL: {json_url}").blue().bold());
|
||||
|
||||
let (tc, threads) = parse_json(&client, json_url, board.clone()).await?;
|
||||
println!(
|
||||
"{}",
|
||||
format!("Current thread count of {board}: {tc}")
|
||||
.blue()
|
||||
.bold()
|
||||
);
|
||||
|
||||
for url in threads {
|
||||
let (ndl, nfl) =
|
||||
crate::thread::dl(cfg.clone(), client.clone(), Some((url, board.clone()))).await?;
|
||||
|
||||
dl += ndl;
|
||||
fl += nfl;
|
||||
}
|
||||
|
||||
Ok((dl, fl))
|
||||
}
|
||||
|
||||
fn parse_url(url: String) -> (String, String) {
|
||||
let url_split: Vec<&str> = url.split('/').collect();
|
||||
let board_name = url_split.get(url_split.len() - 2).unwrap();
|
||||
let board = url_split.get(url_split.len() - 2).unwrap();
|
||||
|
||||
(
|
||||
format!("https://a.4cdn.org/{}/catalog.json", board_name),
|
||||
board_name.to_string(),
|
||||
format!("https://a.4cdn.org/{board}/catalog.json",),
|
||||
board.to_string(),
|
||||
)
|
||||
}
|
||||
|
||||
pub async fn get_threadlist(json_url: &str, board_name: &str) -> Result<(usize, Vec<String>)> {
|
||||
let req_body = reqwest::get(json_url).await?.text().await?;
|
||||
let json_data: Value = serde_json::from_str(req_body.as_str())?;
|
||||
let board: Vec<Value> = json_data
|
||||
async fn parse_json(
|
||||
client: &Client,
|
||||
url: String,
|
||||
board: String,
|
||||
) -> Result<(usize, Vec<String>), Box<dyn Error>> {
|
||||
let mut threads = Vec::new();
|
||||
|
||||
let res_txt = match client.get(url.clone()).header(USER_AGENT, UA).send().await {
|
||||
Ok(res) => res.text().await?,
|
||||
Err(e) => {
|
||||
eprintln!(
|
||||
"{}",
|
||||
format!("Failed to request the thread JSON data from {url}: {e}")
|
||||
.red()
|
||||
.bold()
|
||||
);
|
||||
exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
let data: Value = match serde_json::from_str(&res_txt) {
|
||||
Ok(data) => data,
|
||||
Err(e) => {
|
||||
eprintln!(
|
||||
"{}",
|
||||
format!("Failed to parse the raw data from {url}: {e}")
|
||||
.red()
|
||||
.bold()
|
||||
);
|
||||
exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
let catalogue: Vec<Value> = data
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.map(|page| page["threads"].clone())
|
||||
.collect();
|
||||
|
||||
let mut board_data: Vec<String> = Vec::new();
|
||||
board.iter().for_each(|thread_arr| {
|
||||
thread_arr.as_array().unwrap().iter().for_each(|thread| {
|
||||
let url = format!(
|
||||
"https://a.4cdn.org/{}/thread/{}.json",
|
||||
board_name, thread["no"]
|
||||
);
|
||||
board_data.push(url);
|
||||
});
|
||||
});
|
||||
for thread_array in catalogue {
|
||||
for thread in thread_array.as_array().unwrap() {
|
||||
let thread_id = &thread["no"];
|
||||
let url = format!("https://a.4cdn.org/{board}/thread/{thread_id}.json");
|
||||
|
||||
Ok((board_data.len(), board_data))
|
||||
threads.push(url);
|
||||
}
|
||||
}
|
||||
|
||||
Ok((threads.len(), threads))
|
||||
}
|
||||
|
@ -1,95 +0,0 @@
|
||||
use colored::Colorize;
|
||||
use futures::{stream, StreamExt};
|
||||
use reqwest::Client;
|
||||
use serde_json::Value;
|
||||
use std::{
|
||||
path::{Path, PathBuf},
|
||||
process::exit,
|
||||
};
|
||||
use tokio::{fs::File, io::AsyncWriteExt};
|
||||
|
||||
type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
|
||||
|
||||
pub async fn get_imagelist(
|
||||
json_url: &str,
|
||||
board_name: &str,
|
||||
output_path: &Path,
|
||||
) -> Result<Vec<(String, PathBuf)>> {
|
||||
let req_body_raw = match reqwest::get(json_url).await {
|
||||
Ok(n) => n,
|
||||
Err(_) => {
|
||||
eprintln!("{}", format!("Error requesting {}", json_url).bold().red());
|
||||
exit(0x0100);
|
||||
}
|
||||
};
|
||||
let req_body_text = req_body_raw.text().await?;
|
||||
let json_data: Value = match serde_json::from_str(req_body_text.as_str()) {
|
||||
Ok(n) => n,
|
||||
Err(e) => {
|
||||
eprintln!(
|
||||
"{}",
|
||||
format!("Error parsing json from {}: {}", json_url, e)
|
||||
.bold()
|
||||
.red()
|
||||
);
|
||||
exit(0x0100);
|
||||
}
|
||||
};
|
||||
|
||||
let mut img_data: Vec<(String, PathBuf)> = Vec::new();
|
||||
json_data["posts"]
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.filter(|post| post["tim"].is_i64())
|
||||
.for_each(|post| {
|
||||
let id = post["tim"].to_string();
|
||||
let ext = post["ext"].as_str().unwrap().to_string();
|
||||
let filepath = output_path.join(format!("{}{}", id, ext).as_str());
|
||||
|
||||
img_data.push((
|
||||
format!("https://i.4cdn.org/{}/{}{}", board_name, id, ext),
|
||||
filepath,
|
||||
))
|
||||
});
|
||||
|
||||
Ok(img_data)
|
||||
}
|
||||
|
||||
pub async fn get_images(img_data: &Vec<(String, PathBuf)>) -> Result<usize> {
|
||||
let client = Client::builder().build()?;
|
||||
|
||||
let futures = stream::iter(img_data.iter().map(|data| async {
|
||||
let (url, path) = data;
|
||||
let send_fut = client.get(url).send();
|
||||
|
||||
match send_fut.await {
|
||||
Ok(res) => match res.bytes().await {
|
||||
Ok(bytes) => {
|
||||
let mut file = File::create(path).await.unwrap();
|
||||
file.write_all(&bytes).await.unwrap();
|
||||
|
||||
println!(
|
||||
"{}",
|
||||
format!("{} bytes: {:?} -> {:?}", bytes.len(), url, path)
|
||||
.italic()
|
||||
.purple()
|
||||
);
|
||||
}
|
||||
Err(_) => eprintln!(
|
||||
"{}",
|
||||
format!("Error converting request from {} to bytes", url)
|
||||
.bold()
|
||||
.red()
|
||||
),
|
||||
},
|
||||
Err(_) => eprintln!("{}", format!("Error requesting {}", url).bold().red()),
|
||||
}
|
||||
}))
|
||||
.buffer_unordered(100)
|
||||
.collect::<Vec<()>>();
|
||||
|
||||
futures.await;
|
||||
|
||||
Ok(img_data.len())
|
||||
}
|
84
src/http.rs
Normal file
84
src/http.rs
Normal file
@ -0,0 +1,84 @@
|
||||
use std::{error::Error, path::PathBuf, sync::Arc, time::Duration};
|
||||
|
||||
use base64::{engine::general_purpose, Engine};
|
||||
use colored::Colorize;
|
||||
use futures::{lock::Mutex, stream, StreamExt};
|
||||
use md5::{Digest, Md5};
|
||||
use reqwest::{header::USER_AGENT, Client};
|
||||
use tokio::{fs::File, io::AsyncWriteExt};
|
||||
|
||||
use crate::UA;
|
||||
|
||||
const KEEP_ALIVE_TIMEOUT: Duration = Duration::from_secs(5);
|
||||
|
||||
pub async fn concurrent_dl(
|
||||
images: Vec<(String, PathBuf, String)>,
|
||||
) -> Result<(usize, usize), Box<dyn Error>> {
|
||||
let dl_count = Arc::new(Mutex::new(0));
|
||||
let sk_count = Arc::new(Mutex::new(0));
|
||||
|
||||
let client = Client::builder()
|
||||
.pool_idle_timeout(KEEP_ALIVE_TIMEOUT)
|
||||
.build()?;
|
||||
|
||||
let futures = stream::iter(images.iter().map(|data| async {
|
||||
let dl_count = Arc::clone(&dl_count);
|
||||
let _sk_count = Arc::clone(&sk_count);
|
||||
let client = client.clone();
|
||||
|
||||
let (url, path, _expct_md5) = data;
|
||||
let send_fut = client.get(url).header(USER_AGENT, UA).send();
|
||||
|
||||
match send_fut.await {
|
||||
Ok(res) => match res.bytes().await {
|
||||
Ok(bytes) => {
|
||||
let byte_count = bytes.len();
|
||||
|
||||
let mut hasher = Md5::new();
|
||||
hasher.update(&bytes);
|
||||
let result = hasher.finalize();
|
||||
let b64_md5 = general_purpose::STANDARD.encode(result);
|
||||
|
||||
// 4chan file attachment hash is always 24 character packed base64 encoded MD5. Truly a fucking state of art CRC.
|
||||
|
||||
// TODO: Figure out how the MD5 should be converted before uncommenting the following filtering condition
|
||||
|
||||
// if b64_md5 != *expct_md5 {
|
||||
// eprintln!("{}", format!("File skipped due to mismatched MD5 (expected {expct_md5}, got {b64_md5})").red().bold());
|
||||
// let mut sk_count = sk_count.lock().await;
|
||||
// *sk_count += 1;
|
||||
|
||||
// return;
|
||||
// }
|
||||
|
||||
let mut file = File::create(path).await.unwrap();
|
||||
file.write_all(&bytes).await.unwrap();
|
||||
|
||||
let mut dl_count = dl_count.lock().await;
|
||||
*dl_count += 1;
|
||||
|
||||
println!(
|
||||
"{}",
|
||||
format!("{b64_md5}: {byte_count} bytes").truecolor(0, 209, 27)
|
||||
);
|
||||
}
|
||||
Err(_) => eprintln!(
|
||||
"{}",
|
||||
format!("Failed to convert request from {} to bytes", url)
|
||||
.red()
|
||||
.bold()
|
||||
),
|
||||
},
|
||||
Err(_) => eprintln!("{}", format!("Failed to request {}", url).red().bold()),
|
||||
}
|
||||
}))
|
||||
.buffer_unordered(100)
|
||||
.collect::<Vec<()>>();
|
||||
|
||||
futures.await;
|
||||
|
||||
let dl = *dl_count.lock().await;
|
||||
let sk = *sk_count.lock().await;
|
||||
|
||||
Ok((dl, sk))
|
||||
}
|
282
src/main.rs
282
src/main.rs
@ -1,127 +1,199 @@
|
||||
mod board;
|
||||
mod downloader;
|
||||
mod http;
|
||||
mod thread;
|
||||
|
||||
use clap::{Arg, ArgGroup, Command};
|
||||
use colored::Colorize;
|
||||
use regex::Regex;
|
||||
use std::{path::PathBuf, process::exit};
|
||||
|
||||
type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
|
||||
use clap::{Args, Parser};
|
||||
use colored::Colorize;
|
||||
use lazy_regex::*;
|
||||
|
||||
#[derive(Debug)]
|
||||
static RES_REX: Lazy<Regex> = lazy_regex!(r"^\d{3,6}x\d{3,6}$");
|
||||
static RATIO_REX: Lazy<Regex> = lazy_regex!(r"^\d{1,2}:\d{1,2}$");
|
||||
static URL_REX: Lazy<Regex> = lazy_regex!(
|
||||
r"^((http|https)://)?boards.(4chan|4channel).org/[a-zA-Z]{1,4}/(catalog|thread/\d+)$"
|
||||
);
|
||||
|
||||
pub static UA: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.3";
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
enum Mode {
|
||||
Thread,
|
||||
Board,
|
||||
}
|
||||
|
||||
fn parse_cli_args() -> Result<(PathBuf, String, Mode)> {
|
||||
let matches = Command::new("dlrs")
|
||||
.arg(
|
||||
Arg::new("output")
|
||||
.short('o')
|
||||
.long("output")
|
||||
.value_name("PATH")
|
||||
.help("Set an output directory")
|
||||
.takes_value(true)
|
||||
.required(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("thread")
|
||||
.short('t')
|
||||
.long("thread")
|
||||
.value_name("URL")
|
||||
.help("Set a thread URL")
|
||||
.takes_value(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("board")
|
||||
.short('b')
|
||||
.long("board")
|
||||
.value_name("URL")
|
||||
.help("Set a board URL")
|
||||
.takes_value(true),
|
||||
)
|
||||
.group(
|
||||
ArgGroup::new("target")
|
||||
.args(&["thread", "board"])
|
||||
.required(true),
|
||||
)
|
||||
.get_matches();
|
||||
#[derive(Debug, Clone)]
|
||||
struct Config {
|
||||
mode: Mode,
|
||||
target: String,
|
||||
min_res: Option<(u64, u64)>,
|
||||
max_res: Option<(u64, u64)>,
|
||||
aspect_ratios: Option<Vec<(u32, u32)>>,
|
||||
out_dir: PathBuf,
|
||||
verbose: bool,
|
||||
}
|
||||
|
||||
let re = Regex::new(
|
||||
r"^((http|https)://)?boards.(4chan|4channel).org/[a-zA-Z]{1,4}/(catalog|thread/\d+)$",
|
||||
)?;
|
||||
impl From<Cli> for Config {
|
||||
fn from(value: Cli) -> Self {
|
||||
let mode: Mode;
|
||||
let target: String;
|
||||
|
||||
let path = PathBuf::from(matches.value_of("output").unwrap());
|
||||
let target_match = matches.value_of("target").unwrap();
|
||||
let target = match re.is_match(target_match) {
|
||||
true => target_match,
|
||||
false => {
|
||||
eprintln!("{}", "Error: Invalid URL format".to_string().bold().red());
|
||||
exit(0x0100);
|
||||
if let Some(thread_url) = value.target.thread {
|
||||
mode = Mode::Thread;
|
||||
target = thread_url;
|
||||
} else if let Some(board_url) = value.target.board {
|
||||
mode = Mode::Board;
|
||||
target = board_url;
|
||||
} else {
|
||||
eprintln!("{}", "No target URL".red().bold());
|
||||
exit(1);
|
||||
}
|
||||
};
|
||||
let mode = match matches.is_present("thread") {
|
||||
true => Mode::Thread,
|
||||
false => Mode::Board,
|
||||
};
|
||||
|
||||
Ok((path, target.to_string(), mode))
|
||||
if !URL_REX.is_match(&target) {
|
||||
eprintln!("{}", "Target URL doesn't pass the RegEx check".red().bold());
|
||||
exit(1);
|
||||
}
|
||||
|
||||
let out_dir = PathBuf::from(value.output);
|
||||
|
||||
Config {
|
||||
mode,
|
||||
target,
|
||||
min_res: Self::parse_res(value.min_res),
|
||||
max_res: Self::parse_res(value.max_res),
|
||||
aspect_ratios: Self::parse_aspect_ratios(value.aspect_ratios),
|
||||
out_dir,
|
||||
verbose: value.verbose,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Config {
|
||||
fn parse_res(res_str: Option<String>) -> Option<(u64, u64)> {
|
||||
let res = match res_str {
|
||||
Some(res) => res,
|
||||
None => return None,
|
||||
};
|
||||
|
||||
if !RES_REX.is_match(&res) {
|
||||
eprintln!(
|
||||
"{}",
|
||||
"Given resolution doesn't pass the RegEx check".red().bold()
|
||||
);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
let split: Vec<&str> = res.split('x').collect();
|
||||
let parsed = (
|
||||
split[0].parse::<u64>().unwrap(),
|
||||
split[1].parse::<u64>().unwrap(),
|
||||
);
|
||||
|
||||
Some(parsed)
|
||||
}
|
||||
|
||||
fn parse_aspect_ratios(ratios_str: Option<String>) -> Option<Vec<(u32, u32)>> {
|
||||
let ratios = match ratios_str {
|
||||
Some(ratios) => ratios,
|
||||
None => return None,
|
||||
};
|
||||
|
||||
let ratios_vec: Vec<&str> = ratios.split(',').collect();
|
||||
let mut parsed_vec = Vec::new();
|
||||
|
||||
for r in ratios_vec {
|
||||
if !RATIO_REX.is_match(r) {
|
||||
eprintln!(
|
||||
"{}",
|
||||
"One or more of the given aspect ratios don't pass the RegEx check"
|
||||
.red()
|
||||
.bold()
|
||||
);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
let split: Vec<&str> = r.split(':').collect();
|
||||
let parsed = (
|
||||
split[0].parse::<u32>().unwrap(),
|
||||
split[1].parse::<u32>().unwrap(),
|
||||
);
|
||||
|
||||
parsed_vec.push(parsed);
|
||||
}
|
||||
|
||||
Some(parsed_vec)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
struct Cli {
|
||||
/// Set the output directory
|
||||
#[arg(short = 'o', value_name = "PATH", required = true)]
|
||||
output: String,
|
||||
|
||||
/// Set the target URL
|
||||
#[command(flatten)]
|
||||
target: Target,
|
||||
|
||||
/// Set the minimum image resolution (e.g. "1920x1080")
|
||||
#[arg(short = 'n', value_name = "MIN_RES")]
|
||||
min_res: Option<String>,
|
||||
|
||||
/// Set the minimum image resolution (e.g. "3840x2160")
|
||||
#[arg(short = 'm', value_name = "MAX_RES")]
|
||||
max_res: Option<String>,
|
||||
|
||||
/// Set a comma-separated list of accepted image aspect ratios (e.g. "4:3,16:9")
|
||||
#[arg(short = 'a', value_name = "RATIOS")]
|
||||
aspect_ratios: Option<String>,
|
||||
|
||||
/// Toggle verbose output
|
||||
#[arg(short = 'v', action, default_value_t = false)]
|
||||
verbose: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Args)]
|
||||
#[group(required = true, multiple = false)]
|
||||
struct Target {
|
||||
/// Set the thread URL
|
||||
#[arg(short = 't', value_name = "URL")]
|
||||
thread: Option<String>,
|
||||
|
||||
/// Set the board URL
|
||||
#[arg(short = 'b', value_name = "URL")]
|
||||
board: Option<String>,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
let (path, target, mode) = parse_cli_args()?;
|
||||
println!(
|
||||
"{}",
|
||||
format!(
|
||||
"\nDownload configuration:\n\tOUTPUT PATH: {:?}\n\tURL: {}\n\tDOWNLOAD MODE: {:?}\n",
|
||||
path, target, mode
|
||||
)
|
||||
.bold()
|
||||
.green()
|
||||
);
|
||||
async fn main() {
|
||||
let args = Cli::parse();
|
||||
let cfg = Config::from(args);
|
||||
let client = reqwest::Client::new();
|
||||
|
||||
match mode {
|
||||
Mode::Thread => {
|
||||
let (json_url, board_name) = thread::parse_url(&target);
|
||||
println!(
|
||||
"{}",
|
||||
format!("Parsing JSON from {}", json_url).bold().blue()
|
||||
);
|
||||
let img_data = downloader::get_imagelist(&json_url, &board_name, &path).await?;
|
||||
let filecount = downloader::get_images(&img_data).await?;
|
||||
|
||||
println!(
|
||||
"{}",
|
||||
format!("Total of {} files downloaded from 1 thread.\n", filecount)
|
||||
.bold()
|
||||
.green()
|
||||
);
|
||||
}
|
||||
Mode::Board => {
|
||||
let (json_url, board_name) = board::parse_url(&target);
|
||||
let (thread_amt, thread_data) = board::get_threadlist(&json_url, &board_name).await?;
|
||||
let mut filecount: usize = 0;
|
||||
for url in &thread_data {
|
||||
println!("{}", format!("Parsing JSON from {}", url).bold().blue());
|
||||
let img_data = downloader::get_imagelist(url, &board_name, &path).await?;
|
||||
let total_amt = downloader::get_images(&img_data).await?;
|
||||
filecount += total_amt;
|
||||
}
|
||||
|
||||
println!(
|
||||
"{}",
|
||||
format!(
|
||||
"Total of {} files downloaded from {} threads.\n",
|
||||
filecount, thread_amt
|
||||
)
|
||||
.bold()
|
||||
.green()
|
||||
);
|
||||
}
|
||||
if cfg.verbose {
|
||||
println!("\nDLRS CONFIG:\n{cfg:#?}\n");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
let res = match cfg.mode {
|
||||
Mode::Thread => thread::dl(cfg, client, None).await,
|
||||
Mode::Board => board::dl(cfg, client).await,
|
||||
};
|
||||
|
||||
let (dl, fl) = match res {
|
||||
Ok((dl, fl)) => (dl, fl),
|
||||
Err(e) => {
|
||||
eprintln!(
|
||||
"{}",
|
||||
format!("Error during thread download: {e}").red().bold()
|
||||
);
|
||||
exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
println!(
|
||||
"{}",
|
||||
format!("A total of {dl} files downloaded and {fl} filtered")
|
||||
.truecolor(252, 156, 12)
|
||||
.bold()
|
||||
);
|
||||
}
|
||||
|
163
src/thread.rs
163
src/thread.rs
@ -1,13 +1,162 @@
|
||||
pub fn parse_url(url: &str) -> (String, String) {
|
||||
use core::fmt;
|
||||
use std::{error::Error, path::PathBuf, process::exit};
|
||||
|
||||
use crate::{http, Config, UA};
|
||||
|
||||
use colored::Colorize;
|
||||
use num::integer::gcd;
|
||||
use reqwest::{header::USER_AGENT, Client};
|
||||
use serde_json::Value;
|
||||
|
||||
#[derive(Debug)]
|
||||
enum FilterCondition {
|
||||
MaxRes,
|
||||
MinRes,
|
||||
AspectRatio,
|
||||
None,
|
||||
}
|
||||
|
||||
impl fmt::Display for FilterCondition {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::MaxRes => write!(f, "Image is larger than the set maximum resolution"),
|
||||
Self::MinRes => write!(f, "Image is smaller than the set maximum resolution"),
|
||||
Self::AspectRatio => write!(f, "Image's aspect ratio doesn't match the listed ones"),
|
||||
Self::None => write!(f, "No reason"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn dl(
|
||||
cfg: Config,
|
||||
client: Client,
|
||||
combo: Option<(String, String)>,
|
||||
) -> Result<(usize, usize), Box<dyn Error>> {
|
||||
let (json_url, board) = match combo {
|
||||
Some((json_url, board)) => (json_url, board),
|
||||
None => parse_url(cfg.target.clone()),
|
||||
};
|
||||
|
||||
println!("{}", format!("Thread JSON URL: {json_url}").blue().bold());
|
||||
|
||||
let (images, fl) = parse_json(cfg.clone(), &client, json_url, board).await?;
|
||||
let (dl, sk) = http::concurrent_dl(images).await?;
|
||||
|
||||
Ok((dl, fl + sk))
|
||||
}
|
||||
|
||||
fn parse_url(url: String) -> (String, String) {
|
||||
let url_split: Vec<&str> = url.split('/').collect();
|
||||
let thread_id = url_split.last().unwrap();
|
||||
let board_name = url_split.get(url_split.len() - 3).unwrap();
|
||||
let board = url_split.get(url_split.len() - 3).unwrap();
|
||||
|
||||
(
|
||||
format!(
|
||||
"https://a.4cdn.org/{}/thread/{}.json",
|
||||
board_name, thread_id
|
||||
),
|
||||
board_name.to_string(),
|
||||
format!("https://a.4cdn.org/{board}/thread/{thread_id}.json"),
|
||||
board.to_string(),
|
||||
)
|
||||
}
|
||||
|
||||
async fn parse_json(
|
||||
cfg: Config,
|
||||
client: &Client,
|
||||
url: String,
|
||||
board: String,
|
||||
) -> Result<(Vec<(String, PathBuf, String)>, usize), Box<dyn Error>> {
|
||||
let mut filtered = 0;
|
||||
let mut images = Vec::new();
|
||||
|
||||
let res_txt = match client.get(url.clone()).header(USER_AGENT, UA).send().await {
|
||||
Ok(res) => res.text().await?,
|
||||
Err(e) => {
|
||||
eprintln!(
|
||||
"{}",
|
||||
format!("Failed to request the thread JSON data from {url}: {e}")
|
||||
.red()
|
||||
.bold()
|
||||
);
|
||||
exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
let data: Value = match serde_json::from_str(&res_txt) {
|
||||
Ok(data) => data,
|
||||
Err(e) => {
|
||||
eprintln!(
|
||||
"{}",
|
||||
format!("Failed to parse the raw data from {url}: {e}")
|
||||
.red()
|
||||
.bold()
|
||||
);
|
||||
exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
for entry in data["posts"].as_array().unwrap() {
|
||||
// Reply without an attachment
|
||||
if !entry["tim"].is_i64() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let (is_filtered, cond) = img_filter(cfg.clone(), entry);
|
||||
|
||||
if is_filtered {
|
||||
if cfg.verbose {
|
||||
println!("Image filtered: {cond}");
|
||||
}
|
||||
|
||||
filtered += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Format: (img url, output file, md5 hash)
|
||||
images.push(parse_img(cfg.clone(), entry, &board));
|
||||
}
|
||||
|
||||
Ok((images, filtered))
|
||||
}
|
||||
|
||||
fn parse_img(cfg: Config, entry: &Value, board: &str) -> (String, PathBuf, String) {
|
||||
let id = entry["tim"].to_string();
|
||||
// Chained conversions for `ext` and `md5` to get rid of the quotes
|
||||
let ext = entry["ext"].as_str().unwrap().to_string();
|
||||
let md5 = entry["md5"].as_str().unwrap().to_string();
|
||||
let path = cfg.out_dir.join(format!("{id}{ext}"));
|
||||
let url = format!("https://i.4cdn.org/{board}/{id}{ext}");
|
||||
|
||||
(url, path, md5)
|
||||
}
|
||||
|
||||
fn img_filter(cfg: Config, entry: &Value) -> (bool, FilterCondition) {
|
||||
let img_w = entry["w"].as_u64().unwrap();
|
||||
let img_h = entry["h"].as_u64().unwrap();
|
||||
let aspect_ratio = img_aspect_ratio(img_w, img_h);
|
||||
|
||||
if let Some(min_res) = cfg.min_res {
|
||||
if min_res.0 > img_w || min_res.1 > img_h {
|
||||
return (true, FilterCondition::MinRes);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(max_res) = cfg.max_res {
|
||||
if max_res.0 < img_w || max_res.1 < img_h {
|
||||
return (true, FilterCondition::MaxRes);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(aspect_ratios) = &cfg.aspect_ratios {
|
||||
// Non-empty list of preferred aspect ratios
|
||||
if !aspect_ratios.contains(&aspect_ratio) {
|
||||
return (true, FilterCondition::AspectRatio);
|
||||
}
|
||||
}
|
||||
|
||||
(false, FilterCondition::None)
|
||||
}
|
||||
|
||||
fn img_aspect_ratio(w: u64, h: u64) -> (u32, u32) {
|
||||
let div = gcd(w, h);
|
||||
let simpl_w = (w / div) as u32;
|
||||
let simpl_h = (h / div) as u32;
|
||||
|
||||
(simpl_w, simpl_h)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user