feat: progress bar & slight attempt at improving efficiency
This commit is contained in:
parent
01de99663b
commit
2d7a05a732
60
Cargo.lock
generated
60
Cargo.lock
generated
@ -206,6 +206,19 @@ dependencies = [
|
|||||||
"windows-sys 0.48.0",
|
"windows-sys 0.48.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "console"
|
||||||
|
version = "0.15.8"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb"
|
||||||
|
dependencies = [
|
||||||
|
"encode_unicode",
|
||||||
|
"lazy_static",
|
||||||
|
"libc",
|
||||||
|
"unicode-width",
|
||||||
|
"windows-sys 0.52.0",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "core-foundation"
|
name = "core-foundation"
|
||||||
version = "0.9.4"
|
version = "0.9.4"
|
||||||
@ -250,6 +263,7 @@ dependencies = [
|
|||||||
"clap",
|
"clap",
|
||||||
"colored",
|
"colored",
|
||||||
"futures",
|
"futures",
|
||||||
|
"indicatif",
|
||||||
"lazy-regex",
|
"lazy-regex",
|
||||||
"md-5",
|
"md-5",
|
||||||
"num",
|
"num",
|
||||||
@ -258,6 +272,12 @@ dependencies = [
|
|||||||
"tokio",
|
"tokio",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "encode_unicode"
|
||||||
|
version = "0.3.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "encoding_rs"
|
name = "encoding_rs"
|
||||||
version = "0.8.34"
|
version = "0.8.34"
|
||||||
@ -604,6 +624,28 @@ dependencies = [
|
|||||||
"hashbrown",
|
"hashbrown",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "indicatif"
|
||||||
|
version = "0.17.8"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3"
|
||||||
|
dependencies = [
|
||||||
|
"console",
|
||||||
|
"instant",
|
||||||
|
"number_prefix",
|
||||||
|
"portable-atomic",
|
||||||
|
"unicode-width",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "instant"
|
||||||
|
version = "0.1.13"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ipnet"
|
name = "ipnet"
|
||||||
version = "2.10.0"
|
version = "2.10.0"
|
||||||
@ -821,6 +863,12 @@ dependencies = [
|
|||||||
"autocfg",
|
"autocfg",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "number_prefix"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "object"
|
name = "object"
|
||||||
version = "0.36.4"
|
version = "0.36.4"
|
||||||
@ -927,6 +975,12 @@ version = "0.3.31"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2"
|
checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "portable-atomic"
|
||||||
|
version = "1.9.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cc9c68a3f6da06753e9335d63e27f6b9754dd1920d941135b7ea8224f141adb2"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "proc-macro2"
|
name = "proc-macro2"
|
||||||
version = "1.0.86"
|
version = "1.0.86"
|
||||||
@ -1436,6 +1490,12 @@ dependencies = [
|
|||||||
"tinyvec",
|
"tinyvec",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-width"
|
||||||
|
version = "0.1.14"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "untrusted"
|
name = "untrusted"
|
||||||
version = "0.9.0"
|
version = "0.9.0"
|
||||||
|
@ -9,6 +9,7 @@ base64 = "0.22.1"
|
|||||||
clap = { version = "4.5.18", features = ["derive"] }
|
clap = { version = "4.5.18", features = ["derive"] }
|
||||||
colored = "2.1.0"
|
colored = "2.1.0"
|
||||||
futures = "0.3.30"
|
futures = "0.3.30"
|
||||||
|
indicatif = "0.17.8"
|
||||||
lazy-regex = "3.3.0"
|
lazy-regex = "3.3.0"
|
||||||
md-5 = "0.10.6"
|
md-5 = "0.10.6"
|
||||||
num = "0.4.3"
|
num = "0.4.3"
|
||||||
|
98
src/http.rs
98
src/http.rs
@ -3,17 +3,40 @@ use std::{error::Error, path::PathBuf, sync::Arc, time::Duration};
|
|||||||
use base64::{engine::general_purpose, Engine};
|
use base64::{engine::general_purpose, Engine};
|
||||||
use colored::Colorize;
|
use colored::Colorize;
|
||||||
use futures::{lock::Mutex, stream, StreamExt};
|
use futures::{lock::Mutex, stream, StreamExt};
|
||||||
|
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
|
||||||
use md5::{Digest, Md5};
|
use md5::{Digest, Md5};
|
||||||
use reqwest::{header::USER_AGENT, Client};
|
use reqwest::{header::USER_AGENT, Client};
|
||||||
use tokio::{fs::File, io::AsyncWriteExt};
|
use tokio::{fs::File, io::AsyncWriteExt, time::timeout};
|
||||||
|
|
||||||
use crate::UA;
|
use crate::UA;
|
||||||
|
|
||||||
const KEEP_ALIVE_TIMEOUT: Duration = Duration::from_secs(5);
|
const KEEP_ALIVE_TIMEOUT: Duration = Duration::from_secs(30);
|
||||||
|
const DOWNLOAD_TIMEOUT: Duration = Duration::from_secs(60);
|
||||||
|
|
||||||
|
async fn write_file(path: &PathBuf, bytes: &[u8]) -> std::io::Result<usize> {
|
||||||
|
let mut file = File::create(path).await?;
|
||||||
|
let bw = file.write(bytes).await?;
|
||||||
|
file.flush().await?;
|
||||||
|
|
||||||
|
Ok(bw)
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn concurrent_dl(
|
pub async fn concurrent_dl(
|
||||||
images: Vec<(String, PathBuf, String)>,
|
images: Vec<(String, PathBuf, String)>,
|
||||||
|
display_url: String,
|
||||||
) -> Result<(usize, usize), Box<dyn Error>> {
|
) -> Result<(usize, usize), Box<dyn Error>> {
|
||||||
|
let multi = MultiProgress::new();
|
||||||
|
|
||||||
|
let bar = multi.add(ProgressBar::new(images.len() as u64));
|
||||||
|
bar.set_style(
|
||||||
|
ProgressStyle::with_template("{spinner} {msg} [{wide_bar:.white/gray}] [{pos}/{len}]")
|
||||||
|
.unwrap(),
|
||||||
|
);
|
||||||
|
bar.set_message(display_url);
|
||||||
|
|
||||||
|
let error_bar = multi.add(ProgressBar::new(0));
|
||||||
|
error_bar.set_style(ProgressStyle::with_template("{wide_msg}").unwrap());
|
||||||
|
|
||||||
let dl_count = Arc::new(Mutex::new(0));
|
let dl_count = Arc::new(Mutex::new(0));
|
||||||
let sk_count = Arc::new(Mutex::new(0));
|
let sk_count = Arc::new(Mutex::new(0));
|
||||||
|
|
||||||
@ -27,56 +50,61 @@ pub async fn concurrent_dl(
|
|||||||
let client = client.clone();
|
let client = client.clone();
|
||||||
|
|
||||||
let (url, path, _expct_md5) = data;
|
let (url, path, _expct_md5) = data;
|
||||||
let send_fut = client.get(url).header(USER_AGENT, UA).send();
|
|
||||||
|
|
||||||
match send_fut.await {
|
let download_result = timeout(DOWNLOAD_TIMEOUT, async {
|
||||||
Ok(res) => match res.bytes().await {
|
let res = client.get(url).header(USER_AGENT, UA).send().await?;
|
||||||
Ok(bytes) => {
|
let bytes = res.bytes().await?;
|
||||||
let byte_count = bytes.len();
|
|
||||||
|
|
||||||
let mut hasher = Md5::new();
|
Ok::<_, reqwest::Error>(bytes)
|
||||||
hasher.update(&bytes);
|
})
|
||||||
let result = hasher.finalize();
|
.await;
|
||||||
let b64_md5 = general_purpose::STANDARD.encode(result);
|
|
||||||
|
|
||||||
// 4chan file attachment hash is always 24 character packed base64 encoded MD5. Truly a fucking state of art CRC.
|
match download_result {
|
||||||
|
Ok(Ok(bytes)) => {
|
||||||
|
let mut hasher = Md5::new();
|
||||||
|
hasher.update(&bytes);
|
||||||
|
let result = hasher.finalize();
|
||||||
|
let _b64_md5 = general_purpose::STANDARD.encode(result);
|
||||||
|
|
||||||
// TODO: Figure out how the MD5 should be converted before uncommenting the following filtering condition
|
// TODO: Figure out how the MD5 should be converted before uncommenting the following filtering condition
|
||||||
|
// if b64_md5 != *expct_md5 {
|
||||||
|
// error_bar.set_message(format!("File skipped due to mismatched MD5 (expected {expct_md5}, got {b64_md5})").red().bold());
|
||||||
|
// let mut sk_count = sk_count.lock().await;
|
||||||
|
// *sk_count += 1;
|
||||||
|
|
||||||
// if b64_md5 != *expct_md5 {
|
// return;
|
||||||
// eprintln!("{}", format!("File skipped due to mismatched MD5 (expected {expct_md5}, got {b64_md5})").red().bold());
|
// }
|
||||||
// let mut sk_count = sk_count.lock().await;
|
|
||||||
// *sk_count += 1;
|
|
||||||
|
|
||||||
// return;
|
let _n = write_file(path, &bytes).await.unwrap();
|
||||||
// }
|
let mut dl_count = dl_count.lock().await;
|
||||||
|
*dl_count += 1;
|
||||||
let mut file = File::create(path).await.unwrap();
|
}
|
||||||
file.write_all(&bytes).await.unwrap();
|
Err(_) => {
|
||||||
|
error_bar.set_message(format!(
|
||||||
let mut dl_count = dl_count.lock().await;
|
|
||||||
*dl_count += 1;
|
|
||||||
|
|
||||||
println!(
|
|
||||||
"{}",
|
|
||||||
format!("{b64_md5}: {byte_count} bytes").truecolor(0, 209, 27)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
Err(_) => eprintln!(
|
|
||||||
"{}",
|
"{}",
|
||||||
format!("Failed to convert request from {} to bytes", url)
|
format!("Failed to convert request from {} to bytes", url)
|
||||||
.red()
|
.red()
|
||||||
.bold()
|
.bold()
|
||||||
),
|
));
|
||||||
},
|
}
|
||||||
Err(_) => eprintln!("{}", format!("Failed to request {}", url).red().bold()),
|
Ok(Err(_)) => {
|
||||||
|
error_bar.set_message(format!(
|
||||||
|
"{}",
|
||||||
|
format!("Failed to request {}", url).red().bold()
|
||||||
|
));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bar.inc(1);
|
||||||
}))
|
}))
|
||||||
.buffer_unordered(100)
|
.buffer_unordered(100)
|
||||||
.collect::<Vec<()>>();
|
.collect::<Vec<()>>();
|
||||||
|
|
||||||
futures.await;
|
futures.await;
|
||||||
|
|
||||||
|
bar.finish();
|
||||||
|
error_bar.finish();
|
||||||
|
|
||||||
let dl = *dl_count.lock().await;
|
let dl = *dl_count.lock().await;
|
||||||
let sk = *sk_count.lock().await;
|
let sk = *sk_count.lock().await;
|
||||||
|
|
||||||
|
@ -192,7 +192,7 @@ async fn main() {
|
|||||||
|
|
||||||
println!(
|
println!(
|
||||||
"{}",
|
"{}",
|
||||||
format!("A total of {dl} files downloaded and {fl} filtered")
|
format!("\nA total of {dl} files downloaded and {fl} filtered")
|
||||||
.truecolor(252, 156, 12)
|
.truecolor(252, 156, 12)
|
||||||
.bold()
|
.bold()
|
||||||
);
|
);
|
||||||
|
@ -37,10 +37,8 @@ pub async fn dl(
|
|||||||
None => parse_url(cfg.target.clone()),
|
None => parse_url(cfg.target.clone()),
|
||||||
};
|
};
|
||||||
|
|
||||||
println!("{}", format!("Thread JSON URL: {json_url}").blue().bold());
|
let (images, fl) = parse_json(cfg.clone(), &client, json_url.clone(), board).await?;
|
||||||
|
let (dl, sk) = http::concurrent_dl(images, json_url).await?;
|
||||||
let (images, fl) = parse_json(cfg.clone(), &client, json_url, board).await?;
|
|
||||||
let (dl, sk) = http::concurrent_dl(images).await?;
|
|
||||||
|
|
||||||
Ok((dl, fl + sk))
|
Ok((dl, fl + sk))
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user