libgenbulk
Owner: IIIlllIIIllI URL: git@github.com:nyangkosense/libgenbulk.git
fix args bug in main
Commit bc4b23aa1f859f77ebe5ff6f6ea14c9469afdda8 by SM <seb.michalk@gmail.com> on 2025-05-21 11:47:59 +0200
diff --git a/README.md b/README.md
index da80bee..3f522b0 100644
--- a/README.md
+++ b/README.md
@@ -36,7 +36,7 @@ The parser reads a Libgen SQL dump and outputs download URLs:
If you want a list of URLs containing specific Languages:
```bash
-./parser libgen_compact.sql links_english.txt english, german, italian
+./parser libgen_compact.sql links_english.txt --languages=english,german,italian
```
It'll show progress as it runs through the file and generates URLs like:
diff --git a/src/parser.zig b/src/parser.zig
index e4b68ad..601deac 100644
--- a/src/parser.zig
+++ b/src/parser.zig
@@ -12,19 +12,18 @@ pub fn main() !void {
defer std.process.argsFree(gpa, args);
if (args.len < 2) {
- std.debug.print("Usage: {s} <sql_file_path> [output_file_path] [language1, language2, ...] [md5_to_debug]\n", .{args[0]});
- std.debug.print("If output_file_path is not provided, results will be printed to stdout \n", .{});
- std.debug.print("If no languages are specified, all languages will be included \n", .{});
+ std.debug.print("Usage: {s} <sql_file_path> [output_file_path] [--languages=lang1,lang2,...] [--debug=md5]\n", .{args[0]});
+ std.debug.print("If output_file_path is not provided, results will be printed to stdout\n", .{});
+ std.debug.print("Example: {s} libgen.sql books.txt --languages=english,german,russian\n", .{args[0]});
return;
}
const file_path = args[1];
+
const output_to_file = args.len >= 3;
const output_path = if (output_to_file) args[2] else "";
- // Optional MD5 to debug - if provided, print all details for this record
- const debug_md5 = if (args.len >= 4) args[3] else "";
-
+ var debug_md5: []const u8 = "";
var languages = std.ArrayList([]const u8).init(gpa);
defer {
for (languages.items) |lang| {
@@ -33,26 +32,34 @@ pub fn main() !void {
languages.deinit();
}
- if (args.len >= 4) {
- var lang_iter = std.mem.split(u8, args[3], ",");
- while (lang_iter.next()) |lang| {
- if (lang.len > 0) {
- const normalized_lang = try gpa.dupe(u8, lang);
- for (0..normalized_lang.len) |i| {
- normalized_lang[i] = std.ascii.toLower(normalized_lang[i]);
+ for (args[3..]) |arg| {
+ if (std.mem.startsWith(u8, arg, "--languages=")) {
+ const langs_str = arg["--languages=".len..];
+ var lang_iter = std.mem.split(u8, langs_str, ",");
+ while (lang_iter.next()) |lang| {
+ if (lang.len > 0) {
+ const normalized_lang = try gpa.dupe(u8, lang);
+ for (0..normalized_lang.len) |i| {
+ normalized_lang[i] = std.ascii.toLower(normalized_lang[i]);
+ }
+ try languages.append(normalized_lang);
}
- try languages.append(normalized_lang);
}
+ } else if (std.mem.startsWith(u8, arg, "--debug=")) {
+ debug_md5 = arg["--debug=".len..];
}
+ }
+ if (languages.items.len > 0) {
std.debug.print("Filtering for languages: ", .{});
for (languages.items) |lang| {
std.debug.print("{s} ", .{lang});
}
std.debug.print("\n", .{});
+ } else {
+ std.debug.print("No language filters - including all languages\n", .{});
}
-
var output_file: ?fs.File = null;
if (output_to_file) {
output_file = try fs.cwd().createFile(output_path, .{});
@@ -203,7 +210,7 @@ fn processEntryTupleFromBuffer(tuple_buffer: *std.ArrayList(u8), output_file: ?f
const locator_raw = if (fields.items.len > 40) fields.items[40] else ""; // Locator is field 40
const local_raw = if (fields.items.len > 41) fields.items[41] else ""; // Local is field 41
const language_raw = if (fields.items.len > 12) fields.items[12] else "";
-
+
const language = cleanSQLString(language_raw, allocator) catch try allocator.dupe(u8, "");
defer allocator.free(language);