RFE: have a way to request compressed patches and rawdiffs

John Keeping john at keeping.me.uk
Mon May 1 13:00:31 CEST 2017


On Fri, Apr 28, 2017 at 04:46:34PM +0000, Konstantin Ryabitsev wrote:
> Since I'm on an RFE binge, here's another one. :)
> 
> Some of the rawdiffs we have been generating end up pretty large (e.g.
> linux-next to mainline rawdiff is around 80MB). We compress them using http
> protocol compression, but the reality is that most people would be getting
> these using wget or curl, which do not add the "accept-encoding: gzip"
> header and therefore get the uncompressed version.
> 
> It would be great to be able to tell cgit to generate and send compressed
> versions of raw output like patches or rawdiffs.

Here's an initial attempt at this, it needs splitting up into a series
of a few patches (I think there's about four hiding in here), but I'm
sending it now to see if this is roughly what's needed.

The idea is to support extensions on page names (for pages that opt in)
and it's implemented for patch and rawdiff at the moment with support
for GZIP compressing the response body.

For example, given the URL:

	https://git.zx2c4.com/cgit/patch/?id=8e9ddd21a50beb9fd660cf6cd6a583234924b932

with this change you can add ".gz" to the end of "patch":

	https://git.zx2c4.com/cgit/patch.gz/?id=8e9ddd21a50beb9fd660cf6cd6a583234924b932

and the content will be returned compressed with gzip.

I've only added gzip for now, but the setup is table driven so adding
more compression filters should be straightforward.

Signed-off-by: John Keeping <john at keeping.me.uk>
---
 cgit.c     | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 cgit.h     |  2 ++
 cmd.c      | 44 +++++++++++++++++++-------------------
 cmd.h      |  3 ++-
 filter.c   |  2 ++
 ui-diff.c  |  6 +++++-
 ui-patch.c | 10 +++++++--
 7 files changed, 113 insertions(+), 26 deletions(-)

diff --git a/cgit.c b/cgit.c
index 1075753..8d25680 100644
--- a/cgit.c
+++ b/cgit.c
@@ -695,6 +695,46 @@ static inline void authenticate_cookie(void)
 	ctx.env.authenticated = cgit_close_filter(ctx.cfg.auth_filter);
 }
 
+static struct cgit_filter *create_gzip_filter(void)
+{
+	char **argv = xcalloc(3, sizeof(char *));
+	struct cgit_exec_filter *f = xmalloc(sizeof(*f));
+
+	argv[0] = xstrdup("gzip");
+	argv[1] = xstrdup("-n");
+	argv[2] = NULL;
+
+	cgit_exec_filter_init(f, argv[0], argv);
+	return &f->base;
+}
+
+struct cgit_page_compression {
+	const char *const ext;
+	const char *const mimetype;
+	struct cgit_filter *(*create)(void);
+};
+
+static const struct cgit_page_compression cgit_page_compressions[] = {
+	{ ".gz", "application/x-gzip", create_gzip_filter, },
+};
+
+static struct cgit_filter *get_page_compression(void)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(cgit_page_compressions); i++) {
+		const struct cgit_page_compression *c;
+
+		c = &cgit_page_compressions[i];
+		if (!strcmp(c->ext, ctx.qry.pageext)) {
+			ctx.page.mimetype = xstrdup(c->mimetype);
+			return c->create();
+		}
+	}
+
+	return NULL;
+}
+
 static void process_request(void)
 {
 	struct cgit_cmd *cmd;
@@ -738,6 +778,21 @@ static void process_request(void)
 		return;
 	}
 
+	if (ctx.qry.pageext) {
+		if (!cmd->want_compression) {
+			cgit_print_error_page(404, "Not found",
+					"Invalid request");
+			return;
+		}
+
+		ctx.page.body_filter = get_page_compression();
+		if (!ctx.page.body_filter) {
+			cgit_print_error_page(404, "Not found",
+					"Invalid request");
+			return;
+		}
+	}
+
 	if (ctx.repo && prepare_repo_cmd())
 		return;
 
@@ -1008,6 +1063,21 @@ static void cgit_parse_args(int argc, const char **argv)
 	}
 }
 
+static void split_page_ext(void)
+{
+	char *dot;
+
+	if (!ctx.qry.page)
+		return;
+
+	dot = strchr(ctx.qry.page, '.');
+	if (!dot)
+		return;
+
+	ctx.qry.pageext = xstrdup(dot);
+	*dot = '\0';
+}
+
 static int calc_ttl(void)
 {
 	if (!ctx.repo)
@@ -1075,6 +1145,8 @@ int cmd_main(int argc, const char **argv)
 		cgit_parse_url(ctx.qry.url);
 	}
 
+	split_page_ext();
+
 	/* Before we go any further, we set ctx.env.authenticated by checking to see
 	 * if the supplied cookie is valid. All cookies are valid if there is no
 	 * auth_filter. If there is an auth_filter, the filter decides. */
diff --git a/cgit.h b/cgit.h
index fbc6c6a..a04b03b 100644
--- a/cgit.h
+++ b/cgit.h
@@ -164,6 +164,7 @@ struct cgit_query {
 	char *raw;
 	char *repo;
 	char *page;
+	char *pageext;
 	char *search;
 	char *grep;
 	char *head;
@@ -280,6 +281,7 @@ struct cgit_page {
 	const char *title;
 	int status;
 	const char *statusmsg;
+	struct cgit_filter *body_filter;
 };
 
 struct cgit_environment {
diff --git a/cmd.c b/cmd.c
index d280e95..9cc5797 100644
--- a/cmd.c
+++ b/cmd.c
@@ -155,32 +155,32 @@ static void tree_fn(void)
 	cgit_print_tree(ctx.qry.sha1, ctx.qry.path);
 }
 
-#define def_cmd(name, want_repo, want_vpath, is_clone) \
-	{#name, name##_fn, want_repo, want_vpath, is_clone}
+#define def_cmd(name, want_repo, want_vpath, is_clone, want_compression) \
+	{#name, name##_fn, want_repo, want_vpath, is_clone, want_compression}
 
 struct cgit_cmd *cgit_get_cmd(void)
 {
 	static struct cgit_cmd cmds[] = {
-		def_cmd(HEAD, 1, 0, 1),
-		def_cmd(atom, 1, 0, 0),
-		def_cmd(about, 0, 0, 0),
-		def_cmd(blob, 1, 0, 0),
-		def_cmd(commit, 1, 1, 0),
-		def_cmd(diff, 1, 1, 0),
-		def_cmd(info, 1, 0, 1),
-		def_cmd(log, 1, 1, 0),
-		def_cmd(ls_cache, 0, 0, 0),
-		def_cmd(objects, 1, 0, 1),
-		def_cmd(patch, 1, 1, 0),
-		def_cmd(plain, 1, 0, 0),
-		def_cmd(rawdiff, 1, 1, 0),
-		def_cmd(refs, 1, 0, 0),
-		def_cmd(repolist, 0, 0, 0),
-		def_cmd(snapshot, 1, 0, 0),
-		def_cmd(stats, 1, 1, 0),
-		def_cmd(summary, 1, 0, 0),
-		def_cmd(tag, 1, 0, 0),
-		def_cmd(tree, 1, 1, 0),
+		def_cmd(HEAD, 1, 0, 1, 0),
+		def_cmd(atom, 1, 0, 0, 0),
+		def_cmd(about, 0, 0, 0, 0),
+		def_cmd(blob, 1, 0, 0, 0),
+		def_cmd(commit, 1, 1, 0, 0),
+		def_cmd(diff, 1, 1, 0, 0),
+		def_cmd(info, 1, 0, 1, 0),
+		def_cmd(log, 1, 1, 0, 0),
+		def_cmd(ls_cache, 0, 0, 0, 0),
+		def_cmd(objects, 1, 0, 1, 0),
+		def_cmd(patch, 1, 1, 0, 1),
+		def_cmd(plain, 1, 0, 0, 0),
+		def_cmd(rawdiff, 1, 1, 0, 1),
+		def_cmd(refs, 1, 0, 0, 0),
+		def_cmd(repolist, 0, 0, 0, 0),
+		def_cmd(snapshot, 1, 0, 0, 0),
+		def_cmd(stats, 1, 1, 0, 0),
+		def_cmd(summary, 1, 0, 0, 0),
+		def_cmd(tag, 1, 0, 0, 0),
+		def_cmd(tree, 1, 1, 0, 0),
 	};
 	int i;
 
diff --git a/cmd.h b/cmd.h
index 6249b1d..4d46ef9 100644
--- a/cmd.h
+++ b/cmd.h
@@ -8,7 +8,8 @@ struct cgit_cmd {
 	cgit_cmd_fn fn;
 	unsigned int want_repo:1,
 		want_vpath:1,
-		is_clone:1;
+		is_clone:1,
+		want_compression:1;
 };
 
 extern struct cgit_cmd *cgit_get_cmd(void);
diff --git a/filter.c b/filter.c
index 949c931..517add5 100644
--- a/filter.c
+++ b/filter.c
@@ -379,6 +379,8 @@ int cgit_close_filter(struct cgit_filter *filter)
 {
 	if (!filter)
 		return 0;
+
+	fflush(stdout);
 	return filter->close(filter);
 }
 
diff --git a/ui-diff.c b/ui-diff.c
index 173d351..1949fa1 100644
--- a/ui-diff.c
+++ b/ui-diff.c
@@ -447,8 +447,11 @@ void cgit_print_diff(const char *new_rev, const char *old_rev,
 		DIFF_OPT_SET(&diffopt, RECURSIVE);
 		diff_setup_done(&diffopt);
 
-		ctx.page.mimetype = "text/plain";
+		if (!ctx.qry.pageext)
+			ctx.page.mimetype = "text/plain";
 		cgit_print_http_headers();
+		cgit_open_filter(ctx.page.body_filter);
+
 		if (old_tree_sha1) {
 			diff_tree_sha1(old_tree_sha1, new_tree_sha1, "",
 				       &diffopt);
@@ -458,6 +461,7 @@ void cgit_print_diff(const char *new_rev, const char *old_rev,
 		diffcore_std(&diffopt);
 		diff_flush(&diffopt);
 
+		cgit_close_filter(ctx.page.body_filter);
 		return;
 	}
 
diff --git a/ui-patch.c b/ui-patch.c
index 6745b69..e24d2fa 100644
--- a/ui-patch.c
+++ b/ui-patch.c
@@ -64,8 +64,10 @@ void cgit_print_patch(const char *new_rev, const char *old_rev,
 			oid_to_hex(&new_rev_oid));
 	}
 
-	patchname = fmt("%s.patch", rev_range);
-	ctx.page.mimetype = "text/plain";
+	patchname = fmt("%s.patch%s", rev_range,
+			ctx.qry.pageext ? ctx.qry.pageext : "");
+	if (!ctx.qry.pageext)
+		ctx.page.mimetype = "text/plain";
 	ctx.page.filename = patchname;
 	cgit_print_http_headers();
 
@@ -75,6 +77,8 @@ void cgit_print_patch(const char *new_rev, const char *old_rev,
 			      "%s%n%n%w(0)%b";
 	}
 
+	cgit_open_filter(ctx.page.body_filter);
+
 	init_revisions(&rev, NULL);
 	rev.abbrev = DEFAULT_ABBREV;
 	rev.verbose_header = 1;
@@ -92,4 +96,6 @@ void cgit_print_patch(const char *new_rev, const char *old_rev,
 		log_tree_commit(&rev, commit);
 		printf("-- \ncgit %s\n\n", cgit_version);
 	}
+
+	cgit_close_filter(ctx.page.body_filter);
 }
-- 
2.12.2.648.g6730d8bc62.dirty



More information about the CGit mailing list