Home | History | Annotate | Download | only in mod_sed
      1  0     jyri /*
      2  0     jyri  * Copyright (c) 2005, 2008 Sun Microsystems, Inc. All Rights Reserved.
      3  0     jyri  * Use is subject to license terms.
      4  0     jyri  *
      5  0     jyri  * Licensed under the Apache License, Version 2.0 (the "License");
      6  0     jyri  * you may not use this file except in compliance with the License.
      7  0     jyri  * You may obtain a copy of the License at
      8  0     jyri  *  http://www.apache.org/licenses/LICENSE-2.0.
      9  0     jyri  *
     10  0     jyri  * Unless required by applicable law or agreed to in writing, software
     11  0     jyri  * distributed under the License is distributed on an "AS IS" BASIS,
     12  0     jyri  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
     13  0     jyri  * or implied.
     14  0     jyri  * See the License for the specific language governing permissions and
     15  0     jyri  * limitations under the License.
     16  0     jyri  */
     17  0     jyri 
     18  0     jyri #include "httpd.h"
     19  0     jyri #include "http_config.h"
     20  0     jyri #include "http_log.h"
     21  0     jyri #include "apr_strings.h"
     22  0     jyri #include "apr_general.h"
     23  0     jyri #include "util_filter.h"
     24  0     jyri #include "apr_buckets.h"
     25  0     jyri #include "http_request.h"
     26  0     jyri #include "libsed.h"
     27  0     jyri 
     28  0     jyri static const char *sed_filter_name = "Sed";
     29  3  basantk #define MODSED_OUTBUF_SIZE 4000
     30  0     jyri 
     31  0     jyri typedef struct sed_expr_config
     32  0     jyri {
     33  0     jyri     sed_commands_t *sed_cmds;
     34  0     jyri     const char *last_error;
     35  0     jyri } sed_expr_config;
     36  0     jyri 
     37  0     jyri typedef struct sed_config
     38  0     jyri {
     39  0     jyri     sed_expr_config output;
     40  0     jyri     sed_expr_config input;
     41  0     jyri } sed_config;
     42  0     jyri 
     43  0     jyri /* Context for filter invocation for single HTTP request */
     44  0     jyri typedef struct sed_filter_ctxt
     45  0     jyri {
     46  0     jyri     sed_eval_t eval;
     47  0     jyri     request_rec *r;
     48  0     jyri     apr_bucket_brigade *bb;
     49  3  basantk     char *outbuf;
     50  3  basantk     char *curoutbuf;
     51  3  basantk     int bufsize;
     52  0     jyri } sed_filter_ctxt;
     53  0     jyri 
     54  0     jyri module AP_MODULE_DECLARE_DATA sed_module;
     55  0     jyri 
     56  0     jyri /* This function will be call back from libsed functions if there is any error
     57  0     jyri  * happend during execution of sed scripts
     58  0     jyri  */
     59  0     jyri static void log_sed_errf(void *data, const char *error)
     60  0     jyri {
     61  0     jyri     request_rec *r = (request_rec *) data;
     62  0     jyri     ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, error);
     63  0     jyri }
     64  0     jyri 
     65  0     jyri /* This function will be call back from libsed functions if there is any
     66  0     jyri  * compilation error.
     67  0     jyri  */
     68  0     jyri static void sed_compile_errf(void *data, const char *error)
     69  0     jyri {
     70  0     jyri     sed_expr_config *sed_cfg = (sed_expr_config *) data;
     71  0     jyri     sed_cfg->last_error = error;
     72  0     jyri }
     73  0     jyri 
     74  3  basantk /*
     75  3  basantk  * flush_output_buffer
     76  3  basantk  * Flush the  output data (stored in ctx->outbuf)
     77  3  basantk  */
     78  3  basantk static void flush_output_buffer(sed_filter_ctxt *ctx, char* buf, int sz)
     79  3  basantk {
     80  3  basantk     int size = ctx->curoutbuf - ctx->outbuf;
     81  3  basantk     char *out;
     82  5  basantk     apr_bucket *b;
     83  3  basantk     if (size + sz <= 0)
     84  3  basantk         return;
     85  3  basantk     out = apr_palloc(ctx->r->pool, size + sz);
     86  3  basantk     if (size) {
     87  3  basantk         memcpy(out, ctx->outbuf, size);
     88  3  basantk     }
     89  3  basantk     if (buf && (sz > 0)) {
     90  3  basantk         memcpy(out + size, buf, sz);
     91  3  basantk     }
     92  3  basantk     /* Reset the output buffer position */
     93  3  basantk     ctx->curoutbuf = ctx->outbuf;
     94  5  basantk     b = apr_bucket_pool_create(out, size + sz, ctx->r->pool,
     95  5  basantk                                ctx->r->connection->bucket_alloc);
     96  3  basantk     APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
     97  3  basantk }
     98  3  basantk 
     99  0     jyri /* This is a call back function. When libsed wants to generate the output,
    100  3  basantk  * this function will be invoked.
    101  0     jyri  */
    102  0     jyri static void sed_write_output(void *dummy, char *buf, int sz)
    103  0     jyri {
    104  0     jyri     /* dummy is basically filter context. Context is passed during invocation
    105  0     jyri      * of sed_eval_buffer
    106  0     jyri      */
    107  0     jyri     sed_filter_ctxt *ctx = (sed_filter_ctxt *) dummy;
    108  3  basantk     if (((ctx->curoutbuf - ctx->outbuf) + sz) >= ctx->bufsize) {
    109  3  basantk         /* flush current buffer */
    110  3  basantk         flush_output_buffer(ctx, buf, sz);
    111  3  basantk     }
    112  3  basantk     else {
    113  3  basantk         memcpy(ctx->curoutbuf, buf, sz);
    114  3  basantk         ctx->curoutbuf += sz;
    115  3  basantk     }
    116  0     jyri }
    117  0     jyri 
    118  0     jyri /* Compile a sed expression. Compiled context is saved in sed_cfg->sed_cmds.
    119  0     jyri  * Memory required for compilation context is allocated from cmd->pool.
    120  0     jyri  */
    121  0     jyri static apr_status_t compile_sed_expr(sed_expr_config *sed_cfg,
    122  0     jyri                                      cmd_parms *cmd,
    123  0     jyri                                      const char *expr)
    124  0     jyri {
    125  0     jyri     apr_status_t status = APR_SUCCESS;
    126  0     jyri 
    127  0     jyri     if (!sed_cfg->sed_cmds) {
    128  0     jyri         sed_commands_t *sed_cmds;
    129  0     jyri         sed_cmds = apr_pcalloc(cmd->pool, sizeof(sed_commands_t));
    130  0     jyri         status = sed_init_commands(sed_cmds, sed_compile_errf, sed_cfg,
    131  0     jyri                                    cmd->pool);
    132  0     jyri         if (status != APR_SUCCESS) {
    133  0     jyri             sed_destroy_commands(sed_cmds);
    134  0     jyri             return status;
    135  0     jyri         }
    136  0     jyri         sed_cfg->sed_cmds = sed_cmds;
    137  0     jyri     }
    138  0     jyri     status = sed_compile_string(sed_cfg->sed_cmds, expr);
    139  0     jyri     if (status != APR_SUCCESS) {
    140  0     jyri         sed_destroy_commands(sed_cfg->sed_cmds);
    141  0     jyri         sed_cfg->sed_cmds = NULL;
    142  0     jyri     }
    143  0     jyri     return status;
    144  0     jyri }
    145  0     jyri 
    146  0     jyri /* sed eval cleanup function */
    147  0     jyri static apr_status_t sed_eval_cleanup(void *data)
    148  0     jyri {
    149  0     jyri     sed_eval_t *eval = (sed_eval_t *) data;
    150  0     jyri     sed_destroy_eval(eval);
    151  0     jyri     return APR_SUCCESS;
    152  0     jyri }
    153  0     jyri 
    154  3  basantk /* Initialize sed filter context. If successful then context is set in f->ctx
    155  3  basantk  */
    156  3  basantk static apr_status_t init_context(ap_filter_t *f, sed_expr_config *sed_cfg)
    157  3  basantk {
    158  3  basantk     apr_status_t status;
    159  3  basantk     sed_filter_ctxt* ctx;
    160  3  basantk     request_rec *r = f->r;
    161  3  basantk     /* Create the context. Call sed_init_eval. libsed will generated
    162  3  basantk      * output by calling sed_write_output and generates any error by
    163  3  basantk      * invoking log_sed_errf.
    164  3  basantk      */
    165  3  basantk     ctx = apr_pcalloc(r->pool, sizeof(sed_filter_ctxt));
    166  3  basantk     ctx->r = r;
    167  3  basantk     ctx->bb = NULL;
    168  3  basantk     status = sed_init_eval(&ctx->eval, sed_cfg->sed_cmds, log_sed_errf,
    169  3  basantk                            r, &sed_write_output, r->pool);
    170  3  basantk     if (status != APR_SUCCESS) {
    171  3  basantk         return status;
    172  3  basantk     }
    173  3  basantk     apr_pool_cleanup_register(r->pool, &ctx->eval, sed_eval_cleanup,
    174  3  basantk                               apr_pool_cleanup_null);
    175  3  basantk     ctx->bufsize = MODSED_OUTBUF_SIZE;
    176  3  basantk     ctx->outbuf = apr_palloc(r->pool, ctx->bufsize + 1);
    177  3  basantk     ctx->curoutbuf = ctx->outbuf;
    178  3  basantk     f->ctx = ctx;
    179  3  basantk     return APR_SUCCESS;
    180  3  basantk }
    181  3  basantk 
    182  0     jyri /* Entry function for Sed output filter */
    183  0     jyri static apr_status_t sed_response_filter(ap_filter_t *f,
    184  0     jyri                                         apr_bucket_brigade *bb)
    185  0     jyri {
    186  0     jyri     apr_bucket *b;
    187  0     jyri     apr_status_t status;
    188  0     jyri     sed_config *cfg = ap_get_module_config(f->r->per_dir_config,
    189  0     jyri                                            &sed_module);
    190  0     jyri     sed_filter_ctxt *ctx = f->ctx;
    191  0     jyri     sed_expr_config *sed_cfg = &cfg->output;
    192  0     jyri 
    193  0     jyri     if ((sed_cfg == NULL) || (sed_cfg->sed_cmds == NULL)) {
    194  0     jyri         /* No sed expressions */
    195  0     jyri         ap_remove_output_filter(f);
    196  0     jyri         return ap_pass_brigade(f->next, bb);
    197  0     jyri     }
    198  0     jyri 
    199  0     jyri     if (ctx == NULL) {
    200  0     jyri 
    201  0     jyri         if (APR_BUCKET_IS_EOS(APR_BRIGADE_FIRST(bb))) {
    202  0     jyri             /* no need to run sed filter for Head requests */
    203  0     jyri             ap_remove_output_filter(f);
    204  0     jyri             return ap_pass_brigade(f->next, bb);
    205  0     jyri         }
    206  0     jyri 
    207  3  basantk         status = init_context(f, sed_cfg);
    208  3  basantk         if (status != APR_SUCCESS)
    209  3  basantk              return status;
    210  3  basantk         ctx = f->ctx;
    211  0     jyri     }
    212  0     jyri 
    213  0     jyri     ctx->bb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
    214  0     jyri 
    215  0     jyri     /* Here is the main logic. Iterate through all the buckets, read the
    216  0     jyri      * content of the bucket, call sed_eval_buffer on the data.
    217  0     jyri      * sed_eval_buffer will read the data line by line, run filters on each
    218  0     jyri      * line. sed_eval_buffer will generates the output by calling
    219  0     jyri      * sed_write_output which will add the output to ctx->bb. At the end of
    220  0     jyri      * the loop, ctx->bb is passed to the next filter in chain. At the end of
    221  0     jyri      * the data, if new line is not found then sed_eval_buffer will store the
    222  0     jyri      * data in it's own buffer.
    223  0     jyri      *
    224  0     jyri      * Once eos bucket is found then sed_finalize_eval will flush the rest of
    225  0     jyri      * the data. If there is no new line in last line of data, new line is
    226  0     jyri      * appended (that is a solaris sed behavior). libsed's internal memory for
    227  0     jyri      * evaluation is allocated on request's pool so it will be cleared once
    228  0     jyri      * request is over.
    229  0     jyri      *
    230  0     jyri      * If flush bucket is found then append the the flush bucket to ctx->bb
    231  0     jyri      * and pass it to next filter. There may be some data which will still be
    232  0     jyri      * in sed's internal buffer which can't be flushed until new line
    233  0     jyri      * character is arrived.
    234  0     jyri      */
    235  0     jyri     for (b = APR_BRIGADE_FIRST(bb); b != APR_BRIGADE_SENTINEL(bb);) {
    236  0     jyri         const char *buf = NULL;
    237  0     jyri         apr_size_t bytes = 0;
    238  0     jyri         if (APR_BUCKET_IS_EOS(b)) {
    239  0     jyri             apr_bucket *b1 = APR_BUCKET_NEXT(b);
    240  0     jyri             /* Now clean up the internal sed buffer */
    241  0     jyri             sed_finalize_eval(&ctx->eval, ctx);
    242  3  basantk             flush_output_buffer(ctx, NULL, 0);
    243  0     jyri             APR_BUCKET_REMOVE(b);
    244  0     jyri             /* Insert the eos bucket to ctx->bb brigade */
    245  0     jyri             APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
    246  0     jyri             b = b1;
    247  0     jyri         }
    248  0     jyri         else if (APR_BUCKET_IS_FLUSH(b)) {
    249  0     jyri             apr_bucket *b1 = APR_BUCKET_NEXT(b);
    250  0     jyri             APR_BUCKET_REMOVE(b);
    251  0     jyri             APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
    252  0     jyri             status = ap_pass_brigade(f->next, ctx->bb);
    253  0     jyri             apr_brigade_cleanup(ctx->bb);
    254  0     jyri             if (status != APR_SUCCESS) {
    255  0     jyri                 return status;
    256  0     jyri             }
    257  0     jyri             b = b1;
    258  0     jyri         }
    259  0     jyri         else if (APR_BUCKET_IS_METADATA(b)) {
    260  0     jyri             b = APR_BUCKET_NEXT(b);
    261  0     jyri         }
    262  0     jyri         else if (apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ)
    263  0     jyri                  == APR_SUCCESS) {
    264  0     jyri             apr_bucket *b1 = APR_BUCKET_NEXT(b);
    265  0     jyri             status = sed_eval_buffer(&ctx->eval, buf, bytes, ctx);
    266  0     jyri             if (status != APR_SUCCESS) {
    267  0     jyri                 return status;
    268  0     jyri             }
    269  3  basantk             flush_output_buffer(ctx, NULL, 0);
    270  0     jyri             APR_BUCKET_REMOVE(b);
    271  0     jyri             apr_bucket_delete(b);
    272  0     jyri             b = b1;
    273  0     jyri         }
    274  0     jyri         else {
    275  0     jyri             apr_bucket *b1 = APR_BUCKET_NEXT(b);
    276  0     jyri             APR_BUCKET_REMOVE(b);
    277  0     jyri             b = b1;
    278  0     jyri         }
    279  0     jyri     }
    280  0     jyri     apr_brigade_cleanup(bb);
    281  0     jyri     return ap_pass_brigade(f->next, ctx->bb);
    282  0     jyri }
    283  0     jyri 
    284  0     jyri /* Entry function for Sed input filter */
    285  0     jyri static apr_status_t sed_request_filter(ap_filter_t *f,
    286  0     jyri                                        apr_bucket_brigade *bb,
    287  0     jyri                                        ap_input_mode_t mode,
    288  0     jyri                                        apr_read_type_e block,
    289  0     jyri                                        apr_off_t readbytes)
    290  0     jyri {
    291  0     jyri     sed_config *cfg = ap_get_module_config(f->r->per_dir_config,
    292  0     jyri                                            &sed_module);
    293  0     jyri     sed_filter_ctxt *ctx = f->ctx;
    294  0     jyri     apr_status_t status;
    295  0     jyri     sed_expr_config *sed_cfg = &cfg->input;
    296  0     jyri 
    297  0     jyri     if (mode != AP_MODE_READBYTES) {
    298  0     jyri         return ap_get_brigade(f->next, bb, mode, block, readbytes);
    299  0     jyri     }
    300  0     jyri 
    301  0     jyri     if ((sed_cfg == NULL) || (sed_cfg->sed_cmds == NULL)) {
    302  0     jyri         /* No sed expression */
    303  0     jyri         return ap_get_brigade(f->next, bb, mode, block, readbytes);
    304  0     jyri     }
    305  0     jyri 
    306  0     jyri     if (!ctx) {
    307  0     jyri         if (!ap_is_initial_req(f->r)) {
    308  0     jyri             ap_remove_input_filter(f);
    309  0     jyri             /* XXX : Should we filter the sub requests too */
    310  0     jyri             return ap_get_brigade(f->next, bb, mode, block, readbytes);
    311  0     jyri         }
    312  3  basantk         status = init_context(f, sed_cfg);
    313  3  basantk         if (status != APR_SUCCESS)
    314  3  basantk              return status;
    315  3  basantk         ctx = f->ctx;
    316  0     jyri         ctx->bb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
    317  0     jyri     }
    318  0     jyri 
    319  0     jyri     /* Here is the logic :
    320  0     jyri      * Read the readbytes data from next level fiter into bbinp. Loop through
    321  0     jyri      * the buckets in bbinp and read the data from buckets and invoke
    322  0     jyri      * sed_eval_buffer on the data. libsed will generate it's output using
    323  0     jyri      * sed_write_output which will add data in ctx->bb. Do it until it have
    324  0     jyri      * atleast one bucket bucket in ctx->bb. At the end of data eos bucket
    325  0     jyri      * should be there.
    326  0     jyri      *
    327  0     jyri      * Once eos bucket is seen, then invoke sed_finalize_eval to clear the
    328  0     jyri      * output. If the last byte of data is not a new line character then sed
    329  0     jyri      * will add a new line to the data that is default sed behaviour. Note
    330  0     jyri      * that using this filter with POST data, caller may not expect this
    331  0     jyri      * behaviour.
    332  0     jyri      *
    333  0     jyri      * If next level fiter generate the flush bucket, we can't do much about
    334  0     jyri      * it. If we want to return the flush bucket in brigade bb (to the caller)
    335  0     jyri      * the question is where to add it?
    336  0     jyri      */
    337  0     jyri     while (APR_BRIGADE_EMPTY(ctx->bb)) {
    338  0     jyri         apr_bucket_brigade *bbinp;
    339  0     jyri         apr_bucket *b;
    340  0     jyri 
    341  0     jyri         /* read the bytes from next level filter */
    342  0     jyri         bbinp = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
    343  0     jyri         status = ap_get_brigade(f->next, bbinp, mode, block, readbytes);
    344  0     jyri         if (status != APR_SUCCESS) {
    345  0     jyri             return status;
    346  0     jyri         }
    347  0     jyri         for (b = APR_BRIGADE_FIRST(bbinp); b != APR_BRIGADE_SENTINEL(bbinp);
    348  0     jyri              b = APR_BUCKET_NEXT(b)) {
    349  0     jyri             const char *buf = NULL;
    350  0     jyri             apr_size_t bytes;
    351  0     jyri 
    352  0     jyri             if (APR_BUCKET_IS_EOS(b)) {
    353  0     jyri                 /* eos bucket. Clear the internal sed buffers */
    354  0     jyri                 sed_finalize_eval(&ctx->eval, ctx);
    355  3  basantk                 flush_output_buffer(ctx, NULL, 0);
    356  0     jyri                 APR_BUCKET_REMOVE(b);
    357  0     jyri                 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
    358  0     jyri                 break;
    359  0     jyri             }
    360  0     jyri             else if (APR_BUCKET_IS_FLUSH(b)) {
    361  0     jyri                 /* What should we do with flush bucket */
    362  0     jyri                 continue;
    363  0     jyri             }
    364  0     jyri             if (apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ)
    365  0     jyri                      == APR_SUCCESS) {
    366  0     jyri                 status = sed_eval_buffer(&ctx->eval, buf, bytes, ctx);
    367  0     jyri                 if (status != APR_SUCCESS)
    368  0     jyri                     return status;
    369  3  basantk                 flush_output_buffer(ctx, NULL, 0);
    370  0     jyri             }
    371  0     jyri         }
    372  0     jyri         apr_brigade_cleanup(bbinp);
    373  0     jyri         apr_brigade_destroy(bbinp);
    374  0     jyri     }
    375  0     jyri 
    376  0     jyri     if (!APR_BRIGADE_EMPTY(ctx->bb)) {
    377  0     jyri         apr_bucket_brigade *newbb = NULL;
    378  0     jyri         apr_bucket *b = NULL;
    379  0     jyri 
    380  0     jyri         /* This may return APR_INCOMPLETE which should be fine */
    381  0     jyri         apr_brigade_partition(ctx->bb, readbytes, &b);
    382  0     jyri 
    383  0     jyri         newbb = apr_brigade_split(ctx->bb, b);
    384  0     jyri         APR_BRIGADE_CONCAT(bb, ctx->bb);
    385  0     jyri         APR_BRIGADE_CONCAT(ctx->bb, newbb);
    386  0     jyri     }
    387  0     jyri     return APR_SUCCESS;
    388  0     jyri }
    389  0     jyri 
    390  0     jyri static const char *sed_add_expr(cmd_parms *cmd, void *cfg, const char *arg)
    391  0     jyri {
    392  0     jyri     int offset = (int) (long) cmd->info;
    393  0     jyri     sed_expr_config *sed_cfg =
    394  0     jyri                 (sed_expr_config *) (((char *) cfg) + offset);
    395  0     jyri     if (compile_sed_expr(sed_cfg, cmd, arg) != APR_SUCCESS) {
    396  0     jyri         return apr_psprintf(cmd->temp_pool,
    397  0     jyri                             "Failed to compile sed expression. %s",
    398  0     jyri                             sed_cfg->last_error);
    399  0     jyri     }
    400  0     jyri     return NULL;
    401  0     jyri }
    402  0     jyri 
    403  0     jyri static void *create_sed_dir_config(apr_pool_t *p, char *s)
    404  0     jyri {
    405  0     jyri     sed_config *cfg = apr_pcalloc(p, sizeof(sed_config));
    406  0     jyri     return cfg;
    407  0     jyri }
    408  0     jyri 
    409  0     jyri static const command_rec sed_filter_cmds[] = {
    410  0     jyri     AP_INIT_TAKE1("OutputSed", sed_add_expr,
    411  0     jyri                   (void *) APR_OFFSETOF(sed_config, output),
    412  0     jyri                   ACCESS_CONF,
    413  0     jyri                   "Sed regular expression for Response"),
    414  0     jyri     AP_INIT_TAKE1("InputSed", sed_add_expr,
    415  0     jyri                   (void *) APR_OFFSETOF(sed_config, input),
    416  0     jyri                   ACCESS_CONF,
    417  0     jyri                   "Sed regular expression for Request"),
    418  0     jyri     {NULL}
    419  0     jyri };
    420  0     jyri 
    421  0     jyri static void register_hooks(apr_pool_t *p)
    422  0     jyri {
    423  0     jyri     ap_register_output_filter(sed_filter_name, sed_response_filter, NULL,
    424  0     jyri                               AP_FTYPE_RESOURCE);
    425  0     jyri     ap_register_input_filter(sed_filter_name, sed_request_filter, NULL,
    426  0     jyri                              AP_FTYPE_RESOURCE);
    427  0     jyri }
    428  0     jyri 
    429  0     jyri module AP_MODULE_DECLARE_DATA sed_module = {
    430  0     jyri     STANDARD20_MODULE_STUFF,
    431  0     jyri     create_sed_dir_config,      /* dir config creater */
    432  0     jyri     NULL,                       /* dir merger --- default is to override */
    433  0     jyri     NULL,                       /* server config */
    434  0     jyri     NULL,                       /* merge server config */
    435  0     jyri     sed_filter_cmds,            /* command table */
    436  0     jyri     register_hooks              /* register hooks */
    437  0     jyri };
    438