Home | History | Annotate | Download | only in mod_sed
      1 /*
      2  * Copyright (c) 2005, 2008 Sun Microsystems, Inc. All Rights Reserved.
      3  * Use is subject to license terms.
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at
      8  *  http://www.apache.org/licenses/LICENSE-2.0.
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
     13  * or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  */
     17 
     18 #include "httpd.h"
     19 #include "http_config.h"
     20 #include "http_log.h"
     21 #include "apr_strings.h"
     22 #include "apr_general.h"
     23 #include "util_filter.h"
     24 #include "apr_buckets.h"
     25 #include "http_request.h"
     26 #include "libsed.h"
     27 
     28 static const char *sed_filter_name = "Sed";
     29 #define MODSED_OUTBUF_SIZE 4000
     30 
     31 typedef struct sed_expr_config
     32 {
     33     sed_commands_t *sed_cmds;
     34     const char *last_error;
     35 } sed_expr_config;
     36 
     37 typedef struct sed_config
     38 {
     39     sed_expr_config output;
     40     sed_expr_config input;
     41 } sed_config;
     42 
     43 /* Context for filter invocation for single HTTP request */
     44 typedef struct sed_filter_ctxt
     45 {
     46     sed_eval_t eval;
     47     request_rec *r;
     48     apr_bucket_brigade *bb;
     49     char *outbuf;
     50     char *curoutbuf;
     51     int bufsize;
     52 } sed_filter_ctxt;
     53 
     54 module AP_MODULE_DECLARE_DATA sed_module;
     55 
     56 /* This function will be call back from libsed functions if there is any error
     57  * happend during execution of sed scripts
     58  */
     59 static void log_sed_errf(void *data, const char *error)
     60 {
     61     request_rec *r = (request_rec *) data;
     62     ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, error);
     63 }
     64 
     65 /* This function will be call back from libsed functions if there is any
     66  * compilation error.
     67  */
     68 static void sed_compile_errf(void *data, const char *error)
     69 {
     70     sed_expr_config *sed_cfg = (sed_expr_config *) data;
     71     sed_cfg->last_error = error;
     72 }
     73 
     74 /*
     75  * flush_output_buffer
     76  * Flush the  output data (stored in ctx->outbuf)
     77  */
     78 static void flush_output_buffer(sed_filter_ctxt *ctx, char* buf, int sz)
     79 {
     80     int size = ctx->curoutbuf - ctx->outbuf;
     81     char *out;
     82     apr_bucket *b;
     83     if (size + sz <= 0)
     84         return;
     85     out = apr_palloc(ctx->r->pool, size + sz);
     86     if (size) {
     87         memcpy(out, ctx->outbuf, size);
     88     }
     89     if (buf && (sz > 0)) {
     90         memcpy(out + size, buf, sz);
     91     }
     92     /* Reset the output buffer position */
     93     ctx->curoutbuf = ctx->outbuf;
     94     b = apr_bucket_pool_create(out, size + sz, ctx->r->pool,
     95                                ctx->r->connection->bucket_alloc);
     96     APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
     97 }
     98 
     99 /* This is a call back function. When libsed wants to generate the output,
    100  * this function will be invoked.
    101  */
    102 static void sed_write_output(void *dummy, char *buf, int sz)
    103 {
    104     /* dummy is basically filter context. Context is passed during invocation
    105      * of sed_eval_buffer
    106      */
    107     sed_filter_ctxt *ctx = (sed_filter_ctxt *) dummy;
    108     if (((ctx->curoutbuf - ctx->outbuf) + sz) >= ctx->bufsize) {
    109         /* flush current buffer */
    110         flush_output_buffer(ctx, buf, sz);
    111     }
    112     else {
    113         memcpy(ctx->curoutbuf, buf, sz);
    114         ctx->curoutbuf += sz;
    115     }
    116 }
    117 
    118 /* Compile a sed expression. Compiled context is saved in sed_cfg->sed_cmds.
    119  * Memory required for compilation context is allocated from cmd->pool.
    120  */
    121 static apr_status_t compile_sed_expr(sed_expr_config *sed_cfg,
    122                                      cmd_parms *cmd,
    123                                      const char *expr)
    124 {
    125     apr_status_t status = APR_SUCCESS;
    126 
    127     if (!sed_cfg->sed_cmds) {
    128         sed_commands_t *sed_cmds;
    129         sed_cmds = apr_pcalloc(cmd->pool, sizeof(sed_commands_t));
    130         status = sed_init_commands(sed_cmds, sed_compile_errf, sed_cfg,
    131                                    cmd->pool);
    132         if (status != APR_SUCCESS) {
    133             sed_destroy_commands(sed_cmds);
    134             return status;
    135         }
    136         sed_cfg->sed_cmds = sed_cmds;
    137     }
    138     status = sed_compile_string(sed_cfg->sed_cmds, expr);
    139     if (status != APR_SUCCESS) {
    140         sed_destroy_commands(sed_cfg->sed_cmds);
    141         sed_cfg->sed_cmds = NULL;
    142     }
    143     return status;
    144 }
    145 
    146 /* sed eval cleanup function */
    147 static apr_status_t sed_eval_cleanup(void *data)
    148 {
    149     sed_eval_t *eval = (sed_eval_t *) data;
    150     sed_destroy_eval(eval);
    151     return APR_SUCCESS;
    152 }
    153 
    154 /* Initialize sed filter context. If successful then context is set in f->ctx
    155  */
    156 static apr_status_t init_context(ap_filter_t *f, sed_expr_config *sed_cfg)
    157 {
    158     apr_status_t status;
    159     sed_filter_ctxt* ctx;
    160     request_rec *r = f->r;
    161     /* Create the context. Call sed_init_eval. libsed will generated
    162      * output by calling sed_write_output and generates any error by
    163      * invoking log_sed_errf.
    164      */
    165     ctx = apr_pcalloc(r->pool, sizeof(sed_filter_ctxt));
    166     ctx->r = r;
    167     ctx->bb = NULL;
    168     status = sed_init_eval(&ctx->eval, sed_cfg->sed_cmds, log_sed_errf,
    169                            r, &sed_write_output, r->pool);
    170     if (status != APR_SUCCESS) {
    171         return status;
    172     }
    173     apr_pool_cleanup_register(r->pool, &ctx->eval, sed_eval_cleanup,
    174                               apr_pool_cleanup_null);
    175     ctx->bufsize = MODSED_OUTBUF_SIZE;
    176     ctx->outbuf = apr_palloc(r->pool, ctx->bufsize + 1);
    177     ctx->curoutbuf = ctx->outbuf;
    178     f->ctx = ctx;
    179     return APR_SUCCESS;
    180 }
    181 
    182 /* Entry function for Sed output filter */
    183 static apr_status_t sed_response_filter(ap_filter_t *f,
    184                                         apr_bucket_brigade *bb)
    185 {
    186     apr_bucket *b;
    187     apr_status_t status;
    188     sed_config *cfg = ap_get_module_config(f->r->per_dir_config,
    189                                            &sed_module);
    190     sed_filter_ctxt *ctx = f->ctx;
    191     sed_expr_config *sed_cfg = &cfg->output;
    192 
    193     if ((sed_cfg == NULL) || (sed_cfg->sed_cmds == NULL)) {
    194         /* No sed expressions */
    195         ap_remove_output_filter(f);
    196         return ap_pass_brigade(f->next, bb);
    197     }
    198 
    199     if (ctx == NULL) {
    200 
    201         if (APR_BUCKET_IS_EOS(APR_BRIGADE_FIRST(bb))) {
    202             /* no need to run sed filter for Head requests */
    203             ap_remove_output_filter(f);
    204             return ap_pass_brigade(f->next, bb);
    205         }
    206 
    207         status = init_context(f, sed_cfg);
    208         if (status != APR_SUCCESS)
    209              return status;
    210         ctx = f->ctx;
    211     }
    212 
    213     ctx->bb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
    214 
    215     /* Here is the main logic. Iterate through all the buckets, read the
    216      * content of the bucket, call sed_eval_buffer on the data.
    217      * sed_eval_buffer will read the data line by line, run filters on each
    218      * line. sed_eval_buffer will generates the output by calling
    219      * sed_write_output which will add the output to ctx->bb. At the end of
    220      * the loop, ctx->bb is passed to the next filter in chain. At the end of
    221      * the data, if new line is not found then sed_eval_buffer will store the
    222      * data in it's own buffer.
    223      *
    224      * Once eos bucket is found then sed_finalize_eval will flush the rest of
    225      * the data. If there is no new line in last line of data, new line is
    226      * appended (that is a solaris sed behavior). libsed's internal memory for
    227      * evaluation is allocated on request's pool so it will be cleared once
    228      * request is over.
    229      *
    230      * If flush bucket is found then append the the flush bucket to ctx->bb
    231      * and pass it to next filter. There may be some data which will still be
    232      * in sed's internal buffer which can't be flushed until new line
    233      * character is arrived.
    234      */
    235     for (b = APR_BRIGADE_FIRST(bb); b != APR_BRIGADE_SENTINEL(bb);) {
    236         const char *buf = NULL;
    237         apr_size_t bytes = 0;
    238         if (APR_BUCKET_IS_EOS(b)) {
    239             apr_bucket *b1 = APR_BUCKET_NEXT(b);
    240             /* Now clean up the internal sed buffer */
    241             sed_finalize_eval(&ctx->eval, ctx);
    242             flush_output_buffer(ctx, NULL, 0);
    243             APR_BUCKET_REMOVE(b);
    244             /* Insert the eos bucket to ctx->bb brigade */
    245             APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
    246             b = b1;
    247         }
    248         else if (APR_BUCKET_IS_FLUSH(b)) {
    249             apr_bucket *b1 = APR_BUCKET_NEXT(b);
    250             APR_BUCKET_REMOVE(b);
    251             APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
    252             status = ap_pass_brigade(f->next, ctx->bb);
    253             apr_brigade_cleanup(ctx->bb);
    254             if (status != APR_SUCCESS) {
    255                 return status;
    256             }
    257             b = b1;
    258         }
    259         else if (APR_BUCKET_IS_METADATA(b)) {
    260             b = APR_BUCKET_NEXT(b);
    261         }
    262         else if (apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ)
    263                  == APR_SUCCESS) {
    264             apr_bucket *b1 = APR_BUCKET_NEXT(b);
    265             status = sed_eval_buffer(&ctx->eval, buf, bytes, ctx);
    266             if (status != APR_SUCCESS) {
    267                 return status;
    268             }
    269             flush_output_buffer(ctx, NULL, 0);
    270             APR_BUCKET_REMOVE(b);
    271             apr_bucket_delete(b);
    272             b = b1;
    273         }
    274         else {
    275             apr_bucket *b1 = APR_BUCKET_NEXT(b);
    276             APR_BUCKET_REMOVE(b);
    277             b = b1;
    278         }
    279     }
    280     apr_brigade_cleanup(bb);
    281     return ap_pass_brigade(f->next, ctx->bb);
    282 }
    283 
    284 /* Entry function for Sed input filter */
    285 static apr_status_t sed_request_filter(ap_filter_t *f,
    286                                        apr_bucket_brigade *bb,
    287                                        ap_input_mode_t mode,
    288                                        apr_read_type_e block,
    289                                        apr_off_t readbytes)
    290 {
    291     sed_config *cfg = ap_get_module_config(f->r->per_dir_config,
    292                                            &sed_module);
    293     sed_filter_ctxt *ctx = f->ctx;
    294     apr_status_t status;
    295     sed_expr_config *sed_cfg = &cfg->input;
    296 
    297     if (mode != AP_MODE_READBYTES) {
    298         return ap_get_brigade(f->next, bb, mode, block, readbytes);
    299     }
    300 
    301     if ((sed_cfg == NULL) || (sed_cfg->sed_cmds == NULL)) {
    302         /* No sed expression */
    303         return ap_get_brigade(f->next, bb, mode, block, readbytes);
    304     }
    305 
    306     if (!ctx) {
    307         if (!ap_is_initial_req(f->r)) {
    308             ap_remove_input_filter(f);
    309             /* XXX : Should we filter the sub requests too */
    310             return ap_get_brigade(f->next, bb, mode, block, readbytes);
    311         }
    312         status = init_context(f, sed_cfg);
    313         if (status != APR_SUCCESS)
    314              return status;
    315         ctx = f->ctx;
    316         ctx->bb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
    317     }
    318 
    319     /* Here is the logic :
    320      * Read the readbytes data from next level fiter into bbinp. Loop through
    321      * the buckets in bbinp and read the data from buckets and invoke
    322      * sed_eval_buffer on the data. libsed will generate it's output using
    323      * sed_write_output which will add data in ctx->bb. Do it until it have
    324      * atleast one bucket bucket in ctx->bb. At the end of data eos bucket
    325      * should be there.
    326      *
    327      * Once eos bucket is seen, then invoke sed_finalize_eval to clear the
    328      * output. If the last byte of data is not a new line character then sed
    329      * will add a new line to the data that is default sed behaviour. Note
    330      * that using this filter with POST data, caller may not expect this
    331      * behaviour.
    332      *
    333      * If next level fiter generate the flush bucket, we can't do much about
    334      * it. If we want to return the flush bucket in brigade bb (to the caller)
    335      * the question is where to add it?
    336      */
    337     while (APR_BRIGADE_EMPTY(ctx->bb)) {
    338         apr_bucket_brigade *bbinp;
    339         apr_bucket *b;
    340 
    341         /* read the bytes from next level filter */
    342         bbinp = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
    343         status = ap_get_brigade(f->next, bbinp, mode, block, readbytes);
    344         if (status != APR_SUCCESS) {
    345             return status;
    346         }
    347         for (b = APR_BRIGADE_FIRST(bbinp); b != APR_BRIGADE_SENTINEL(bbinp);
    348              b = APR_BUCKET_NEXT(b)) {
    349             const char *buf = NULL;
    350             apr_size_t bytes;
    351 
    352             if (APR_BUCKET_IS_EOS(b)) {
    353                 /* eos bucket. Clear the internal sed buffers */
    354                 sed_finalize_eval(&ctx->eval, ctx);
    355                 flush_output_buffer(ctx, NULL, 0);
    356                 APR_BUCKET_REMOVE(b);
    357                 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
    358                 break;
    359             }
    360             else if (APR_BUCKET_IS_FLUSH(b)) {
    361                 /* What should we do with flush bucket */
    362                 continue;
    363             }
    364             if (apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ)
    365                      == APR_SUCCESS) {
    366                 status = sed_eval_buffer(&ctx->eval, buf, bytes, ctx);
    367                 if (status != APR_SUCCESS)
    368                     return status;
    369                 flush_output_buffer(ctx, NULL, 0);
    370             }
    371         }
    372         apr_brigade_cleanup(bbinp);
    373         apr_brigade_destroy(bbinp);
    374     }
    375 
    376     if (!APR_BRIGADE_EMPTY(ctx->bb)) {
    377         apr_bucket_brigade *newbb = NULL;
    378         apr_bucket *b = NULL;
    379 
    380         /* This may return APR_INCOMPLETE which should be fine */
    381         apr_brigade_partition(ctx->bb, readbytes, &b);
    382 
    383         newbb = apr_brigade_split(ctx->bb, b);
    384         APR_BRIGADE_CONCAT(bb, ctx->bb);
    385         APR_BRIGADE_CONCAT(ctx->bb, newbb);
    386     }
    387     return APR_SUCCESS;
    388 }
    389 
    390 static const char *sed_add_expr(cmd_parms *cmd, void *cfg, const char *arg)
    391 {
    392     int offset = (int) (long) cmd->info;
    393     sed_expr_config *sed_cfg =
    394                 (sed_expr_config *) (((char *) cfg) + offset);
    395     if (compile_sed_expr(sed_cfg, cmd, arg) != APR_SUCCESS) {
    396         return apr_psprintf(cmd->temp_pool,
    397                             "Failed to compile sed expression. %s",
    398                             sed_cfg->last_error);
    399     }
    400     return NULL;
    401 }
    402 
    403 static void *create_sed_dir_config(apr_pool_t *p, char *s)
    404 {
    405     sed_config *cfg = apr_pcalloc(p, sizeof(sed_config));
    406     return cfg;
    407 }
    408 
    409 static const command_rec sed_filter_cmds[] = {
    410     AP_INIT_TAKE1("OutputSed", sed_add_expr,
    411                   (void *) APR_OFFSETOF(sed_config, output),
    412                   ACCESS_CONF,
    413                   "Sed regular expression for Response"),
    414     AP_INIT_TAKE1("InputSed", sed_add_expr,
    415                   (void *) APR_OFFSETOF(sed_config, input),
    416                   ACCESS_CONF,
    417                   "Sed regular expression for Request"),
    418     {NULL}
    419 };
    420 
    421 static void register_hooks(apr_pool_t *p)
    422 {
    423     ap_register_output_filter(sed_filter_name, sed_response_filter, NULL,
    424                               AP_FTYPE_RESOURCE);
    425     ap_register_input_filter(sed_filter_name, sed_request_filter, NULL,
    426                              AP_FTYPE_RESOURCE);
    427 }
    428 
    429 module AP_MODULE_DECLARE_DATA sed_module = {
    430     STANDARD20_MODULE_STUFF,
    431     create_sed_dir_config,      /* dir config creater */
    432     NULL,                       /* dir merger --- default is to override */
    433     NULL,                       /* server config */
    434     NULL,                       /* merge server config */
    435     sed_filter_cmds,            /* command table */
    436     register_hooks              /* register hooks */
    437 };
    438