PHP Classes

File: phpcf-src/ext/phpcf.c

Recommend this page to a friend!
  Classes of Alex Krash   PHP Code formatter   phpcf-src/ext/phpcf.c   Download  
File: phpcf-src/ext/phpcf.c
Role: Auxiliary data
Content type: text/plain
Description: Auxiliary data
Class: PHP Code formatter
Reformat PHP code according to standard options
Author: By
Last change:
Date: 9 years ago
Size: 90,078 bytes
 

Contents

Class file image Download
/* $Id$ */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include <stdarg.h> #include "php.h" #include "php_ini.h" #include "ext/standard/php_smart_str.h" #include "ext/standard/info.h" #include "php_phpcf.h" #include "zend.h" #include "zend_language_scanner.h" #include "zend_language_scanner_defs.h" #include <zend_language_parser.h> /* internal classes: Phpcf */ static zend_class_entry *phpcf_class_entry; static zend_object_handlers php_phpcf_handlers; static phpcf_hashtable *rewrite_callbacks; static int le_phpcf; #define zendtext LANG_SCNG(yy_text) #define zendleng LANG_SCNG(yy_leng) /** * Internal functions */ #define FOREACH_FIELDS zval **entry; HashPosition pos; ulong num_key; uint str_key_len; char *string_key; #define FOREACH_BEGIN(arr) zend_hash_internal_pointer_reset_ex(arr, &pos); \ while (zend_hash_get_current_data_ex(arr, (void **)&entry, &pos) == SUCCESS) { \ switch (zend_hash_get_current_key_ex(arr, &string_key, &str_key_len, &num_key, 0, &pos)) { #define FOREACH_END(arr) \ } \ zend_hash_move_forward_ex(arr, &pos); \ } #define APPEND_WHITESPACE(text) _append_whitespace(text, TOKEN_HOOK_ARGS); /** * Hash table functions */ // Create new hash table static phpcf_hashtable *ht_init(int size) { phpcf_hashtable *ht = (phpcf_hashtable*)ecalloc(1, sizeof(phpcf_hashtable)); ht->len = 0; ht->cap = size; ht->entries = (_phpcf_hashtable_entry*)ecalloc(ht->cap, sizeof(_phpcf_hashtable_entry)); return ht; } /** * @TODO add as param to ht_init */ static phpcf_hashtable *ht_init_persistent(int size) { phpcf_hashtable *ht = (phpcf_hashtable*)calloc(1, sizeof(phpcf_hashtable)); ht->len = 0; ht->cap = size; ht->entries = (_phpcf_hashtable_entry*)calloc(1, ht->cap * sizeof(_phpcf_hashtable_entry)); return ht; } // Add value to hash table static void ht_add(phpcf_hashtable *ht, int key, void *val) { if (ht->len + 1 > ht->cap) { zend_throw_exception_ex(zend_exception_get_default(TSRMLS_C), 0 TSRMLS_CC, "PHPCF hash table capacity exceeded"); } int hash = key % (ht->cap - 1); while (ht->entries[hash].val) { /* if entries[hash].key == key, then we will just update the value */ if (ht->entries[hash].key == key) break; hash++; if (hash > ht->cap - 1) hash = 0; } if (ht->entries[hash].key != key) { ht->entries[hash].key = key; ht->len++; } ht->entries[hash].val = val; } // Retrieve value from hash table static void *ht_get(phpcf_hashtable *ht, int key) { int hash = key % (ht->cap - 1); while (ht->entries[hash].val && ht->entries[hash].key != key) { hash++; if (hash > ht->cap - 1) { hash = 0; } } return ht->entries[hash].val; } /** * Whether message sniffing is enabled */ static zend_bool is_sniff_enabled(Formatter *frm) { return frm->options->sniff; } /** * Gains number of char occurencies into str */ static int substr_count(char *str, char symbol) { void *p = (void*)str, *endp = p + strlen(str); int cnt = 0; while ((p = memchr(p, symbol, endp - p))) { cnt++; p++; } return cnt; } /* stokens = source tokens scur = source tokens current index scnt = source tokens count tokens = rewritten tokens cur = rewritten tokens current index buf = string buffer for writing */ static void _append_whitespace(char *whitespace, Formatter *frm, phpcf_token *stokens, int *scur, int scnt, phpcf_token *tokens, int *cur, char **buf) { phpcf_token *src = stokens + *scur, *new = tokens + (++(*cur)); new->type = T_WHITESPACE; strcpy(*buf, whitespace); new->val = *buf; *buf += strlen(whitespace) + 1; new->line = src->line; } static int _is_line_aligned(phpcf_token *tokens, int cnt, int line_pos, int line_length, int whitespace_length) { phpcf_token *tok; int i, is_str, prev_is_comment, other_line_length = 0; for (i = line_pos; i < cnt; i++) { tok = tokens + i; is_str = tok->type == T_CONSTANT_ENCAPSED_STRING || tok->type == T_ENCAPSED_AND_WHITESPACE; prev_is_comment = i > 0 && tok->type == T_WHITESPACE && tokens[i - 1].type == T_COMMENT; if (prev_is_comment || (!is_str && strstr(tok->val, "\n"))) break; other_line_length += strlen(tok->val); if (other_line_length == whitespace_length && tok->type == T_WHITESPACE) { // aligned by whitespace return 1; } else if (other_line_length == line_length && tokens[i - 1].type == T_WHITESPACE) { // aligned by tokens! return 1; } else if (other_line_length > line_length) { // no chance break; } } return 0; } static int _is_prev_line_aligned(phpcf_token *tokens, int cnt, int prev_line_pos, int line_length, int whitespace_length) { phpcf_token *tok; int i, is_str, prev_is_comment; for (i = prev_line_pos; i >= 0; i--) { // find beginning of line tok = tokens + i; is_str = tok->type == T_CONSTANT_ENCAPSED_STRING || tok->type == T_ENCAPSED_AND_WHITESPACE || tok->type == T_COMMENT; prev_is_comment = i > 0 && tok->type == T_WHITESPACE && tokens[i - 1].type == T_COMMENT; if (prev_is_comment || (!is_str && strstr(tok->val, "\n"))) break; } if (i < prev_line_pos) { return _is_line_aligned(tokens, cnt, i + 1, line_length, whitespace_length); } return 0; } static int _is_next_line_aligned(phpcf_token *tokens, int cnt, int next_line_pos, int line_length, int whitespace_length) { phpcf_token *tok; int i, is_str, prev_is_comment; for (i = next_line_pos; i < cnt; i++) { // find beginning of line tok = tokens + i; is_str = tok->type == T_CONSTANT_ENCAPSED_STRING || tok->type == T_ENCAPSED_AND_WHITESPACE; prev_is_comment = i > 0 && tok->type == T_WHITESPACE && tokens[i - 1].type == T_COMMENT; if (prev_is_comment || (!is_str && strstr(tok->val, "\n"))) break; } if (i > next_line_pos) { phpcf_token *tmp; while(i < cnt) { tmp = &tokens[i]; if (tmp->type == T_WHITESPACE) { break; } ++i; } return _is_line_aligned(tokens, cnt, i + 1, line_length, whitespace_length); } return 0; } static int _is_whitespace_string(char *str) { do { if (!isspace(*str)) return 0; } while(*(str++)); return 1; } static int _tokens_is_whitespace_aligned(phpcf_token *tokens, int cur, int cnt) { phpcf_token *tok = tokens + cur; int next_pos = cur + 1, whitespace_length, line_length, i, is_str, prev_is_comment, is_aligned; // either it's EOF or whitespace has tabs/newlines or it has length less than 2 if (next_pos >= cnt || tok->val[0] != ' ' || tok->val[1] != ' ') return 0; for (i = 0; tok->val[i]; i++) if (tok->val[i] != ' ') return 0; // search for beginning of line and count length of line before this whitespace whitespace_length = line_length = 0; for (i = next_pos; i >= 0; i--) { tok = tokens + i; is_str = tok->type == T_CONSTANT_ENCAPSED_STRING || tok->type == T_ENCAPSED_AND_WHITESPACE; prev_is_comment = i > 0 && tok->type == T_WHITESPACE && tokens[i - 1].type == T_COMMENT; if (prev_is_comment || (!is_str && strstr(tok->val, "\n"))) break; // this will skip the indent as well, we do not care line_length += strlen(tok->val); if (i != next_pos) whitespace_length += strlen(tok->val); } int prev, next; is_aligned = prev = next = 0; if (i > 0) is_aligned = prev =_is_prev_line_aligned(tokens, cnt, i - 1, line_length, whitespace_length); if (!is_aligned) is_aligned = next = _is_next_line_aligned(tokens, cnt, next_pos, line_length, whitespace_length); //printf("%d - %d \n", prev, next); return is_aligned; } static int _count_expr_length(phpcf_token *tokens, int cur, int cnt, int found_first_bracket, int max_length) { int length = 0, depth = found_first_bracket ? 1 : 0; int current_pos = cur + 1, i = 0; phpcf_token tok; for (i = current_pos; i < cnt; i++) { tok = tokens[i]; if (!found_first_bracket) { if (tok.type == '(' || tok.type == '[') found_first_bracket = 1; else continue; } // allow having long array definitions if user has decided that there should be an array in long form if (tok.type == T_WHITESPACE && strstr(tok.val, "\n")) { return max_length + 1; } length += strlen(tok.val); if (length > max_length) break; if (tok.type == '(') depth++; if (tok.type == ')') depth--; if (tok.type == '[') depth++; if (tok.type == ']') depth--; if (depth <= 0) break; } return length; } static void _print_indent_descr(smart_str *str, char *indent, char *buf, int len) { int i = 0; char tmp[100]; for (i = 0; i < len; i++) { if (buf[i] == ' ') { continue; } smart_str_appends(str, "'"); smart_str_appendl(str, buf, len); smart_str_appends(str, "'"); return; } if (!len) { smart_str_appends(str, "no indent"); } else if (len % (sizeof(indent) - 1) == 0) { sprintf(tmp, "indent level %ld", len / (sizeof(indent) - 1)); smart_str_appends(str, tmp); } else { sprintf(tmp, "indent of %ld spaces", len / (sizeof(indent) - 1)); smart_str_appends(str, tmp); } int d = 1; } /** * Smart string destructor */ static void issue_dtor(void *data) { char *issue = *(char **)data; if (issue) { efree(issue); } } /** * Sniff execution message */ static void sniff_message(Formatter *frm, int line, char *fmt, ...) { int size, ret; va_list ap; char *buffer; char *prepend = " "; if(!is_sniff_enabled(frm)) { return; } smart_str str = {0}; smart_str_appends(&str, prepend); va_start(ap, fmt); size = vspprintf(&buffer, 0, fmt, ap); smart_str_appends(&str, buffer); va_end(ap); sprintf(buffer, "on line %d", line); smart_str_appends(&str, buffer); smart_str_0(&str); efree(buffer); zend_hash_next_index_insert(frm->ctx->sniff, &str.c, sizeof(char *), NULL); } static void sniff_context_message(Formatter *frm, char *in, char *out, int origin, char *descr, int token_pos, phpcf_token *tokens, int cnt) { phpcf_token token, tok; int token_line, ln, col, i, outlen, inlen; char *contents, *tmpbuf, *outbuf, *inbuf; static char buffer[300]; char *indent = frm->options->indent; if (!is_sniff_enabled(frm)) { return; } if (strlen(in) && origin) { if (origin == EXEC_SEQ_ORIGIN_LEFT) { token_pos--; } else { token_pos++; } } if (token_pos < 0) token_pos = 0; else if (token_pos >= cnt) token_pos = cnt - 1; token = tokens[token_pos]; token_line = token.line; if (!substr_count(out, '\n') || substr_count(out, '\n') != substr_count(in, '\n')) { col = 1; for (i = token_pos - 1; i >= 0; i--) { tok = tokens[i]; contents = tok.val; if (strstr(contents, "\n")) { while (NULL != (tmpbuf = strstr(contents, "\n"))) contents = tmpbuf + 1; col += strlen(contents); break; } else { col += strlen(contents); } } if (!strlen(in) && origin == EXEC_SEQ_ORIGIN_RIGHT) { col += strlen(token.val); } smart_str str = {0}; sprintf(buffer, " Expected %c%s ", tolower(descr[0]), descr + 1); smart_str_appends(&str, buffer); if (NULL != (tmpbuf = strstr(out, "\n"))) { tmpbuf += 1; smart_str_appends(&str, "and "); _print_indent_descr(&str, indent, tmpbuf, strstr(tmpbuf, "\n") ? strstr(tmpbuf, "\n") - tmpbuf : strlen(tmpbuf)); } sprintf(buffer, " on line %d column %d", token_line, col); smart_str_appends(&str, buffer); smart_str_0(&str); zend_hash_next_index_insert(frm->ctx->sniff, &str.c, sizeof(char *), NULL); } else { i = 0; outbuf = out; inbuf = in; do { smart_str str = {0}; if (inbuf) { inlen = strstr(inbuf, "\n") ? strstr(inbuf, "\n") - inbuf : strlen(inbuf); } if (outbuf) { outlen = strstr(outbuf, "\n") ? strstr(outbuf, "\n") - outbuf : strlen(outbuf); } if (!inbuf || !outbuf || strncmp(inbuf, outbuf, outlen) || (inbuf[outlen] != '\n' && inbuf[outlen] != 0)) { smart_str_appends(&str, " Expected "); if (i == 0) { smart_str_appends(&str, "'"); if (outbuf) { smart_str_appendl(&str, outbuf, outlen); } smart_str_appends(&str, "', got '"); if (inbuf) { smart_str_appendl(&str, inbuf, inlen); } sprintf(buffer, "' at the end of line %d", token_line + i); smart_str_appends(&str, buffer); } else { if (outbuf) { _print_indent_descr(&str, indent, outbuf, outlen); } else { smart_str_appends(&str, "nothing"); } smart_str_appends(&str, ", got "); if (inbuf) { _print_indent_descr(&str, indent, inbuf, inlen); } else { smart_str_appends(&str, "nothing"); } sprintf(buffer, " on line %d", token_line + i); smart_str_appends(&str, buffer); } smart_str_0(&str); zend_hash_next_index_insert(frm->ctx->sniff, &str.c, sizeof(char *), NULL); } outbuf = strstr(outbuf, "\n"); if (outbuf) outbuf += 1; inbuf = strstr(inbuf, "\n"); if (inbuf) inbuf += 1; i++; } while (outbuf || inbuf); } } /** * Check, if in current block, before close_symbol there are any other symbols, * except spaces or comments */ int _is_body_empty(phpcf_token *stokens, int *scur, int scnt, int close_symbol) { phpcf_token *tok; int next_pos = *scur + 1, i; int found = 0; for (i = next_pos; i < scnt; i++) { tok = &stokens[i]; // found closing brace if(tok->type==close_symbol) { found = 1; break; } else if (SHOULD_IGNORE_TOKEN(tok->type)) { // it is blank symbol continue; } else { // something non-blank break; } } return found; } /** * Init map of tokens to their callbacks */ static void init_rewrite_callbacks(void) { rewrite_callbacks = ht_init_persistent(100); ht_add(rewrite_callbacks, T_OPEN_TAG, token_hook_open_tag); ht_add(rewrite_callbacks, T_CLOSE_TAG, token_hook_close_tag); ht_add(rewrite_callbacks, T_WHITESPACE, token_hook_whitespace); ht_add(rewrite_callbacks, T_FUNCTION, token_hook_function); ht_add(rewrite_callbacks, T_STRING, token_hook_tstring); ht_add(rewrite_callbacks, T_COMMENT, token_hook_comment); ht_add(rewrite_callbacks, T_VAR, token_hook_classdef); ht_add(rewrite_callbacks, T_PUBLIC, token_hook_classdef); ht_add(rewrite_callbacks, T_PROTECTED, token_hook_classdef); ht_add(rewrite_callbacks, T_PRIVATE, token_hook_classdef); ht_add(rewrite_callbacks, T_CONST, token_hook_classdef); ht_add(rewrite_callbacks, T_FINAL, token_hook_classdef); ht_add(rewrite_callbacks, T_ELSE, token_hook_else); ht_add(rewrite_callbacks, T_ELSEIF, token_hook_else); ht_add(rewrite_callbacks, T_INC, token_hook_increment); ht_add(rewrite_callbacks, T_DEC, token_hook_increment); ht_add(rewrite_callbacks, T_STATIC, token_hook_static); ht_add(rewrite_callbacks, T_START_HEREDOC, token_hook_heredoc); ht_add(rewrite_callbacks, T_OBJECT_OPERATOR, token_hook_binary); ht_add(rewrite_callbacks, T_BOOLEAN_AND, token_hook_binary); ht_add(rewrite_callbacks, T_BOOLEAN_OR, token_hook_binary); ht_add(rewrite_callbacks, T_LOGICAL_OR, token_hook_binary); ht_add(rewrite_callbacks, T_LOGICAL_AND, token_hook_binary); ht_add(rewrite_callbacks, T_VARIABLE, token_hook_variable); ht_add(rewrite_callbacks, '.', token_hook_binary); ht_add(rewrite_callbacks, '`', token_hook_str); ht_add(rewrite_callbacks, '"', token_hook_str); ht_add(rewrite_callbacks, '(', token_hook_open_brace); ht_add(rewrite_callbacks, '+', token_hook_check_unary); ht_add(rewrite_callbacks, '-', token_hook_check_unary); ht_add(rewrite_callbacks, '&', token_hook_check_unary); ht_add(rewrite_callbacks, ',', token_hook_comma); ht_add(rewrite_callbacks, '?', token_hook_ternary_begin); ht_add(rewrite_callbacks, '{', token_hook_curly_brace); ht_add(rewrite_callbacks, '[', token_hook_square_brace); ht_add(rewrite_callbacks, T_ARRAY, token_hook_array); } /** * Can line be changed within current formatting ctx */ static int can_change_lines(formatting_ctx *ctx, int line) { int retval = !zend_hash_num_elements(ctx->user_lines) || zend_hash_index_exists(ctx->user_lines, line); return retval; } /** * Hooks */ TOKEN_HOOK(token_hook_whitespace) { phpcf_token *src = stokens + *scur, *new = tokens + *cur; new->type = _tokens_is_whitespace_aligned(stokens, *scur, scnt) ? T_WHITESPACE_ALIGNED : T_WHITESPACE; new->val = src->val; new->line = src->line; } TOKEN_HOOK(token_hook_array) { phpcf_token *src = stokens + *scur, *new = tokens + *cur, tok; int is_normal_context = 0, next_pos = *scur + 1, i; new->type = src->type; new->val = src->val; new->line = src->line; for (i = next_pos; i < scnt; i++) { tok = stokens[i]; if (SHOULD_IGNORE_TOKEN(tok.type)) { continue; } // it is array hint else if( tok.type == T_VARIABLE || tok.type == '&') { new->type = T_ARRAY_HINT; break; } else { break; } } } // hook opening brace for another type - {_EMPTY - opens block without content TOKEN_HOOK(token_hook_curly_brace) { phpcf_token *src = stokens + *scur, *new = tokens + *cur, tok; int found = _is_body_empty(stokens, scur, scnt, '}'); if(found) { new->type = TOK_EMPTY(src->type); } else { new->type = src->type; } new->val = src->val; new->line = src->line; } /** * Rewrite T_VARIABLE to T_FUNCTION_NAME, when it is function call like * $foo() or Class::$call() */ TOKEN_HOOK(token_hook_variable) { phpcf_token *src = stokens + *scur, *new = tokens + *cur, tok; int type = src->type, next_pos = *scur + 1, i; for (i = next_pos; i < scnt; i++) { tok = stokens[i]; if (SHOULD_IGNORE_TOKEN(tok.type)) continue; // in situations like '++ $a' there would be variable on the right, so the token is positioned left if (tok.type == '(') { type = T_FUNCTION_NAME; } break; } new->type = type; new->val = src->val; new->line = src->line; } /** * rewrite [ to T_SHORT_ARRAY(_ML), if necessary */ TOKEN_HOOK(token_hook_square_brace) { phpcf_token *src = stokens + *scur, *new = tokens + *cur, tok; int prev_pos, i; prev_pos = *scur; new->type = src->type; new->val = src->val; new->line = src->line; for(i=prev_pos -1; i>0; i--) { tok = stokens[i]; if (SHOULD_IGNORE_TOKEN(tok.type)) { continue; } // it is dereferencing, ok else if( tok.type == ']' || tok.type == ')' || tok.type == T_CONSTANT_ENCAPSED_STRING || tok.type == T_VARIABLE) { break; } else { new->type = T_ARRAY_SHORT; break; } } // add multilines, if needed if (new->type == T_ARRAY_SHORT) { int length = _count_expr_length(stokens, *scur, scnt, 1, frm->options->max_line_length); if (length >= frm->options->max_line_length) { new->type = T_ARRAY_SHORT_ML; } } } int _is_long_comma(formatting_ctx *ctx, phpcf_token *tokens, int curr_pos, int cnt, int max_length, int remember_positions) { int len = 0, depth = 0, is_long, i; phpcf_token tok; // getting first right non-whitespace token length (only if it is not ) for (i = curr_pos + 1; i < cnt; i++) { tok = tokens[i]; if (tok.type == T_WHITESPACE) { // if there already is a new line after comma, we will keep it and remember its position if (strstr(tok.val, "\n")) { if (remember_positions) { ctx->last_long_position = curr_pos; } return 1; } continue; } if (tok.type == ')') break; len += strlen(tok.val); break; } // going backwards to see if there already is more than 120 symbols in line for (i = curr_pos; i > 0; i--) { // we can remember that which comma was long and // count the next long comma only starting from the previous long one if (i == ctx->last_long_position) { break; } tok = tokens[i]; // we should stop when line beginning is reached or in another special case described below if (strstr(tok.val, "\n")) break; if (tok.type == T_WHITESPACE) continue; if (tok.type == ',') len += 2; else if (tok.type == '(') depth--; else if (tok.type == ')') depth++; else len += strlen(tok.val); // found opening "(" before reaching max length, which means it is something like this: // array( 'something', some_func(argument100, argument200), 'something else' ) // ^ our brace ^ our comma // such comma must not be long even though the length before newline can be higher than max. // comma must be directly related to the array (or argument list), but in this case it does not if (depth < 0) return 0; if (depth == 0 && len >= max_length) break; } is_long = (len >= max_length); if (remember_positions && is_long) { ctx->last_long_position = curr_pos; } return is_long; } TOKEN_HOOK(token_hook_comma) { phpcf_token *src = stokens + *scur, *new = tokens + *cur, tok; int type = src->type; if (_is_long_comma(frm->ctx, stokens, *scur, scnt, frm->options->max_line_length, 1)) { type = TOK_LONG(type); } new->type = type; new->val = src->val; new->line = src->line; } TOKEN_HOOK(token_hook_open_brace) { phpcf_token *src = stokens + *scur, *new = tokens + *cur; int length = _count_expr_length(stokens, *scur, scnt, 1, frm->options->max_line_length); new->type = '('; if (length >= frm->options->max_line_length) { new->type = TOK_LONG(new->type); } int can_change_tokens = can_change_lines(frm->ctx, src->line); if (can_change_tokens && _is_body_empty(stokens, scur, scnt, ')')) { new->type = '('; new->type = TOK_EMPTY('('); } new->val = src->val; new->line = src->line; } /* text inside "strings" (and `strings`) is also tokenized in PHP by default: we do not care about these */ TOKEN_HOOK(token_hook_str) { phpcf_token *src = stokens + *scur, *new = tokens + *cur, tok; int q = src->type, i; char *string_buf = *buf; new->type = src->type; new->val = src->val; new->line = src->line; for (;;) { src = stokens + (++*scur); if (src->type == q) { (*buf)++; **buf = 0; new = tokens + (++*cur); new->type = T_STRING_CONTENTS; new->val = string_buf; new->line = src->line; break; } strcpy(*buf, src->val); *buf += strlen(src->val); } new = tokens + (++*cur); new->type = src->type; new->val = src->val; new->line = src->line; } /* rewrite ?: as a single token */ TOKEN_HOOK(token_hook_ternary_begin) { phpcf_token *src = stokens + *scur, *new = tokens + *cur, tok; if (*scur + 1 < scnt && (stokens + *scur + 1)->type == ':') { (*scur)++; new->type = T_TERNARY_GLUED; new->val = "?:"; } else { new->type = src->type; new->val = src->val; } new->line = src->line; } /* text inside HEREDOCs is tokenized in PHP by default, which is not what we need at all */ TOKEN_HOOK(token_hook_heredoc) { phpcf_token *src = stokens + *scur, *new = tokens + *cur, tok; int i; char *string_buf = *buf; new->type = src->type; new->val = src->val; new->line = src->line; for (;;) { src = stokens + (++*scur); if (src->type == T_END_HEREDOC) { (*buf)++; **buf = 0; new = tokens + (++*cur); new->type = T_HEREDOC_CONTENTS; new->val = string_buf; new->line = src->line; break; } strcpy(*buf, src->val); *buf += strlen(src->val); } new = tokens + (++*cur); new->type = src->type; new->val = src->val; new->line = src->line; } // interpret "static" in "static::HELLO" as normal text (T_STRING) instead of keyword (T_STATIC) TOKEN_HOOK(token_hook_static) { phpcf_token *src = stokens + *scur, *new = tokens + *cur, tok; int is_normal_context = 0, next_pos = *scur + 1, i; for (i = next_pos; i < scnt; i++) { tok = stokens[i]; if (SHOULD_IGNORE_TOKEN(tok.type)) continue; switch (tok.type) { case T_VARIABLE: // static var; protected static var; case T_PROTECTED: case T_PUBLIC: case T_PRIVATE: case T_FINAL: case T_VAR: case T_ABSTRACT: // static protected var; case T_FUNCTION: // static function() { ... } is_normal_context = 1; break; } break; } if (is_normal_context) { return token_hook_classdef(TOKEN_HOOK_ARGS); } new->type = T_STRING; new->val = src->val; new->line = src->line; } TOKEN_HOOK(token_hook_function) { phpcf_token *src = stokens + *scur, *new = tokens + *cur, tok; int next_pos = *scur + 1, length = 0, depth = 0, i = 0; int is_anonymous = 1, found_arguments_begin = 0, found_open_bracket = 0; int type = T_FUNCTION; for (i = next_pos; i < scnt; i++) { tok = stokens[i]; length += strlen(tok.val); if (!found_open_bracket) { if (SHOULD_IGNORE_TOKEN(tok.type)) continue; // function doSomethingGood( // ^ T_STRING ^ arguments begin here if (tok.type == T_STRING && !found_arguments_begin) is_anonymous = 0; if (tok.type == '(') found_arguments_begin = 1; if (tok.type == '{') { found_open_bracket = 1; depth = 1; } } else { if (tok.type == '{') depth++; if (tok.type == '}') depth--; if (depth <= 0) break; // if someone wants to put function declaration on several lines, so be it if (strstr(tok.val, "\n")) { length = frm->options->max_line_length; break; } } } if (is_anonymous) { if (length >= frm->options->max_line_length) { type = T_ANONFUNC_LONG; } else { type = T_ANONFUNC; } } new->type = type; new->val = src->val; new->line = src->line; } TOKEN_HOOK(token_hook_close_tag) { phpcf_token *src = stokens + *scur, *new = tokens + *cur, tok; int is_eof = 1, can_delete = 1, next_pos = *scur + 1, i = 0, curr_pos, prev_pos; int can_change_tokens = can_change_lines(frm->ctx, src->line); if (can_change_tokens) { if (next_pos < scnt) { for (i = next_pos; i < scnt; i++) { tok = stokens[i]; if (SHOULD_IGNORE_TOKEN(tok.type)) continue; if (tok.type == T_INLINE_HTML && _is_whitespace_string(tok.val)) continue; is_eof = 0; break; } curr_pos = next_pos - 1; } else { curr_pos = scnt - 1; } if (is_eof) { prev_pos = curr_pos - 1; if (stokens[prev_pos].type == T_WHITESPACE) prev_pos--; if (stokens[prev_pos].type != ';' && stokens[prev_pos].type != '}') { sniff_message(frm, src->line, "Expected either ';' or '}' before closing tag for it to be safely deleted"); can_delete = 0; } } if (is_eof && can_delete) { sniff_message(frm, src->line, "No close tag allowed at the end of file"); (*cur) -= 1; return; } } new->type = src->type; new->val = src->val; new->line = src->line; } TOKEN_HOOK(token_hook_open_tag) { phpcf_token *src = stokens + *scur, *new = tokens + *cur; int append_offset = 0; int can_change_tokens = can_change_lines(frm->ctx, src->line); new->type = src->type; new->line = src->line; if (can_change_tokens) new->val = "<?php"; if (!strncmp("<?php", src->val, 5)) { append_offset = 5; } else { append_offset = 2; // short open tag: "<?" } if (!can_change_tokens) { strncpy(*buf, src->val, append_offset); new->val = *buf; new->val[append_offset] = 0; *buf += append_offset + 1; } if (src->val[append_offset]) { APPEND_WHITESPACE(src->val + append_offset); } } TOKEN_HOOK(token_hook_check_unary) { phpcf_token *src = stokens + *scur, *new = tokens + *cur, tok; int is_unary = 0, i; for (i = *scur - 1; i > 0; i--) { tok = stokens[i]; if (SHOULD_IGNORE_TOKEN(tok.type)) continue; switch(tok.type) { case ']': case ')': case T_LNUMBER: case T_DNUMBER: case T_VARIABLE: case T_STRING: break; default: is_unary = 1; break; } break; } new->type = is_unary ? TOK_UNARY(src->type) : src->type; new->val = src->val; new->line = src->line; } TOKEN_HOOK(token_hook_tstring) { phpcf_token *src = stokens + *scur, *new = tokens + *cur, tok; int type = T_STRING, i; zval *names_callback = frm->string_filter; for (i = *scur + 1; i < scnt; i++) { tok = stokens[i]; if (SHOULD_IGNORE_TOKEN(tok.type)) continue; if (tok.type == '(') type = T_FUNCTION_NAME; break; } // checking for non-ascii symbol presence int sizer = sizeof(src->val); int contains = 0; int j; for ( j=0; j<sizer; j++) { char sym = src->val[j]; // valid symbols for classes, functions etc. are a-zA-Z_ if ( ! ( (sym >= 'a' && sym <= 'z' ) || (sym >= 'A' && sym >= 'Z') || (sym >= '0' && sym <= '9') || sym == '_' ) ) { contains = 1; break; } } // value to return char *value_to_assign = src->val; // executing user-defined callback for string validation if (contains && names_callback && NULL != names_callback) { zval *retval_ptr; zval **argv[1]; zval *val; MAKE_STD_ZVAL(val); ZVAL_STRING(val, src->val, 1); argv[0]= &val; zval *local = names_callback; int call_res = call_user_function_ex(EG(function_table), NULL, names_callback, &retval_ptr, 1, argv, 0, NULL TSRMLS_CC); if(FAILURE == call_res) { zend_throw_exception_ex(zend_exception_get_default(TSRMLS_C), 0 TSRMLS_CC, "Error invoking T_STRING callback"); } else if(IS_ARRAY == Z_TYPE_P(retval_ptr)) { // extracting data from ["newString", "debugMessage"] HashTable *ht = Z_ARRVAL_P(retval_ptr); zval **tmpzval = NULL; if( FAILURE == zend_hash_index_find(ht, 0, (void **) &tmpzval) ) { zend_throw_exception_ex(zend_exception_get_default(TSRMLS_C), 0 TSRMLS_CC, "Error getting result string from callback result"); } else { value_to_assign = Z_STRVAL_PP(tmpzval); // try find debug message if ( SUCCESS == zend_hash_index_find(ht, 1, (void **) &tmpzval) && Z_STRLEN_PP(tmpzval) != 0 ) { sniff_message(frm, src->line, Z_STRVAL_PP(tmpzval)); } } } } new->type = type; new->val = value_to_assign; new->line = src->line; } TOKEN_HOOK(token_hook_comment) { phpcf_token *src = stokens + *scur, *new = tokens + *cur, tok; int type = src->type, prev_pos, i = 0, l = 0; int can_change_tokens = can_change_lines(frm->ctx, src->line); if (can_change_tokens && src->val[0] == '#') { sniff_message(frm, src->line, "Comments starting with '#' are not allowed"); new->val = *buf; strcpy(new->val, "//"); strcat(new->val, src->val + 1); *buf += strlen(new->val) + 1; } else { new->val = *buf; strcpy(new->val, src->val); *buf += strlen(new->val) + 1; } if (src->val[0] == '#' || (src->val[0] == '/' && src->val[1] == '/')) { // if previous token is whitespace with line feed, then it means that this comment takes whole line prev_pos = *scur - 1; // otherwise it is appended to some expression like this one if (prev_pos > 1 && stokens[prev_pos].type == T_WHITESPACE && strstr(stokens[prev_pos].val, "\n")) { type = T_SINGLE_LINE_COMMENT_ALONE; } else { type = T_SINGLE_LINE_COMMENT; } while (new->val[i]) { if (new->val[i] == '\n') { new->val[i] = 0; break; } i++; } } new->type = type; new->line = src->line; if (type == T_COMMENT) return; // do not touch multiline comments APPEND_WHITESPACE("\n"); } TOKEN_HOOK(token_hook_classdef) { phpcf_token *src = stokens + *scur, *new = tokens + *cur, tok; int type = src->type, next_pos = *scur + 1, i; int is_property_def = 0; int is_newline = 0; // whether or not definition is on a new line for (i = next_pos; i < scnt; i++) { tok = stokens[i]; // look for whitespace, comment, variable or constant name after, e.g. "private" // if nothing was found, then it is end of property def (or real property def did not begin) if (tok.type == T_VARIABLE || tok.type == T_STRING) is_property_def = 1; else if (tok.type != T_COMMENT && tok.type != T_WHITESPACE) break; if (strstr(tok.val, "\n")) is_newline = 1; } if (is_property_def && is_newline) type = TOK_NL(type); new->type = type; new->val = src->val; new->line = src->line; } TOKEN_HOOK(token_hook_binary) { phpcf_token *src = stokens + *scur, *new = tokens + *cur, tok; int type = src->type, next_pos = *scur + 1, prev_pos = *scur - 1; if (stokens[next_pos].type == T_WHITESPACE && strstr(stokens[next_pos].val, "\n")) { type = TOK_NL(type); } else { // searching for comment or whitespace with \n while(prev_pos > 0) { // non-comments and whitespaces does not cause new line if (!SHOULD_IGNORE_TOKEN(stokens[prev_pos].type)) { break; } if( strstr(stokens[prev_pos].val, "\n") ) { type = TOK_NL(type); break; } prev_pos--; } } new->type = type; new->val = src->val; new->line = src->line; } TOKEN_HOOK(token_hook_else) { phpcf_token *src = stokens + *scur, *new = tokens + *cur, tok; int type = src->type, prev_pos = *scur - 1, has_block_before = 0, i; for (i = prev_pos; i > 0; i--) { tok = stokens[i]; if (SHOULD_IGNORE_TOKEN(tok.type)) continue; if (tok.type == '}') has_block_before = 1; break; } if (!has_block_before) type = TOK_INLINE(type); new->type = type; new->val = src->val; new->line = src->line; } // T_INC => T_INC_LEFT || T_INC_RIGHT ( T_INC_LEFT in case "++$a", T_INC_RIGHT in case "$a++") TOKEN_HOOK(token_hook_increment) { phpcf_token *src = stokens + *scur, *new = tokens + *cur, tok; int type = src->type, next_pos = *scur + 1, is_left = 0, i; for (i = next_pos; i < scnt; i++) { tok = stokens[i]; if (SHOULD_IGNORE_TOKEN(tok.type)) continue; // in situations like '++ $a' there would be variable on the right, so the token is positioned left if (tok.type == T_VARIABLE) is_left = 1; break; } type = is_left ? TOK_LEFT(type) : TOK_RIGHT(type); new->type = type; new->val = src->val; new->line = src->line; } static int phpcf_tokenize(int source_len, char *buf, phpcf_token *tokens) { zval token; zval *keyword; int token_type; zend_bool destroy; int token_line = 1, i = -1; void *p, *endp; char *cur_buf = buf; ZVAL_NULL(&token); while ((token_type = lex_scan(&token TSRMLS_CC))) { i++; destroy = 1; switch (token_type) { case T_CLOSE_TAG: if (zendtext[zendleng - 1] != '>') { CG(zend_lineno)++; } case T_OPEN_TAG: case T_OPEN_TAG_WITH_ECHO: case T_WHITESPACE: case T_COMMENT: case T_DOC_COMMENT: destroy = 0; break; } if (token_type >= 256) { if (token_type == T_END_HEREDOC) { if (CG(increment_lineno)) { token_line = ++CG(zend_lineno); CG(increment_lineno) = 0; } } strncpy(cur_buf, (const char*)zendtext, zendleng); tokens[i].val = cur_buf; cur_buf[zendleng] = 0; cur_buf += zendleng + 2; tokens[i].type = token_type; tokens[i].line = token_line; p = (void*)zendtext; endp = p + zendleng; // token_line += substr_count(zendtext, "\n") while ((p = memchr(p, '\n', endp - p))) { token_line++; p++; } } else { tokens[i].type = token_type; strncpy(cur_buf, (const char*)zendtext, zendleng); tokens[i].val = cur_buf; cur_buf[zendleng] = 0; cur_buf += zendleng + 2; tokens[i].line = token_line; } if (destroy && Z_TYPE(token) != IS_NULL) { zval_dtor(&token); } ZVAL_NULL(&token); if (token_type == T_HALT_COMPILER) { break; } } return i + 1; } static void _append_indent(char *buf, int indent_count, char *indent) { int i, indent_length = strlen(indent); if (indent_count > 20) { indent_count = 20; // protection from buffer overflow } buf += strlen(buf); for (i = 0; i < indent_count; i++) { strcat(buf, indent); buf += indent_length; } } static void _apply_exec_sequence(phpcf_exec_seq **exec_sequence, int seq_len, int line, phpcf_token *tokens, int tokens_cnt, int *indent, smart_str *result, Formatter *frm) { static char out[100]; formatting_ctx *ctx = frm->ctx; phpcf_exec_seq *cur; phpcf_exec_rule *rule = NULL, *cur_rule = NULL, *erule; int i, tok_idx, origin; char *in = ""; char *indent_seq = frm->options->indent; out[0] = 0; for (i = 0; i < seq_len; i++) { cur = exec_sequence[i]; #if 0 if (cur->type == EXEC_SEQ_TYPE_EXEC_RULE) { printf("Rule (indent level %d):\n", *indent); erule = &(cur->seq_data.rule); if (erule->descr) printf(" descr: %s\n", erule->descr); if (erule->ex_indent) printf(" ex indent: %d\n", erule->ex_indent); if (erule->ex) printf(" ex: %d\n", erule->ex); } else { printf("Text: '%s'\n", cur->seq_data.tok->val); } #endif if (cur->type == EXEC_SEQ_TYPE_TOKEN) { in = cur->seq_data.tok->val; continue; } cur_rule = &(cur->seq_data.rule); if (!rule) { rule = cur_rule; tok_idx = cur->tok_idx; origin = cur->origin; } *indent += cur_rule->ex_indent; if (cur_rule->ex && cur_rule->ex < rule->ex) { rule = cur_rule; // rules with lower value have higher priority tok_idx = cur->tok_idx; origin = cur->origin; } } int can_change_tokens = can_change_lines(ctx, line); if (!can_change_tokens || !rule || !rule->ex || rule->ex == PHPCF_EX_DO_NOT_TOUCH_ANYTHING) { smart_str_appends(result, in); return; } switch(rule->ex) { case PHPCF_EX_CHECK_NL: case PHPCF_EX_CHECK_NL_STRONG: // allow 2 new lines if (substr_count(in, '\n') >= 2) { strcat(out, "\n\n"); _append_indent(out, *indent, indent_seq); } else { strcat(out, "\n"); _append_indent(out, *indent, indent_seq); } break; case PHPCF_EX_DELETE_SPACES: case PHPCF_EX_DELETE_SPACES_STRONG: out[0] = 0; break; case PHPCF_EX_SHRINK_SPACES: case PHPCF_EX_SHRINK_SPACES_STRONG: out[0] = ' '; out[1] = 0; break; case PHPCF_EX_SHRINK_NLS_2: strcat(out, "\n\n"); _append_indent(out, *indent, indent_seq); break; case PHPCF_EX_SHRINK_NLS: case PHPCF_EX_SHRINK_NLS_STRONG: strcat(out, "\n"); _append_indent(out, *indent, indent_seq); break; case PHPCF_EX_NL_OR_SPACE: if (substr_count(in, '\n') > 0) { strcat(out, "\n"); _append_indent(out, *indent, indent_seq); } else { out[0] = ' '; out[1] = 0; } break; } if (strcmp(in, out)) { sniff_context_message(frm, in, out, origin, rule->descr, tok_idx, tokens, tokens_cnt); } smart_str_appends(result, out); } static char *phpcf_exec(phpcf_exec_seq *exec, int exec_len, phpcf_token *tokens, int tokens_cnt, Formatter *frm) { int i, indent = 0, seq_len = 0; smart_str result = {0}; phpcf_exec_seq *exec_sequence[10]; phpcf_token *tok; smart_str_appends(&result, ""); for (i = 0; i < exec_len; i++) { if (exec[i].type == EXEC_SEQ_TYPE_TOKEN) { tok = exec[i].seq_data.tok; if (tok->type == T_WHITESPACE) { exec_sequence[seq_len++] = exec + i; } else { if (seq_len) { _apply_exec_sequence(exec_sequence, seq_len, tok->line, tokens, tokens_cnt, &indent, &result, frm); } smart_str_appends(&result, tok->val); seq_len = 0; } } else { exec_sequence[seq_len++] = exec + i; } } if (seq_len) { _apply_exec_sequence(exec_sequence, seq_len, tok->line, tokens, tokens_cnt, &indent, &result, frm); } smart_str_0(&result); return result.c; } static char *_get_context_name(int idx, phpcf_hashtable *indexes) { return (char*)ht_get(indexes, idx); } static void _fsm_print_stack(phpcf_fsm_stack *stack, phpcf_hashtable *table) { int i = 0; for (i = 0; i < stack->len; i++) { fprintf(stderr, "%s%s", _get_context_name(stack->contexts[i], table), i == stack->len - 1 ? "" : " / "); } } static char *get_token_type_name(int token_type) { if (token_type <= 256) return ""; switch (token_type) { case T_REQUIRE_ONCE: return "T_REQUIRE_ONCE"; case T_REQUIRE: return "T_REQUIRE"; case T_EVAL: return "T_EVAL"; case T_INCLUDE_ONCE: return "T_INCLUDE_ONCE"; case T_INCLUDE: return "T_INCLUDE"; case T_LOGICAL_OR: return "T_LOGICAL_OR"; case T_LOGICAL_XOR: return "T_LOGICAL_XOR"; case T_LOGICAL_AND: return "T_LOGICAL_AND"; case T_PRINT: return "T_PRINT"; case T_SR_EQUAL: return "T_SR_EQUAL"; case T_SL_EQUAL: return "T_SL_EQUAL"; case T_XOR_EQUAL: return "T_XOR_EQUAL"; case T_OR_EQUAL: return "T_OR_EQUAL"; case T_AND_EQUAL: return "T_AND_EQUAL"; case T_MOD_EQUAL: return "T_MOD_EQUAL"; case T_CONCAT_EQUAL: return "T_CONCAT_EQUAL"; case T_DIV_EQUAL: return "T_DIV_EQUAL"; case T_MUL_EQUAL: return "T_MUL_EQUAL"; case T_MINUS_EQUAL: return "T_MINUS_EQUAL"; case T_PLUS_EQUAL: return "T_PLUS_EQUAL"; case T_BOOLEAN_OR: return "T_BOOLEAN_OR"; case T_BOOLEAN_AND: return "T_BOOLEAN_AND"; case T_IS_NOT_IDENTICAL: return "T_IS_NOT_IDENTICAL"; case T_IS_IDENTICAL: return "T_IS_IDENTICAL"; case T_IS_NOT_EQUAL: return "T_IS_NOT_EQUAL"; case T_IS_EQUAL: return "T_IS_EQUAL"; case T_IS_GREATER_OR_EQUAL: return "T_IS_GREATER_OR_EQUAL"; case T_IS_SMALLER_OR_EQUAL: return "T_IS_SMALLER_OR_EQUAL"; case T_SR: return "T_SR"; case T_SL: return "T_SL"; case T_INSTANCEOF: return "T_INSTANCEOF"; case T_UNSET_CAST: return "T_UNSET_CAST"; case T_BOOL_CAST: return "T_BOOL_CAST"; case T_OBJECT_CAST: return "T_OBJECT_CAST"; case T_ARRAY_CAST: return "T_ARRAY_CAST"; case T_STRING_CAST: return "T_STRING_CAST"; case T_DOUBLE_CAST: return "T_DOUBLE_CAST"; case T_INT_CAST: return "T_INT_CAST"; case T_DEC: return "T_DEC"; case T_INC: return "T_INC"; case T_CLONE: return "T_CLONE"; case T_NEW: return "T_NEW"; case T_EXIT: return "T_EXIT"; case T_IF: return "T_IF"; case T_ELSEIF: return "T_ELSEIF"; case T_ELSE: return "T_ELSE"; case T_ENDIF: return "T_ENDIF"; case T_LNUMBER: return "T_LNUMBER"; case T_DNUMBER: return "T_DNUMBER"; case T_STRING: return "T_STRING"; case T_STRING_VARNAME: return "T_STRING_VARNAME"; case T_VARIABLE: return "T_VARIABLE"; case T_NUM_STRING: return "T_NUM_STRING"; case T_INLINE_HTML: return "T_INLINE_HTML"; case T_CHARACTER: return "T_CHARACTER"; case T_BAD_CHARACTER: return "T_BAD_CHARACTER"; case T_ENCAPSED_AND_WHITESPACE: return "T_ENCAPSED_AND_WHITESPACE"; case T_CONSTANT_ENCAPSED_STRING: return "T_CONSTANT_ENCAPSED_STRING"; case T_ECHO: return "T_ECHO"; case T_DO: return "T_DO"; case T_WHILE: return "T_WHILE"; case T_ENDWHILE: return "T_ENDWHILE"; case T_FOR: return "T_FOR"; case T_ENDFOR: return "T_ENDFOR"; case T_FOREACH: return "T_FOREACH"; case T_ENDFOREACH: return "T_ENDFOREACH"; case T_DECLARE: return "T_DECLARE"; case T_ENDDECLARE: return "T_ENDDECLARE"; case T_AS: return "T_AS"; case T_SWITCH: return "T_SWITCH"; case T_ENDSWITCH: return "T_ENDSWITCH"; case T_CASE: return "T_CASE"; case T_DEFAULT: return "T_DEFAULT"; case T_BREAK: return "T_BREAK"; case T_CONTINUE: return "T_CONTINUE"; case T_GOTO: return "T_GOTO"; case T_FUNCTION: return "T_FUNCTION"; case T_CONST: return "T_CONST"; case T_RETURN: return "T_RETURN"; case T_TRY: return "T_TRY"; case T_CATCH: return "T_CATCH"; case T_THROW: return "T_THROW"; case T_USE: return "T_USE"; case T_GLOBAL: return "T_GLOBAL"; case T_PUBLIC: return "T_PUBLIC"; case T_PROTECTED: return "T_PROTECTED"; case T_PRIVATE: return "T_PRIVATE"; case T_FINAL: return "T_FINAL"; case T_ABSTRACT: return "T_ABSTRACT"; case T_STATIC: return "T_STATIC"; case T_VAR: return "T_VAR"; case T_UNSET: return "T_UNSET"; case T_ISSET: return "T_ISSET"; case T_EMPTY: return "T_EMPTY"; case T_HALT_COMPILER: return "T_HALT_COMPILER"; case T_CLASS: return "T_CLASS"; case T_INTERFACE: return "T_INTERFACE"; case T_EXTENDS: return "T_EXTENDS"; case T_IMPLEMENTS: return "T_IMPLEMENTS"; case T_OBJECT_OPERATOR: return "T_OBJECT_OPERATOR"; case T_DOUBLE_ARROW: return "T_DOUBLE_ARROW"; case T_LIST: return "T_LIST"; case T_ARRAY: return "T_ARRAY"; case T_CLASS_C: return "T_CLASS_C"; case T_METHOD_C: return "T_METHOD_C"; case T_FUNC_C: return "T_FUNC_C"; case T_LINE: return "T_LINE"; case T_FILE: return "T_FILE"; case T_COMMENT: return "T_COMMENT"; case T_DOC_COMMENT: return "T_DOC_COMMENT"; case T_OPEN_TAG: return "T_OPEN_TAG"; case T_OPEN_TAG_WITH_ECHO: return "T_OPEN_TAG_WITH_ECHO"; case T_CLOSE_TAG: return "T_CLOSE_TAG"; case T_WHITESPACE: return "T_WHITESPACE"; case T_START_HEREDOC: return "T_START_HEREDOC"; case T_END_HEREDOC: return "T_END_HEREDOC"; case T_DOLLAR_OPEN_CURLY_BRACES: return "T_DOLLAR_OPEN_CURLY_BRACES"; case T_CURLY_OPEN: return "T_CURLY_OPEN"; case T_PAAMAYIM_NEKUDOTAYIM: return "T_DOUBLE_COLON"; case T_NAMESPACE: return "T_NAMESPACE"; case T_NS_C: return "T_NS_C"; case T_DIR: return "T_DIR"; case T_NS_SEPARATOR: return "T_NS_SEPARATOR"; // custom tokens by phpcf: case T_STRING_CONTENTS: return "T_STRING_CONTENTS"; case T_HEREDOC_CONTENTS: return "T_HEREDOC_CONTENTS"; case T_SINGLE_LINE_COMMENT: return "T_SINGLE_LINE_COMMENT"; case T_SINGLE_LINE_COMMENT_ALONE: return "T_SINGLE_LINE_COMMENT_ALONE"; case T_ANONFUNC: return "T_ANONFUNC"; case T_ANONFUNC_LONG: return "T_ANONFUNC_LONG"; case T_WHITESPACE_ALIGNED: return "T_WHITESPACE_ALIGNED"; case T_FUNCTION_NAME: return "T_FUNCTION_NAME"; case T_ARRAY_SHORT: return "T_ARRAY_SHORT"; case T_ARRAY_SHORT_ML: return "T_ARRAY_SHORT_ML"; case T_ARRAY_HINT: return "T_ARRAY_HINT"; } return "UNKNOWN"; } static void _print_whitespace_val(char *val) { char buf[100]; // '\n' => "\n", '\t' => "\t" int i = 0, j = 0, l = strlen(val); int final_len = l + substr_count(val, '\n') + substr_count(val, '\t'); if (final_len > sizeof(buf) - 1) { fprintf(stderr, "%s", val); return; } for(i = 0; i <= l; i++) { if (val[i] == '\n') { buf[j++] = '\\'; buf[j++] = 'n'; } else if (val[i] == '\t') { buf[j++] = '\\'; buf[j++] = 't'; } else if (val[i] == ' ') { buf[j++] = '.'; } else { buf[j++] = val[i]; } if (!val[i]) break; } fprintf(stderr, "%-30s", buf); } static void _print_debug_line(phpcf_fsm_stack *stack, phpcf_token *tok, phpcf_hashtable *table) { fprintf(stderr, "%27s", get_token_type_name(tok->type & 0xFFFF)); if (TOK_IS_LONG(tok->type)) fprintf(stderr, "_LONG "); else if (TOK_IS_EMPTY(tok->type)) fprintf(stderr, "_EMPTY "); else if (TOK_IS_NL(tok->type)) fprintf(stderr, "_NL "); else if (TOK_IS_UNARY(tok->type)) fprintf(stderr, "_UNARY "); else if (TOK_IS_LEFT(tok->type)) fprintf(stderr, "_LEFT "); else if (TOK_IS_RIGHT(tok->type)) fprintf(stderr, "_RIGHT "); else if (TOK_IS_INLINE(tok->type)) fprintf(stderr, "_INLINE"); else fprintf(stderr, " "); _print_whitespace_val(tok->val); fprintf(stderr, " "); _fsm_print_stack(stack,table); fprintf(stderr, "\n"); } static int _fsm_perform_transition(phpcf_fsm_stack *stack, int type, int ctx) { #if 0 printf("FSM perform transition %d with ctx %d\n", type, ctx); #endif switch (type) { case CTX_RULE_TYPE_FLAT: stack->contexts[stack->len - 1] = ctx; break; case CTX_RULE_TYPE_PUSH: if (stack->len + 1 > sizeof(stack->contexts) / sizeof(stack->contexts[0])) { zend_throw_exception_ex(zend_exception_get_default(TSRMLS_C), 0 TSRMLS_CC, "PHPCF FSM stack overflow"); } stack->contexts[stack->len] = ctx; stack->len++; break; case CTX_RULE_TYPE_POP: if (ctx > 0) { zend_throw_exception_ex(zend_exception_get_default(TSRMLS_C), 0 TSRMLS_CC, "PHPCF FSM incorrect rule for pop: %d", ctx); } stack->len += ctx; if (stack->len < 1) { zend_throw_exception_ex(zend_exception_get_default(TSRMLS_C), 0 TSRMLS_CC, "PHPCF FSM stack underflow"); } break; default: zend_throw_exception_ex(zend_exception_get_default(TSRMLS_C), 0 TSRMLS_CC, "PHPCF FSM unknown rule type: %d", type); break; } return stack->contexts[stack->len - 1]; } /* * performs context switch for tok, returns current context */ static int phpcf_fsm_transit(phpcf_fsm_stack *stack, phpcf_token *tok, Formatter *frm) { phpcf_hashtable *tok_rules; phpcf_ctx_rule *rule; formatting_ctx *ctx = frm->ctx; if (tok->type == T_WHITESPACE) { return stack->contexts[stack->len - 1]; } if (ctx->delayed_rule_type) { if (frm->options->debug){ fprintf(stderr, "Found delayed rule: "); switch (ctx->delayed_rule_type) { case CTX_RULE_TYPE_FLAT: fprintf(stderr, "%s", _get_context_name(ctx->delayed_rule_ctx, frm->ctx_registry->inverted_indexes)); break; case CTX_RULE_TYPE_PUSH: fprintf(stderr, "array(%s)", _get_context_name(ctx->delayed_rule_ctx,frm->ctx_registry->inverted_indexes)); break; case CTX_RULE_TYPE_POP: fprintf(stderr, "%d", ctx->delayed_rule_ctx); break; } fprintf(stderr, " in state %s\n", _get_context_name(stack->contexts[stack->len - 1], frm->ctx_registry->inverted_indexes)); } _fsm_perform_transition(stack, ctx->delayed_rule_type, ctx->delayed_rule_ctx); ctx->delayed_rule_type = ctx->delayed_rule_ctx = 0; } tok_rules = ht_get(ctx->rules, stack->contexts[stack->len - 1]); if (!tok_rules) { zend_throw_exception_ex(zend_exception_get_default(TSRMLS_C), 0 TSRMLS_CC, "Inconsistent context rules: no rules for context %d", stack->contexts[stack->len - 1]); } rule = ht_get(tok_rules, tok->type); if (!rule) rule = ht_get(tok_rules, 0); if (rule) { if (rule->type == CTX_RULE_TYPE_REPLACE) { _fsm_perform_transition(stack, rule->now_type, rule->now_ctx); _fsm_perform_transition(stack, rule->replace_type, rule->replace_ctx); } else if (rule->type == CTX_RULE_TYPE_DELAYED) { ctx->delayed_rule_type = rule->delayed_type; ctx->delayed_rule_ctx = rule->delayed_ctx; _fsm_perform_transition(stack, rule->now_type, rule->now_ctx); } else { _fsm_perform_transition(stack, rule->type, rule->now_ctx); } } return stack->contexts[stack->len - 1]; } static int phpcf_process(Formatter *frm, phpcf_token *tokens, int tokens_cnt, phpcf_exec_seq *exec, int *exec_len) { int i = 0, j = 0, cur_ctx; phpcf_token *tok; phpcf_fsm_stack stack; phpcf_hashtable *ctx_control_rules; phpcf_exec_rule *erule; phpcf_control_rule *rule; /* init FSM */ frm->ctx->delayed_rule_type = frm->ctx->delayed_rule_ctx = 0; stack.len = 1; stack.contexts[0] = frm->ctx_registry->first_ctx; for (i = 0; i < tokens_cnt; i++) { tok = tokens + i; cur_ctx = phpcf_fsm_transit(&stack, tok, frm); if (frm->options->debug) { _print_debug_line(&stack, tok,frm->ctx_registry->inverted_indexes); } ctx_control_rules = (phpcf_hashtable*)ht_get(frm->format_rules, tok->type); if (!ctx_control_rules) { // unknown token, leaving as-is exec[j].type = EXEC_SEQ_TYPE_TOKEN; exec[j].tok_idx = i; exec[j].seq_data.tok = tok; j++; continue; } // look for control rule for current context, otherwise get rule for default context rule = (phpcf_control_rule*)ht_get(ctx_control_rules, cur_ctx); if (!rule) { rule = (phpcf_control_rule*)ht_get(ctx_control_rules, 0); if (!rule) { // no default rule, leaving as-is exec[j].type = EXEC_SEQ_TYPE_TOKEN; exec[j].tok_idx = i; exec[j].seq_data.tok = tok; j++; continue; } } if ((rule->ex_left || rule->ex_indent_left) && (i == 0 || (tok - 1)->type != T_WHITESPACE_ALIGNED)) { exec[j].type = EXEC_SEQ_TYPE_EXEC_RULE; exec[j].origin = EXEC_SEQ_ORIGIN_LEFT; exec[j].tok_idx = i; exec[j].seq_data.rule.descr = rule->descr_left; exec[j].seq_data.rule.ex_indent = rule->ex_indent_left; exec[j].seq_data.rule.ex = rule->ex_left; j++; } exec[j].type = EXEC_SEQ_TYPE_TOKEN; exec[j].tok_idx = i; exec[j].seq_data.tok = tok; j++; if ((rule->ex_right || rule->ex_indent_right) && (i == tokens_cnt - 1 || (tok + 1)->type != T_WHITESPACE_ALIGNED)) { exec[j].type = EXEC_SEQ_TYPE_EXEC_RULE; exec[j].origin = EXEC_SEQ_ORIGIN_RIGHT; exec[j].tok_idx = i; exec[j].seq_data.rule.descr = rule->descr_right; exec[j].seq_data.rule.ex_indent = rule->ex_indent_right; exec[j].seq_data.rule.ex = rule->ex_right; j++; } } if (frm->ctx->delayed_rule_type) { cur_ctx = _fsm_perform_transition(&stack, frm->ctx->delayed_rule_type, frm->ctx->delayed_rule_ctx); } #if 0 for (i = 0; i < j; i++) { if (exec[i].type == EXEC_SEQ_TYPE_EXEC_RULE) { erule = &(exec[i].seq_data.rule); if (erule->ex_indent) printf(" ex indent: %d", erule->ex_indent); if (erule->ex || 1) printf(" ex: %d\n", erule->ex); } else { printf("Text: '%s'\n", exec[i].seq_data.tok->val); } } #endif if (cur_ctx != _get_context_idx("CTX_DEFAULT", frm->ctx_registry) && cur_ctx != _get_context_idx("CTX_PHP", frm->ctx_registry)) { zend_throw_exception_ex(zend_exception_get_default(TSRMLS_C), 0 TSRMLS_CC, "%s", "Internal formatter error, invalid final context"); } *exec_len = j; return 0; } static char *phpcf_format(Formatter *frm, char *source, int source_len) { phpcf_token *source_tokens, *tokens, tok; char *buf, *result, *rewrite_tokens_buf; int i, cnt, len = 0, l = 0, exec_len = 0, type; // maximum number of elements in exec per token is 3 phpcf_exec_seq *exec; zval source_z; zend_lex_state original_lex_state; ZVAL_STRINGL(&source_z, source, source_len, 1); zend_save_lexical_state(&original_lex_state TSRMLS_CC); if (zend_prepare_string_for_scanning(&source_z, "" TSRMLS_CC) == FAILURE) { zend_restore_lexical_state(&original_lex_state TSRMLS_CC); return estrdup(""); } LANG_SCNG(yy_state) = yycINITIAL; source_tokens = (phpcf_token*)emalloc(sizeof(phpcf_token) * source_len); buf = (char*)emalloc(source_len * 3); cnt = phpcf_tokenize(source_len, buf, source_tokens); zend_restore_lexical_state(&original_lex_state TSRMLS_CC); zval_dtor(&source_z); // 'rewrite token functions' do not split tokens to more than 2 new tokens tokens = (phpcf_token*)emalloc(sizeof(phpcf_token) * cnt * 2); l = source_len * 2; for (i = 0; i < cnt; i++) { type = source_tokens[i].type; if (type == T_OPEN_TAG) { l += 4; // '<?' can be replaced with '<?php' } else if (type == T_COMMENT) { l += 1; // '#' can be replaced with '//' } } rewrite_tokens_buf = (char*)emalloc(sizeof(phpcf_token) * l); cnt = phpcf_rewrite_tokens(frm, source_tokens, cnt, tokens, rewrite_tokens_buf); exec = (phpcf_exec_seq*)ecalloc(3 * cnt, sizeof(phpcf_exec_seq)); phpcf_process(frm, tokens, cnt, exec, &exec_len); result = phpcf_exec(exec, exec_len, tokens, cnt, frm); efree(exec); efree(source_tokens); efree(tokens); efree(rewrite_tokens_buf); efree(buf); return result; } /** * Destroy all data, associated with formatter */ static void phpcf_formatter_dtor(void *object TSRMLS_DC) { Formatter *formatter = (Formatter *)object; efree(formatter->format_rules); efree(formatter->ctx_rules); efree(formatter->options); efree(formatter->fsm_stack); efree(object); } /* }}} */ /** * Get token code by it's name */ static int _get_token_idx(char *name) { zval tmp; int res, len = strlen(name); char buf[100]; if (name[0] != 'T') { res = name[0]; if (!strcmp(name + 1, "_NL")) res = TOK_NL(res); else if (!strcmp(name + 1, "_LONG")) res = TOK_LONG(res); else if (!strcmp(name + 1, "_UNARY")) res = TOK_UNARY(res); else if (!strcmp(name + 1, "_EMPTY")) res = TOK_EMPTY(res); return res; } if (!strcmp(name, "T_ARRAY_SHORT")) { return T_ARRAY_SHORT; } else if (!strcmp(name, "T_ARRAY_SHORT_ML")) { return T_ARRAY_SHORT_ML; } if (!strcmp(name + len - 5, "_LEFT")) len -= 5; else if (!strcmp(name + len - 6, "_RIGHT")) len -= 6; else if (!strcmp(name + len - 3, "_NL")) len -= 3; else if (!strcmp(name + len - 7, "_INLINE")) len -= 7; else if (!strcmp(name + len - 6, "_EMPTY")) len -= 6; if (len > sizeof(buf) - 1) { zend_error(E_ERROR, "Buffer overflow for token name %s", name); } strncpy(buf, name, len); buf[len] = 0; if (!zend_get_constant(buf, len, &tmp)) { zend_error(E_ERROR, "Could not parse constant %s", buf); } res = Z_LVAL(tmp); if (!strcmp(name + len, "_LEFT")) res = TOK_LEFT(res); else if (!strcmp(name + len, "_RIGHT")) res = TOK_RIGHT(res); else if (!strcmp(name + len, "_NL")) res = TOK_NL(res); else if (!strcmp(name + len, "_INLINE")) res = TOK_INLINE(res); else if (!strcmp(name + len, "_EMPTY")) res = TOK_EMPTY(res); return res; } // Creates new context registry static phpcf_context_registry *new_context_registry() { phpcf_context_registry *registry = (phpcf_context_registry*)ecalloc(1, sizeof(phpcf_context_registry)); registry->counter = 0; MAKE_STD_ZVAL(registry->indexes); array_init(registry->indexes); return registry; } /** * Retrieve idx for context, named by name from registry */ static int _get_context_idx(char *name, phpcf_context_registry *registry) { int i = 0; zval **res; HashTable *ht = Z_ARRVAL_P(registry->indexes); /** * @todo make it good */ #if 0 FOREACH_FIELDS; FOREACH_BEGIN(Z_ARRVAL_P(registry->indexes)); case HASH_KEY_IS_STRING: if (strncmp(string_key, "CTX_", 4)) printf("Corrupted string key: %s\n", string_key); break; FOREACH_END(Z_ARRVAL_P(registry->indexes)); for (i = 0; i < registry->inverted_indexes->cap; i++) { if (!registry->inverted_indexes->entries[i].val) { continue; } if (strncmp(registry->inverted_indexes->entries[i].val, "CTX_", 4)) { printf("Added context %s\n", name); printf("Context %d value %s\n", registry->inverted_indexes->entries[i].key, registry->inverted_indexes->entries[i].val); zend_error(E_ERROR, "Memory corruption"); } } #endif if (zend_hash_exists(ht, name, strlen(name) + 1)) { zend_hash_find(ht, name, strlen(name) + 1, (void**)&res); return Z_LVAL_PP(res); } registry->counter++; add_assoc_long(registry->indexes, name, registry->counter); ht_add(registry->inverted_indexes, registry->counter, estrdup(name)); return registry->counter; } /* Here is a consise description of PHP version rule actions $rule_action = 'CTX_OTHER_THING'; // replace stack top with CTX_OTHER_THING $rule_action = ['CTX_OTHER_THING']; // perform push(CTX_OTHER_THING) $rule_action = -2; // perform pop() 2 times $rule_action = ['NOW' => N, 'NEXT' => M]; // execute N now, changing current context and execute M before next token */ static phpcf_ctx_rule _parse_context_rule_action(zval *rule_action, phpcf_context_registry *registry) { phpcf_ctx_rule result, tmp, tmp2; zval **res; HashTable *ht; switch (Z_TYPE_P(rule_action)) { case IS_STRING: result.type = CTX_RULE_TYPE_FLAT; result.now_ctx = _get_context_idx(Z_STRVAL_P(rule_action), registry); break; case IS_LONG: result.type = CTX_RULE_TYPE_POP; result.now_ctx = Z_LVAL_P(rule_action); break; case IS_ARRAY: ht = Z_ARRVAL_P(rule_action); // replace rule if (zend_hash_exists(ht, "REPLACE", 8)) { result.type = CTX_RULE_TYPE_REPLACE; zval **replace_def; zend_hash_find(ht, "REPLACE", 8, (void**)&replace_def); if (IS_ARRAY != Z_TYPE_PP(replace_def)) { zend_error(E_ERROR, "Not array passed for REPLACE definition"); } HashTable *replace_ht; replace_ht = Z_ARRVAL_PP(replace_def); zval **from, **to; if (FAILURE == zend_hash_index_find(replace_ht, 0, (void**)&from) || FAILURE == zend_hash_index_find(replace_ht, 1, (void**)&to)) { zend_error(E_ERROR, "No array indexes 0,1 in REPLACE definition"); } tmp = _parse_context_rule_action(*from, registry); result.now_type = tmp.type; result.now_ctx = tmp.now_ctx; tmp = _parse_context_rule_action(*to, registry); result.replace_type = tmp.type; result.replace_ctx = tmp.now_ctx; } // common rule, as array - add it to stack else if (!zend_hash_exists(ht, "NOW", 4)) { zend_hash_index_find(ht, 0, (void**)&res); result.type = CTX_RULE_TYPE_PUSH; result.now_ctx = _get_context_idx(Z_STRVAL_PP(res),registry); } // delayed rule else { result.type = CTX_RULE_TYPE_DELAYED; zend_hash_find(ht, "NOW", 4, (void**)&res); tmp = _parse_context_rule_action(*res, registry); result.now_type = tmp.type; result.now_ctx = tmp.now_ctx; zend_hash_find(ht, "NEXT", 5, (void**)&res); tmp = _parse_context_rule_action(*res, registry); result.delayed_type = tmp.type; result.delayed_ctx = tmp.now_ctx; } break; } return result; } /* $token_rules = array( '?' => array('CTX_TERNARY_BEGIN'), 'T_OBJECT_OPERATOR $' => array('CTX_INLINE_BRACE_BEGIN'), ... ); */ static phpcf_hashtable* _parse_token_context_rules(zval *ztoken_rules, phpcf_context_registry *registry) { char tok_name[100]; HashTable *token_rules = Z_ARRVAL_P(ztoken_rules); int i, l, ht_size = 0, last_idx = 0; phpcf_ctx_rule rule, *rulep; phpcf_hashtable *ht; FOREACH_FIELDS; FOREACH_BEGIN(token_rules); case HASH_KEY_IS_STRING: ht_size += substr_count(string_key, ' ') + 1; break; case HASH_KEY_IS_LONG: ht_size += 1; break; FOREACH_END(token_rules); ht = ht_init(ht_size * 2); FOREACH_BEGIN(token_rules); case HASH_KEY_IS_STRING: rule = _parse_context_rule_action(*entry, registry); l = strlen(string_key); last_idx = 0; for (i = 0; i < l; i++) { if (string_key[i] == ' ') { strncpy(tok_name, string_key + last_idx, i - last_idx); tok_name[i - last_idx] = 0; last_idx = i + 1; rulep = (phpcf_ctx_rule*)emalloc(sizeof(phpcf_ctx_rule)); memcpy(rulep, &rule, sizeof(phpcf_ctx_rule)); ht_add(ht, _get_token_idx(tok_name), rulep); } } strncpy(tok_name, string_key + last_idx, i - last_idx); tok_name[i - last_idx] = 0; rulep = (phpcf_ctx_rule*)emalloc(sizeof(phpcf_ctx_rule)); memcpy(rulep, &rule, sizeof(phpcf_ctx_rule)); ht_add(ht, _get_token_idx(tok_name), rulep); break; case HASH_KEY_IS_LONG: rule = _parse_context_rule_action(*entry,registry); if (num_key == 1) { rulep = (phpcf_ctx_rule*)emalloc(sizeof(phpcf_ctx_rule)); memcpy(rulep, &rule, sizeof(phpcf_ctx_rule)); ht_add(ht, 0, rulep); } else { zend_error(E_WARNING, "Unknown key in tokens in context rules: %lu", num_key); } break; FOREACH_END(token_rules); return ht; } /* $context_rules = array( 'CTX_LONG_FIRST_NL CTX_LONG_EXPR_NL' => <token_rule>, ... ); */ static phpcf_hashtable* parse_context_rules(HashTable *context_rules, phpcf_context_registry *registry) { long cnt = 0; int last_idx, i, l, ht_size = 0; phpcf_hashtable *ht, *rule; char ctx_name[100]; FOREACH_FIELDS; FOREACH_BEGIN(context_rules); case HASH_KEY_IS_STRING: ht_size += substr_count(string_key, ' ') + 1; break; case HASH_KEY_IS_LONG: ht_size += 1; break; FOREACH_END(context_rules); ht = ht_init(ht_size * 2); registry->inverted_indexes = ht_init(ht_size * 2); FOREACH_BEGIN(context_rules); case HASH_KEY_IS_STRING: rule = _parse_token_context_rules(*entry, registry); l = strlen(string_key); last_idx = 0; for (i = 0; i < l; i++) { if (string_key[i] == ' ') { strncpy(ctx_name, string_key + last_idx, i - last_idx); ctx_name[i - last_idx] = 0; last_idx = i + 1; ht_add(ht, _get_context_idx(ctx_name, registry), rule); } } strncpy(ctx_name, string_key + last_idx, i - last_idx); ctx_name[i - last_idx] = 0; ht_add(ht, _get_context_idx(ctx_name, registry), rule); break; case HASH_KEY_IS_LONG: if (num_key != 0) { zend_error(E_WARNING, "Incorrect key in context rules: %lu", num_key); } else { registry->first_ctx = _get_context_idx(Z_STRVAL_PP(entry), registry); } break; FOREACH_END(context_rules); return ht; } static phpcf_control_rule* _parse_token_control_rules(zval *zrules) { phpcf_control_rule *return_val = (phpcf_control_rule*)ecalloc(1, sizeof(phpcf_control_rule)); HashTable *rules = Z_ARRVAL_P(zrules); zval **res; FOREACH_FIELDS; if (zend_hash_index_find(rules, PHPCF_KEY_DESCR_LEFT, (void**)&res) == SUCCESS) { return_val->descr_left = estrdup(Z_STRVAL_PP(res)); } if (zend_hash_index_find(rules, PHPCF_KEY_LEFT, (void**)&res) == SUCCESS) { if (Z_TYPE_PP(res) == IS_ARRAY) { FOREACH_BEGIN(Z_ARRVAL_PP(res)); case HASH_KEY_IS_LONG: if (Z_LVAL_PP(entry) == PHPCF_EX_DECREASE_INDENT) { return_val->ex_indent_left--; } else if (Z_LVAL_PP(entry) == PHPCF_EX_INCREASE_INDENT) { return_val->ex_indent_left++; } else { return_val->ex_left = Z_LVAL_PP(entry); } break; FOREACH_END(Z_ARRVAL_PP(res)); } else { return_val->ex_left = Z_LVAL_PP(res); } } if (zend_hash_index_find(rules, PHPCF_KEY_DESCR_RIGHT, (void**)&res) == SUCCESS) { return_val->descr_right = estrdup(Z_STRVAL_PP(res)); } if (zend_hash_index_find(rules, PHPCF_KEY_RIGHT, (void**)&res) == SUCCESS) { if (Z_TYPE_PP(res) == IS_ARRAY) { FOREACH_BEGIN(Z_ARRVAL_PP(res)); case HASH_KEY_IS_LONG: if (Z_LVAL_PP(entry) == PHPCF_EX_DECREASE_INDENT) { return_val->ex_indent_right--; } else if (Z_LVAL_PP(entry) == PHPCF_EX_INCREASE_INDENT) { return_val->ex_indent_right++; } else { return_val->ex_right = Z_LVAL_PP(entry); } break; FOREACH_END(Z_ARRVAL_PP(res)); } else { return_val->ex_right = Z_LVAL_PP(res); } } return return_val; } static phpcf_hashtable* _parse_context_controls(zval *ztoken_rules, phpcf_context_registry *registry) { HashTable *token_rules = Z_ARRVAL_P(ztoken_rules); phpcf_control_rule *rulep; phpcf_hashtable *ht; int ht_size = 0, l, i, last_idx; char ctx_name[100]; FOREACH_FIELDS; FOREACH_BEGIN(token_rules); case HASH_KEY_IS_STRING: ht_size += substr_count(string_key, ' ') + 1; break; case HASH_KEY_IS_LONG: ht_size += 1; break; FOREACH_END(token_rules); ht = ht_init(ht_size * 2); FOREACH_BEGIN(token_rules); case HASH_KEY_IS_STRING: rulep = _parse_token_control_rules(*entry); l = strlen(string_key); last_idx = 0; for (i = 0; i < l; i++) { if (string_key[i] == ' ') { strncpy(ctx_name, string_key + last_idx, i - last_idx); ctx_name[i - last_idx] = 0; last_idx = i + 1; ht_add(ht, _get_context_idx(ctx_name, registry), rulep); } } strncpy(ctx_name, string_key + last_idx, i - last_idx); ctx_name[i - last_idx] = 0; ht_add(ht, _get_context_idx(ctx_name, registry), rulep); break; case HASH_KEY_IS_LONG: ht_add(ht, 0, _parse_token_control_rules(*entry)); break; FOREACH_END(token_rules); return ht; } /* $controls = array( '{' => ..., ', ,_LONG' => ..., ... ); */ static phpcf_hashtable* parse_controls(HashTable *controls, phpcf_context_registry *registry) { phpcf_hashtable *ht, *token_controls; phpcf_control_rule *rule; int ht_size = 0, l, i, j, last_idx; char tok_name[100]; FOREACH_FIELDS; FOREACH_BEGIN(controls); case HASH_KEY_IS_STRING: ht_size += substr_count(string_key, ' ') + 1; break; case HASH_KEY_IS_LONG: ht_size += 1; break; FOREACH_END(controls); ht = ht_init(ht_size * 2); FOREACH_BEGIN(controls); case HASH_KEY_IS_STRING: token_controls = _parse_context_controls(*entry, registry); l = strlen(string_key); last_idx = 0; for (i = 0; i < l; i++) { if (string_key[i] == ' ') { strncpy(tok_name, string_key + last_idx, i - last_idx); tok_name[i - last_idx] = 0; last_idx = i + 1; ht_add(ht, _get_token_idx(tok_name), token_controls); } } strncpy(tok_name, string_key + last_idx, i - last_idx); tok_name[i - last_idx] = 0; j = _get_token_idx(tok_name); ht_add(ht, j, token_controls); break; FOREACH_END(controls); #if 0 for (i = 0; i < ht->cap; i++) { if (!ht->entries[i].val) continue; l = ht->entries[i].key; if (l < 256) { printf("token %c\n", l); } else if (l < 65536) { printf("token %s\n", get_token_type_name(l)); } else { printf("token %d\n", l); } token_controls = (phpcf_hashtable*)ht->entries[i].val; for (j = 0; j < token_controls->cap; j++) { rule = (control_rule*)token_controls->entries[j].val; if (!rule) continue; l = token_controls->entries[j].key; printf(" context: %d\n", l); if (rule->descr_left) printf(" rule descr left: %s\n", rule->descr_left); if (rule->ex_left) printf(" ex left: %d\n", rule->ex_left); if (rule->ex_indent_left) printf(" ex indent left: %d\n", rule->ex_indent_left); if (rule->descr_right) printf(" rule descr right: %s\n", rule->descr_right); if (rule->ex_right) printf(" ex right: %d\n", rule->ex_right); if (rule->ex_indent_right) printf(" ex indent right: %d\n", rule->ex_indent_right); } } #endif return ht; } static void php_phpcf_obj_dtor(Formatter *c TSRMLS_DC) /* {{{ */ { efree(c->ctx_rules); efree(c->format_rules); if (c->options) { efree(c->options); } if (c->fsm_stack) { efree(c->fsm_stack); } if (c->string_filter) { efree(c->string_filter); } zend_object_std_dtor(&c->std TSRMLS_CC); efree(c); } /* }}} */ static zend_object_value php_phpcf_new(zend_class_entry *ce TSRMLS_DC) /* {{{ */ { Formatter *c; formatting_ctx *ctx; phpcf_options *opts; zend_object_value retval; c = ecalloc(1, sizeof(*c)); zend_object_std_init(&c->std, ce TSRMLS_CC); ctx = ecalloc(1, sizeof(*ctx)); ctx->last_long_position = -1; ALLOC_HASHTABLE(ctx->user_lines); zend_hash_init(ctx->user_lines, 0, NULL, ZVAL_PTR_DTOR, 0); ALLOC_HASHTABLE(ctx->sniff); zend_hash_init(ctx->sniff, 0, NULL,issue_dtor, 1); c->ctx = ctx; opts = ecalloc(1, sizeof(*opts)); opts->indent = PHPCF_INDENT; opts->max_line_length = 120; opts->debug = 0; opts->sniff = 0; c->options = opts; ALLOC_HASHTABLE(c->std.properties); zend_hash_init(c->std.properties, 0, NULL, ZVAL_PTR_DTOR, 0); object_properties_init(&c->std, ce); retval.handle = zend_objects_store_put(c, (zend_objects_store_dtor_t)zend_objects_destroy_object, (zend_objects_free_object_storage_t)php_phpcf_obj_dtor, NULL TSRMLS_CC); retval.handlers = &php_phpcf_handlers; return retval; } /* }}} */ /* rewrite `source_tokens` and write new ones to `tokens` using `buf` as buffer for strings returns number of new tokens */ static int phpcf_rewrite_tokens(Formatter *frm, phpcf_token *source_tokens, int source_tokens_cnt, phpcf_token *tokens, char *buf) { phpcf_hashtable *tbl = rewrite_callbacks; phpcf_token tok; int cnt = 0, i = 0; void (*hook_func)(Formatter*, phpcf_token*, int*, int, phpcf_token*, int*, char**); frm->ctx->last_long_position = -1; for (i = 0; i < source_tokens_cnt; i++) { tok = source_tokens[i]; // glue sequential whitespace tokens together if (tok.type == T_WHITESPACE && cnt > 0 && tokens[cnt - 1].type == T_WHITESPACE) { strcat(tokens[cnt - 1].val, tok.val); buf += strlen(tok.val) + 1; continue; } hook_func = ht_get(rewrite_callbacks, tok.type); if (hook_func) { hook_func(frm, source_tokens, &i, source_tokens_cnt, tokens, &cnt, &buf); } else { tokens[cnt].type = tok.type; strcpy(buf, tok.val); tokens[cnt].val = buf; tokens[cnt].line = tok.line; buf += strlen(tok.val) + 1; } cnt++; } return cnt; } /** * PHP interface */ /* {{{ arginfo */ ZEND_BEGIN_ARG_INFO_EX(arginfo_phpcf_get_version, 0, 0, 1) ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_INFO_EX(arginfo_phpcf_construct, 0, 0, 2) ZEND_ARG_INFO(0, ctx_rules) ZEND_ARG_INFO(0, format_rules) ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_INFO_EX(arginfo_phpcf_format, 0, 0, 1) ZEND_ARG_INFO(0, content) ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_INFO_EX(arginfo_phpcf_set_string_filter, 0, 0, 1) ZEND_ARG_INFO(0, callback) ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_INFO_EX(arginfo_phpcf_set_max_line_length, 0, 0, 1) ZEND_ARG_INFO(0, line_length) ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_INFO_EX(arginfo_phpcf_set_tab_sequence, 0, 0, 1) ZEND_ARG_INFO(0, tab_sequence) ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_INFO_EX(arginfo_phpcf_set_sniff_messages, 0, 0, 1) ZEND_ARG_INFO(0, sniff) ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_INFO_EX(arginfo_phpcf_set_debug_enabled, 0, 0, 1) ZEND_ARG_INFO(0, debug_enabled) ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_INFO_EX(arginfo_phpcf_get_issues, 0, 0, 0) ZEND_END_ARG_INFO() /* }}} */ /* {{{ proto bool new \Phpcf\Impl\Extension(array ctx, array format) */ PHP_METHOD(phpcf, __construct) { zval *ctx_rules, *format_rules; phpcf_hashtable *parsed_ctx_rules, *parsed_format_rules; phpcf_context_registry *registry; Formatter *FormatterImpl; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "aa", &ctx_rules, &format_rules) == FAILURE) { zend_throw_exception_ex(zend_exception_get_default(TSRMLS_C), 0 TSRMLS_CC, "%s", "Expect context and formatting rules being arrays"); return; } registry = new_context_registry(); parsed_ctx_rules = parse_context_rules(Z_ARRVAL_P(ctx_rules), registry); parsed_format_rules = parse_controls(Z_ARRVAL_P(format_rules), registry); FormatterImpl = (Formatter*)zend_object_store_get_object(getThis() TSRMLS_CC); FormatterImpl->ctx_rules = parsed_ctx_rules; FormatterImpl->format_rules = parsed_format_rules; FormatterImpl->ctx_registry = registry; FormatterImpl->ctx->rules = FormatterImpl->ctx_rules; // @TODO is it OK? } /* }}} */ /* {{{ proto string Formatter->format(string body) */ PHP_METHOD(phpcf, format) { Formatter *FormatterImpl; char *source = NULL, *result; int source_len; zval *lines; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sz", &source, &source_len, &lines) == FAILURE) { return; } FormatterImpl = (Formatter*)zend_object_store_get_object(getThis() TSRMLS_CC); if (Z_TYPE_P(lines) == IS_ARRAY) { FormatterImpl->ctx->user_lines = Z_ARRVAL_P(lines); } else { zend_hash_clean(FormatterImpl->ctx->user_lines); } zend_hash_clean(FormatterImpl->ctx->sniff); result = phpcf_format(FormatterImpl, source, source_len); RETVAL_STRING(result, 0); } /* }}} */ /* {{{ proto void Formatter->setStringFilter(filter) */ PHP_METHOD(phpcf, setStringFilter) { // callback, passed to the function zval **callback; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Z", &callback) == FAILURE) { zend_throw_exception_ex(zend_exception_get_default(TSRMLS_C), 0 TSRMLS_CC, "%s", "Non-callable callback given"); } Formatter *FormatterImpl = (Formatter*)zend_object_store_get_object(getThis() TSRMLS_CC); // non-callable and not null (to reset existing) if (!zend_is_callable(*callback, IS_CALLABLE_CHECK_NO_ACCESS, NULL TSRMLS_CC) && IS_NULL != Z_TYPE_PP(callback)) { zend_throw_exception_ex(zend_exception_get_default(TSRMLS_C), 0 TSRMLS_CC, "Non callable and not null string given"); RETURN_FALSE; } else { if (IS_NULL == Z_TYPE_PP(callback)) { FormatterImpl->string_filter = NULL; } else { FormatterImpl->string_filter = *callback; SEPARATE_ZVAL(&FormatterImpl->string_filter); Z_ADDREF_P(FormatterImpl->string_filter); } } RETURN_TRUE; } /* }}} */ /* {{{ proto void Formatter->setMaxLineLength(width) */ PHP_METHOD(phpcf, setMaxLineLength) { long line_length; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &line_length) == FAILURE) { zend_throw_exception_ex(zend_exception_get_default(TSRMLS_C), 0 TSRMLS_CC, "%s", "Invalid max line length given"); } if (line_length <= 0) { zend_throw_exception_ex(zend_exception_get_default(TSRMLS_C), 0 TSRMLS_CC, "Negative max line length given"); } Formatter *FormatterImpl = (Formatter*)zend_object_store_get_object(getThis() TSRMLS_CC); FormatterImpl->options->max_line_length = line_length; } /* }}} */ /* {{{ proto void Formatter->setMaxLineLength(width) */ PHP_METHOD(phpcf, setTabSequence) { char *sequence; int sequence_len; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &sequence, &sequence_len) == FAILURE) { zend_throw_exception_ex(zend_exception_get_default(TSRMLS_C), 0 TSRMLS_CC, "Invalid sequence given"); } if (sequence_len == 0) { zend_throw_exception_ex(zend_exception_get_default(TSRMLS_C), 0 TSRMLS_CC, "Empty sequence given"); } Formatter *FormatterImpl = (Formatter*)zend_object_store_get_object(getThis() TSRMLS_CC); FormatterImpl->options->indent = sequence; } /* }}} */ /* {{{ proto void Formatter->setSniffMessages(flag) */ PHP_METHOD(phpcf, setSniffMessages) { zend_bool sniff = 0; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "b", &sniff) == FAILURE) { zend_throw_exception_ex(zend_exception_get_default(TSRMLS_C), 0 TSRMLS_CC, "Invalid sniff flag given"); } Formatter *FormatterImpl = (Formatter*)zend_object_store_get_object(getThis() TSRMLS_CC); FormatterImpl->options->sniff = sniff; } /* }}} */ /* {{{ proto void Formatter->setDebugEnabled(flag) */ PHP_METHOD(phpcf, setDebugEnabled) { zend_bool debug = 0; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "b", &debug) == FAILURE) { zend_throw_exception_ex(zend_exception_get_default(TSRMLS_C), 0 TSRMLS_CC, "Invalid debug flag given"); } Formatter *FormatterImpl = (Formatter*)zend_object_store_get_object(getThis() TSRMLS_CC); FormatterImpl->options->debug = debug; } /* }}} */ /* {{{ proto void Formatter->getIssues() */ PHP_METHOD(phpcf, getIssues) { HashPosition pointer; HashTable *issue_ht; Formatter *FormatterImpl = (Formatter*)zend_object_store_get_object(getThis() TSRMLS_CC); issue_ht = FormatterImpl->ctx->sniff; array_init_size(return_value, issue_ht->nNumOfElements); char **data; zval *issue; for (zend_hash_internal_pointer_reset_ex(issue_ht, &pointer); zend_hash_get_current_data_ex(issue_ht, (void **) &data, &pointer) == SUCCESS; zend_hash_move_forward_ex(issue_ht, &pointer)) { add_next_index_string(return_value, *data, 1); } } /* }}} */ /* {{{ phpcf_functions[] * * Every user visible function must have an entry in phpcf_functions[]. */ const zend_function_entry phpcf_functions[] = { PHP_ME(phpcf, __construct, arginfo_phpcf_construct, ZEND_ACC_PUBLIC) PHP_ME(phpcf, format, arginfo_phpcf_format, ZEND_ACC_PUBLIC) PHP_ME(phpcf, setStringFilter, arginfo_phpcf_set_string_filter, ZEND_ACC_PUBLIC) PHP_ME(phpcf, setMaxLineLength, arginfo_phpcf_set_max_line_length, ZEND_ACC_PUBLIC) PHP_ME(phpcf, setDebugEnabled, arginfo_phpcf_set_debug_enabled, ZEND_ACC_PUBLIC) PHP_ME(phpcf, setTabSequence, arginfo_phpcf_set_tab_sequence, ZEND_ACC_PUBLIC) PHP_ME(phpcf, setSniffMessages, arginfo_phpcf_set_sniff_messages, ZEND_ACC_PUBLIC) PHP_ME(phpcf, getIssues, arginfo_phpcf_get_issues, ZEND_ACC_PUBLIC) PHP_FE(phpcf_get_version, arginfo_phpcf_get_version) {NULL, NULL, NULL} }; /* }}} */ /* {{{ phpcf_module_entry */ zend_module_entry phpcf_module_entry = { STANDARD_MODULE_HEADER, "phpcf", phpcf_functions, PHP_MINIT(phpcf), NULL, NULL, NULL, NULL, PHPCF_VERSION_STRING, STANDARD_MODULE_PROPERTIES }; /* }}} */ #ifdef COMPILE_DL_PHPCF ZEND_GET_MODULE(phpcf) #endif /** * Get version method */ PHP_FUNCTION(phpcf_get_version) { RETURN_STRING(PHPCF_VERSION_STRING, 1); } /** * phpinfo() section */ PHP_MINFO_FUNCTION(phpcf) /* {{{ */ { php_info_print_table_start(); php_info_print_table_header(2, "Version", PHPCF_VERSION_STRING); php_info_print_table_end(); } /* }}} */ ZEND_RSRC_DTOR_FUNC(php_phpcf_dtor) { if (rsrc->ptr) { Formatter *formatter = (Formatter *)rsrc->ptr; php_phpcf_obj_dtor(formatter); rsrc->ptr = NULL; } } PHP_MINIT_FUNCTION(phpcf) /* {{{ */ { zend_class_entry ce; le_phpcf = zend_register_list_destructors_ex(NULL, php_phpcf_dtor, "Formatter instance", module_number); memcpy(&php_phpcf_handlers, zend_get_std_object_handlers(), sizeof(zend_object_handlers)); php_phpcf_handlers.clone_obj = NULL; INIT_NS_CLASS_ENTRY(ce,"Phpcf\\Impl", "Extension", phpcf_functions); phpcf_class_entry = zend_register_internal_class(&ce TSRMLS_CC); phpcf_class_entry->create_object = php_phpcf_new; phpcf_class_entry->ce_flags |= ZEND_ACC_FINAL_CLASS; init_rewrite_callbacks(); return SUCCESS; }; /* }}} */ PHP_RINIT_FUNCTION(phpcf) /* {{{ */ { return SUCCESS; } /* }}} */