/* Copyright (c) 2007, Google Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 *
 *     * Redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above
 * copyright notice, this list of conditions and the following disclaimer
 * in the documentation and/or other materials provided with the
 * distribution.
 *     * Neither the name of Google Inc. nor the names of its
 * contributors may be used to endorse or promote products derived from
 * this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 * ---
 *
 * Author: falmeida@google.com (Filipe Almeida)
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>

#include "statemachine.h"

/* So we can support both C and C++ compilers, we use the CAST() macro instead
 * of using C style casts or static_cast<>() directly.
 */
#ifdef __cplusplus
  #define CAST(type, expression) (static_cast<type>(expression))
#else
  #define CAST(type, expression) ((type)(expression))
#endif

#ifdef __cplusplus
namespace HTMLPARSER_NAMESPACE {
#endif

#define MAX_CHAR_8BIT 256

/* Populates the statemachine definition.
 */
void statemachine_definition_populate(statemachine_definition *def,
                                      const int* const* transition_table,
                                      const char* const* state_names)
{
  assert(def != NULL);
  assert(transition_table != NULL);

  def->transition_table = transition_table;

  def->state_names = state_names;
}

/* Add's the callback for the event in_state that is called when the
 * statemachine is in state st.
 *
 * This event is called everytime the the statemachine is in the specified
 * state forevery character in the input stream even if the state remains
 * the same.
 *
 * This is event is the last event to be called and is fired after both events
 * exit_state and enter_state.
 */
void statemachine_in_state(statemachine_definition *def, int st,
                           state_event_function func)
{
    assert(def != NULL);
    assert(st < def->num_states);
    def->in_state_events[st] = func;
}

/* Add's the callback for the event enter_state that is called when the
 * statemachine enters state st.
 *
 * This event is fired after the event exit_state but before the event
 * in_state.
 */
void statemachine_enter_state(statemachine_definition *def, int st,
                              state_event_function func)
{
    assert(def != NULL);
    assert(st < def->num_states);
    def->enter_state_events[st] = func;
}

/* Add's the callback for the event exit_state that is called when the
 * statemachine exits from state st.
 *
 * This is the first event to be called and is fired before both the events
 * enter_state and in_state.
 */
void statemachine_exit_state(statemachine_definition *def, int st,
                             state_event_function func)
{
    assert(def != NULL);
    assert(st < def->num_states);
    def->exit_state_events[st] = func;
}

/* Initializes a new statemachine definition with a defined number of states.
 *
 * Returns NULL if initialization fails.
 *
 * Initialization failure is fatal, and if this function fails it may not
 * deallocate all previsouly allocated memory.
 */
statemachine_definition *statemachine_definition_new(int states)
{
    statemachine_definition *def;
    def = CAST(statemachine_definition *,
               malloc(sizeof(statemachine_definition)));
    if (def == NULL)
      return NULL;

    def->in_state_events = CAST(state_event_function *,
                                calloc(states, sizeof(state_event_function)));
    if (def->in_state_events == NULL)
      return NULL;

    def->enter_state_events =CAST(state_event_function *,
                                   calloc(states,
                                          sizeof(state_event_function)));
    if (def->enter_state_events == NULL)
      return NULL;

    def->exit_state_events = CAST(state_event_function *,
                                  calloc(states, sizeof(state_event_function)));
    if (def->exit_state_events == NULL)
      return NULL;

    def->num_states = states;
    def->state_names = NULL;
    return def;
}

/* Deallocates a statemachine definition object
 */
void statemachine_definition_delete(statemachine_definition *def)
{
    assert(def != NULL);
    free(def->in_state_events);
    free(def->enter_state_events);
    free(def->exit_state_events);
    free(def);
}

/* Returns the current state.
 */
int statemachine_get_state(statemachine_ctx *ctx) {
  return ctx->current_state;
}

/* Sets the current state.
 *
 * It calls the exit event for the old state and the enter event for the state
 * we intend to move into.
 *
 * Since this state change was not initiated by a character in the input stream
 * we pass a null char to the event functions.
 */
void statemachine_set_state(statemachine_ctx *ctx, int state)
{

  statemachine_definition *def;

  assert(ctx != NULL);
  assert(ctx->definition != NULL);

  def = ctx->definition;

  assert(state < def->num_states);

  ctx->next_state = state;

  if (ctx->current_state != ctx->next_state) {
    if (def->exit_state_events[ctx->current_state])
       def->exit_state_events[ctx->current_state](ctx,
                                                 ctx->current_state,
                                                 '\0',
                                                 ctx->next_state);

    if (def->enter_state_events[ctx->next_state])
       def->enter_state_events[ctx->next_state](ctx,
                                               ctx->current_state,
                                               '\0',
                                               ctx->next_state);
  }

  ctx->current_state = state;
}

/* Reset the statemachine.
 *
 * The state is set to the initialization values. This includes setting the
 * state to the default state (0), stopping recording and setting the line
 * number to 1.
 */
void statemachine_reset(statemachine_ctx *ctx)
{
  ctx->current_state = 0;
  ctx->next_state = 0;
  ctx->record_buffer[0] = '\0';
  ctx->record_pos = 0;
  ctx->recording = 0;
  ctx->line_number = 1;
  ctx->column_number = 1;
}

/* Initializes a new statemachine. Receives a statemachine definition object
 * that should have been initialized with statemachine_definition_new() and a
 * user reference to be used by the caller.
 *
 * The user reference is used by the caller to store any instance specific data
 * the caller may need and is typically used to propagate context information
 * to the event callbacks. The user pointer can just be set to NULL if the
 * caller doesn't need it.
 *
 * Returns NULL if initialization fails.
 *
 * Initialization failure is fatal, and if this function fails it may not
 * deallocate all previously allocated memory.
 */
statemachine_ctx *statemachine_new(statemachine_definition *def,
                                   void *user)
{
    statemachine_ctx *ctx;
    assert(def != NULL);
    ctx = CAST(statemachine_ctx *, malloc(sizeof(statemachine_ctx)));
    if (ctx == NULL)
      return NULL;

    statemachine_reset(ctx);

    ctx->definition = def;
    ctx->user = user;

    return ctx;
}

/* Returns a pointer to a context which is a duplicate of the statemachine src.
 * The statemachine definition and the user pointer have to be provided since
 * these references are not owned by the statemachine itself, but this will be
 * shallow copies as they point to data structures we do not own.
 */
statemachine_ctx *statemachine_duplicate(statemachine_ctx *src,
                                         statemachine_definition *def,
                                         void *user)
{
    statemachine_ctx *dst;
    assert(src != NULL);
    dst = statemachine_new(def, user);
    if (dst == NULL)
      return NULL;

    statemachine_copy(dst, src, def, user);

    return dst;
}

/* Copies the context of the statemachine pointed to by src to the statemachine
 * provided by dst.
 * The statemachine definition and the user pointer have to be provided since
 * these references are not owned by the statemachine itself.
 */
void statemachine_copy(statemachine_ctx *dst,
                       statemachine_ctx *src,
                       statemachine_definition *def,
                       void *user)
{
    memcpy(dst, src, sizeof(statemachine_ctx));
    dst->definition = def;
    dst->user = user;
}

/* Deallocates a statemachine object
 */
void statemachine_delete(statemachine_ctx *ctx)
{
    assert(ctx != NULL);
    free(ctx);
}

/* Starts recording the current input stream into an internal buffer.
 * The current input character is included in the recording.
 */
void statemachine_start_record(statemachine_ctx *ctx)
{
    assert(ctx != NULL);
    ctx->record_buffer[0] = '\0';
    ctx->record_pos = 0;
    ctx->recording = 1;
}

/* Stops recording the current input stream.
 * The last input character is not included in the recording.
 * This function returns a pointer to the recorded string buffer.
 */
const char *statemachine_stop_record(statemachine_ctx *ctx)
{
    assert(ctx != NULL);
    assert(ctx->recording);
    ctx->record_buffer[ctx->record_pos] = '\0';
    ctx->recording = 0;
    return ctx->record_buffer;
}

 /* Returns a pointer to the record string buffer.
 */
const char *statemachine_record_buffer(statemachine_ctx *ctx)
{
    return ctx->record_buffer;
}

void statemachine_encode_char(char schr, char *output, size_t len)
{
  unsigned char chr = schr;
  if (chr == '\'') {
    strncpy(output, "\\'", len);
  } else if (chr == '\\') {
    strncpy(output, "\\\\", len);

  /* Like isprint() but not dependent on locale. */
  } else if (chr >= 32 && chr <= 126) {
    snprintf(output, len, "%c", chr);
  } else if (chr == '\n') {
    strncpy(output, "\\n", len);
  } else if (chr == '\r') {
    strncpy(output, "\\r", len);
  } else if (chr == '\t') {
    strncpy(output, "\\t", len);
  } else {
    snprintf(output, len, "\\x%.2x", chr);
  }

  output[len - 1] = '\0';
}

/* Sets the error message in case of a transition error.
 *
 * Called from statemachine_parse to set the error message in case of a
 * transition error.
 */
static void statemachine_set_transition_error_message(statemachine_ctx *ctx)
{
  char encoded_char[10];
  statemachine_encode_char(ctx->current_char, encoded_char,
                           sizeof(encoded_char));

  if (ctx->definition->state_names) {
    snprintf(ctx->error_msg, STATEMACHINE_MAX_STR_ERROR,
             "Unexpected character '%s' in state '%s'",
             encoded_char,
             ctx->definition->state_names[ctx->current_state]);
  } else {
    snprintf(ctx->error_msg, STATEMACHINE_MAX_STR_ERROR,
             "Unexpected character '%s'", encoded_char);
  }

}

/* Parses the input html stream and returns the finishing state.
 *
 * Returns STATEMACHINE_ERROR if unable to parse the input. If
 * statemachine_parse() is called after an error situation was encountered
 * the behaviour is unspecified.
 */
/* TODO(falmeida): change int size to size_t size */
int statemachine_parse(statemachine_ctx *ctx, const char *str, int size)
{
    int i;
    const int* const* state_table = ctx->definition->transition_table;
    statemachine_definition *def;

    assert(ctx !=NULL &&
           ctx->definition != NULL &&
           ctx->definition->transition_table != NULL);

    if (size < 0) {
        snprintf(ctx->error_msg, STATEMACHINE_MAX_STR_ERROR, "%s",
                 "Negative size in statemachine_parse().");
        return STATEMACHINE_ERROR;
    }

    def = ctx->definition;

    for (i = 0; i < size; i++) {
        ctx->current_char = *str;
        ctx->next_state =
            state_table[ctx->current_state][CAST(unsigned char, *str)];
        if (ctx->next_state == STATEMACHINE_ERROR) {
            statemachine_set_transition_error_message(ctx);
            return STATEMACHINE_ERROR;
        }

        if (ctx->current_state != ctx->next_state) {
            if (def->exit_state_events[ctx->current_state])
                def->exit_state_events[ctx->current_state](ctx,
                                                           ctx->current_state,
                                                           *str,
                                                           ctx->next_state);
        }
        if (ctx->current_state != ctx->next_state) {
            if (def->enter_state_events[ctx->next_state])
                def->enter_state_events[ctx->next_state](ctx,
                                                         ctx->current_state,
                                                         *str,
                                                         ctx->next_state);
        }

        if (def->in_state_events[ctx->next_state])
            def->in_state_events[ctx->next_state](ctx,
                                                  ctx->current_state,
                                                  *str,
                                                  ctx->next_state);

        /* We need two bytes left so we can NULL terminate the string. */
        if (ctx->recording &&
            STATEMACHINE_RECORD_BUFFER_SIZE - 1 > ctx->record_pos) {
            ctx->record_buffer[ctx->record_pos++] = *str;
            ctx->record_buffer[ctx->record_pos] = '\0';
        }

/* TODO(falmeida): Should clarify the contract here, since an event can change
 * ctx->next_state and we need this functionality */

        ctx->current_state = ctx->next_state;
        ctx->column_number++;

        if (*str == '\n') {
          ctx->line_number++;
          ctx->column_number = 1;
        }
        str++;
    }

    return ctx->current_state;
}

#ifdef __cplusplus
}  /* namespace security_streamhtmlparser */
#endif
