403Webshell
Server IP : 172.67.216.182  /  Your IP : 162.158.106.35
Web Server : Apache
System : Linux krdc-ubuntu-s-2vcpu-4gb-amd-blr1-01.localdomain 5.15.0-142-generic #152-Ubuntu SMP Mon May 19 10:54:31 UTC 2025 x86_64
User : www ( 1000)
PHP Version : 7.4.33
Disable Function : passthru,exec,system,putenv,chroot,chgrp,chown,shell_exec,popen,proc_open,pcntl_exec,ini_alter,ini_restore,dl,openlog,syslog,readlink,symlink,popepassthru,pcntl_alarm,pcntl_fork,pcntl_waitpid,pcntl_wait,pcntl_wifexited,pcntl_wifstopped,pcntl_wifsignaled,pcntl_wifcontinued,pcntl_wexitstatus,pcntl_wtermsig,pcntl_wstopsig,pcntl_signal,pcntl_signal_dispatch,pcntl_get_last_error,pcntl_strerror,pcntl_sigprocmask,pcntl_sigwaitinfo,pcntl_sigtimedwait,pcntl_exec,pcntl_getpriority,pcntl_setpriority,imap_open,apache_setenv
MySQL : OFF  |  cURL : ON  |  WGET : ON  |  Perl : ON  |  Python : OFF  |  Sudo : ON  |  Pkexec : ON
Directory :  /www/server/mysql/src/sql/

Upload File :
current_dir [ Writeable ] document_root [ Writeable ]

 

Command :


[ Back ]     

Current File : /www/server/mysql/src/sql/ha_ndbcluster.cc
/* Copyright (c) 2004, 2023, Oracle and/or its affiliates.

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License, version 2.0,
  as published by the Free Software Foundation.

  This program is also distributed with certain software (including
  but not limited to OpenSSL) that is licensed under separate terms,
  as designated in a particular file or component or in included license
  documentation.  The authors of MySQL hereby grant you an additional
  permission to link the program and your derivative works with the
  separately licensed software that they have included with MySQL.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License, version 2.0, for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */

/**
  @file

  @brief
  This file defines the NDB Cluster handler: the interface between
  MySQL and NDB Cluster
*/

#include "ha_ndbcluster_glue.h"
#include "ha_ndbcluster.h"
#include <ndbapi/NdbApi.hpp>
#include <ndbapi/NdbIndexStat.hpp>
#include <ndbapi/NdbInterpretedCode.hpp>
#include "../storage/ndb/src/ndbapi/NdbQueryBuilder.hpp"
#include "../storage/ndb/src/ndbapi/NdbQueryOperation.hpp"

#include "ha_ndbcluster_binlog.h"
#include "ha_ndbcluster_push.h"
#include "ha_ndbcluster_cond.h"
#include "ha_ndbcluster_tables.h"
#include "ha_ndbcluster_connection.h"
#include "ndb_thd.h"
#include "ndb_table_guard.h"
#include "ndb_global_schema_lock.h"
#include "ndb_global_schema_lock_guard.h"
#include "abstract_query_plan.h"
#include "partition_info.h"
#include "ndb_dist_priv_util.h"
#include "ha_ndb_index_stat.h"

#include <mysql/plugin.h>
#include <ndb_version.h>
#include <ndb_global.h>
#include "ndb_mi.h"
#include "ndb_conflict.h"
#include "ndb_anyvalue.h"
#include "ndb_binlog_extra_row_info.h"
#include "ndb_event_data.h"
#include "ndb_schema_dist.h"
#include "ndb_component.h"
#include "ndb_util_thread.h"
#include "ndb_local_connection.h"
#include "ndb_local_schema.h"
#include "ndb_tdc.h"
#include "ndb_log.h"
#include "ndb_name_util.h"
#include "../storage/ndb/src/common/util/parse_mask.hpp"
#include "../storage/ndb/include/util/SparseBitmask.hpp"
#include "m_ctype.h"

using std::min;
using std::max;

// ndb interface initialization/cleanup
extern "C" void ndb_init_internal();
extern "C" void ndb_end_internal();

static const int DEFAULT_PARALLELISM= 0;
static const ha_rows DEFAULT_AUTO_PREFETCH= 32;
static const ulong ONE_YEAR_IN_SECONDS= (ulong) 3600L*24L*365L;

ulong opt_ndb_extra_logging;
static ulong opt_ndb_wait_connected;
ulong opt_ndb_wait_setup;
static ulong opt_ndb_cache_check_time;
static uint opt_ndb_cluster_connection_pool;
static uint opt_ndb_recv_thread_activation_threshold;
static char* opt_ndb_recv_thread_cpu_mask;
static char* opt_ndb_index_stat_option;
static char* opt_ndb_connectstring;
static uint opt_ndb_nodeid;

static MYSQL_THDVAR_UINT(
  autoincrement_prefetch_sz,         /* name */
  PLUGIN_VAR_RQCMDARG,
  "Specify number of autoincrement values that are prefetched.",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  1,                                 /* default */
  1,                                 /* min */
  65535,                             /* max */
  0                                  /* block */
);


static MYSQL_THDVAR_BOOL(
  force_send,                        /* name */
  PLUGIN_VAR_OPCMDARG,
  "Force send of buffers to ndb immediately without waiting for "
  "other threads.",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  1                                  /* default */
);


static MYSQL_THDVAR_BOOL(
  use_exact_count,                   /* name */
  PLUGIN_VAR_OPCMDARG,
  "Use exact records count during query planning and for fast "
  "select count(*), disable for faster queries.",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  0                                  /* default */
);


static MYSQL_THDVAR_BOOL(
  use_transactions,                  /* name */
  PLUGIN_VAR_OPCMDARG,
  "Use transactions for large inserts, if enabled then large "
  "inserts will be split into several smaller transactions",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  1                                  /* default */
);


static MYSQL_THDVAR_BOOL(
  use_copying_alter_table,           /* name */
  PLUGIN_VAR_OPCMDARG,
  "Force ndbcluster to always copy tables at alter table (should "
  "only be used if on-line alter table fails).",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  0                                  /* default */
);


static MYSQL_THDVAR_UINT(
  optimized_node_selection,          /* name */
  PLUGIN_VAR_OPCMDARG,
  "Select nodes for transactions in a more optimal way.",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  3,                                 /* default */
  0,                                 /* min */
  3,                                 /* max */
  0                                  /* block */
);


static MYSQL_THDVAR_ULONG(
  batch_size,                        /* name */
  PLUGIN_VAR_RQCMDARG,
  "Batch size in bytes.",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  32768,                             /* default */
  0,                                 /* min */
  ONE_YEAR_IN_SECONDS,               /* max */
  0                                  /* block */
);


static MYSQL_THDVAR_ULONG(
  optimization_delay,                /* name */
  PLUGIN_VAR_RQCMDARG,
  "For optimize table, specifies the delay in milliseconds "
  "for each batch of rows sent.",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  10,                                /* default */
  0,                                 /* min */
  100000,                            /* max */
  0                                  /* block */
);

#if NDB_VERSION_D < NDB_MAKE_VERSION(7,2,0)
#define DEFAULT_NDB_INDEX_STAT_ENABLE FALSE
#else
#define DEFAULT_NDB_INDEX_STAT_ENABLE TRUE
#endif

static MYSQL_THDVAR_BOOL(
  index_stat_enable,                 /* name */
  PLUGIN_VAR_OPCMDARG,
  "Use ndb index statistics in query optimization.",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  DEFAULT_NDB_INDEX_STAT_ENABLE      /* default */
);


static MYSQL_THDVAR_BOOL(
  table_no_logging,                  /* name */
  PLUGIN_VAR_NOCMDARG,
  "",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  FALSE                              /* default */
);


static MYSQL_THDVAR_BOOL(
  table_temporary,                   /* name */
  PLUGIN_VAR_NOCMDARG,
  "",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  FALSE                              /* default */
);

static MYSQL_THDVAR_UINT(
  blob_read_batch_bytes,             /* name */
  PLUGIN_VAR_RQCMDARG,
  "Specifies the bytesize large Blob reads "
  "should be batched into.  0 == No limit.",
  NULL,                              /* check func */
  NULL,                              /* update func */
  65536,                             /* default */
  0,                                 /* min */
  UINT_MAX,                          /* max */
  0                                  /* block */
);

static MYSQL_THDVAR_UINT(
  blob_write_batch_bytes,            /* name */
  PLUGIN_VAR_RQCMDARG,
  "Specifies the bytesize large Blob writes "
  "should be batched into.  0 == No limit.",
  NULL,                              /* check func */
  NULL,                              /* update func */
  65536,                             /* default */
  0,                                 /* min */
  UINT_MAX,                          /* max */
  0                                  /* block */
);

static MYSQL_THDVAR_UINT(
  deferred_constraints,              /* name */
  PLUGIN_VAR_RQCMDARG,
  "Specified that constraints should be checked deferred (when supported)",
  NULL,                              /* check func */
  NULL,                              /* update func */
  0,                                 /* default */
  0,                                 /* min */
  1,                                 /* max */
  0                                  /* block */
);

static MYSQL_THDVAR_BOOL(
  show_foreign_key_mock_tables,          /* name */
  PLUGIN_VAR_OPCMDARG,
  "Show the mock tables which is used to support foreign_key_checks= 0. "
  "Extra info warnings are shown when creating and dropping the tables. "
  "The real table name is show in SHOW CREATE TABLE",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  0                                  /* default */
);

#if NDB_VERSION_D < NDB_MAKE_VERSION(7,2,0)
#define DEFAULT_NDB_JOIN_PUSHDOWN FALSE
#else
#define DEFAULT_NDB_JOIN_PUSHDOWN TRUE
#endif

static MYSQL_THDVAR_BOOL(
  join_pushdown,                     /* name */
  PLUGIN_VAR_OPCMDARG,
  "Enable pushing down of join to datanodes",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  DEFAULT_NDB_JOIN_PUSHDOWN          /* default */
);

static MYSQL_THDVAR_BOOL(
  log_exclusive_reads,               /* name */
  PLUGIN_VAR_OPCMDARG,
  "Log primary key reads with exclusive locks "
  "to allow conflict resolution based on read conflicts",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  0                                  /* default */
);


/*
  Required in index_stat.cc but available only from here
  thanks to use of top level anonymous structs.
*/
bool ndb_index_stat_get_enable(THD *thd)
{
  const bool value = THDVAR(thd, index_stat_enable);
  return value;
}

bool ndb_show_foreign_key_mock_tables(THD* thd)
{
  const bool value = THDVAR(thd, show_foreign_key_mock_tables);
  return value;
}

bool ndb_log_exclusive_reads(THD *thd)
{
  const bool value = THDVAR(thd, log_exclusive_reads);
  return value;
}

static int ndbcluster_end(handlerton *hton, ha_panic_function flag);
static bool ndbcluster_show_status(handlerton *hton, THD*,
                                   stat_print_fn *,
                                   enum ha_stat_type);

static int ndbcluster_get_tablespace(THD* thd,
                                     LEX_CSTRING db_name,
                                     LEX_CSTRING table_name,
                                     LEX_CSTRING *tablespace_name);
static int ndbcluster_alter_tablespace(handlerton *hton,
                                       THD* thd, 
                                       st_alter_tablespace *info);
static int ndbcluster_fill_files_table(handlerton *hton,
                                       THD *thd, 
                                       TABLE_LIST *tables, 
                                       Item *cond);

#if MYSQL_VERSION_ID >= 50501
/**
   Used to fill in INFORMATION_SCHEMA* tables.

   @param hton handle to the handlerton structure
   @param thd the thread/connection descriptor
   @param[in,out] tables the information schema table that is filled up
   @param cond used for conditional pushdown to storage engine
   @param schema_table_idx the table id that distinguishes the type of table

   @return Operation status
 */
static int
ndbcluster_fill_is_table(handlerton *hton, THD *thd, TABLE_LIST *tables,
                         Item *cond, enum enum_schema_tables schema_table_idx)
{
  if (schema_table_idx == SCH_FILES)
    return  ndbcluster_fill_files_table(hton, thd, tables, cond);
  return 0;
}
#endif

static handler *ndbcluster_create_handler(handlerton *hton,
                                          TABLE_SHARE *table,
                                          MEM_ROOT *mem_root)
{
  return new (mem_root) ha_ndbcluster(hton, table);
}

static uint
ndbcluster_partition_flags()
{
  return (HA_CAN_UPDATE_PARTITION_KEY |
          HA_CAN_PARTITION_UNIQUE | HA_USE_AUTO_PARTITION);
}

uint ha_ndbcluster::alter_flags(uint flags) const
{
  const uint f=
    HA_PARTITION_FUNCTION_SUPPORTED |
    0;

  if (flags & Alter_info::ALTER_DROP_PARTITION)
    return 0;

  return f;
}

#define NDB_AUTO_INCREMENT_RETRIES 100
#define BATCH_FLUSH_SIZE (32768)

#define ERR_PRINT(err) \
  DBUG_PRINT("error", ("%d  message: %s", err.code, err.message))

#define ERR_RETURN(err)                  \
{                                        \
  const NdbError& tmp= err;              \
  DBUG_RETURN(ndb_to_mysql_error(&tmp)); \
}

#define ERR_BREAK(err, code)             \
{                                        \
  const NdbError& tmp= err;              \
  code= ndb_to_mysql_error(&tmp);        \
  break;                                 \
}

#define ERR_SET(err, code)               \
{                                        \
  const NdbError& tmp= err;              \
  code= ndb_to_mysql_error(&tmp);        \
}

static int ndbcluster_inited= 0;

/* 
   Indicator and CONDVAR used to delay client and slave
   connections until Ndb has Binlog setup
   (bug#46955)
*/
int ndb_setup_complete= 0;
native_cond_t COND_ndb_setup_complete; // Signal with ndbcluster_mutex

extern Ndb* g_ndb;

/// Handler synchronization
native_mutex_t ndbcluster_mutex;

/// Table lock handling
HASH ndbcluster_open_tables;
HASH ndbcluster_dropped_tables;

static uchar *ndbcluster_get_key(NDB_SHARE *share, size_t *length,
                                my_bool);

static void modify_shared_stats(NDB_SHARE *share,
                                Ndb_local_table_statistics *local_stat);

static int ndb_get_table_statistics(THD *thd, ha_ndbcluster*, bool, Ndb*,
                                    const NdbRecord *, struct Ndb_statistics *,
                                    uint part_id= ~(uint)0);

static ulong multi_range_fixed_size(int num_ranges);

static ulong multi_range_max_entry(NDB_INDEX_TYPE keytype, ulong reclength);

THD *injector_thd= 0;

/* Status variables shown with 'show status like 'Ndb%' */

struct st_ndb_status g_ndb_status;

const char *g_ndb_status_index_stat_status = "";
long g_ndb_status_index_stat_cache_query = 0;
long g_ndb_status_index_stat_cache_clean = 0;

long long g_event_data_count = 0;
long long g_event_nondata_count = 0;
long long g_event_bytes_count = 0;

static long long g_slave_api_client_stats[Ndb::NumClientStatistics];

static long long g_server_api_client_stats[Ndb::NumClientStatistics];

void
update_slave_api_stats(Ndb* ndb)
{
  for (Uint32 i=0; i < Ndb::NumClientStatistics; i++)
    g_slave_api_client_stats[i] = ndb->getClientStat(i);
}

st_ndb_slave_state g_ndb_slave_state;

static int check_slave_state(THD* thd)
{
  DBUG_ENTER("check_slave_state");

#ifdef HAVE_NDB_BINLOG
  if (!thd->slave_thread)
    DBUG_RETURN(0);

  const Uint32 runId = ndb_mi_get_slave_run_id();
  DBUG_PRINT("info", ("Slave SQL thread run id is %u",
                      runId));
  if (unlikely(runId != g_ndb_slave_state.sql_run_id))
  {
    DBUG_PRINT("info", ("Slave run id changed from %u, "
                        "treating as Slave restart",
                        g_ndb_slave_state.sql_run_id));
    g_ndb_slave_state.sql_run_id = runId;

    g_ndb_slave_state.atStartSlave();

    /* Always try to load the Max Replicated Epoch info
     * first.
     * Could be made optional if it's a problem
     */
    {
      /*
         Load highest replicated epoch from a local
         MySQLD from the cluster.
      */
      DBUG_PRINT("info", ("Loading applied epoch information from %s",
                          NDB_APPLY_TABLE));
      NdbError ndb_error;
      Uint64 highestAppliedEpoch = 0;
      do
      {
        Ndb* ndb= check_ndb_in_thd(thd);
        NDBDICT* dict= ndb->getDictionary();
        NdbTransaction* trans= NULL;
        ndb->setDatabaseName(NDB_REP_DB);
        Ndb_table_guard ndbtab_g(dict, NDB_APPLY_TABLE);

        const NDBTAB* ndbtab= ndbtab_g.get_table();
        if (unlikely(ndbtab == NULL))
        {
          ndb_error = dict->getNdbError();
          break;
        }

        trans= ndb->startTransaction();
        if (unlikely(trans == NULL))
        {
          ndb_error = ndb->getNdbError();
          break;
        }

        do
        {
          NdbScanOperation* sop = trans->getNdbScanOperation(ndbtab);
          if (unlikely(sop == NULL))
          {
            ndb_error = trans->getNdbError();
            break;
          }

          const Uint32 server_id_col_num = 0;
          const Uint32 epoch_col_num = 1;
          NdbRecAttr* server_id_ra = 0;
          NdbRecAttr* epoch_ra = 0;

          if (unlikely((sop->readTuples(NdbOperation::LM_CommittedRead) != 0)   ||
                       ((server_id_ra = sop->getValue(server_id_col_num)) == NULL)  ||
                       ((epoch_ra = sop->getValue(epoch_col_num)) == NULL)))
          {
            ndb_error = sop->getNdbError();
            break;
          }

          if (trans->execute(NdbTransaction::Commit))
          {
            ndb_error = trans->getNdbError();
            break;
          }

          int rc = 0;
          while (0 == (rc= sop->nextResult(true)))
          {
            Uint32 serverid = server_id_ra->u_32_value();
            Uint64 epoch = epoch_ra->u_64_value();

            if ((serverid == ::server_id) ||
                (ndb_mi_get_ignore_server_id(serverid)))
            {
              highestAppliedEpoch = MAX(epoch, highestAppliedEpoch);
            }
          }

          if (rc != 1)
          {
            ndb_error = sop->getNdbError();
            break;
          }
        } while (0);

        trans->close();
      } while(0);

      if (ndb_error.code != 0)
      {
        sql_print_warning("NDB Slave : Could not determine maximum replicated epoch from %s.%s "
                          "at Slave start, error %u %s",
                          NDB_REP_DB,
                          NDB_APPLY_TABLE,
                          ndb_error.code, ndb_error.message);
      }

      /*
        Set Global status variable to the Highest Applied Epoch from
        the Cluster DB.
        If none was found, this will be zero.
      */
      g_ndb_slave_state.max_rep_epoch = highestAppliedEpoch;
      sql_print_information("NDB Slave : MaxReplicatedEpoch set to %llu (%u/%u) at Slave start",
                            g_ndb_slave_state.max_rep_epoch,
                            (Uint32)(g_ndb_slave_state.max_rep_epoch >> 32),
                            (Uint32)(g_ndb_slave_state.max_rep_epoch & 0xffffffff));
    } // Load highest replicated epoch
  } // New Slave SQL thread run id
#endif

  DBUG_RETURN(0);
}


static int update_status_variables(Thd_ndb *thd_ndb,
                                   st_ndb_status *ns,
                                   Ndb_cluster_connection *c)
{
  ns->connected_port= c->get_connected_port();
  ns->connected_host= c->get_connected_host();
  if (ns->cluster_node_id != (int) c->node_id())
  {
    ns->cluster_node_id= c->node_id();
    if (&g_ndb_status == ns && g_ndb_cluster_connection == c)
      sql_print_information("NDB: NodeID is %lu, management server '%s:%lu'",
                            ns->cluster_node_id, ns->connected_host,
                            ns->connected_port);
  }
  ns->number_of_replicas= 0;
  {
    int n= c->get_no_ready();
    ns->number_of_ready_data_nodes= n > 0 ?  n : 0;
  }
  ns->number_of_data_nodes= c->no_db_nodes();
  ns->connect_count= c->get_connect_count();
  ns->last_commit_epoch_server= ndb_get_latest_trans_gci();
  if (thd_ndb)
  {
    ns->execute_count= thd_ndb->m_execute_count;
    ns->scan_count= thd_ndb->m_scan_count;
    ns->pruned_scan_count= thd_ndb->m_pruned_scan_count;
    ns->sorted_scan_count= thd_ndb->m_sorted_scan_count;
    ns->pushed_queries_defined= thd_ndb->m_pushed_queries_defined;
    ns->pushed_queries_dropped= thd_ndb->m_pushed_queries_dropped;
    ns->pushed_queries_executed= thd_ndb->m_pushed_queries_executed;
    ns->pushed_reads= thd_ndb->m_pushed_reads;
    ns->last_commit_epoch_session = thd_ndb->m_last_commit_epoch_session;
    for (int i= 0; i < MAX_NDB_NODES; i++)
    {
      ns->transaction_no_hint_count[i]= thd_ndb->m_transaction_no_hint_count[i];
      ns->transaction_hint_count[i]= thd_ndb->m_transaction_hint_count[i];
    }
    for (int i=0; i < Ndb::NumClientStatistics; i++)
    {
      ns->api_client_stats[i] = thd_ndb->ndb->getClientStat(i);
    }
    ns->schema_locks_count= thd_ndb->schema_locks_count;
  }
  return 0;
}

/* Helper macro for definitions of NdbApi status variables */

#define NDBAPI_COUNTERS(NAME_SUFFIX, ARRAY_LOCATION)                    \
  {"api_wait_exec_complete_count" NAME_SUFFIX,                          \
   (char*) ARRAY_LOCATION[ Ndb::WaitExecCompleteCount ],                \
   SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
  {"api_wait_scan_result_count" NAME_SUFFIX,                            \
   (char*) ARRAY_LOCATION[ Ndb::WaitScanResultCount ],                  \
   SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
  {"api_wait_meta_request_count" NAME_SUFFIX,                           \
   (char*) ARRAY_LOCATION[ Ndb::WaitMetaRequestCount ],                 \
   SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
  {"api_wait_nanos_count" NAME_SUFFIX,                                  \
   (char*) ARRAY_LOCATION[ Ndb::WaitNanosCount ],                       \
   SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
  {"api_bytes_sent_count" NAME_SUFFIX,                                  \
   (char*) ARRAY_LOCATION[ Ndb::BytesSentCount ],                       \
   SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
  {"api_bytes_received_count" NAME_SUFFIX,                              \
   (char*) ARRAY_LOCATION[ Ndb::BytesRecvdCount ],                      \
   SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
  {"api_trans_start_count" NAME_SUFFIX,                                 \
   (char*) ARRAY_LOCATION[ Ndb::TransStartCount ],                      \
   SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
  {"api_trans_commit_count" NAME_SUFFIX,                                \
   (char*) ARRAY_LOCATION[ Ndb::TransCommitCount ],                     \
   SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
  {"api_trans_abort_count" NAME_SUFFIX,                                 \
   (char*) ARRAY_LOCATION[ Ndb::TransAbortCount ],                      \
   SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
  {"api_trans_close_count" NAME_SUFFIX,                                 \
   (char*) ARRAY_LOCATION[ Ndb::TransCloseCount ],                      \
   SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
  {"api_pk_op_count" NAME_SUFFIX,                                       \
   (char*) ARRAY_LOCATION[ Ndb::PkOpCount ],                            \
   SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
  {"api_uk_op_count" NAME_SUFFIX,                                       \
   (char*) ARRAY_LOCATION[ Ndb::UkOpCount ],                            \
   SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
  {"api_table_scan_count" NAME_SUFFIX,                                  \
   (char*) ARRAY_LOCATION[ Ndb::TableScanCount ],                       \
   SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
  {"api_range_scan_count" NAME_SUFFIX,                                  \
   (char*) ARRAY_LOCATION[ Ndb::RangeScanCount ],                       \
   SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
  {"api_pruned_scan_count" NAME_SUFFIX,                                 \
   (char*) ARRAY_LOCATION[ Ndb::PrunedScanCount ],                      \
   SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
  {"api_scan_batch_count" NAME_SUFFIX,                                  \
   (char*) ARRAY_LOCATION[ Ndb::ScanBatchCount ],                       \
   SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
  {"api_read_row_count" NAME_SUFFIX,                                    \
   (char*) ARRAY_LOCATION[ Ndb::ReadRowCount ],                         \
   SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
  {"api_trans_local_read_row_count" NAME_SUFFIX,                        \
   (char*) ARRAY_LOCATION[ Ndb::TransLocalReadRowCount ],               \
   SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
  {"api_adaptive_send_forced_count" NAME_SUFFIX,                        \
   (char *) ARRAY_LOCATION[ Ndb::ForcedSendsCount ],                    \
   SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
  {"api_adaptive_send_unforced_count" NAME_SUFFIX,                      \
   (char *) ARRAY_LOCATION[ Ndb::UnforcedSendsCount ],                  \
   SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},                                   \
  {"api_adaptive_send_deferred_count" NAME_SUFFIX,                      \
   (char *) ARRAY_LOCATION[ Ndb::DeferredSendsCount ],                  \
   SHOW_LONGLONG, SHOW_SCOPE_GLOBAL}

SHOW_VAR ndb_status_variables_dynamic[]= {
  {"cluster_node_id",     (char*) &g_ndb_status.cluster_node_id,      SHOW_LONG, SHOW_SCOPE_GLOBAL},
  {"config_from_host",    (char*) &g_ndb_status.connected_host,       SHOW_CHAR_PTR, SHOW_SCOPE_GLOBAL},
  {"config_from_port",    (char*) &g_ndb_status.connected_port,       SHOW_LONG, SHOW_SCOPE_GLOBAL},
//{"number_of_replicas",  (char*) &g_ndb_status.number_of_replicas,   SHOW_LONG, SHOW_SCOPE_GLOBAL},
  {"number_of_data_nodes",(char*) &g_ndb_status.number_of_data_nodes, SHOW_LONG, SHOW_SCOPE_GLOBAL},
  {"number_of_ready_data_nodes",
   (char*) &g_ndb_status.number_of_ready_data_nodes,                  SHOW_LONG, SHOW_SCOPE_GLOBAL},
  {"connect_count",      (char*) &g_ndb_status.connect_count,         SHOW_LONG, SHOW_SCOPE_GLOBAL},
  {"execute_count",      (char*) &g_ndb_status.execute_count,         SHOW_LONG, SHOW_SCOPE_GLOBAL},
  {"scan_count",         (char*) &g_ndb_status.scan_count,            SHOW_LONG, SHOW_SCOPE_GLOBAL},
  {"pruned_scan_count",  (char*) &g_ndb_status.pruned_scan_count,     SHOW_LONG, SHOW_SCOPE_GLOBAL},
  {"schema_locks_count", (char*) &g_ndb_status.schema_locks_count,    SHOW_LONG, SHOW_SCOPE_GLOBAL},
  NDBAPI_COUNTERS("_session", &g_ndb_status.api_client_stats),
  {"sorted_scan_count",  (char*) &g_ndb_status.sorted_scan_count,     SHOW_LONG, SHOW_SCOPE_GLOBAL},
  {"pushed_queries_defined", (char*) &g_ndb_status.pushed_queries_defined, 
   SHOW_LONG, SHOW_SCOPE_GLOBAL},
  {"pushed_queries_dropped", (char*) &g_ndb_status.pushed_queries_dropped,
   SHOW_LONG, SHOW_SCOPE_GLOBAL},
  {"pushed_queries_executed", (char*) &g_ndb_status.pushed_queries_executed,
   SHOW_LONG, SHOW_SCOPE_GLOBAL},
  {"pushed_reads",       (char*) &g_ndb_status.pushed_reads,          SHOW_LONG, SHOW_SCOPE_GLOBAL},
  {"last_commit_epoch_server", 
                         (char*) &g_ndb_status.last_commit_epoch_server,
                                                                      SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
  {"last_commit_epoch_session", 
                         (char*) &g_ndb_status.last_commit_epoch_session, 
                                                                      SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
  {NullS, NullS, SHOW_LONG, SHOW_SCOPE_GLOBAL}
};


SHOW_VAR ndb_status_injector_variables[]= {
  {"api_event_data_count_injector",     (char*) &g_event_data_count, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
  {"api_event_nondata_count_injector",  (char*) &g_event_nondata_count, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
  {"api_event_bytes_count_injector",    (char*) &g_event_bytes_count, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
  {NullS, NullS, SHOW_LONG, SHOW_SCOPE_GLOBAL}
};

SHOW_VAR ndb_status_slave_variables[]= {
  NDBAPI_COUNTERS("_slave", &g_slave_api_client_stats),
  {"slave_max_replicated_epoch", (char*) &g_ndb_slave_state.max_rep_epoch, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
  {NullS, NullS, SHOW_LONG, SHOW_SCOPE_GLOBAL}
};

SHOW_VAR ndb_status_server_client_stat_variables[]= {
  NDBAPI_COUNTERS("", &g_server_api_client_stats),
  {"api_event_data_count",     
   (char*) &g_server_api_client_stats[ Ndb::DataEventsRecvdCount ], 
   SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
  {"api_event_nondata_count",  
   (char*) &g_server_api_client_stats[ Ndb::NonDataEventsRecvdCount ], 
   SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
  {"api_event_bytes_count",    
   (char*) &g_server_api_client_stats[ Ndb::EventBytesRecvdCount ], 
   SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
  {NullS, NullS, SHOW_LONG, SHOW_SCOPE_GLOBAL}
};

static int show_ndb_server_api_stats(THD *thd, SHOW_VAR *var, char *buff)
{
  /* This function is called when SHOW STATUS / INFO_SCHEMA wants
   * to see one of our status vars
   * We use this opportunity to :
   *  1) Update the globals with current values
   *  2) Return an array of var definitions, pointing to
   *     the updated globals
   */
  ndb_get_connection_stats((Uint64*) &g_server_api_client_stats[0]);

  var->type= SHOW_ARRAY;
  var->value= (char*) ndb_status_server_client_stat_variables;
  var->scope= SHOW_SCOPE_GLOBAL;

  return 0;
}

SHOW_VAR ndb_status_index_stat_variables[]= {
  {"status",          (char*) &g_ndb_status_index_stat_status, SHOW_CHAR_PTR, SHOW_SCOPE_GLOBAL},
  {"cache_query",     (char*) &g_ndb_status_index_stat_cache_query, SHOW_LONG, SHOW_SCOPE_GLOBAL},
  {"cache_clean",     (char*) &g_ndb_status_index_stat_cache_clean, SHOW_LONG, SHOW_SCOPE_GLOBAL},
  {NullS, NullS, SHOW_LONG, SHOW_SCOPE_GLOBAL}
};


/*
  Error handling functions
*/

/* Note for merge: old mapping table, moved to storage/ndb/ndberror.c */

int ndb_to_mysql_error(const NdbError *ndberr)
{
  /* read the mysql mapped error code */
  int error= ndberr->mysql_code;

  switch (error)
  {
    /* errors for which we do not add warnings, just return mapped error code
    */
  case HA_ERR_NO_SUCH_TABLE:
  case HA_ERR_KEY_NOT_FOUND:
    return error;

    /* Mapping missing, go with the ndb error code */
  case -1:
  case 0:
    /* Never map to errors below HA_ERR_FIRST */
    if (ndberr->code < HA_ERR_FIRST)
      error= HA_ERR_INTERNAL_ERROR;
    else
      error= ndberr->code;
    break;
    /* Mapping exists, go with the mapped code */
  default:
    break;
  }

  {
    /*
      Push the NDB error message as warning
      - Used to be able to use SHOW WARNINGS to get more info
        on what the error is
      - Used by replication to see if the error was temporary
    */
    if (ndberr->status == NdbError::TemporaryError)
      push_warning_printf(current_thd, Sql_condition::SL_WARNING,
                          ER_GET_TEMPORARY_ERRMSG, ER(ER_GET_TEMPORARY_ERRMSG),
                          ndberr->code, ndberr->message, "NDB");
    else
      push_warning_printf(current_thd, Sql_condition::SL_WARNING,
                          ER_GET_ERRMSG, ER(ER_GET_ERRMSG),
                          ndberr->code, ndberr->message, "NDB");
  }
  return error;
}

ulong opt_ndb_slave_conflict_role;

#ifdef HAVE_NDB_BINLOG

static int
handle_conflict_op_error(NdbTransaction* trans,
                         const NdbError& err,
                         const NdbOperation* op);

static int
handle_row_conflict(NDB_CONFLICT_FN_SHARE* cfn_share,
                    const char* tab_name,
                    bool table_has_blobs,
                    const char* handling_type,
                    const NdbRecord* key_rec,
                    const NdbRecord* data_rec,
                    const uchar* old_row,
                    const uchar* new_row,
                    enum_conflicting_op_type op_type,
                    enum_conflict_cause conflict_cause,
                    const NdbError& conflict_error,
                    NdbTransaction* conflict_trans,
                    const MY_BITMAP *write_set,
                    Uint64 transaction_id);
#endif

static const Uint32 error_op_after_refresh_op = 920;

static inline
int
check_completed_operations_pre_commit(Thd_ndb *thd_ndb, NdbTransaction *trans,
                                      const NdbOperation *first,
                                      const NdbOperation *last,
                                      uint *ignore_count)
{
  uint ignores= 0;
  DBUG_ENTER("check_completed_operations_pre_commit");

  if (unlikely(first == 0))
  {
    assert(last == 0);
    DBUG_RETURN(0);
  }

  /*
    Check that all errors are "accepted" errors
    or exceptions to report
  */
#ifdef HAVE_NDB_BINLOG
  const NdbOperation* lastUserOp = trans->getLastDefinedOperation();
#endif
  while (true)
  {
    const NdbError &err= first->getNdbError();
    const bool op_has_conflict_detection = (first->getCustomData() != NULL);
    if (!op_has_conflict_detection)
    {
      assert(err.code != (int) error_op_after_refresh_op);

      /* 'Normal path' - ignore key (not) present, others are errors */
      if (err.classification != NdbError::NoError &&
          err.classification != NdbError::ConstraintViolation &&
          err.classification != NdbError::NoDataFound)
      {
        /* Non ignored error, report it */
        DBUG_PRINT("info", ("err.code == %u", err.code));
        DBUG_RETURN(err.code);
      }
    }
#ifdef HAVE_NDB_BINLOG
    else
    {
      /*
         Op with conflict detection, use special error handling method
       */

      if (err.classification != NdbError::NoError)
      {
        int res = handle_conflict_op_error(trans,
                                           err,
                                           first);
        if (res != 0)
          DBUG_RETURN(res);
      }
    } // if (!op_has_conflict_detection)
#endif
    if (err.classification != NdbError::NoError)
      ignores++;

    if (first == last)
      break;

    first= trans->getNextCompletedOperation(first);
  }
  if (ignore_count)
    *ignore_count= ignores;
#ifdef HAVE_NDB_BINLOG
  /*
     Conflict detection related error handling above may have defined
     new operations on the transaction.  If so, execute them now
  */
  if (trans->getLastDefinedOperation() != lastUserOp)
  {
    const NdbOperation* last_conflict_op = trans->getLastDefinedOperation();

    NdbError nonMaskedError;
    assert(nonMaskedError.code == 0);

    if (trans->execute(NdbTransaction::NoCommit,
                       NdbOperation::AO_IgnoreError,
                       thd_ndb->m_force_send))
    {
      /* Transaction execute failed, even with IgnoreError... */
      nonMaskedError = trans->getNdbError();
      assert(nonMaskedError.code != 0);
    }
    else if (trans->getNdbError().code)
    {
      /* Check the result codes of the operations we added */
      const NdbOperation* conflict_op = NULL;
      do
      {
        conflict_op = trans->getNextCompletedOperation(conflict_op);
        assert(conflict_op != NULL);
        /* We will ignore 920 which represents a refreshOp or other op
         * arriving after a refreshOp
         */
        const NdbError& err = conflict_op->getNdbError();
        if ((err.code != 0) &&
            (err.code != (int) error_op_after_refresh_op))
        {
          /* Found a real error, break out and handle it */
          nonMaskedError = err;
          break;
        }
      } while (conflict_op != last_conflict_op);
    }

    /* Handle errors with extra conflict handling operations */
    if (nonMaskedError.code != 0)
    {
      if (nonMaskedError.status == NdbError::TemporaryError)
      {
        /* Slave will roll back and retry entire transaction. */
        ERR_RETURN(nonMaskedError);
      }
      else
      {
        char msg[FN_REFLEN];
        my_snprintf(msg, sizeof(msg), "Executing extra operations for "
                    "conflict handling hit Ndb error %d '%s'",
                    nonMaskedError.code, nonMaskedError.message);
        push_warning_printf(current_thd, Sql_condition::SL_ERROR,
                            ER_EXCEPTIONS_WRITE_ERROR,
                            ER(ER_EXCEPTIONS_WRITE_ERROR), msg);
        /* Slave will stop replication. */
        DBUG_RETURN(ER_EXCEPTIONS_WRITE_ERROR);
      }
    }
  }
#endif
  DBUG_RETURN(0);
}

static inline
int
check_completed_operations(Thd_ndb *thd_ndb, NdbTransaction *trans,
                           const NdbOperation *first,
                           const NdbOperation *last,
                           uint *ignore_count)
{
  uint ignores= 0;
  DBUG_ENTER("check_completed_operations");

  if (unlikely(first == 0))
  {
    assert(last == 0);
    DBUG_RETURN(0);
  }

  /*
    Check that all errors are "accepted" errors
  */
  while (true)
  {
    const NdbError &err= first->getNdbError();
    if (err.classification != NdbError::NoError &&
        err.classification != NdbError::ConstraintViolation &&
        err.classification != NdbError::NoDataFound)
    {
#ifdef HAVE_NDB_BINLOG
      /* All conflict detection etc should be done before commit */
      assert((err.code != (int) error_conflict_fn_violation) &&
             (err.code != (int) error_op_after_refresh_op));
#endif
      DBUG_RETURN(err.code);
    }
    if (err.classification != NdbError::NoError)
      ignores++;

    if (first == last)
      break;

    first= trans->getNextCompletedOperation(first);
  }
  if (ignore_count)
    *ignore_count= ignores;
  DBUG_RETURN(0);
}

void
ha_ndbcluster::release_completed_operations(NdbTransaction *trans)
{
  /**
   * mysqld reads/write blobs fully,
   *   which means that it does not keep blobs
   *   open/active over execute, which means
   *   that it should be safe to release anything completed here
   *
   *   i.e don't check for blobs, but just go ahead and release
   */
  trans->releaseCompletedOperations();
  trans->releaseCompletedQueries();
}


static inline
int
execute_no_commit(Thd_ndb *thd_ndb, NdbTransaction *trans,
                  bool ignore_no_key,
                  uint *ignore_count = 0)
{
  DBUG_ENTER("execute_no_commit");
  ha_ndbcluster::release_completed_operations(trans);
  const NdbOperation *first= trans->getFirstDefinedOperation();
  const NdbOperation *last= trans->getLastDefinedOperation();
  thd_ndb->m_execute_count++;
  thd_ndb->m_unsent_bytes= 0;
  DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));
  int rc= 0;
  do
  {
    if (trans->execute(NdbTransaction::NoCommit,
                       NdbOperation::AO_IgnoreError,
                       thd_ndb->m_force_send))
    {
      rc= -1;
      break;
    }
    if (!ignore_no_key || trans->getNdbError().code == 0)
    {
      rc= trans->getNdbError().code;
      break;
    }

    rc = check_completed_operations_pre_commit(thd_ndb, trans,
                                               first, last,
                                               ignore_count);
  } while (0);

  if (unlikely(thd_ndb->is_slave_thread() &&
               rc != 0))
  {
    g_ndb_slave_state.atTransactionAbort();
  }

  DBUG_PRINT("info", ("execute_no_commit rc is %d", rc));
  DBUG_RETURN(rc);
}


static inline
int
execute_commit(Thd_ndb *thd_ndb, NdbTransaction *trans,
               int force_send, int ignore_error, uint *ignore_count = 0)
{
  DBUG_ENTER("execute_commit");
  NdbOperation::AbortOption ao= NdbOperation::AO_IgnoreError;
  if (thd_ndb->m_unsent_bytes && !ignore_error)
  {
    /*
      We have unsent bytes and cannot ignore error.  Calling execute
      with NdbOperation::AO_IgnoreError will result in possible commit
      of a transaction although there is an error.
    */
    ao= NdbOperation::AbortOnError;
  }
  const NdbOperation *first= trans->getFirstDefinedOperation();
  const NdbOperation *last= trans->getLastDefinedOperation();
  thd_ndb->m_execute_count++;
  thd_ndb->m_unsent_bytes= 0;
  DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));
  int rc= 0;
  do
  {
    if (trans->execute(NdbTransaction::Commit, ao, force_send))
    {
      rc= -1;
      break;
    }

    if (!ignore_error || trans->getNdbError().code == 0)
    {
      rc= trans->getNdbError().code;
      break;
    }

    rc= check_completed_operations(thd_ndb, trans, first, last,
                                   ignore_count);
  } while (0);

  if (likely(rc == 0))
  {
    /* Committed ok, update session GCI, if it's available
     * (Not available for reads, empty transactions etc...)
     */
    Uint64 reportedGCI;
    if (trans->getGCI(&reportedGCI) == 0 &&
        reportedGCI != 0)
    {
      assert(reportedGCI >= thd_ndb->m_last_commit_epoch_session);
      thd_ndb->m_last_commit_epoch_session = reportedGCI;
    }
  }

  if (thd_ndb->is_slave_thread())
  {
    if (likely(rc == 0))
    {
      /* Success */
      g_ndb_slave_state.atTransactionCommit(thd_ndb->m_last_commit_epoch_session);
    }
    else
    {
      g_ndb_slave_state.atTransactionAbort();
    }
  }

  DBUG_PRINT("info", ("execute_commit rc is %d", rc));
  DBUG_RETURN(rc);
}

static inline
int execute_no_commit_ie(Thd_ndb *thd_ndb, NdbTransaction *trans)
{
  DBUG_ENTER("execute_no_commit_ie");
  ha_ndbcluster::release_completed_operations(trans);
  int res= trans->execute(NdbTransaction::NoCommit,
                          NdbOperation::AO_IgnoreError,
                          thd_ndb->m_force_send);
  thd_ndb->m_unsent_bytes= 0;
  thd_ndb->m_execute_count++;
  DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));
  DBUG_RETURN(res);
}

/*
  Place holder for ha_ndbcluster thread specific data
*/
typedef struct st_thd_ndb_share {
  const void *key;
  struct Ndb_local_table_statistics stat;
} THD_NDB_SHARE;
static
uchar *thd_ndb_share_get_key(THD_NDB_SHARE *thd_ndb_share, size_t *length,
                            my_bool not_used MY_ATTRIBUTE((unused)))
{
  *length= sizeof(thd_ndb_share->key);
  return (uchar*) &thd_ndb_share->key;
}

Thd_ndb::Thd_ndb(THD* thd) :
  m_thd(thd),
  m_slave_thread(thd->slave_thread),
  m_skip_binlog_setup_in_find_files(false),
  schema_locks_count(0),
  m_last_commit_epoch_session(0)
{
  connection= ndb_get_cluster_connection();
  m_connect_count= connection->get_connect_count();
  ndb= new Ndb(connection, "");
  lock_count= 0;
  start_stmt_count= 0;
  save_point_count= 0;
  count= 0;
  trans= NULL;
  m_handler= NULL;
  m_error= FALSE;
  options= 0;
  (void) my_hash_init(&open_tables, table_alias_charset, 5, 0, 0,
                      (my_hash_get_key)thd_ndb_share_get_key, 0, 0,
                      PSI_INSTRUMENT_ME);
  m_unsent_bytes= 0;
  m_execute_count= 0;
  m_scan_count= 0;
  m_pruned_scan_count= 0;
  m_sorted_scan_count= 0;
  m_pushed_queries_defined= 0;
  m_pushed_queries_dropped= 0;
  m_pushed_queries_executed= 0;
  m_pushed_reads= 0;
  memset(m_transaction_no_hint_count, 0, sizeof(m_transaction_no_hint_count));
  memset(m_transaction_hint_count, 0, sizeof(m_transaction_hint_count));
  global_schema_lock_trans= NULL;
  global_schema_lock_count= 0;
  global_schema_lock_error= 0;
  init_alloc_root(PSI_INSTRUMENT_ME,
                  &m_batch_mem_root, BATCH_FLUSH_SIZE/4, 0);
}

Thd_ndb::~Thd_ndb()
{
  if (opt_ndb_extra_logging > 1)
  {
    /*
      print some stats about the connection at disconnect
    */
    for (int i= 0; i < MAX_NDB_NODES; i++)
    {
      if (m_transaction_hint_count[i] > 0 ||
          m_transaction_no_hint_count[i] > 0)
      {
        sql_print_information("tid %u: node[%u] "
                              "transaction_hint=%u, transaction_no_hint=%u",
                              m_thd->thread_id(), i,
                              m_transaction_hint_count[i],
                              m_transaction_no_hint_count[i]);
      }
    }
  }
  if (ndb)
  {
    delete ndb;
    ndb= NULL;
  }
  changed_tables.empty();
  my_hash_free(&open_tables);
  free_root(&m_batch_mem_root, MYF(0));
}


Ndb *ha_ndbcluster::get_ndb(THD *thd) const
{
  return thd_get_thd_ndb(thd)->ndb;
}

/*
 * manage uncommitted insert/deletes during transactio to get records correct
 */

void ha_ndbcluster::set_rec_per_key()
{
  DBUG_ENTER("ha_ndbcluster::set_rec_per_key");
  /*
    Set up the 'rec_per_key[]' for keys which we have good knowledge
    about the distribution. 'rec_per_key[]' is init'ed to '0' by 
    open_binary_frm(), which is interpreted as 'unknown' by optimizer.
    -> Not setting 'rec_per_key[]' will force the optimizer to use
    its own heuristic to estimate 'records pr. key'.
  */
  for (uint i=0 ; i < table_share->keys ; i++)
  {
    bool is_unique_index= false;
    KEY* key_info= table->key_info + i;
    switch (get_index_type(i))
    {
    case UNIQUE_INDEX:
    case PRIMARY_KEY_INDEX:
    {
      // Index is unique when all 'key_parts' are specified,
      // else distribution is unknown and not specified here.
      is_unique_index= true;
      break;
    }
    case UNIQUE_ORDERED_INDEX:
    case PRIMARY_KEY_ORDERED_INDEX:
      is_unique_index= true;
      // intentional fall thru to logic for ordered index
    case ORDERED_INDEX:
      // 'Records pr. key' are unknown for non-unique indexes.
      // (May change when we get better index statistics.)
    {
      THD *thd= current_thd;
      const bool index_stat_enable= THDVAR(NULL, index_stat_enable) &&
                                    THDVAR(thd, index_stat_enable);
      if (index_stat_enable)
      {
        int err= ndb_index_stat_set_rpk(i);
        if (err != 0 &&
            /* no stats is not unexpected error */
            err != NdbIndexStat::NoIndexStats &&
            /* warning was printed at first error */
            err != NdbIndexStat::MyHasError &&
            /* stats thread aborted request */
            err != NdbIndexStat::MyAbortReq)
        {
          push_warning_printf(thd, Sql_condition::SL_WARNING,
                              ER_CANT_GET_STAT, /* pun? */
                              "index stats (RPK) for key %s:"
                              " unexpected error %d",
                              key_info->name, err);
        }
      }
      // no fallback method...
      break;
    }
    default:
      assert(false);
    }
    // set rows per key to 1 for complete key given for unique/primary index
    if (is_unique_index)
    {
      key_info->set_records_per_key(key_info->user_defined_key_parts-1, 1.0f);
    }
  }
  DBUG_VOID_RETURN;
}

int ha_ndbcluster::records(ha_rows* num_rows)
{
  DBUG_ENTER("ha_ndbcluster::records");
  DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d",
                      m_table->getTableId(),
                      m_table_info->no_uncommitted_rows_count));

  int error = update_stats(table->in_use, 1);
  if (error != 0)
  {
    *num_rows = HA_POS_ERROR;
    DBUG_RETURN(error);
  }

  *num_rows = stats.records;
  DBUG_RETURN(0);
}

void ha_ndbcluster::no_uncommitted_rows_execute_failure()
{
  DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_execute_failure");
  get_thd_ndb(current_thd)->m_error= TRUE;
  DBUG_VOID_RETURN;
}

void ha_ndbcluster::no_uncommitted_rows_update(int c)
{
  DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_update");
  struct Ndb_local_table_statistics *local_info= m_table_info;
  local_info->no_uncommitted_rows_count+= c;
  DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d",
                      m_table->getTableId(),
                      local_info->no_uncommitted_rows_count));
  DBUG_VOID_RETURN;
}


int ha_ndbcluster::ndb_err(NdbTransaction *trans)
{
  THD *thd= current_thd;
  int res;
  NdbError err= trans->getNdbError();
  DBUG_ENTER("ndb_err");

  switch (err.classification) {
  case NdbError::SchemaError:
  {
    // TODO perhaps we need to do more here, invalidate also in the cache
    m_table->setStatusInvalid();
    /* Close other open handlers not used by any thread */
    ndb_tdc_close_cached_table(thd, m_dbname, m_tabname);
    break;
  }
  default:
    break;
  }
  res= ndb_to_mysql_error(&err);
  DBUG_PRINT("info", ("transformed ndbcluster error %d to mysql error %d", 
                      err.code, res));
  if (res == HA_ERR_FOUND_DUPP_KEY)
  {
    char *error_data= err.details;
    uint dupkey= MAX_KEY;

    for (uint i= 0; i < MAX_KEY; i++)
    {
      if (m_index[i].type == UNIQUE_INDEX || 
          m_index[i].type == UNIQUE_ORDERED_INDEX)
      {
        const NDBINDEX *unique_index=
          (const NDBINDEX *) m_index[i].unique_index;
        if (unique_index && UintPtr(unique_index->getObjectId()) == UintPtr(error_data))
        {
          dupkey= i;
          break;
        }
      }
    }
    if (m_rows_to_insert == 1)
    {
      /*
	We can only distinguish between primary and non-primary
	violations here, so we need to return MAX_KEY for non-primary
	to signal that key is unknown
      */
      m_dupkey= err.code == 630 ? table_share->primary_key : dupkey; 
    }
    else
    {
      /* We are batching inserts, offending key is not available */
      m_dupkey= (uint) -1;
    }
  }
  DBUG_RETURN(res);
}


/**
  Override the default get_error_message in order to add the 
  error message of NDB .
*/

bool ha_ndbcluster::get_error_message(int error, 
                                      String *buf)
{
  DBUG_ENTER("ha_ndbcluster::get_error_message");
  DBUG_PRINT("enter", ("error: %d", error));

  Ndb *ndb= check_ndb_in_thd(current_thd);
  if (!ndb)
    DBUG_RETURN(FALSE);

  const NdbError err= ndb->getNdbError(error);
  bool temporary= err.status==NdbError::TemporaryError;
  buf->set(err.message, (uint32)strlen(err.message), &my_charset_bin);
  DBUG_PRINT("exit", ("message: %s, temporary: %d", buf->ptr(), temporary));
  DBUG_RETURN(temporary);
}


/*
  field_used_length() returns the number of bytes actually used to
  store the data of the field. So for a varstring it includes both
  length byte(s) and string data, and anything after data_length()
  bytes are unused.
*/
static
uint32 field_used_length(const Field* field)
{
 if (field->type() == MYSQL_TYPE_VARCHAR)
 {
   const Field_varstring* f = static_cast<const Field_varstring*>(field);
   return f->length_bytes + const_cast<Field_varstring*>(f)->data_length();
                            // ^ no 'data_length() const'
 }
 return field->pack_length();
}


/**
  Check if MySQL field type forces var part in ndb storage
*/
static bool field_type_forces_var_part(enum_field_types type)
{
  switch (type) {
  case MYSQL_TYPE_VAR_STRING:
  case MYSQL_TYPE_VARCHAR:
    return TRUE;
  case MYSQL_TYPE_TINY_BLOB:
  case MYSQL_TYPE_BLOB:
  case MYSQL_TYPE_MEDIUM_BLOB:
  case MYSQL_TYPE_LONG_BLOB:
  case MYSQL_TYPE_JSON:
  case MYSQL_TYPE_GEOMETRY:
    return FALSE;
  default:
    return FALSE;
  }
}

/*
  Return a generic buffer that will remain valid until after next execute.

  The memory is freed by the first call to add_row_check_if_batch_full_size()
  following any execute() call. The intention is that the memory is associated
  with one batch of operations during batched slave updates.

  Note in particular that using get_buffer() / copy_row_to_buffer() separately
  from add_row_check_if_batch_full_size() could make meory usage grow without
  limit, and that this sequence:

    execute()
    get_buffer() / copy_row_to_buffer()
    add_row_check_if_batch_full_size()
    ...
    execute()

  will free the memory already at add_row_check_if_batch_full_size() time, it
  will not remain valid until the second execute().
*/
uchar *
ha_ndbcluster::get_buffer(Thd_ndb *thd_ndb, uint size)
{
  return (uchar*)alloc_root(&(thd_ndb->m_batch_mem_root), size);
}

uchar *
ha_ndbcluster::copy_row_to_buffer(Thd_ndb *thd_ndb, const uchar *record)
{
  uchar *row= get_buffer(thd_ndb, table->s->reclength);
  if (unlikely(!row))
    return NULL;
  memcpy(row, record, table->s->reclength);
  return row;
}

/**
 * findBlobError
 * This method attempts to find an error in the hierarchy of runtime
 * NDBAPI objects from Blob up to transaction.
 * It will return -1 if no error is found, 0 if an error is found.
 */
int findBlobError(NdbError& error, NdbBlob* pBlob)
{
  error= pBlob->getNdbError();
  if (error.code != 0)
    return 0;
  
  const NdbOperation* pOp= pBlob->getNdbOperation();
  error= pOp->getNdbError();
  if (error.code != 0)
    return 0;
  
  NdbTransaction* pTrans= pOp->getNdbTransaction();
  error= pTrans->getNdbError();
  if (error.code != 0)
    return 0;
  
  /* No error on any of the objects */
  return -1;
}


/* 
 This routine calculates the length of the blob/text after applying mysql limits
 on blob/text sizes. If the blob contains multi-byte characters, the length is 
 reduced till the end of the last well-formed char, so that data is not truncated 
 in the middle of a multi-byte char.
 */
uint64 calc_ndb_blob_len(const CHARSET_INFO *cs, uchar *blob_ptr, uint64 maxlen)
{
  int errors = 0;
  
  const char *begin = (const char*) blob_ptr;
  const char *end = (const char*) (blob_ptr+maxlen);
  
  // avoid truncation in the middle of a multi-byte character by 
  // stopping at end of last well-formed character before max length
  uint32 numchars = cs->cset->numchars(cs, begin, end);
  uint64 len64 = cs->cset->well_formed_len(cs, begin, end, numchars, &errors);
  assert(len64 <= maxlen);

  return len64; 
}

int g_get_ndb_blobs_value(NdbBlob *ndb_blob, void *arg)
{
  ha_ndbcluster *ha= (ha_ndbcluster *)arg;
  DBUG_ENTER("g_get_ndb_blobs_value");
  DBUG_PRINT("info", ("destination row: %p", ha->m_blob_destination_record));

  if (ha->m_blob_counter == 0)   /* Reset total size at start of row */
    ha->m_blobs_row_total_size= 0;

  /* Count the total length needed for blob data. */
  int isNull;
  if (ndb_blob->getNull(isNull) != 0)
    ERR_RETURN(ndb_blob->getNdbError());
  if (isNull == 0) {
    Uint64 len64= 0;
    if (ndb_blob->getLength(len64) != 0)
      ERR_RETURN(ndb_blob->getNdbError());
    /* Align to Uint64. */
    ha->m_blobs_row_total_size+= (len64 + 7) & ~((Uint64)7);
    if (ha->m_blobs_row_total_size > 0xffffffff)
    {
      assert(FALSE);
      DBUG_RETURN(-1);
    }
    DBUG_PRINT("info", ("Blob number %d needs size %llu, total buffer reqt. now %llu",
                        ha->m_blob_counter,
                        len64,
                        ha->m_blobs_row_total_size));
  }
  ha->m_blob_counter++;

  /*
    Wait until all blobs in this row are active, so we can allocate
    and use a common buffer containing all.
  */
  if (ha->m_blob_counter < ha->m_blob_expected_count_per_row)
    DBUG_RETURN(0);

  /* Reset blob counter for next row (scan scenario) */
  ha->m_blob_counter= 0;

  /* Re-allocate bigger blob buffer for this row if necessary. */
  if (ha->m_blobs_row_total_size > ha->m_blobs_buffer_size)
  {
    my_free(ha->m_blobs_buffer, MYF(MY_ALLOW_ZERO_PTR));
    DBUG_PRINT("info", ("allocate blobs buffer size %u",
                        (uint32)(ha->m_blobs_row_total_size)));
    /* Windows compiler complains about my_malloc on non-size_t
     * validate mapping from Uint64 to size_t
     */
    if(((size_t)ha->m_blobs_row_total_size) != ha->m_blobs_row_total_size)
    {
      ha->m_blobs_buffer= NULL;
      ha->m_blobs_buffer_size= 0;
      DBUG_RETURN(-1);
    }

    ha->m_blobs_buffer=
      (uchar*) my_malloc(PSI_INSTRUMENT_ME,
                         (size_t) ha->m_blobs_row_total_size, MYF(MY_WME));
    if (ha->m_blobs_buffer == NULL)
    {
      ha->m_blobs_buffer_size= 0;
      DBUG_RETURN(-1);
    }
    ha->m_blobs_buffer_size= ha->m_blobs_row_total_size;
  }

  /*
    Now read all blob data.
    If we know the destination mysqld row, we also set the blob null bit and
    pointer/length (if not, it will be done instead in unpack_record()).
  */
  uint32 offset= 0;
  for (uint i= 0; i < ha->table->s->fields; i++)
  {
    Field *field= ha->table->field[i];
    if (! (field->flags & BLOB_FLAG))
      continue;
    NdbValue value= ha->m_value[i];
    if (value.blob == NULL)
    {
      DBUG_PRINT("info",("[%u] skipped", i));
      continue;
    }
    Field_blob *field_blob= (Field_blob *)field;
    NdbBlob *ndb_blob= value.blob;
    int isNull;
    if (ndb_blob->getNull(isNull) != 0)
      ERR_RETURN(ndb_blob->getNdbError());
    if (isNull == 0) {
      Uint64 len64= 0;
      if (ndb_blob->getLength(len64) != 0)
        ERR_RETURN(ndb_blob->getNdbError());
      assert(len64 < 0xffffffff);
      uchar *buf= ha->m_blobs_buffer + offset;
	  uint32 len= (uint32)(ha->m_blobs_buffer_size - offset);
      if (ndb_blob->readData(buf, len) != 0)
      {
        NdbError err;
        if (findBlobError(err, ndb_blob) == 0)
        {
          ERR_RETURN(err);
        }
        else
        {
          /* Should always have some error code set */
          assert(err.code != 0);
          ERR_RETURN(err);
        }
      }   
      DBUG_PRINT("info", ("[%u] offset: %u  buf: 0x%lx  len=%u",
                          i, offset, (long) buf, len));
      assert(len == len64);
      if (ha->m_blob_destination_record)
      {
        my_ptrdiff_t ptrdiff=
          ha->m_blob_destination_record - ha->table->record[0];
        field_blob->move_field_offset(ptrdiff);

        if(len > field_blob->max_data_length())
        {
          len = calc_ndb_blob_len(field_blob->charset(),
                                  buf, field_blob->max_data_length());

          // push a warning
          push_warning_printf(current_thd, Sql_condition::SL_WARNING,
                      WARN_DATA_TRUNCATED,
                      "Truncated value from TEXT field \'%s\'", field_blob->field_name);
        }

        field_blob->set_ptr(len, buf);
        field_blob->set_notnull();
        field_blob->move_field_offset(-ptrdiff);
      }
      offset+= Uint32((len64 + 7) & ~((Uint64)7));
    }
    else if (ha->m_blob_destination_record)
    {
      /* Have to set length even in this case. */
      my_ptrdiff_t ptrdiff=
        ha->m_blob_destination_record - ha->table->record[0];
      uchar *buf= ha->m_blobs_buffer + offset;
      field_blob->move_field_offset(ptrdiff);
      field_blob->set_ptr((uint32)0, buf);
      field_blob->set_null();
      field_blob->move_field_offset(-ptrdiff);
      DBUG_PRINT("info", ("[%u] isNull=%d", i, isNull));
    }
  }

  if (!ha->m_active_cursor)
  {
    /* Non-scan, Blob reads have been issued
     * execute them and then close the Blob 
     * handles
     */
    for (uint i= 0; i < ha->table->s->fields; i++)
    {
      Field *field= ha->table->field[i];
      if (! (field->flags & BLOB_FLAG))
        continue;
      NdbValue value= ha->m_value[i];
      if (value.blob == NULL)
      {
        DBUG_PRINT("info",("[%u] skipped", i));
        continue;
      }
      NdbBlob *ndb_blob= value.blob;
    
      assert(ndb_blob->getState() == NdbBlob::Active);

      /* Call close() with execPendingBlobOps == true
       * For LM_CommittedRead access, this will enqueue
       * an unlock operation, which the Blob framework 
       * code invoking this callback will execute before
       * returning control to the caller of execute()
       */
      if (ndb_blob->close(true) != 0)
      {
        ERR_RETURN(ndb_blob->getNdbError());
      }
    }
  }

  DBUG_RETURN(0);
}

/*
  Request reading of blob values.

  If dst_record is specified, the blob null bit, pointer, and length will be
  set in that record. Otherwise they must be set later by calling
  unpack_record().
*/
int
ha_ndbcluster::get_blob_values(const NdbOperation *ndb_op, uchar *dst_record,
                               const MY_BITMAP *bitmap)
{
  uint i;
  DBUG_ENTER("ha_ndbcluster::get_blob_values");

  m_blob_counter= 0;
  m_blob_expected_count_per_row= 0;
  m_blob_destination_record= dst_record;
  m_blobs_row_total_size= 0;
  ndb_op->getNdbTransaction()->
    setMaxPendingBlobReadBytes(THDVAR(current_thd, blob_read_batch_bytes));

  for (i= 0; i < table_share->fields; i++) 
  {
    Field *field= table->field[i];
    if (!(field->flags & BLOB_FLAG))
      continue;

    DBUG_PRINT("info", ("fieldnr=%d", i));
    NdbBlob *ndb_blob;
    if (bitmap_is_set(bitmap, i))
    {
      if ((ndb_blob= ndb_op->getBlobHandle(i)) == NULL ||
          ndb_blob->setActiveHook(g_get_ndb_blobs_value, this) != 0)
        DBUG_RETURN(1);
      m_blob_expected_count_per_row++;
    }
    else
      ndb_blob= NULL;

    m_value[i].blob= ndb_blob;
  }

  DBUG_RETURN(0);
}

int
ha_ndbcluster::set_blob_values(const NdbOperation *ndb_op,
                               my_ptrdiff_t row_offset, const MY_BITMAP *bitmap,
                               uint *set_count, bool batch)
{
  uint field_no;
  uint *blob_index, *blob_index_end;
  int res= 0;
  DBUG_ENTER("ha_ndbcluster::set_blob_values");

  *set_count= 0;

  if (table_share->blob_fields == 0)
    DBUG_RETURN(0);

  ndb_op->getNdbTransaction()->
    setMaxPendingBlobWriteBytes(THDVAR(current_thd, blob_write_batch_bytes));
  blob_index= table_share->blob_field;
  blob_index_end= blob_index + table_share->blob_fields;
  do
  {
    field_no= *blob_index;
    /* A NULL bitmap sets all blobs. */
    if (bitmap && !bitmap_is_set(bitmap, field_no))
      continue;
    Field *field= table->field[field_no];

    NdbBlob *ndb_blob= ndb_op->getBlobHandle(field_no);
    if (ndb_blob == NULL)
      ERR_RETURN(ndb_op->getNdbError());
    if (field->is_real_null(row_offset))
    {
      DBUG_PRINT("info", ("Setting Blob %d to NULL", field_no));
      if (ndb_blob->setNull() != 0)
        ERR_RETURN(ndb_op->getNdbError());
    }
    else
    {
      Field_blob *field_blob= (Field_blob *)field;

      // Get length and pointer to data
      const uchar *field_ptr= field->ptr + row_offset;
      uint32 blob_len= field_blob->get_length(field_ptr);
      uchar* blob_ptr= NULL;
      field_blob->get_ptr(&blob_ptr);

      // Looks like NULL ptr signals length 0 blob
      if (blob_ptr == NULL) {
        assert(blob_len == 0);
        blob_ptr= (uchar*)"";
      }

      DBUG_PRINT("value", ("set blob ptr: 0x%lx  len: %u",
                           (long) blob_ptr, blob_len));
      DBUG_DUMP("value", blob_ptr, MIN(blob_len, 26));

      /*
        NdbBlob requires the data pointer to remain valid until execute() time.
        So when batching, we need to copy the value to a temporary buffer.
      */
      if (batch && blob_len > 0)
      {
        uchar *tmp_buf= get_buffer(m_thd_ndb, blob_len);
        if (!tmp_buf)
          DBUG_RETURN(HA_ERR_OUT_OF_MEM);
        memcpy(tmp_buf, blob_ptr, blob_len);
        blob_ptr= tmp_buf;
      }
      res= ndb_blob->setValue((char*)blob_ptr, blob_len);
      if (res != 0)
        ERR_RETURN(ndb_op->getNdbError());
    }

    ++(*set_count);
  } while (++blob_index != blob_index_end);

  DBUG_RETURN(res);
}


/**
  Check if any set or get of blob value in current query.
*/

bool ha_ndbcluster::uses_blob_value(const MY_BITMAP *bitmap) const
{
  uint *blob_index, *blob_index_end;
  if (table_share->blob_fields == 0)
    return FALSE;

  blob_index=     table_share->blob_field;
  blob_index_end= blob_index + table_share->blob_fields;
  do
  {
    if (bitmap_is_set(bitmap, table->field[*blob_index]->field_index))
      return TRUE;
  } while (++blob_index != blob_index_end);
  return FALSE;
}

void ha_ndbcluster::release_blobs_buffer()
{
  DBUG_ENTER("releaseBlobsBuffer");
  if (m_blobs_buffer_size > 0)
  {
    DBUG_PRINT("info", ("Deleting blobs buffer, size %llu", m_blobs_buffer_size));
    my_free(m_blobs_buffer, MYF(MY_ALLOW_ZERO_PTR));
    m_blobs_buffer= 0;
    m_blobs_row_total_size= 0;
    m_blobs_buffer_size= 0;
  }
  DBUG_VOID_RETURN;
}


/*
  Does type support a default value?
*/
static bool
type_supports_default_value(enum_field_types mysql_type)
{
  bool ret = (mysql_type != MYSQL_TYPE_BLOB &&
              mysql_type != MYSQL_TYPE_TINY_BLOB &&
              mysql_type != MYSQL_TYPE_MEDIUM_BLOB &&
              mysql_type != MYSQL_TYPE_LONG_BLOB &&
              mysql_type != MYSQL_TYPE_JSON &&
              mysql_type != MYSQL_TYPE_GEOMETRY);

  return ret;
}

/**
   Check that Ndb data dictionary has the same default values
   as MySQLD for the current table.
   Called as part of a DBUG check as part of table open
   
   Returns
     0  - Defaults are ok
     -1 - Some default(s) are bad
*/
int ha_ndbcluster::check_default_values(const NDBTAB* ndbtab)
{
  /* Debug only method for checking table defaults aligned
     between MySQLD and Ndb
  */
  bool defaults_aligned= true;

  if (ndbtab->hasDefaultValues())
  {
    /* Ndb supports native defaults for non-pk columns */
    my_bitmap_map *old_map= tmp_use_all_columns(table, table->read_set);

    for (uint f=0; f < table_share->fields; f++)
    {
      Field* field= table->field[f]; // Use Field struct from MySQLD table rep
      const NdbDictionary::Column* ndbCol= ndbtab->getColumn(field->field_index); 

      if ((! (field->flags & (PRI_KEY_FLAG |
                              NO_DEFAULT_VALUE_FLAG))) &&
          type_supports_default_value(field->real_type()))
      {
        /* We expect Ndb to have a native default for this
         * column
         */
        my_ptrdiff_t src_offset= table_share->default_values - 
          field->table->record[0];

        /* Move field by offset to refer to default value */
        field->move_field_offset(src_offset);
        
        const uchar* ndb_default= (const uchar*) ndbCol->getDefaultValue();

        if (ndb_default == NULL)
          /* MySQLD default must also be NULL */
          defaults_aligned= field->is_null();
        else
        {
          if (field->type() != MYSQL_TYPE_BIT)
          {
            defaults_aligned= (0 == field->cmp(ndb_default));
          }
          else
          {
            longlong value= (static_cast<Field_bit*>(field))->val_int();
            /* Map to NdbApi format - two Uint32s */
            Uint32 out[2];
            out[0] = 0;
            out[1] = 0;
            for (int b=0; b < 64; b++)
            {
              out[b >> 5] |= (value & 1) << (b & 31);
              
              value= value >> 1;
            }
            Uint32 defaultLen = field_used_length(field);
            defaultLen = ((defaultLen + 3) & ~(Uint32)0x7);
            defaults_aligned= (0 == memcmp(ndb_default, 
                                           out, 
                                           defaultLen));
          }
        }
        
        field->move_field_offset(-src_offset);

        if (unlikely(!defaults_aligned))
        {
          sql_print_error("NDB Internal error: Default values differ "
                          "for column %u, ndb_default: %d",
                          field->field_index, ndb_default != NULL);
        }
      }
      else
      {
        /* We don't expect Ndb to have a native default for this column */
        if (unlikely(ndbCol->getDefaultValue() != NULL))
        {
          /* Didn't expect that */
          sql_print_error("NDB Internal error: Column %u has native "
                          "default, but shouldn't. Flags=%u, type=%u",
                          field->field_index, field->flags,
                          field->real_type());
          defaults_aligned= false;
        }
      }
      if (unlikely(!defaults_aligned))
      {
        // Dump field
        sql_print_error("field[ name: '%s', type: %u, real_type: %u, "
                        "flags: 0x%x, is_null: %d]",
                        field->field_name, field->type(), field->real_type(),
                        field->flags, field->is_null());
        // Dump ndbCol
        sql_print_error("ndbCol[name: '%s', type: %u, column_no: %d, "
                        "nullable: %d]",
                        ndbCol->getName(), ndbCol->getType(),
                        ndbCol->getColumnNo(), ndbCol->getNullable());
        break;
      }
    } 
    tmp_restore_column_map(table->read_set, old_map);
  }

  return (defaults_aligned? 0: -1);
}

int ha_ndbcluster::get_metadata(THD *thd, const char *path)
{
  Ndb *ndb= get_thd_ndb(thd)->ndb;
  NDBDICT *dict= ndb->getDictionary();
  const NDBTAB *tab;
  int error;
  DBUG_ENTER("get_metadata");
  DBUG_PRINT("enter", ("m_tabname: %s, path: %s", m_tabname, path));

  assert(m_table == NULL);
  assert(m_table_info == NULL);

  uchar *data= NULL, *pack_data= NULL;
  size_t length, pack_length;

  /*
    Compare FrmData in NDB with frm file from disk.
  */
  error= 0;
  if (readfrm(path, &data, &length) ||
      packfrm(data, length, &pack_data, &pack_length))
  {
    my_free(data, MYF(MY_ALLOW_ZERO_PTR));
    my_free(pack_data, MYF(MY_ALLOW_ZERO_PTR));
    DBUG_RETURN(1);
  }

  ndb->setDatabaseName(m_dbname);
  Ndb_table_guard ndbtab_g(dict, m_tabname);
  if (!(tab= ndbtab_g.get_table()))
    ERR_RETURN(dict->getNdbError());

  if (get_ndb_share_state(m_share) != NSS_ALTERED 
      && cmp_frm(tab, pack_data, pack_length))
  {
    DBUG_PRINT("error", 
               ("metadata, pack_length: %lu  getFrmLength: %d  memcmp: %d",
                (ulong) pack_length, tab->getFrmLength(),
                memcmp(pack_data, tab->getFrmData(), pack_length)));
    DBUG_DUMP("pack_data", (uchar*) pack_data, pack_length);
    DBUG_DUMP("frm", (uchar*) tab->getFrmData(), tab->getFrmLength());
    error= HA_ERR_TABLE_DEF_CHANGED;
  }
  my_free((char*)data, MYF(0));
  my_free((char*)pack_data, MYF(0));

  /* Now check that any Ndb native defaults are aligned 
     with MySQLD defaults
  */
  assert(check_default_values(tab) == 0);

  if (error)
    goto err;

  DBUG_PRINT("info", ("fetched table %s", tab->getName()));
  m_table= tab;

  if (bitmap_init(&m_bitmap, m_bitmap_buf, table_share->fields, 0))
  {
    error= HA_ERR_OUT_OF_MEM;
    goto err;
  }
  if (table_share->primary_key == MAX_KEY)
  {
    /* Hidden primary key. */
    if ((error= add_hidden_pk_ndb_record(dict)) != 0)
      goto err;
  }

  if ((error= add_table_ndb_record(dict)) != 0)
    goto err;

  /*
    Approx. write size in bytes over transporter
  */
  m_bytes_per_write= 12 + tab->getRowSizeInBytes() + 4 * tab->getNoOfColumns();

  /* Open indexes */
  if ((error= open_indexes(thd, ndb, table, FALSE)) != 0)
    goto err;

  /* Read foreign keys where this table is child or parent */
  if ((error= get_fk_data(thd, ndb)) != 0)
    goto err;

  /*
    Backward compatibility for tables created without tablespace
    in .frm => read tablespace setting from engine
  */
  if (table_share->mysql_version < 50120 &&
      !table_share->tablespace /* safety */)
  {
    Uint32 id;
    if (tab->getTablespace(&id))
    {
      NdbDictionary::Tablespace ts= dict->getTablespace(id);
      NdbError ndberr= dict->getNdbError();
      if (ndberr.classification == NdbError::NoError)
      {
        const char *tablespace= ts.getName();
        const size_t tablespace_len= strlen(tablespace);
        if (tablespace_len != 0)
        {
          DBUG_PRINT("info", ("Found tablespace '%s'", tablespace));
          table_share->tablespace= strmake_root(&table_share->mem_root,
                                                tablespace,
                                                tablespace_len);
        }
      }
    }
  }

  ndbtab_g.release();

  DBUG_RETURN(0);

err:
  ndbtab_g.invalidate();
  m_table= NULL;
  DBUG_RETURN(error);
}

static int fix_unique_index_attr_order(NDB_INDEX_DATA &data,
                                       const NDBINDEX *index,
                                       KEY *key_info)
{
  DBUG_ENTER("fix_unique_index_attr_order");
  unsigned sz= index->getNoOfIndexColumns();

  if (data.unique_index_attrid_map)
    my_free((char*)data.unique_index_attrid_map, MYF(0));
  data.unique_index_attrid_map= (uchar*)my_malloc(PSI_INSTRUMENT_ME, sz,MYF(MY_WME));
  if (data.unique_index_attrid_map == 0)
  {
    sql_print_error("fix_unique_index_attr_order: my_malloc(%u) failure",
                    (unsigned int)sz);
    DBUG_RETURN(HA_ERR_OUT_OF_MEM);
  }

  KEY_PART_INFO* key_part= key_info->key_part;
  KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
  assert(key_info->user_defined_key_parts == sz);
  for (unsigned i= 0; key_part != end; key_part++, i++) 
  {
    const char *field_name= key_part->field->field_name;
#ifndef NDEBUG
   data.unique_index_attrid_map[i]= 255;
#endif
    for (unsigned j= 0; j < sz; j++)
    {
      const NDBCOL *c= index->getColumn(j);
      if (strcmp(field_name, c->getName()) == 0)
      {
        data.unique_index_attrid_map[i]= j;
        break;
      }
    }
    assert(data.unique_index_attrid_map[i] != 255);
  }
  DBUG_RETURN(0);
}

/*
  Create all the indexes for a table.
  If any index should fail to be created,
  the error is returned immediately
*/
int ha_ndbcluster::create_indexes(THD *thd, Ndb *ndb, TABLE *tab) const
{
  uint i;
  int error= 0;
  const char *index_name;
  KEY* key_info= tab->key_info;
  const char **key_name= tab->s->keynames.type_names;
  DBUG_ENTER("ha_ndbcluster::create_indexes");

  for (i= 0; i < tab->s->keys; i++, key_info++, key_name++)
  {
    index_name= *key_name;
    NDB_INDEX_TYPE idx_type= get_index_type_from_table(i);
    error= create_index(thd, index_name, key_info, idx_type, i);
    if (error)
    {
      DBUG_PRINT("error", ("Failed to create index %u", i));
      break;
    }
  }

  DBUG_RETURN(error);
}

static void ndb_init_index(NDB_INDEX_DATA &data)
{
  data.type= UNDEFINED_INDEX;
  data.status= UNDEFINED;
  data.unique_index= NULL;
  data.index= NULL;
  data.unique_index_attrid_map= NULL;
  data.ndb_record_key= NULL;
  data.ndb_unique_record_key= NULL;
  data.ndb_unique_record_row= NULL;
}

static void ndb_clear_index(NDBDICT *dict, NDB_INDEX_DATA &data)
{
  if (data.unique_index_attrid_map)
  {
    my_free((char*)data.unique_index_attrid_map, MYF(0));
  }
  if (data.ndb_unique_record_key)
    dict->releaseRecord(data.ndb_unique_record_key);
  if (data.ndb_unique_record_row)
    dict->releaseRecord(data.ndb_unique_record_row);
  if (data.ndb_record_key)
    dict->releaseRecord(data.ndb_record_key);
  ndb_init_index(data);
}

static
void ndb_protect_char(const char* from, char* to, uint to_length, char protect)
{
  uint fpos= 0, tpos= 0;

  while(from[fpos] != '\0' && tpos < to_length - 1)
  {
    if (from[fpos] == protect)
    {
      int len= 0;
      to[tpos++]= '@';
      if(tpos < to_length - 5)
      {
        len= sprintf(to+tpos, "00%u", (uint) protect);
        tpos+= len;
      }
    }
    else
    {
      to[tpos++]= from[fpos];
    }
    fpos++;
  }
  to[tpos]= '\0';
}

/*
  Associate a direct reference to an index handle
  with an index (for faster access)
 */
int ha_ndbcluster::add_index_handle(THD *thd, NDBDICT *dict, KEY *key_info,
                                    const char *key_name, uint index_no)
{
  char index_name[FN_LEN + 1];
  int error= 0;

  NDB_INDEX_TYPE idx_type= get_index_type_from_table(index_no);
  m_index[index_no].type= idx_type;
  DBUG_ENTER("ha_ndbcluster::add_index_handle");
  DBUG_PRINT("enter", ("table %s", m_tabname));
  
  ndb_protect_char(key_name, index_name, sizeof(index_name) - 1, '/');
  if (idx_type != PRIMARY_KEY_INDEX && idx_type != UNIQUE_INDEX)
  {
    DBUG_PRINT("info", ("Get handle to index %s", index_name));
    const NDBINDEX *index;
    do
    {
      index= dict->getIndexGlobal(index_name, *m_table);
      if (!index)
        ERR_RETURN(dict->getNdbError());
      DBUG_PRINT("info", ("index: 0x%lx  id: %d  version: %d.%d  status: %d",
                          (long) index,
                          index->getObjectId(),
                          index->getObjectVersion() & 0xFFFFFF,
                          index->getObjectVersion() >> 24,
                          index->getObjectStatus()));
      assert(index->getObjectStatus() ==
             NdbDictionary::Object::Retrieved);
      break;
    } while (1);
    m_index[index_no].index= index;
  }
  if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX)
  {
    char unique_index_name[FN_LEN + 1];
    static const char* unique_suffix= "$unique";
    m_has_unique_index= TRUE;
    strxnmov(unique_index_name, FN_LEN, index_name, unique_suffix, NullS);
    DBUG_PRINT("info", ("Get handle to unique_index %s", unique_index_name));
    const NDBINDEX *index;
    do
    {
      index= dict->getIndexGlobal(unique_index_name, *m_table);
      if (!index)
        ERR_RETURN(dict->getNdbError());
      DBUG_PRINT("info", ("index: 0x%lx  id: %d  version: %d.%d  status: %d",
                          (long) index,
                          index->getObjectId(),
                          index->getObjectVersion() & 0xFFFFFF,
                          index->getObjectVersion() >> 24,
                          index->getObjectStatus()));
      assert(index->getObjectStatus() ==
             NdbDictionary::Object::Retrieved);
      break;
    } while (1);
    m_index[index_no].unique_index= index;
    error= fix_unique_index_attr_order(m_index[index_no], index, key_info);
  }

  if (!error)
    error= add_index_ndb_record(dict, key_info, index_no);

  if (!error)
    m_index[index_no].status= ACTIVE;
  
  DBUG_RETURN(error);
}

/*
  We use this function to convert null bit masks, as found in class Field,
  to bit numbers, as used in NdbRecord.
*/
static uint
null_bit_mask_to_bit_number(uchar bit_mask)
{
  switch (bit_mask)
  {
    case  0x1: return 0;
    case  0x2: return 1;
    case  0x4: return 2;
    case  0x8: return 3;
    case 0x10: return 4;
    case 0x20: return 5;
    case 0x40: return 6;
    case 0x80: return 7;
    default:
      assert(false);
      return 0;
  }
}

static void
ndb_set_record_specification(uint field_no,
                             NdbDictionary::RecordSpecification *spec,
                             const TABLE *table,
                             const NdbDictionary::Table *ndb_table)
{
  spec->column= ndb_table->getColumn(field_no);
  spec->offset= Uint32(table->field[field_no]->ptr - table->record[0]);
  if (table->field[field_no]->real_maybe_null())
  {
    spec->nullbit_byte_offset=
      Uint32(table->field[field_no]->null_offset());
    spec->nullbit_bit_in_byte=
      null_bit_mask_to_bit_number(table->field[field_no]->null_bit);
  }
  else if (table->field[field_no]->type() == MYSQL_TYPE_BIT)
  {
    /* We need to store the position of the overflow bits. */
    const Field_bit* field_bit= static_cast<Field_bit*>(table->field[field_no]);
    spec->nullbit_byte_offset=
      Uint32(field_bit->bit_ptr - table->record[0]);
    spec->nullbit_bit_in_byte= field_bit->bit_ofs;
  }
  else
  {
    spec->nullbit_byte_offset= 0;
    spec->nullbit_bit_in_byte= 0;
  }
  spec->column_flags= 0;
  if (table->field[field_no]->type() == MYSQL_TYPE_STRING &&
      table->field[field_no]->pack_length() == 0)
  {
    /*
      This is CHAR(0), which we represent as
      a nullable BIT(1) column where we ignore the data bit
    */
    spec->column_flags |=
        NdbDictionary::RecordSpecification::BitColMapsNullBitOnly;
  }
}

int
ha_ndbcluster::add_table_ndb_record(NDBDICT *dict)
{
  DBUG_ENTER("ha_ndbcluster::add_table_ndb_record()");
  NdbDictionary::RecordSpecification spec[NDB_MAX_ATTRIBUTES_IN_TABLE + 2];
  NdbRecord *rec;
  uint i;

  for (i= 0; i < table_share->fields; i++)
  {
    ndb_set_record_specification(i, &spec[i], table, m_table);
  }

  rec= dict->createRecord(m_table, spec, i, sizeof(spec[0]),
                          NdbDictionary::RecMysqldBitfield |
                          NdbDictionary::RecPerColumnFlags);
  if (! rec)
    ERR_RETURN(dict->getNdbError());
  m_ndb_record= rec;

  DBUG_RETURN(0);
}

/* Create NdbRecord for setting hidden primary key from Uint64. */
int
ha_ndbcluster::add_hidden_pk_ndb_record(NDBDICT *dict)
{
  DBUG_ENTER("ha_ndbcluster::add_hidden_pk_ndb_record");
  NdbDictionary::RecordSpecification spec[1];
  NdbRecord *rec;

  spec[0].column= m_table->getColumn(table_share->fields);
  spec[0].offset= 0;
  spec[0].nullbit_byte_offset= 0;
  spec[0].nullbit_bit_in_byte= 0;

  rec= dict->createRecord(m_table, spec, 1, sizeof(spec[0]));
  if (! rec)
    ERR_RETURN(dict->getNdbError());
  m_ndb_hidden_key_record= rec;

  DBUG_RETURN(0);
}

int
ha_ndbcluster::add_index_ndb_record(NDBDICT *dict, KEY *key_info, uint index_no)
{
  DBUG_ENTER("ha_ndbcluster::add_index_ndb_record");
  NdbDictionary::RecordSpecification spec[NDB_MAX_ATTRIBUTES_IN_TABLE + 2];
  NdbRecord *rec;

  Uint32 offset= 0;
  for (uint i= 0; i < key_info->user_defined_key_parts; i++)
  {
    KEY_PART_INFO *kp= &key_info->key_part[i];

    spec[i].column= m_table->getColumn(kp->fieldnr - 1);
    if (! spec[i].column)
      ERR_RETURN(dict->getNdbError());
    if (kp->null_bit)
    {
      /* Nullable column. */
      spec[i].offset= offset + 1;           // First byte is NULL flag
      spec[i].nullbit_byte_offset= offset;
      spec[i].nullbit_bit_in_byte= 0;
    }
    else
    {
      /* Not nullable column. */
      spec[i].offset= offset;
      spec[i].nullbit_byte_offset= 0;
      spec[i].nullbit_bit_in_byte= 0;
    }
    offset+= kp->store_length;
  }

  if (m_index[index_no].index)
  {
    /*
      Enable MysqldShrinkVarchar flag so that the two-byte length used by
      mysqld for short varchar keys is correctly converted into a one-byte
      length used by Ndb kernel.
    */
    rec= dict->createRecord(m_index[index_no].index, m_table,
                            spec, key_info->user_defined_key_parts, sizeof(spec[0]),
                            ( NdbDictionary::RecMysqldShrinkVarchar |
                              NdbDictionary::RecMysqldBitfield ));
    if (! rec)
      ERR_RETURN(dict->getNdbError());
    m_index[index_no].ndb_record_key= rec;
  }
  else
    m_index[index_no].ndb_record_key= NULL;

  if (m_index[index_no].unique_index)
  {
    rec= dict->createRecord(m_index[index_no].unique_index, m_table,
                            spec, key_info->user_defined_key_parts, sizeof(spec[0]),
                            ( NdbDictionary::RecMysqldShrinkVarchar |
                              NdbDictionary::RecMysqldBitfield ));
    if (! rec)
      ERR_RETURN(dict->getNdbError());
    m_index[index_no].ndb_unique_record_key= rec;
  }
  else if (index_no == table_share->primary_key)
  {
    /* The primary key is special, there is no explicit NDB index associated. */
    rec= dict->createRecord(m_table,
                            spec, key_info->user_defined_key_parts, sizeof(spec[0]),
                            ( NdbDictionary::RecMysqldShrinkVarchar |
                              NdbDictionary::RecMysqldBitfield ));
    if (! rec)
      ERR_RETURN(dict->getNdbError());
    m_index[index_no].ndb_unique_record_key= rec;
  }
  else
    m_index[index_no].ndb_unique_record_key= NULL;

  /* Now do the same, but this time with offsets from Field, for row access. */
  for (uint i= 0; i < key_info->user_defined_key_parts; i++)
  {
    const KEY_PART_INFO *kp= &key_info->key_part[i];

    spec[i].offset= kp->offset;
    if (kp->null_bit)
    {
      /* Nullable column. */
      spec[i].nullbit_byte_offset= kp->null_offset;
      spec[i].nullbit_bit_in_byte= null_bit_mask_to_bit_number(kp->null_bit);
    }
    else
    {
      /* Not nullable column. */
      spec[i].nullbit_byte_offset= 0;
      spec[i].nullbit_bit_in_byte= 0;
    }
  }

  if (m_index[index_no].unique_index)
  {
    rec= dict->createRecord(m_index[index_no].unique_index, m_table,
                            spec, key_info->user_defined_key_parts, sizeof(spec[0]),
                            NdbDictionary::RecMysqldBitfield);
    if (! rec)
      ERR_RETURN(dict->getNdbError());
    m_index[index_no].ndb_unique_record_row= rec;
  }
  else if (index_no == table_share->primary_key)
  {
    rec= dict->createRecord(m_table,
                            spec, key_info->user_defined_key_parts, sizeof(spec[0]),
                            NdbDictionary::RecMysqldBitfield);
    if (! rec)
      ERR_RETURN(dict->getNdbError());
    m_index[index_no].ndb_unique_record_row= rec;
  }
  else
    m_index[index_no].ndb_unique_record_row= NULL;

  DBUG_RETURN(0);
}

/*
  Associate index handles for each index of a table
*/
int ha_ndbcluster::open_indexes(THD *thd, Ndb *ndb, TABLE *tab,
                                bool ignore_error)
{
  uint i;
  int error= 0;
  NDBDICT *dict= ndb->getDictionary();
  KEY* key_info= tab->key_info;
  const char **key_name= tab->s->keynames.type_names;
  DBUG_ENTER("ha_ndbcluster::open_indexes");
  m_has_unique_index= FALSE;
  btree_keys.clear_all();
  for (i= 0; i < tab->s->keys; i++, key_info++, key_name++)
  {
    if ((error= add_index_handle(thd, dict, key_info, *key_name, i)))
    {
      if (ignore_error)
        m_index[i].index= m_index[i].unique_index= NULL;
      else
        break;
    }
    m_index[i].null_in_unique_index= FALSE;
    if (check_index_fields_not_null(key_info))
      m_index[i].null_in_unique_index= TRUE;

    if (error == 0 && MY_TEST(index_flags(i, 0, 0) & HA_READ_RANGE))
      btree_keys.set_bit(i);
  }

  if (error && !ignore_error)
  {
    while (i > 0)
    {
      i--;
      if (m_index[i].index)
      {
         dict->removeIndexGlobal(*m_index[i].index, 1);
         m_index[i].index= NULL;
      }
      if (m_index[i].unique_index)
      {
         dict->removeIndexGlobal(*m_index[i].unique_index, 1);
         m_index[i].unique_index= NULL;
      }
    }
  }

  assert(error == 0 || error == 4243);

  DBUG_RETURN(error);
}

/*
  Renumber indexes in index list by shifting out
  indexes that are to be dropped
 */
void ha_ndbcluster::renumber_indexes(Ndb *ndb, TABLE *tab)
{
  uint i;
  const char *index_name;
  KEY* key_info= tab->key_info;
  const char **key_name= tab->s->keynames.type_names;
  DBUG_ENTER("ha_ndbcluster::renumber_indexes");
  
  for (i= 0; i < tab->s->keys; i++, key_info++, key_name++)
  {
    index_name= *key_name;
    NDB_INDEX_TYPE idx_type= get_index_type_from_table(i);
    m_index[i].type= idx_type;
    if (m_index[i].status == TO_BE_DROPPED) 
    {
      DBUG_PRINT("info", ("Shifting index %s(%i) out of the list", 
                          index_name, i));
      NDB_INDEX_DATA tmp;
      uint j= i + 1;
      // Shift index out of list
      while(j != MAX_KEY && m_index[j].status != UNDEFINED)
      {
        tmp=  m_index[j - 1];
        m_index[j - 1]= m_index[j];
        m_index[j]= tmp;
        j++;
      }
    }
  }

  DBUG_VOID_RETURN;
}

/*
  Drop all indexes that are marked for deletion
*/
int ha_ndbcluster::drop_indexes(Ndb *ndb, TABLE *tab)
{
  uint i;
  int error= 0;
  const char *index_name;
  KEY* key_info= tab->key_info;
  NDBDICT *dict= ndb->getDictionary();
  DBUG_ENTER("ha_ndbcluster::drop_indexes");
  
  for (i= 0; i < tab->s->keys; i++, key_info++)
  {
    NDB_INDEX_TYPE idx_type= get_index_type_from_table(i);
    m_index[i].type= idx_type;
    if (m_index[i].status == TO_BE_DROPPED)
    {
      const NdbDictionary::Index *index= m_index[i].index;
      const NdbDictionary::Index *unique_index= m_index[i].unique_index;
      
      if (index)
      {
        index_name= index->getName();
        DBUG_PRINT("info", ("Dropping index %u: %s", i, index_name));  
        // Drop ordered index from ndb
        if (dict->dropIndexGlobal(*index) == 0)
        {
          dict->removeIndexGlobal(*index, 1);
          m_index[i].index= NULL;
        }
        else
        {
          error= ndb_to_mysql_error(&dict->getNdbError());
          m_dupkey= i; // for HA_ERR_DROP_INDEX_FK
        }
      }
      if (!error && unique_index)
      {
        index_name= unique_index->getName();
        DBUG_PRINT("info", ("Dropping unique index %u: %s", i, index_name));
        // Drop unique index from ndb
        if (dict->dropIndexGlobal(*unique_index) == 0)
        {
          dict->removeIndexGlobal(*unique_index, 1);
          m_index[i].unique_index= NULL;
        }
        else
        {
          error=ndb_to_mysql_error(&dict->getNdbError());
          m_dupkey= i; // for HA_ERR_DROP_INDEX_FK
        }
      }
      if (error)
        DBUG_RETURN(error);
      ndb_clear_index(dict, m_index[i]);
      continue;
    }
  }
  
  DBUG_RETURN(error);
}

/**
  Decode the type of an index from information 
  provided in table object.
*/
NDB_INDEX_TYPE ha_ndbcluster::get_index_type_from_table(uint inx) const
{
  return get_index_type_from_key(inx, table_share->key_info,
                                 inx == table_share->primary_key);
}

NDB_INDEX_TYPE ha_ndbcluster::get_index_type_from_key(uint inx,
                                                      KEY *key_info,
                                                      bool primary) const
{
  bool is_hash_index=  (key_info[inx].algorithm == 
                        HA_KEY_ALG_HASH);
  if (primary)
    return is_hash_index ? PRIMARY_KEY_INDEX : PRIMARY_KEY_ORDERED_INDEX;
  
  return ((key_info[inx].flags & HA_NOSAME) ? 
          (is_hash_index ? UNIQUE_INDEX : UNIQUE_ORDERED_INDEX) :
          ORDERED_INDEX);
} 

bool ha_ndbcluster::check_index_fields_not_null(KEY* key_info) const
{
  KEY_PART_INFO* key_part= key_info->key_part;
  KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
  DBUG_ENTER("ha_ndbcluster::check_index_fields_not_null");
  
  for (; key_part != end; key_part++) 
    {
      Field* field= key_part->field;
      if (field->maybe_null())
	DBUG_RETURN(TRUE);
    }
  
  DBUG_RETURN(FALSE);
}

void ha_ndbcluster::release_metadata(THD *thd, Ndb *ndb)
{
  uint i;

  DBUG_ENTER("release_metadata");
  DBUG_PRINT("enter", ("m_tabname: %s", m_tabname));

  NDBDICT *dict= ndb->getDictionary();
  int invalidate_indexes= 0;
  if (thd && thd->lex && thd->lex->sql_command == SQLCOM_FLUSH)
  {
    invalidate_indexes = 1;
  }
  if (m_table != NULL)
  {
    if (m_ndb_record != NULL)
    {
      dict->releaseRecord(m_ndb_record);
      m_ndb_record= NULL;
    }
    if (m_ndb_hidden_key_record != NULL)
    {
      dict->releaseRecord(m_ndb_hidden_key_record);
      m_ndb_hidden_key_record= NULL;
    }
    if (m_table->getObjectStatus() == NdbDictionary::Object::Invalid)
      invalidate_indexes= 1;
    dict->removeTableGlobal(*m_table, invalidate_indexes);
  }
  // TODO investigate
  assert(m_table_info == NULL);
  m_table_info= NULL;

  // Release index list 
  for (i= 0; i < MAX_KEY; i++)
  {
    if (m_index[i].unique_index)
    {
      assert(m_table != NULL);
      dict->removeIndexGlobal(*m_index[i].unique_index, invalidate_indexes);
    }
    if (m_index[i].index)
    {
      assert(m_table != NULL);
      dict->removeIndexGlobal(*m_index[i].index, invalidate_indexes);
    }
    ndb_clear_index(dict, m_index[i]);
  }

  // Release FK data
  release_fk_data(thd);

  m_table= NULL;
  DBUG_VOID_RETURN;
}


/*
  Map from thr_lock_type to NdbOperation::LockMode
*/
static inline
NdbOperation::LockMode get_ndb_lock_mode(enum thr_lock_type type)
{
  if (type >= TL_WRITE_ALLOW_WRITE)
    return NdbOperation::LM_Exclusive;
  if (type ==  TL_READ_WITH_SHARED_LOCKS)
    return NdbOperation::LM_Read;
  return NdbOperation::LM_CommittedRead;
}


static const ulong index_type_flags[]=
{
  /* UNDEFINED_INDEX */
  0,                         

  /* PRIMARY_KEY_INDEX */
  HA_ONLY_WHOLE_INDEX, 

  /* PRIMARY_KEY_ORDERED_INDEX */
  /* 
     Enable HA_KEYREAD_ONLY when "sorted" indexes are supported, 
     thus ORDER BY clauses can be optimized by reading directly 
     through the index.
  */
  // HA_KEYREAD_ONLY | 
  HA_READ_NEXT |
  HA_READ_PREV |
  HA_READ_RANGE |
  HA_READ_ORDER,

  /* UNIQUE_INDEX */
  HA_ONLY_WHOLE_INDEX,

  /* UNIQUE_ORDERED_INDEX */
  HA_READ_NEXT |
  HA_READ_PREV |
  HA_READ_RANGE |
  HA_READ_ORDER,

  /* ORDERED_INDEX */
  HA_READ_NEXT |
  HA_READ_PREV |
  HA_READ_RANGE |
  HA_READ_ORDER
};

static const int index_flags_size= sizeof(index_type_flags)/sizeof(ulong);

inline NDB_INDEX_TYPE ha_ndbcluster::get_index_type(uint idx_no) const
{
  assert(idx_no < MAX_KEY);
  return m_index[idx_no].type;
}

inline bool ha_ndbcluster::has_null_in_unique_index(uint idx_no) const
{
  assert(idx_no < MAX_KEY);
  return m_index[idx_no].null_in_unique_index;
}


/**
  Get the flags for an index.

  @return
    flags depending on the type of the index.
*/

inline ulong ha_ndbcluster::index_flags(uint idx_no, uint part,
                                        bool all_parts) const 
{ 
  DBUG_ENTER("ha_ndbcluster::index_flags");
  DBUG_PRINT("enter", ("idx_no: %u", idx_no));
  assert(get_index_type_from_table(idx_no) < index_flags_size);
  DBUG_RETURN(index_type_flags[get_index_type_from_table(idx_no)] | 
              HA_KEY_SCAN_NOT_ROR);
}

bool
ha_ndbcluster::primary_key_is_clustered() const
{

  if (table->s->primary_key == MAX_KEY)
    return false;

  /*
    NOTE 1: our ordered indexes are not really clustered
    but since accesing data when scanning index is free
    it's a good approximation

    NOTE 2: We really should consider DD attributes here too
    (for which there is IO to read data when scanning index)
    but that will need to be handled later...
  */
  const ndb_index_type idx_type =
    get_index_type_from_table(table->s->primary_key);
  return (idx_type == PRIMARY_KEY_ORDERED_INDEX ||
          idx_type == UNIQUE_ORDERED_INDEX ||
          idx_type == ORDERED_INDEX);
}

bool ha_ndbcluster::check_index_fields_in_write_set(uint keyno)
{
  KEY* key_info= table->key_info + keyno;
  KEY_PART_INFO* key_part= key_info->key_part;
  KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
  uint i;
  DBUG_ENTER("check_index_fields_in_write_set");

  for (i= 0; key_part != end; key_part++, i++)
  {
    Field* field= key_part->field;
    if (!bitmap_is_set(table->write_set, field->field_index))
    {
      DBUG_RETURN(false);
    }
  }

  DBUG_RETURN(true);
}


/**
  Read one record from NDB using primary key.
*/

int ha_ndbcluster::pk_read(const uchar *key, uint key_len, uchar *buf,
                           uint32 *part_id)
{
  NdbConnection *trans= m_thd_ndb->trans;
  int res;
  DBUG_ENTER("pk_read");
  DBUG_PRINT("enter", ("key_len: %u read_set=%x",
                       key_len, table->read_set->bitmap[0]));
  DBUG_DUMP("key", key, key_len);
  assert(trans);

  NdbOperation::LockMode lm= get_ndb_lock_mode(m_lock.type);

  if (check_if_pushable(NdbQueryOperationDef::PrimaryKeyAccess,
                        table->s->primary_key))
  {
    // Is parent of pushed join
    assert(lm == NdbOperation::LM_CommittedRead);
    const int error= pk_unique_index_read_key_pushed(table->s->primary_key, key,
                                                     (m_user_defined_partitioning ?
                                                     part_id : NULL));
    if (unlikely(error))
      DBUG_RETURN(error);

    assert(m_active_query!=NULL);
    if ((res = execute_no_commit_ie(m_thd_ndb, trans)) != 0 ||
        m_active_query->getNdbError().code) 
    {
      table->status= STATUS_NOT_FOUND;
      DBUG_RETURN(ndb_err(trans));
    }

    int result= fetch_next_pushed();
    if (result == NdbQuery::NextResult_gotRow)
    {
      DBUG_RETURN(0);
    }
    else if (result == NdbQuery::NextResult_scanComplete)
    {
      DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
    }
    else
    {
      DBUG_RETURN(ndb_err(trans));
    }
  }
  else
  {
    if (m_pushed_join_operation == PUSHED_ROOT)
    {
      m_thd_ndb->m_pushed_queries_dropped++;
    }

    const NdbOperation *op;
    if (!(op= pk_unique_index_read_key(table->s->primary_key, key, buf, lm,
                                       (m_user_defined_partitioning ?
                                        part_id :
                                        NULL))))
      ERR_RETURN(trans->getNdbError());

    if ((res = execute_no_commit_ie(m_thd_ndb, trans)) != 0 ||
        op->getNdbError().code) 
    {
      table->status= STATUS_NOT_FOUND;
      DBUG_RETURN(ndb_err(trans));
    }
    table->status= 0;     
    DBUG_RETURN(0);
  }
}

/**
  Update primary key or part id by doing delete insert.
*/

int ha_ndbcluster::ndb_pk_update_row(THD *thd,
                                     const uchar *old_data, uchar *new_data)
{
  NdbTransaction *trans= m_thd_ndb->trans;
  int error;
  DBUG_ENTER("ndb_pk_update_row");
  assert(trans);

  DBUG_PRINT("info", ("primary key update or partition change, "
                      "doing delete+insert"));

#ifndef NDEBUG
  /*
   * 'old_data' contain colums as specified in 'read_set'.
   * All PK columns must be included for ::ndb_delete_row()
   */
  assert(bitmap_is_subset(m_pk_bitmap_p, table->read_set));
  /*
   * As a complete 'new_data' row is reinserted after the delete,
   * all columns must be contained in the read+write union.
   */
  bitmap_copy(&m_bitmap, table->read_set);
  bitmap_union(&m_bitmap, table->write_set);
  assert(bitmap_is_set_all(&m_bitmap));
#endif

  // Delete old row
  error= ndb_delete_row(old_data, TRUE);
  if (error)
  {
    DBUG_PRINT("info", ("delete failed"));
    DBUG_RETURN(error);
  }

  // Insert new row
  DBUG_PRINT("info", ("delete succeded"));
  bool batched_update= (m_active_cursor != 0);
  /*
    If we are updating a primary key with auto_increment
    then we need to update the auto_increment counter
  */
  if (table->found_next_number_field &&
      bitmap_is_set(table->write_set, 
                    table->found_next_number_field->field_index) &&
      (error= set_auto_inc(thd, table->found_next_number_field)))
  {
    DBUG_RETURN(error);
  }

  /*
    We are mapping a MySQLD PK changing update to an NdbApi delete 
    and insert.
    The original PK changing update may not have written new values
    to all columns, so the write set may be partial.
    We set the write set to be all columns so that all values are
    copied from the old row to the new row.
  */
  my_bitmap_map *old_map=
    tmp_use_all_columns(table, table->write_set);
  error= ndb_write_row(new_data, TRUE, batched_update);
  tmp_restore_column_map(table->write_set, old_map);

  if (error)
  {
    DBUG_PRINT("info", ("insert failed"));
    if (trans->commitStatus() == NdbConnection::Started)
    {
      if (thd->slave_thread)
        g_ndb_slave_state.atTransactionAbort();
      m_thd_ndb->m_unsent_bytes= 0;
      m_thd_ndb->m_execute_count++;
      DBUG_PRINT("info", ("execute_count: %u", m_thd_ndb->m_execute_count));
      trans->execute(NdbTransaction::Rollback);
#ifdef FIXED_OLD_DATA_TO_ACTUALLY_CONTAIN_GOOD_DATA
      int undo_res;
      // Undo delete_row(old_data)
      undo_res= ndb_write_row((uchar *)old_data, TRUE, batched_update);
      if (undo_res)
        push_warning(table->in_use,
                     Sql_condition::SL_WARNING,
                     undo_res,
                     "NDB failed undoing delete at primary key update");
#endif
    }
    DBUG_RETURN(error);
  }
  DBUG_PRINT("info", ("delete+insert succeeded"));

  DBUG_RETURN(0);
}

/**
  Check that all operations between first and last all
  have gotten the errcode
  If checking for HA_ERR_KEY_NOT_FOUND then update m_dupkey
  for all succeeding operations
*/
bool ha_ndbcluster::check_all_operations_for_error(NdbTransaction *trans,
                                                   const NdbOperation *first,
                                                   const NdbOperation *last,
                                                   uint errcode)
{
  const NdbOperation *op= first;
  DBUG_ENTER("ha_ndbcluster::check_all_operations_for_error");

  while(op)
  {
    NdbError err= op->getNdbError();
    if (err.status != NdbError::Success)
    {
      if (ndb_to_mysql_error(&err) != (int) errcode)
        DBUG_RETURN(FALSE);
      if (op == last) break;
      op= trans->getNextCompletedOperation(op);
    }
    else
    {
      // We found a duplicate
      if (op->getType() == NdbOperation::UniqueIndexAccess)
      {
        if (errcode == HA_ERR_KEY_NOT_FOUND)
        {
          NdbIndexOperation *iop= (NdbIndexOperation *) op;
          const NDBINDEX *index= iop->getIndex();
          // Find the key_no of the index
          for(uint i= 0; i<table->s->keys; i++)
          {
            if (m_index[i].unique_index == index)
            {
              m_dupkey= i;
              break;
            }
          }
        }
      }
      else
      {
        // Must have been primary key access
        assert(op->getType() == NdbOperation::PrimaryKeyAccess);
        if (errcode == HA_ERR_KEY_NOT_FOUND)
          m_dupkey= table->s->primary_key;
      }
      DBUG_RETURN(FALSE);      
    }
  }
  DBUG_RETURN(TRUE);
}


/**
 * Check if record contains any null valued columns that are part of a key
 */
static
int
check_null_in_record(const KEY* key_info, const uchar *record)
{
  KEY_PART_INFO *curr_part, *end_part;
  curr_part= key_info->key_part;
  end_part= curr_part + key_info->user_defined_key_parts;

  while (curr_part != end_part)
  {
    if (curr_part->null_bit &&
        (record[curr_part->null_offset] & curr_part->null_bit))
      return 1;
    curr_part++;
  }
  return 0;
  /*
    We could instead pre-compute a bitmask in table_share with one bit for
    every null-bit in the key, and so check this just by OR'ing the bitmask
    with the null bitmap in the record.
    But not sure it's worth it.
  */
}

/* Empty mask and dummy row, for reading no attributes using NdbRecord. */
/* Mask will be initialized to all zeros by linker. */
static unsigned char empty_mask[(NDB_MAX_ATTRIBUTES_IN_TABLE+7)/8];
static char dummy_row[1];

/**
  Peek to check if any rows already exist with conflicting
  primary key or unique index values
*/

int ha_ndbcluster::peek_indexed_rows(const uchar *record, 
                                     NDB_WRITE_OP write_op)
{
  NdbTransaction *trans;
  const NdbOperation *op;
  const NdbOperation *first, *last;
  NdbOperation::OperationOptions options;
  NdbOperation::OperationOptions *poptions=NULL;
  options.optionsPresent = 0;
  uint i;
  int res, error;
  DBUG_ENTER("peek_indexed_rows");
  if (unlikely(!(trans= get_transaction(error))))
  {
    DBUG_RETURN(error);
  }
  const NdbOperation::LockMode lm = get_ndb_lock_mode(m_lock.type);
  first= NULL;
  if (write_op != NDB_UPDATE && table->s->primary_key != MAX_KEY)
  {
    /*
     * Fetch any row with colliding primary key
     */
    const NdbRecord *key_rec=
      m_index[table->s->primary_key].ndb_unique_record_row;

    if (m_user_defined_partitioning)
    {
      uint32 part_id;
      int error;
      longlong func_value;
      my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);
      error= m_part_info->get_partition_id(m_part_info, &part_id, &func_value);
      dbug_tmp_restore_column_map(table->read_set, old_map);
      if (error)
      {
        m_part_info->err_value= func_value;
        DBUG_RETURN(error);
      }
      options.optionsPresent |= NdbOperation::OperationOptions::OO_PARTITION_ID;
      options.partitionId=part_id;
      poptions=&options;
    }    

    if (!(op= trans->readTuple(key_rec, (const char *)record,
                               m_ndb_record, dummy_row, lm, empty_mask,
                               poptions, 
                               sizeof(NdbOperation::OperationOptions))))
      ERR_RETURN(trans->getNdbError());
    
    first= op;
  }
  /*
   * Fetch any rows with colliding unique indexes
   */
  KEY* key_info;
  for (i= 0, key_info= table->key_info; i < table->s->keys; i++, key_info++)
  {
    if (i != table_share->primary_key &&
        key_info->flags & HA_NOSAME &&
        bitmap_is_overlapping(table->write_set, m_key_fields[i]))
    {
      /*
        A unique index is defined on table and it's being updated
        We cannot look up a NULL field value in a unique index. But since
        keys with NULLs are not indexed, such rows cannot conflict anyway, so
        we just skip the index in this case.
      */
      if (check_null_in_record(key_info, record))
      {
        DBUG_PRINT("info", ("skipping check for key with NULL"));
        continue;
      }
      if (write_op != NDB_INSERT && !check_index_fields_in_write_set(i))
      {
        DBUG_PRINT("info", ("skipping check for key %u not in write_set", i));
        continue;
      }

      const NdbOperation *iop;
      const NdbRecord *key_rec= m_index[i].ndb_unique_record_row;
      if (!(iop= trans->readTuple(key_rec, (const char *)record,
                                  m_ndb_record, dummy_row,
                                  lm, empty_mask)))
        ERR_RETURN(trans->getNdbError());

      if (!first)
        first= iop;
    }
  }
  last= trans->getLastDefinedOperation();
  if (first)
    res= execute_no_commit_ie(m_thd_ndb, trans);
  else
  {
    // Table has no keys
    table->status= STATUS_NOT_FOUND;
    DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
  }
  const NdbError ndberr= trans->getNdbError();
  error= ndberr.mysql_code;
  if ((error != 0 && error != HA_ERR_KEY_NOT_FOUND) ||
      check_all_operations_for_error(trans, first, last, 
                                     HA_ERR_KEY_NOT_FOUND))
  {
    table->status= STATUS_NOT_FOUND;
    DBUG_RETURN(ndb_err(trans));
  } 
  else
  {
    DBUG_PRINT("info", ("m_dupkey %d", m_dupkey));
  }
  DBUG_RETURN(0);
}


/**
  Read one record from NDB using unique secondary index.
*/

int ha_ndbcluster::unique_index_read(const uchar *key,
                                     uint key_len, uchar *buf)
{
  NdbTransaction *trans= m_thd_ndb->trans;
  DBUG_ENTER("ha_ndbcluster::unique_index_read");
  DBUG_PRINT("enter", ("key_len: %u, index: %u", key_len, active_index));
  DBUG_DUMP("key", key, key_len);
  assert(trans);

  NdbOperation::LockMode lm= get_ndb_lock_mode(m_lock.type);

  if (check_if_pushable(NdbQueryOperationDef::UniqueIndexAccess,
                        active_index))
  {
    assert(lm == NdbOperation::LM_CommittedRead);
    const int error= pk_unique_index_read_key_pushed(active_index, key, NULL);
    if (unlikely(error))
      DBUG_RETURN(error);

    assert(m_active_query!=NULL);
    if (execute_no_commit_ie(m_thd_ndb, trans) != 0 ||
        m_active_query->getNdbError().code) 
    {
      table->status= STATUS_GARBAGE;
      DBUG_RETURN(ndb_err(trans));
    }

    int result= fetch_next_pushed();
    if (result == NdbQuery::NextResult_gotRow)
    {
      DBUG_RETURN(0);
    }
    else if (result == NdbQuery::NextResult_scanComplete)
    {
      DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
    }
    else
    {
      DBUG_RETURN(ndb_err(trans));
    }
  }
  else
  {
    if (m_pushed_join_operation == PUSHED_ROOT)
    {
      m_thd_ndb->m_pushed_queries_dropped++;
    }

    const NdbOperation *op;

    if (!(op= pk_unique_index_read_key(active_index, key, buf, lm, NULL)))
      ERR_RETURN(trans->getNdbError());
  
    if (execute_no_commit_ie(m_thd_ndb, trans) != 0 ||
        op->getNdbError().code) 
    {
      int err= ndb_err(trans);
      if(err==HA_ERR_KEY_NOT_FOUND)
        table->status= STATUS_NOT_FOUND;
      else
        table->status= STATUS_GARBAGE;

      DBUG_RETURN(err);
    }

    table->status= 0;
    DBUG_RETURN(0);
  }
}

int
ha_ndbcluster::scan_handle_lock_tuple(NdbScanOperation *scanOp,
                                      NdbTransaction *trans)
{
  DBUG_ENTER("ha_ndbcluster::scan_handle_lock_tuple");
  if (m_lock_tuple)
  {
    /*
      Lock level m_lock.type either TL_WRITE_ALLOW_WRITE
      (SELECT FOR UPDATE) or TL_READ_WITH_SHARED_LOCKS (SELECT
      LOCK WITH SHARE MODE) and row was not explictly unlocked 
      with unlock_row() call
    */
    DBUG_PRINT("info", ("Keeping lock on scanned row"));
      
    if (!(scanOp->lockCurrentTuple(trans, m_ndb_record,
                                   dummy_row, empty_mask)))
    {
      m_lock_tuple= false;
      ERR_RETURN(trans->getNdbError());
    }

    /* Perform 'empty update' to mark the read in the binlog, iff required */
    /*
     * Lock_mode = exclusive
     * Session_state = marking_exclusive_reads
     * THEN
     * issue updateCurrentTuple with AnyValue explicitly set
     */
    if ((m_lock.type >= TL_WRITE_ALLOW_WRITE) &&
        ndb_log_exclusive_reads(current_thd))
    {
      if (scan_log_exclusive_read(scanOp, trans))
      { 
        m_lock_tuple= false;
        ERR_RETURN(trans->getNdbError());
      }
    }

    m_thd_ndb->m_unsent_bytes+=12;
    m_lock_tuple= false;
  }
  DBUG_RETURN(0);
}

inline int ha_ndbcluster::fetch_next(NdbScanOperation* cursor)
{
  DBUG_ENTER("fetch_next");
  int local_check;
  int error;
  NdbTransaction *trans= m_thd_ndb->trans;
  
  assert(trans);
  if ((error= scan_handle_lock_tuple(cursor, trans)) != 0)
    DBUG_RETURN(error);
  
  bool contact_ndb= m_lock.type < TL_WRITE_ALLOW_WRITE &&
                    m_lock.type != TL_READ_WITH_SHARED_LOCKS;
  do {
    DBUG_PRINT("info", ("Call nextResult, contact_ndb: %d", contact_ndb));
    /*
      We can only handle one tuple with blobs at a time.
    */
    if (m_thd_ndb->m_unsent_bytes && m_blobs_pending)
    {
      if (execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)
        DBUG_RETURN(ndb_err(trans));
    }
    
    /* Should be no unexamined completed operations
       nextResult() on Blobs generates Blob part read ops,
       so we will free them here
    */
    release_completed_operations(trans);
    
    if ((local_check= cursor->nextResult(&_m_next_row,
                                         contact_ndb,
                                         m_thd_ndb->m_force_send)) == 0)
    {
      /*
	Explicitly lock tuple if "select for update" or
	"select lock in share mode"
      */
      m_lock_tuple= (m_lock.type == TL_WRITE_ALLOW_WRITE
		     || 
		     m_lock.type == TL_READ_WITH_SHARED_LOCKS);
      DBUG_RETURN(0);
    } 
    else if (local_check == 1 || local_check == 2)
    {
      // 1: No more records
      // 2: No more cached records
      
      /*
        Before fetching more rows and releasing lock(s),
        all pending update or delete operations should 
        be sent to NDB
      */
      DBUG_PRINT("info", ("thd_ndb->m_unsent_bytes: %ld",
                          (long) m_thd_ndb->m_unsent_bytes));
      if (m_thd_ndb->m_unsent_bytes)
      {
        if ((error = flush_bulk_insert()) != 0)
          DBUG_RETURN(error);
      }
      contact_ndb= (local_check == 2);
    }
    else
    {
      DBUG_RETURN(ndb_err(trans));
    }
  } while (local_check == 2);

  DBUG_RETURN(1);
}

int ha_ndbcluster::fetch_next_pushed()
{
  DBUG_ENTER("fetch_next_pushed (from pushed operation)");

  assert(m_pushed_operation);
  NdbQuery::NextResultOutcome result= m_pushed_operation->nextResult(true, m_thd_ndb->m_force_send);

  /**
   * Only prepare result & status from this operation in pushed join.
   * Consecutive rows are prepared through ::index_read_pushed() and
   * ::index_next_pushed() which unpack and set correct status for each row.
   */
  if (result == NdbQuery::NextResult_gotRow)
  {
    assert(m_next_row!=NULL);
    DBUG_PRINT("info", ("One more record found"));    
    table->status= 0;
    unpack_record(table->record[0], m_next_row);
//  m_thd_ndb->m_pushed_reads++;
//  DBUG_RETURN(0)
  }
  else if (result == NdbQuery::NextResult_scanComplete)
  {
    assert(m_next_row==NULL);
    DBUG_PRINT("info", ("No more records"));
    table->status= STATUS_NOT_FOUND;
//  m_thd_ndb->m_pushed_reads++;
//  DBUG_RETURN(HA_ERR_END_OF_FILE);
  }
  else
  {
    DBUG_PRINT("info", ("Error from 'nextResult()'"));
    table->status= STATUS_GARBAGE;
//  assert(false);
    DBUG_RETURN(ndb_err(m_thd_ndb->trans));
  }
  DBUG_RETURN(result);
}

/**
  Get the first record from an indexed table access being a child 
  operation in a pushed join. Fetch will be from prefetched
  cached records which are materialized into the bound buffer 
  areas as result of this call. 
*/

int 
ha_ndbcluster::index_read_pushed(uchar *buf, const uchar *key,
                                 key_part_map keypart_map)
{
  DBUG_ENTER("index_read_pushed");

  // Handler might have decided to not execute the pushed joins which has been prepared
  // In this case we do an unpushed index_read based on 'Plain old' NdbOperations
  if (unlikely(!check_is_pushed()))
  {
    int res= index_read_map(buf, key, keypart_map, HA_READ_KEY_EXACT);
    if (!res && table->vfield)
      res= update_generated_read_fields(buf, table);
    DBUG_RETURN(res);
  }

  // Might need to re-establish first result row (wrt. its parents which may have been navigated)
  NdbQuery::NextResultOutcome result= m_pushed_operation->firstResult();

  // Result from pushed operation will be referred by 'm_next_row' if non-NULL
  if (result == NdbQuery::NextResult_gotRow)
  {
    assert(m_next_row!=NULL);
    unpack_record(buf, m_next_row);
    table->status= 0;
    m_thd_ndb->m_pushed_reads++;
  }
  else
  {
    assert(result!=NdbQuery::NextResult_gotRow);
    table->status= STATUS_NOT_FOUND;
    DBUG_PRINT("info", ("No record found"));
//  m_thd_ndb->m_pushed_reads++;
//  DBUG_RETURN(HA_ERR_END_OF_FILE);
  }
  DBUG_RETURN(0);
}


/**
  Get the next record from an indexes table access being a child 
  operation in a pushed join. Fetch will be from prefetched
  cached records which are materialized into the bound buffer 
  areas as result of this call. 
*/
int ha_ndbcluster::index_next_pushed(uchar *buf)
{
  DBUG_ENTER("index_next_pushed");

  // Handler might have decided to not execute the pushed joins which has been prepared
  // In this case we do an unpushed index_read based on 'Plain old' NdbOperations
  if (unlikely(!check_is_pushed()))
  {
    int res= index_next(buf);
    if (!res && table->vfield)
      res= update_generated_read_fields(buf, table);
    DBUG_RETURN(res);
  }

  assert(m_pushed_join_operation>PUSHED_ROOT);  // Child of a pushed join
  assert(m_active_query==NULL);

  int res = fetch_next_pushed();
  if (res == NdbQuery::NextResult_gotRow)
  {
    DBUG_RETURN(0);
  }
  else if (res == NdbQuery::NextResult_scanComplete)
  {
    DBUG_RETURN(HA_ERR_END_OF_FILE);
  }
  else
  {
    DBUG_RETURN(ndb_err(m_thd_ndb->trans));
  }
}


/**
  Get the next record of a started scan. Try to fetch
  it locally from NdbApi cached records if possible, 
  otherwise ask NDB for more.

  @note
    If this is a update/delete make sure to not contact
    NDB before any pending ops have been sent to NDB.
*/

inline int ha_ndbcluster::next_result(uchar *buf)
{  
  int res;
  DBUG_ENTER("next_result");
    
  if (m_active_cursor)
  {
    if ((res= fetch_next(m_active_cursor)) == 0)
    {
      DBUG_PRINT("info", ("One more record found"));    

      unpack_record(buf, m_next_row);
      table->status= 0;
      DBUG_RETURN(0);
    }
    else if (res == 1)
    {
      // No more records
      table->status= STATUS_NOT_FOUND;
      
      DBUG_PRINT("info", ("No more records"));
      DBUG_RETURN(HA_ERR_END_OF_FILE);
    }
    else
    {
      DBUG_RETURN(ndb_err(m_thd_ndb->trans));
    }
  }
  else if (m_active_query)
  {
    res= fetch_next_pushed();
    if (res == NdbQuery::NextResult_gotRow)
    {
      DBUG_RETURN(0);
    }
    else if (res == NdbQuery::NextResult_scanComplete)
    {
      DBUG_RETURN(HA_ERR_END_OF_FILE);
    }
    else
    {
      DBUG_RETURN(ndb_err(m_thd_ndb->trans));
    }
  }
  else
    DBUG_RETURN(HA_ERR_END_OF_FILE);
}

int
ha_ndbcluster::log_exclusive_read(const NdbRecord *key_rec,
                                  const uchar *key,
                                  uchar *buf,
                                  Uint32 *ppartition_id)
{
  DBUG_ENTER("log_exclusive_read");
  NdbOperation::OperationOptions opts;
  opts.optionsPresent=
    NdbOperation::OperationOptions::OO_ABORTOPTION |
    NdbOperation::OperationOptions::OO_ANYVALUE;
  
  /* If the key does not exist, that is ok */
  opts.abortOption= NdbOperation::AO_IgnoreError;
  
  /* 
     Mark the AnyValue as a read operation, so that the update
     is processed
  */
  opts.anyValue= 0;
  ndbcluster_anyvalue_set_read_op(opts.anyValue);

  if (ppartition_id != NULL)
  {
    assert(m_user_defined_partitioning);
    opts.optionsPresent|= NdbOperation::OperationOptions::OO_PARTITION_ID;
    opts.partitionId= *ppartition_id;
  }
  
  const NdbOperation* markingOp=
    m_thd_ndb->trans->updateTuple(key_rec,
                                  (const char*) key,
                                  m_ndb_record,
                                  (char*)buf,
                                  empty_mask,
                                  &opts,
                                  opts.size());
  if (!markingOp)
  {
    char msg[FN_REFLEN];
    my_snprintf(msg, sizeof(msg), "Error logging exclusive reads, failed creating markingOp, %u, %s\n",
                m_thd_ndb->trans->getNdbError().code,
                m_thd_ndb->trans->getNdbError().message);
    push_warning_printf(current_thd, Sql_condition::SL_WARNING,
                        ER_EXCEPTIONS_WRITE_ERROR,
                        ER(ER_EXCEPTIONS_WRITE_ERROR), msg);
    /*
      By returning -1 the caller (pk_unique_index_read_key) will return
      NULL and error on transaction object will be returned.
    */
    DBUG_RETURN(-1);
  }

  DBUG_RETURN(0);
}

int
ha_ndbcluster::scan_log_exclusive_read(NdbScanOperation *cursor,
                                       NdbTransaction *trans)
{
  DBUG_ENTER("ha_ndbcluster::scan_log_exclusive_read");
  NdbOperation::OperationOptions opts;
  opts.optionsPresent= NdbOperation::OperationOptions::OO_ANYVALUE;

  /* 
     Mark the AnyValue as a read operation, so that the update
     is processed
  */
  opts.anyValue= 0;
  ndbcluster_anyvalue_set_read_op(opts.anyValue);
  
  const NdbOperation* markingOp=
    cursor->updateCurrentTuple(trans, m_ndb_record,
                               dummy_row, empty_mask,
                               &opts,
                               sizeof(NdbOperation::OperationOptions));
  if (markingOp == NULL)
  {
    char msg[FN_REFLEN];
    my_snprintf(msg, sizeof(msg), "Error logging exclusive reads during scan, failed creating markingOp, %u, %s\n",
                m_thd_ndb->trans->getNdbError().code,
                m_thd_ndb->trans->getNdbError().message);
    push_warning_printf(current_thd, Sql_condition::SL_WARNING,
                        ER_EXCEPTIONS_WRITE_ERROR,
                        ER(ER_EXCEPTIONS_WRITE_ERROR), msg);
    DBUG_RETURN(-1);
  }

  DBUG_RETURN(0);
}

/**
  Do a primary key or unique key index read operation.
  The key value is taken from a buffer in mysqld key format.
*/
const NdbOperation *
ha_ndbcluster::pk_unique_index_read_key(uint idx, const uchar *key, uchar *buf,
                                        NdbOperation::LockMode lm,
                                        Uint32 *ppartition_id)
{
  DBUG_ENTER("pk_unique_index_read_key");
  const NdbOperation *op;
  const NdbRecord *key_rec;
  NdbOperation::OperationOptions options;
  NdbOperation::OperationOptions *poptions = NULL;
  options.optionsPresent= 0;
  NdbOperation::GetValueSpec gets[2];
  ndb_index_type idx_type=
    (idx != MAX_KEY)?
    get_index_type(idx)
    : UNDEFINED_INDEX;

  assert(m_thd_ndb->trans);

  DBUG_PRINT("info", ("pk_unique_index_read_key of table %s", table->s->table_name.str));

  if (idx != MAX_KEY)
    key_rec= m_index[idx].ndb_unique_record_key;
  else
    key_rec= m_ndb_hidden_key_record;

  /* Initialize the null bitmap, setting unused null bits to 1. */
  memset(buf, 0xff, table->s->null_bytes);

  if (table_share->primary_key == MAX_KEY)
  {
    get_hidden_fields_keyop(&options, gets);
    poptions= &options;
  }
  get_read_set(false, idx);

  if (ppartition_id != NULL)
  {
    assert(m_user_defined_partitioning);
    options.optionsPresent|= NdbOperation::OperationOptions::OO_PARTITION_ID;
    options.partitionId= *ppartition_id;
    poptions= &options;
  }

  op= m_thd_ndb->trans->readTuple(key_rec, (const char *)key, m_ndb_record,
                                  (char *)buf, lm,
                                  (uchar *)(table->read_set->bitmap), poptions,
                                  sizeof(NdbOperation::OperationOptions));

  if (uses_blob_value(table->read_set) &&
      get_blob_values(op, buf, table->read_set) != 0)
    DBUG_RETURN(NULL);

  /* Perform 'empty update' to mark the read in the binlog, iff required */
  /*
   * Lock_mode = exclusive
   * Index = primary or unique
   * Session_state = marking_exclusive_reads
   * THEN
   * issue updateTuple with AnyValue explicitly set
   */
  if ((lm == NdbOperation::LM_Exclusive) &&
      /*
        We don't need to check index type
        (idx_type == PRIMARY_KEY_INDEX ||
        idx_type == PRIMARY_KEY_ORDERED_INDEX ||
        idx_type == UNIQUE_ORDERED_INDEX ||
        idx_type == UNIQUE_INDEX) 
        since this method is only invoked for
        primary or unique indexes, but we do need to check
        if it was a hidden primary key.
      */
      idx_type != UNDEFINED_INDEX &&
      ndb_log_exclusive_reads(current_thd))
  {
    if (log_exclusive_read(key_rec, key, buf, ppartition_id) != 0)
      DBUG_RETURN(NULL);
  }

  DBUG_RETURN(op);
}


static
bool
is_shrinked_varchar(const Field *field)
{
  if (field->real_type() ==  MYSQL_TYPE_VARCHAR)
  {
    if (((Field_varstring*)field)->length_bytes == 1)
      return true;
  }

  return false;
}

int
ha_ndbcluster::pk_unique_index_read_key_pushed(uint idx, 
                                               const uchar *key, 
                                               Uint32 *ppartition_id)
{
  DBUG_ENTER("pk_unique_index_read_key_pushed");
  NdbOperation::OperationOptions options;
  NdbOperation::OperationOptions *poptions = NULL;
  options.optionsPresent= 0;
  NdbOperation::GetValueSpec gets[2];

  assert(m_thd_ndb->trans);
  assert(idx < MAX_KEY);

  if (m_active_query)
  {
    m_active_query->close(FALSE);
    m_active_query= NULL;
  }

  if (table_share->primary_key == MAX_KEY)
  {
    get_hidden_fields_keyop(&options, gets);
    poptions= &options;
  }
  get_read_set(false, idx);

  if (ppartition_id != NULL)
  {
    assert(m_user_defined_partitioning);
    options.optionsPresent|= NdbOperation::OperationOptions::OO_PARTITION_ID;
    options.partitionId= *ppartition_id;
    poptions= &options;
  }

  KEY *key_def= &table->key_info[idx];
  KEY_PART_INFO *key_part;

  uint i;
  Uint32 offset= 0;
  NdbQueryParamValue paramValues[ndb_pushed_join::MAX_KEY_PART];
  assert(key_def->user_defined_key_parts <= ndb_pushed_join::MAX_KEY_PART);

  uint map[ndb_pushed_join::MAX_KEY_PART];
  ndbcluster_build_key_map(m_table, m_index[idx], &table->key_info[idx], map);

  // Bind key values defining root of pushed join
  for (i = 0, key_part= key_def->key_part; i < key_def->user_defined_key_parts; i++, key_part++)
  {
    bool shrinkVarChar= is_shrinked_varchar(key_part->field);

    if (key_part->null_bit)                         // Column is nullable
    {
      assert(idx != table_share->primary_key); // PK can't be nullable
      assert(*(key+offset)==0);                // Null values not allowed in key
                                                    // Value is imm. after NULL indicator
      paramValues[map[i]]= NdbQueryParamValue(key+offset+1,shrinkVarChar);
    }
    else                                            // Non-nullable column
    {
      paramValues[map[i]]= NdbQueryParamValue(key+offset,shrinkVarChar);
    }
    offset+= key_part->store_length;
  }

  const int ret= create_pushed_join(paramValues, key_def->user_defined_key_parts);
  DBUG_RETURN(ret);
}


/** Count number of columns in key part. */
static uint
count_key_columns(const KEY *key_info, const key_range *key)
{
  KEY_PART_INFO *first_key_part= key_info->key_part;
  KEY_PART_INFO *key_part_end= first_key_part + key_info->user_defined_key_parts;
  KEY_PART_INFO *key_part;
  uint length= 0;
  for(key_part= first_key_part; key_part < key_part_end; key_part++)
  {
    if (length >= key->length)
      break;
    length+= key_part->store_length;
  }
  return (uint)(key_part - first_key_part);
}

/* Helper method to compute NDB index bounds. Note: does not set range_no. */
/* Stats queries may differ so add "from" 0:normal 1:RIR 2:RPK. */
void
compute_index_bounds(NdbIndexScanOperation::IndexBound & bound,
                     const KEY *key_info,
                     const key_range *start_key, const key_range *end_key,
                     int from)
{
  DBUG_ENTER("ha_ndbcluster::compute_index_bounds");
  DBUG_PRINT("info", ("from: %d", from));

#ifndef NDEBUG
  DBUG_PRINT("info", ("key parts: %u length: %u",
                      key_info->user_defined_key_parts, key_info->key_length));
  {
    for (uint j= 0; j <= 1; j++)
    {
      const key_range* kr= (j == 0 ? start_key : end_key);
      if (kr)
      {
        DBUG_PRINT("info", ("key range %u: length: %u map: %lx flag: %d",
                          j, kr->length, kr->keypart_map, kr->flag));
        DBUG_DUMP("key", kr->key, kr->length);
      }
      else
      {
        DBUG_PRINT("info", ("key range %u: none", j));
      }
    }
  }
#endif

  if (start_key)
  {
    bound.low_key= (const char*)start_key->key;
    bound.low_key_count= count_key_columns(key_info, start_key);
    bound.low_inclusive=
      start_key->flag != HA_READ_AFTER_KEY &&
      start_key->flag != HA_READ_BEFORE_KEY;
  }
  else
  {
    bound.low_key= NULL;
    bound.low_key_count= 0;
  }

  /* RIR query for x >= 1 inexplicably passes HA_READ_KEY_EXACT. */
  if (start_key &&
      (start_key->flag == HA_READ_KEY_EXACT ||
       start_key->flag == HA_READ_PREFIX_LAST) &&
      from != 1)
  {
    bound.high_key= bound.low_key;
    bound.high_key_count= bound.low_key_count;
    bound.high_inclusive= TRUE;
  }
  else if (end_key)
  {
    bound.high_key= (const char*)end_key->key;
    bound.high_key_count= count_key_columns(key_info, end_key);
    /*
      For some reason, 'where b >= 1 and b <= 3' uses HA_READ_AFTER_KEY for
      the end_key.
      So HA_READ_AFTER_KEY in end_key sets high_inclusive, even though in
      start_key it does not set low_inclusive.
    */
    bound.high_inclusive= end_key->flag != HA_READ_BEFORE_KEY;
    if (end_key->flag == HA_READ_KEY_EXACT ||
        end_key->flag == HA_READ_PREFIX_LAST)
    {
      bound.low_key= bound.high_key;
      bound.low_key_count= bound.high_key_count;
      bound.low_inclusive= TRUE;
    }
  }
  else
  {
    bound.high_key= NULL;
    bound.high_key_count= 0;
  }
  DBUG_PRINT("info", ("start_flag=%d end_flag=%d"
                      " lo_keys=%d lo_incl=%d hi_keys=%d hi_incl=%d",
                      start_key?start_key->flag:0, end_key?end_key->flag:0,
                      bound.low_key_count,
                      bound.low_key_count?bound.low_inclusive:0,
                      bound.high_key_count,
                      bound.high_key_count?bound.high_inclusive:0));
  DBUG_VOID_RETURN;
}

/**
  Start ordered index scan in NDB
*/

int ha_ndbcluster::ordered_index_scan(const key_range *start_key,
                                      const key_range *end_key,
                                      bool sorted, bool descending,
                                      uchar* buf, part_id_range *part_spec)
{  
  NdbTransaction *trans;
  NdbIndexScanOperation *op;
  int error;

  DBUG_ENTER("ha_ndbcluster::ordered_index_scan");
  DBUG_PRINT("enter", ("index: %u, sorted: %d, descending: %d read_set=0x%x",
             active_index, sorted, descending, table->read_set->bitmap[0]));
  DBUG_PRINT("enter", ("Starting new ordered scan on %s", m_tabname));

  // Check that sorted seems to be initialised
  assert(sorted == 0 || sorted == 1);

  if (unlikely(!(trans= get_transaction(error))))
  {
    DBUG_RETURN(error);
  }

  if ((error= close_scan()))
    DBUG_RETURN(error);

  const NdbOperation::LockMode lm = get_ndb_lock_mode(m_lock.type);

  const NdbRecord *key_rec= m_index[active_index].ndb_record_key;
  const NdbRecord *row_rec= m_ndb_record;

  NdbIndexScanOperation::IndexBound bound;
  NdbIndexScanOperation::IndexBound *pbound = NULL;
  if (start_key != NULL || end_key != NULL)
  {
    /* 
       Compute bounds info, reversing range boundaries
       if descending
     */
    compute_index_bounds(bound, 
                         table->key_info + active_index,
                         (descending?
                          end_key : start_key),
                         (descending?
                          start_key : end_key),
                         0);
    bound.range_no = 0;
    pbound = &bound;
  }

  if (check_if_pushable(NdbQueryOperationDef::OrderedIndexScan, active_index))
  {
    const int error= create_pushed_join();
    if (unlikely(error))
      DBUG_RETURN(error);

    NdbQuery* const query= m_active_query;
    if (sorted && query->getQueryOperation((uint)PUSHED_ROOT)
                       ->setOrdering(descending ? NdbQueryOptions::ScanOrdering_descending
                                                : NdbQueryOptions::ScanOrdering_ascending))
    {
      ERR_RETURN(query->getNdbError());
    }

    if (pbound  && query->setBound(key_rec, pbound)!=0)
      ERR_RETURN(query->getNdbError());

    m_thd_ndb->m_scan_count++;

    bool prunable = false;
    if (unlikely(query->isPrunable(prunable) != 0))
      ERR_RETURN(query->getNdbError());
    if (prunable)
      m_thd_ndb->m_pruned_scan_count++;

    // Can't have BLOB in pushed joins (yet)
    assert(!uses_blob_value(table->read_set));
  }
  else
  {
    if (m_pushed_join_operation == PUSHED_ROOT)
    {
      m_thd_ndb->m_pushed_queries_dropped++;
    }

    NdbScanOperation::ScanOptions options;
    options.optionsPresent=NdbScanOperation::ScanOptions::SO_SCANFLAGS;
    options.scan_flags=0;

    NdbOperation::GetValueSpec gets[2];
    if (table_share->primary_key == MAX_KEY)
      get_hidden_fields_scan(&options, gets);

    get_read_set(true, active_index);

    if (lm == NdbOperation::LM_Read)
      options.scan_flags|= NdbScanOperation::SF_KeyInfo;
    if (sorted)
      options.scan_flags|= NdbScanOperation::SF_OrderByFull;
    if (descending)
      options.scan_flags|= NdbScanOperation::SF_Descending;

    /* Partition pruning */
    if (m_use_partition_pruning && 
        m_user_defined_partitioning && part_spec != NULL &&
        part_spec->start_part == part_spec->end_part)
    {
      /* Explicitly set partition id when pruning User-defined partitioned scan */
      options.partitionId = part_spec->start_part;
      options.optionsPresent |= NdbScanOperation::ScanOptions::SO_PARTITION_ID;
    }

    NdbInterpretedCode code(m_table);
    if (m_cond && m_cond->generate_scan_filter(&code, &options))
      ERR_RETURN(code.getNdbError());

    if (!(op= trans->scanIndex(key_rec, row_rec, lm,
                               (uchar *)(table->read_set->bitmap),
                               pbound,
                               &options,
                               sizeof(NdbScanOperation::ScanOptions))))
      ERR_RETURN(trans->getNdbError());

    DBUG_PRINT("info", ("Is scan pruned to 1 partition? : %u", op->getPruned()));
    m_thd_ndb->m_scan_count++;
    m_thd_ndb->m_pruned_scan_count += (op->getPruned()? 1 : 0);

    if (uses_blob_value(table->read_set) &&
        get_blob_values(op, NULL, table->read_set) != 0)
      ERR_RETURN(op->getNdbError());

    m_active_cursor= op;
  }

  if (sorted)
  {        
    m_thd_ndb->m_sorted_scan_count++;
  }

  if (execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)
    DBUG_RETURN(ndb_err(trans));
  
  DBUG_RETURN(next_result(buf));
}

static
int
guess_scan_flags(NdbOperation::LockMode lm,
		 const NDBTAB* tab, const MY_BITMAP* readset)
{
  int flags= 0;
  flags|= (lm == NdbOperation::LM_Read) ? NdbScanOperation::SF_KeyInfo : 0;
  if (tab->checkColumns(0, 0) & 2)
  {
    int ret = tab->checkColumns(readset->bitmap, no_bytes_in_map(readset));
    
    if (ret & 2)
    { // If disk columns...use disk scan
      flags |= NdbScanOperation::SF_DiskScan;
    }
    else if ((ret & 4) == 0 && (lm == NdbOperation::LM_Exclusive))
    {
      // If no mem column is set and exclusive...guess disk scan
      flags |= NdbScanOperation::SF_DiskScan;
    }
  }
  return flags;
}

/*
  Start full table scan in NDB or unique index scan
 */

int ha_ndbcluster::full_table_scan(const KEY* key_info, 
                                   const key_range *start_key,
                                   const key_range *end_key,
                                   uchar *buf)
{
  int error;
  NdbTransaction *trans= m_thd_ndb->trans;
  part_id_range part_spec;
  bool use_set_part_id= FALSE;
  NdbOperation::GetValueSpec gets[2];

  DBUG_ENTER("full_table_scan");  
  DBUG_PRINT("enter", ("Starting new scan on %s", m_tabname));

  if (m_use_partition_pruning && m_user_defined_partitioning)
  {
    assert(m_pushed_join_operation != PUSHED_ROOT);
    part_spec.start_part= 0;
    part_spec.end_part= m_part_info->get_tot_partitions() - 1;
    prune_partition_set(table, &part_spec);
    DBUG_PRINT("info", ("part_spec.start_part: %u  part_spec.end_part: %u",
                        part_spec.start_part, part_spec.end_part));
    /*
      If partition pruning has found no partition in set
      we can return HA_ERR_END_OF_FILE
    */
    if (part_spec.start_part > part_spec.end_part)
    {
      DBUG_RETURN(HA_ERR_END_OF_FILE);
    }

    if (part_spec.start_part == part_spec.end_part)
    {
      /*
       * Only one partition is required to scan, if sorted is required
       * don't need it anymore since output from one ordered partitioned
       * index is always sorted.
       *
       * Note : This table scan pruning currently only occurs for 
       * UserDefined partitioned tables.
       * It could be extended to occur for natively partitioned tables if
       * the Partitioning layer can make a key (e.g. start or end key)
       * available so that we can determine the correct pruning in the 
       * NDBAPI layer.
       */
      use_set_part_id= TRUE;
      if (!trans)
        if (unlikely(!(trans= get_transaction_part_id(part_spec.start_part,
                                                      error))))
          DBUG_RETURN(error);
    }
  }
  if (!trans)
    if (unlikely(!(trans= start_transaction(error))))
      DBUG_RETURN(error);

  const NdbOperation::LockMode lm = get_ndb_lock_mode(m_lock.type);
  NdbScanOperation::ScanOptions options;
  options.optionsPresent = (NdbScanOperation::ScanOptions::SO_SCANFLAGS |
                            NdbScanOperation::ScanOptions::SO_PARALLEL);
  options.scan_flags = guess_scan_flags(lm, m_table, table->read_set);
  options.parallel= DEFAULT_PARALLELISM;

  if (use_set_part_id) {
    assert(m_user_defined_partitioning);
    options.optionsPresent|= NdbScanOperation::ScanOptions::SO_PARTITION_ID;
    options.partitionId = part_spec.start_part;
  };

  if (table_share->primary_key == MAX_KEY)
    get_hidden_fields_scan(&options, gets);

  get_read_set(true, MAX_KEY);

  if (check_if_pushable(NdbQueryOperationDef::TableScan))
  {
    const int error= create_pushed_join();
    if (unlikely(error))
      DBUG_RETURN(error);

    m_thd_ndb->m_scan_count++;
    // Can't have BLOB in pushed joins (yet)
    assert(!uses_blob_value(table->read_set));
  }
  else
  {
    if (m_pushed_join_operation == PUSHED_ROOT)
    {
      m_thd_ndb->m_pushed_queries_dropped++;
    }

    NdbScanOperation *op;
    NdbInterpretedCode code(m_table);

    if (!key_info)
    {
      if (m_cond && m_cond->generate_scan_filter(&code, &options))
        ERR_RETURN(code.getNdbError());
    }
    else
    {
      /* Unique index scan in NDB (full table scan with scan filter) */
      DBUG_PRINT("info", ("Starting unique index scan"));
      if (!m_cond)
        m_cond= new ha_ndbcluster_cond;

      if (!m_cond)
      {
        set_my_errno(HA_ERR_OUT_OF_MEM);
        DBUG_RETURN(my_errno());
      }       
      if (m_cond->generate_scan_filter_from_key(&code, &options, key_info,
                                                start_key, end_key))
        ERR_RETURN(code.getNdbError());
    }

    if (!(op= trans->scanTable(m_ndb_record, lm,
                               (uchar *)(table->read_set->bitmap),
                               &options, sizeof(NdbScanOperation::ScanOptions))))
      ERR_RETURN(trans->getNdbError());

    m_thd_ndb->m_scan_count++;
    m_thd_ndb->m_pruned_scan_count += (op->getPruned()? 1 : 0);

    assert(m_active_cursor==NULL);
    m_active_cursor= op;

    if (uses_blob_value(table->read_set) &&
        get_blob_values(op, NULL, table->read_set) != 0)
      ERR_RETURN(op->getNdbError());
  } // if (check_if_pushable(NdbQueryOperationDef::TableScan))
  
  if (execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)
    DBUG_RETURN(ndb_err(trans));
  DBUG_PRINT("exit", ("Scan started successfully"));
  DBUG_RETURN(next_result(buf));
} // ha_ndbcluster::full_table_scan()

int
ha_ndbcluster::set_auto_inc(THD *thd, Field *field)
{
  DBUG_ENTER("ha_ndbcluster::set_auto_inc");
  bool read_bit= bitmap_is_set(table->read_set, field->field_index);
  bitmap_set_bit(table->read_set, field->field_index);
  Uint64 next_val= (Uint64) field->val_int() + 1;
  if (!read_bit)
    bitmap_clear_bit(table->read_set, field->field_index);
  DBUG_RETURN(set_auto_inc_val(thd, next_val));
}


class Ndb_tuple_id_range_guard {
  NDB_SHARE* m_share;
public:
  Ndb_tuple_id_range_guard(NDB_SHARE* share) :
    m_share(share),
    range(share->tuple_id_range)
  {
    native_mutex_lock(&m_share->mutex);
  }
  ~Ndb_tuple_id_range_guard()
  {
    native_mutex_unlock(&m_share->mutex);
  }
  Ndb::TupleIdRange& range;
};


inline
int
ha_ndbcluster::set_auto_inc_val(THD *thd, Uint64 value)
{
  Ndb *ndb= get_ndb(thd);
  DBUG_ENTER("ha_ndbcluster::set_auto_inc_val");
  DBUG_PRINT("enter", ("value: %llu", value));
  if (ndb->checkUpdateAutoIncrementValue(m_share->tuple_id_range, value))
  {
    Ndb_tuple_id_range_guard g(m_share);
    if (ndb->setAutoIncrementValue(m_table, g.range, value, TRUE)
        == -1)
      ERR_RETURN(ndb->getNdbError());
  }
  DBUG_RETURN(0);
}


void
ha_ndbcluster::get_read_set(bool use_cursor, uint idx)
{
  const bool is_delete=
    table->in_use->lex->sql_command == SQLCOM_DELETE ||
    table->in_use->lex->sql_command == SQLCOM_DELETE_MULTI;

  const bool is_update=
    table->in_use->lex->sql_command == SQLCOM_UPDATE ||
    table->in_use->lex->sql_command == SQLCOM_UPDATE_MULTI;

  assert(use_cursor ||
         idx == table_share->primary_key ||
         table->key_info[idx].flags & HA_NOSAME);

  if (!is_delete && !is_update)
  {
    return;
  }

  /**
   * It is questionable that we in some cases seems to
   * do a read even if 'm_read_before_write_removal_used'.
   * The usage pattern for this seems to be update/delete 
   * cursors which establish a 'current of' position before
   * a delete- / updateCurrentTuple().
   * Anyway, as 'm_read_before_write_removal_used' we don't
   * have to add more columns to 'read_set'.
   *
   * FUTURE: Investigate if we could have completely 
   * cleared the 'read_set'.
   *
   */
  if (m_read_before_write_removal_used)
  {
    return;
  }

  /**
   * If (part of) a primary key is updated, it is executed
   * as a delete+reinsert. In order to avoid extra read-round trips
   * to fetch missing columns required by reinsert: 
   * Ensure all columns not being modified (in write_set)
   * are read prior to ::ndb_pk_update_row().
   * All PK columns are also required by ::ndb_delete_row()
   */
  if (bitmap_is_overlapping(table->write_set, m_pk_bitmap_p))
  {
    assert(table_share->primary_key != MAX_KEY);
    bitmap_set_all(&m_bitmap);
    bitmap_subtract(&m_bitmap, table->write_set);
    bitmap_union(table->read_set, &m_bitmap);
    bitmap_union(table->read_set, m_pk_bitmap_p);
  }

  /**
   * Determine whether we have to read PK columns in 
   * addition to those columns already present in read_set.
   * NOTE: As checked above, It is a precondition that
   *       a read is required as part of delete/update
   *       (!m_read_before_write_removal_used)
   *
   * PK columns are required when:
   *  1) This is a primary/unique keyop.
   *     (i.e. not a positioned update/delete which
   *      maintain a 'current of' position.)
   *
   * In addition, when a 'current of' position is available:
   *  2) When deleting a row containing BLOBs PK is required
   *     to delete BLOB stored in seperate fragments.
   *  3) When updating BLOB columns PK is required to delete 
   *     old BLOB + insert new BLOB contents
   */
  else
  if (!use_cursor ||                             // 1)
      (is_delete && table_share->blob_fields) || // 2)
      uses_blob_value(table->write_set))         // 3)
  {
    bitmap_union(table->read_set, m_pk_bitmap_p);
  }

  /**
   * If update/delete use partition pruning, we need
   * to read the column values which being part of the 
   * partition spec as they are used by
   * ::get_parts_for_update() / ::get_parts_for_delete()
   * Part. columns are always part of PK, so we only
   * have to do this if pk_bitmap wasnt added yet,
   */
  else if (m_use_partition_pruning)  // && m_user_defined_partitioning)
  {
    assert(bitmap_is_subset(&m_part_info->full_part_field_set,
                            m_pk_bitmap_p));
    bitmap_union(table->read_set, &m_part_info->full_part_field_set);
  }
      

  /**
   * Update might cause PK or Unique key violation.
   * Error reporting need values from the offending 
   * unique columns to have been read:
   *
   * NOTE: This is NOT required for the correctness
   *       of the update operation itself. Maybe we
   *       should consider other strategies, like
   *       defering reading of the column values
   *       until formating the error message.
   */
  if (is_update && m_has_unique_index)
  {
    for (uint i= 0; i < table_share->keys; i++)
    {
      if ((table->key_info[i].flags & HA_NOSAME) &&
          bitmap_is_overlapping(table->write_set, m_key_fields[i]))
      {
        bitmap_union(table->read_set, m_key_fields[i]);
      }
    }
  }
}


Uint32
ha_ndbcluster::setup_get_hidden_fields(NdbOperation::GetValueSpec gets[2])
{
  Uint32 num_gets= 0;
  /*
    We need to read the hidden primary key, and possibly the FRAGMENT
    pseudo-column.
  */
  gets[num_gets].column= get_hidden_key_column();
  gets[num_gets].appStorage= &m_ref;
  num_gets++;
  if (m_user_defined_partitioning)
  {
    /* Need to read partition id to support ORDER BY columns. */
    gets[num_gets].column= NdbDictionary::Column::FRAGMENT;
    gets[num_gets].appStorage= &m_part_id;
    num_gets++;
  }
  return num_gets;
}

void
ha_ndbcluster::get_hidden_fields_keyop(NdbOperation::OperationOptions *options,
                                       NdbOperation::GetValueSpec gets[2])
{
  Uint32 num_gets= setup_get_hidden_fields(gets);
  options->optionsPresent|= NdbOperation::OperationOptions::OO_GETVALUE;
  options->extraGetValues= gets;
  options->numExtraGetValues= num_gets;
}

void
ha_ndbcluster::get_hidden_fields_scan(NdbScanOperation::ScanOptions *options,
                                      NdbOperation::GetValueSpec gets[2])
{
  Uint32 num_gets= setup_get_hidden_fields(gets);
  options->optionsPresent|= NdbScanOperation::ScanOptions::SO_GETVALUE;
  options->extraGetValues= gets;
  options->numExtraGetValues= num_gets;
}

inline void
ha_ndbcluster::eventSetAnyValue(THD *thd, 
                                NdbOperation::OperationOptions *options) const
{
  options->anyValue= 0;
  if (unlikely(m_slow_path))
  {
    /*
      Ignore TNTO_NO_LOGGING for slave thd.  It is used to indicate
      log-slave-updates option.  This is instead handled in the
      injector thread, by looking explicitly at the
      opt_log_slave_updates flag.
    */
    Thd_ndb *thd_ndb= get_thd_ndb(thd);
    if (thd->slave_thread)
    {
      /*
        Slave-thread, we are applying a replicated event.
        We set the server_id to the value received from the log which
        may be a composite of server_id and other data according
        to the server_id_bits option.
        In future it may be useful to support *not* mapping composite
        AnyValues to/from Binlogged server-ids
      */
      options->optionsPresent |= NdbOperation::OperationOptions::OO_ANYVALUE;
      options->anyValue = thd_unmasked_server_id(thd);
    }
    else if (thd_ndb->trans_options & TNTO_NO_LOGGING)
    {
      options->optionsPresent |= NdbOperation::OperationOptions::OO_ANYVALUE;
      ndbcluster_anyvalue_set_nologging(options->anyValue);
    }
  }
#ifndef NDEBUG
  DBUG_EXECUTE_IF("ndb_set_reflect_anyvalue",
                  {
                    fprintf(stderr, "Ndb forcing reflect AnyValue\n");
                    options->optionsPresent |= NdbOperation::OperationOptions::OO_ANYVALUE;
                    ndbcluster_anyvalue_set_reflect_op(options->anyValue);
                  });
  DBUG_EXECUTE_IF("ndb_set_refresh_anyvalue",
                  {
                    fprintf(stderr, "Ndb forcing refresh AnyValue\n");
                    options->optionsPresent |= NdbOperation::OperationOptions::OO_ANYVALUE;
                    ndbcluster_anyvalue_set_refresh_op(options->anyValue);
                  });
  
  /*
    MySQLD will set the user-portion of AnyValue (if any) to all 1s
    This tests code filtering ServerIds on the value of server-id-bits.
  */
  const char* p = getenv("NDB_TEST_ANYVALUE_USERDATA");
  if (p != 0  && *p != 0 && *p != '0' && *p != 'n' && *p != 'N')
  {
    options->optionsPresent |= NdbOperation::OperationOptions::OO_ANYVALUE;
    dbug_ndbcluster_anyvalue_set_userbits(options->anyValue);
  }
#endif
}

#ifdef HAVE_NDB_BINLOG

/**
   prepare_conflict_detection

   This method is called during operation definition by the slave,
   when writing to a table with conflict detection defined.

   It is responsible for defining and adding any operation filtering
   required, and for saving any operation definition state required
   for post-execute analysis.

   For transactional detection, this method may determine that the
   operation being defined should not be executed, and conflict
   handling should occur immediately.  In this case, conflict_handled
   is set to true.
*/
int
ha_ndbcluster::prepare_conflict_detection(enum_conflicting_op_type op_type,
                                          const NdbRecord* key_rec,
                                          const NdbRecord* data_rec,
                                          const uchar* old_data,
                                          const uchar* new_data,
                                          const MY_BITMAP *write_set,
                                          NdbTransaction* trans,
                                          NdbInterpretedCode* code,
                                          NdbOperation::OperationOptions* options,
                                          bool& conflict_handled,
                                          bool& avoid_ndbapi_write)
{
  DBUG_ENTER("prepare_conflict_detection");
  THD* thd = table->in_use;
  int res = 0;
  assert(thd->slave_thread);

  conflict_handled = false;

  /*
    Special check for apply_status table, as we really don't want
    to do any special handling with it
  */
  if (unlikely(m_share == ndb_apply_status_share))
  {
    DBUG_RETURN(0);
  }

  /*
     Check transaction id first, as in transactional conflict detection,
     the transaction id is what eventually dictates whether an operation
     is applied or not.

     Not that this applies even if the current operation's table does not
     have a conflict function defined - if a transaction spans a 'transactional
     conflict detection' table and a non transactional table, the non-transactional
     table's data will also be reverted.
  */
  Uint64 transaction_id = Ndb_binlog_extra_row_info::InvalidTransactionId;
  Uint16 conflict_flags = Ndb_binlog_extra_row_info::UnsetConflictFlags;
  bool op_is_marked_as_read= false;
  bool op_is_marked_as_reflected= false;
  bool op_is_marked_as_refresh= false;

  if (thd->binlog_row_event_extra_data)
  {
    Ndb_binlog_extra_row_info extra_row_info;
    if (extra_row_info.loadFromBuffer(thd->binlog_row_event_extra_data) != 0)
    {
      sql_print_warning("NDB Slave : Malformed event received on table %s "
                        "cannot parse.  Stopping Slave.",
                        m_share->key_string());
      DBUG_RETURN( ER_SLAVE_CORRUPT_EVENT );
    }
    
    if (extra_row_info.getFlags() &
        Ndb_binlog_extra_row_info::NDB_ERIF_TRANSID)
      transaction_id = extra_row_info.getTransactionId();

    if (extra_row_info.getFlags() &
        Ndb_binlog_extra_row_info::NDB_ERIF_CFT_FLAGS)
    {
      DBUG_PRINT("info", 
                 ("Slave : have conflict flags : %x\n",
                  extra_row_info.getConflictFlags()));
      conflict_flags = extra_row_info.getConflictFlags();

      if (conflict_flags & NDB_ERIF_CFT_REFLECT_OP)
      {
        op_is_marked_as_reflected= true;
        g_ndb_slave_state.current_reflect_op_prepare_count++;
      }
      
      if (conflict_flags & NDB_ERIF_CFT_REFRESH_OP)
      {
        op_is_marked_as_refresh= true;
        g_ndb_slave_state.current_refresh_op_count++;
      }

      if (conflict_flags & NDB_ERIF_CFT_READ_OP)
        op_is_marked_as_read= true;

      /* Sanity - 1 flag at a time at most */
      assert(! (op_is_marked_as_reflected &&
                op_is_marked_as_refresh));
      assert(! (op_is_marked_as_read &&
                (op_is_marked_as_reflected ||
                 op_is_marked_as_refresh)));
    }
  }

  const st_conflict_fn_def* conflict_fn = (m_share->m_cfn_share?
                                           m_share->m_cfn_share->m_conflict_fn:
                                           NULL);

  bool pass_mode = false;
  if (conflict_fn)
  {
    /* Check Slave Conflict Role Variable setting */
    if (conflict_fn->flags & CF_USE_ROLE_VAR)
    {
      switch (opt_ndb_slave_conflict_role)
      {
      case SCR_NONE:
      {
        sql_print_warning("NDB Slave : Conflict function %s defined on "
                          "table %s requires ndb_slave_conflict_role variable "
                          "to be set.  Stopping slave.",
                          conflict_fn->name,
                          m_share->key_string());
        DBUG_RETURN(ER_SLAVE_CONFIGURATION);
      }
      case SCR_PASS:
      {
        pass_mode = true;
      }
      default:
        /* PRIMARY, SECONDARY */
        break;
      }
    }
  }

  {
    bool handle_conflict_now = false;
    const uchar* row_data = (op_type == WRITE_ROW? new_data : old_data);
    int res = g_ndb_slave_state.atPrepareConflictDetection(m_table,
                                                           key_rec,
                                                           row_data,
                                                           transaction_id,
                                                           handle_conflict_now);
    if (res)
      DBUG_RETURN(res);

    if (handle_conflict_now)
    {
      DBUG_PRINT("info", ("Conflict handling for row occurring now"));
      NdbError noRealConflictError;
      /*
       * If the user operation was a read and we receive an update
       * log event due to an AnyValue update, then the conflicting operation
       * should be reported as a read. 
       */
      enum_conflicting_op_type conflicting_op=
        (op_type == UPDATE_ROW && op_is_marked_as_read)?
        READ_ROW
        : op_type;
      /*
         Directly handle the conflict here - e.g refresh/ write to
         exceptions table etc.
      */
      res = handle_row_conflict(m_share->m_cfn_share,
                                m_share->table_name,
                                m_share->flags & NSF_BLOB_FLAG,
                                "Transaction",
                                key_rec,
                                data_rec,
                                old_data,
                                new_data,
                                conflicting_op,
                                TRANS_IN_CONFLICT,
                                noRealConflictError,
                                trans,
                                write_set,
                                transaction_id);
      if (unlikely(res))
        DBUG_RETURN(res);

      g_ndb_slave_state.conflict_flags |= SCS_OPS_DEFINED;

      /*
        Indicate that there (may be) some more operations to
        execute before committing
      */
      m_thd_ndb->m_unsent_bytes+= 12;
      conflict_handled = true;
      DBUG_RETURN(0);
    }
  }

  if (conflict_fn == NULL ||
      pass_mode)
  {
    /* No conflict function definition required */
    DBUG_RETURN(0);
  }

  /**
   * By default conflict algorithms use the 'natural' NdbApi ops 
   * (insert/update/delete) which can detect presence anomalies, 
   * as opposed to NdbApi write which ignores them.
   * However in some cases, we want to use NdbApi write to apply 
   * events received on tables with conflict detection defined 
   * (e.g. when we want to forcibly align a row with a refresh op).
   */
  avoid_ndbapi_write = true;

  if (unlikely((conflict_fn->flags & CF_TRANSACTIONAL) &&
               (transaction_id == Ndb_binlog_extra_row_info::InvalidTransactionId)))
  {
    sql_print_warning("NDB Slave : Transactional conflict detection defined on table %s, but "
                      "events received without transaction ids.  Check --ndb-log-transaction-id setting "
                      "on upstream Cluster.",
                      m_share->key_string());
    /* This is a user error, but we want them to notice, so treat seriously */
    DBUG_RETURN( ER_SLAVE_CORRUPT_EVENT );
  }

  /**
   * Normally, update and delete have an attached program executed against
   * the existing row content.  Insert (and NdbApi write) do not.  
   * Insert cannot as there is no pre-existing row to examine (and therefore
   * no non prepare-time deterministic decisions to make).
   * NdbApi Write technically could if the row already existed, but this is 
   * not currently supported by NdbApi.
   */
  bool prepare_interpreted_program = (op_type != WRITE_ROW);

  if (conflict_fn->flags & CF_REFLECT_SEC_OPS)
  {
    /* This conflict function reflects secondary ops at the Primary */
    
    if (opt_ndb_slave_conflict_role == SCR_PRIMARY)
    {
      /**
       * Here we mark the applied operations to indicate that they
       * should be reflected back to the SECONDARY cluster.
       * This is required so that :
       *   1.  They are given local Binlog Event source serverids
       *       and so will pass through to the storage engine layer
       *       on the SECONDARY.
       *       (Normally they would be filtered in the Slave IO thread
       *        as having returned-to-source)
       *
       *   2.  They can be tagged as reflected so that the SECONDARY
       *       can handle them differently
       *       (They are force-applied)
       */
      DBUG_PRINT("info", ("Setting AnyValue to reflect secondary op"));

      options->optionsPresent |=
        NdbOperation::OperationOptions::OO_ANYVALUE;
      ndbcluster_anyvalue_set_reflect_op(options->anyValue);
    }
    else if (opt_ndb_slave_conflict_role == SCR_SECONDARY)
    {
      /**
       * On the Secondary, we receive reflected operations which
       * we want to attempt to apply under certain conditions.
       * This is done to recover from situations where
       * both PRIMARY and SECONDARY have performed concurrent
       * DELETEs.
       * 
       * For non reflected operations we want to apply Inserts and
       * Updates using write_tuple() to get an idempotent effect
       */
      if (op_is_marked_as_reflected)
      {
        /**
         * Apply operations using their 'natural' operation types
         * with interpreted programs attached where appropriate.
         * Natural operation types used so that we become aware
         * of any 'presence' issues (row does/not exist).
         */
        DBUG_PRINT("info", ("Reflected operation"));
      }
      else
      {
        /**
         * Either a normal primary sourced change, or a refresh
         * operation.
         * In both cases we want to apply the operation idempotently,
         * and there's no need for an interpreted program.
         * e.g.
         *   WRITE_ROW  -> NdbApi write_row
         *   UPDATE_ROW -> NdbApi write_row
         *   DELETE_ROW -> NdbApi delete_row
         *
         * NdbApi write_row does not fail.
         * NdbApi delete_row will complain if the row does not exist
         * but this will be ignored
         */
        DBUG_PRINT("info", ("Allowing use of NdbApi write_row "
                            "for non reflected op (%u)",
                            op_is_marked_as_refresh));
        prepare_interpreted_program = false;
        avoid_ndbapi_write = false;
      }
    }
  }

  /*
     Prepare interpreted code for operation (update + delete only) according
     to algorithm used
  */
  if (prepare_interpreted_program)
  {
    res = conflict_fn->prep_func(m_share->m_cfn_share,
                                 op_type,
                                 m_ndb_record,
                                 old_data,
                                 new_data,
                                 table->read_set,  // Before image
                                 table->write_set, // After image
                                 code);

    if (res == 0)
    {
      if (code->getWordsUsed() > 0)
      {
        /* Attach conflict detecting filter program to operation */
        options->optionsPresent|=
          NdbOperation::OperationOptions::OO_INTERPRETED;
        options->interpretedCode= code;
      }
    }
    else
    {
      sql_print_warning("NDB Slave : Binlog event on table %s missing "
                        "info necessary for conflict detection.  "
                        "Check binlog format options on upstream cluster.",
                        m_share->key_string());
      DBUG_RETURN( ER_SLAVE_CORRUPT_EVENT);
    }
  } // if (op_type != WRITE_ROW)

  g_ndb_slave_state.conflict_flags |= SCS_OPS_DEFINED;

  /* Now save data for potential insert to exceptions table... */
  Ndb_exceptions_data ex_data;
  ex_data.share= m_share;
  ex_data.key_rec= key_rec;
  ex_data.data_rec= data_rec;
  ex_data.op_type= op_type;
  ex_data.reflected_operation = op_is_marked_as_reflected;
  ex_data.trans_id= transaction_id;
  /*
    We need to save the row data for possible conflict resolution after
    execute().
  */
  if (old_data)
    ex_data.old_row= copy_row_to_buffer(m_thd_ndb, old_data);
  if (old_data != NULL && ex_data.old_row == NULL)
  {
    DBUG_RETURN(HA_ERR_OUT_OF_MEM);
  }
  if (new_data)
    ex_data.new_row= copy_row_to_buffer(m_thd_ndb, new_data);
  if (new_data !=  NULL && ex_data.new_row == NULL)
  {
    DBUG_RETURN(HA_ERR_OUT_OF_MEM);
  }

  ex_data.bitmap_buf= NULL;
  ex_data.write_set= NULL;
  if (table->write_set)
  {
    /* Copy table write set */
    ex_data.bitmap_buf=
      (my_bitmap_map *) get_buffer(m_thd_ndb, table->s->column_bitmap_size);
    if (ex_data.bitmap_buf == NULL)
    {
      DBUG_RETURN(HA_ERR_OUT_OF_MEM);
    }
    ex_data.write_set= (MY_BITMAP*) get_buffer(m_thd_ndb, sizeof(MY_BITMAP));
    if (ex_data.write_set == NULL)
    {
      DBUG_RETURN(HA_ERR_OUT_OF_MEM);
    }
    bitmap_init(ex_data.write_set, ex_data.bitmap_buf,
                table->write_set->n_bits, false);
    bitmap_copy(ex_data.write_set, table->write_set);
  }

  uchar* ex_data_buffer= get_buffer(m_thd_ndb, sizeof(ex_data));
  if (ex_data_buffer == NULL)
  {
    DBUG_RETURN(HA_ERR_OUT_OF_MEM);
  }
  memcpy(ex_data_buffer, &ex_data, sizeof(ex_data));

  /* Store ptr to exceptions data in operation 'customdata' ptr */
  options->optionsPresent|= NdbOperation::OperationOptions::OO_CUSTOMDATA;
  options->customData= (void*)ex_data_buffer;

  DBUG_RETURN(0);
}

/**
   handle_conflict_op_error

   This method is called when an error is detected after executing an
   operation with conflict detection active.

   If the operation error is related to conflict detection, handling
   starts.

   Handling involves incrementing the relevant counter, and optionally
   refreshing the row and inserting an entry into the exceptions table
*/

static int
handle_conflict_op_error(NdbTransaction* trans,
                         const NdbError& err,
                         const NdbOperation* op)
{
  DBUG_ENTER("handle_conflict_op_error");
  DBUG_PRINT("info", ("ndb error: %d", err.code));

  if ((err.code == (int) error_conflict_fn_violation) ||
      (err.code == (int) error_op_after_refresh_op) ||
      (err.classification == NdbError::ConstraintViolation) ||
      (err.classification == NdbError::NoDataFound))
  {
    DBUG_PRINT("info",
               ("err.code = %s, err.classification = %s",
               ((err.code == (int) error_conflict_fn_violation)?
                "error_conflict_fn_violation":
                ((err.code == (int) error_op_after_refresh_op)?
                 "error_op_after_refresh_op" : "?")),
               ((err.classification == NdbError::ConstraintViolation)?
                "ConstraintViolation":
                ((err.classification == NdbError::NoDataFound)?
                 "NoDataFound" : "?"))));

    enum_conflict_cause conflict_cause;

    /* Map cause onto our conflict description type */
    if ((err.code == (int) error_conflict_fn_violation) ||
        (err.code == (int) error_op_after_refresh_op))
    {
      DBUG_PRINT("info", ("ROW_IN_CONFLICT"));
      conflict_cause= ROW_IN_CONFLICT;
    }
    else if (err.classification == NdbError::ConstraintViolation)
    {
      DBUG_PRINT("info", ("ROW_ALREADY_EXISTS"));
      conflict_cause= ROW_ALREADY_EXISTS;
    }
    else
    {
      assert(err.classification == NdbError::NoDataFound);
      DBUG_PRINT("info", ("ROW_DOES_NOT_EXIST"));
      conflict_cause= ROW_DOES_NOT_EXIST;
    }

    /* Get exceptions data from operation */
    const void* buffer=op->getCustomData();
    assert(buffer);
    Ndb_exceptions_data ex_data;
    memcpy(&ex_data, buffer, sizeof(ex_data));
    NDB_SHARE *share= ex_data.share;
    NDB_CONFLICT_FN_SHARE* cfn_share= share ? share->m_cfn_share : NULL;

    const NdbRecord* key_rec= ex_data.key_rec;
    const NdbRecord* data_rec= ex_data.data_rec;
    const uchar* old_row= ex_data.old_row;
    const uchar* new_row= ex_data.new_row;
#ifndef NDEBUG
    const uchar* row=
      (ex_data.op_type == DELETE_ROW)?
      ex_data.old_row : ex_data.new_row;
#endif
    enum_conflicting_op_type causing_op_type= ex_data.op_type;
    const MY_BITMAP *write_set= ex_data.write_set;

    DBUG_PRINT("info", ("Conflict causing op type : %u",
                        causing_op_type));

    if (causing_op_type == REFRESH_ROW)
    {
      /*
         The failing op was a refresh row, we require that it
         failed due to being a duplicate (e.g. a refresh
         occurring on a refreshed row)
       */
      if (err.code == (int) error_op_after_refresh_op)
      {
        DBUG_PRINT("info", ("Operation after refresh - ignoring"));
        DBUG_RETURN(0);
      }
      else
      {
        DBUG_PRINT("info", ("Refresh op hit real error %u", err.code));
        /* Unexpected error, normal handling*/
        DBUG_RETURN(err.code);
      }
    }

    if (ex_data.reflected_operation)
    {
      DBUG_PRINT("info", ("Reflected operation error : %u.",
                          err.code));
      
      /**
       * Expected cases are :
       *   Insert : Row already exists :      Don't care - discard
       *              Secondary has this row, or a future version
       *  
       *   Update : Row does not exist :      Don't care - discard
       *              Secondary has deleted this row later.
       *
       *            Conflict
       *            (Row written here last) : Don't care - discard
       *              Secondary has this row, or a future version
       * 
       *   Delete : Row does not exist :      Don't care - discard
       *              Secondary has deleted this row later.
       * 
       *            Conflict
       *            (Row written here last) : Don't care - discard
       *              Secondary has a future version of this row
       *
       *   Presence and authorship conflicts are used to determine
       *   whether to apply a reflecte operation.
       *   The presence checks avoid divergence and the authorship
       *   checks avoid all actions being applied in delayed 
       *   duplicate.
       */
      assert((err.code == (int) error_conflict_fn_violation) ||
             (err.classification == NdbError::ConstraintViolation) ||
             (err.classification == NdbError::NoDataFound));
      
      g_ndb_slave_state.current_reflect_op_discard_count++;

      DBUG_RETURN(0);
    }

    {
      /**
       * For asymmetric algorithms that use the ROLE variable to 
       * determine their role, we check whether we are on the
       * SECONDARY cluster.
       * This is far as we want to process conflicts on the
       * SECONDARY.
       */
      bool secondary = cfn_share &&
        cfn_share->m_conflict_fn &&
        (cfn_share->m_conflict_fn->flags & CF_USE_ROLE_VAR) &&
        (opt_ndb_slave_conflict_role == SCR_SECONDARY);
      
      if (secondary)
      {
        DBUG_PRINT("info", ("Conflict detected, on secondary - ignore"));
        DBUG_RETURN(0);
      }
    }

    assert(share != NULL && row != NULL);
    bool table_has_trans_conflict_detection =
      cfn_share &&
      cfn_share->m_conflict_fn &&
      (cfn_share->m_conflict_fn->flags & CF_TRANSACTIONAL);

    if (table_has_trans_conflict_detection)
    {
      /* Mark this transaction as in-conflict, unless this is a 
       * Delete-Delete conflict, which we can't currently handle
       * in the normal way
       */
      if (! ((causing_op_type == DELETE_ROW) &&
             (conflict_cause == ROW_DOES_NOT_EXIST)))
      {
        /* Perform special transactional conflict-detected handling */
        int res = g_ndb_slave_state.atTransConflictDetected(ex_data.trans_id);
        if (res)
          DBUG_RETURN(res);
      }
    }

    if (cfn_share)
    {
      /* Now handle the conflict on this row */
      enum_conflict_fn_type cft = cfn_share->m_conflict_fn->type;

      g_ndb_slave_state.current_violation_count[cft]++;

      int res = handle_row_conflict(cfn_share,
                                    share->table_name,
                                    false, /* table_has_blobs */
                                    "Row",
                                    key_rec,
                                    data_rec,
                                    old_row,
                                    new_row,
                                    causing_op_type,
                                    conflict_cause,
                                    err,
                                    trans,
                                    write_set,
                                    /*
                                      ORIG_TRANSID not available for
                                      non-transactional conflict detection.
                                    */
                                    Ndb_binlog_extra_row_info::InvalidTransactionId);

      DBUG_RETURN(res);
    }
    else
    {
      DBUG_PRINT("info", ("missing cfn_share"));
      DBUG_RETURN(0); // TODO : Correct?
    }
  }
  else
  {
    /* Non conflict related error */
    DBUG_PRINT("info", ("err.code == %u", err.code));
    DBUG_RETURN(err.code);
  }

  DBUG_RETURN(0); // Reachable?
}

/*
  is_serverid_local
*/
static bool is_serverid_local(Uint32 serverid)
{
  /*
     If it's not our serverid, check the
     IGNORE_SERVER_IDS setting to check if
     it's local.
  */
  return ((serverid == ::server_id) ||
          ndb_mi_get_ignore_server_id(serverid));
}
#endif

int ha_ndbcluster::write_row(uchar *record)
{
  DBUG_ENTER("ha_ndbcluster::write_row");
#ifdef HAVE_NDB_BINLOG
  if (m_share == ndb_apply_status_share && table->in_use->slave_thread)
  {
    uint32 row_server_id, master_server_id= ndb_mi_get_master_server_id();
    uint64 row_epoch;
    memcpy(&row_server_id, table->field[0]->ptr + (record - table->record[0]),
           sizeof(row_server_id));
    memcpy(&row_epoch, table->field[1]->ptr + (record - table->record[0]),
           sizeof(row_epoch));
    int rc = g_ndb_slave_state.atApplyStatusWrite(master_server_id,
                                                  row_server_id,
                                                  row_epoch,
                                                  is_serverid_local(row_server_id));
    if (rc != 0)
    {
      /* Stop Slave */
      DBUG_RETURN(rc);
    }
  }
#endif /* HAVE_NDB_BINLOG */
  DBUG_RETURN(ndb_write_row(record, FALSE, FALSE));
}

/**
  Insert one record into NDB
*/
int ha_ndbcluster::ndb_write_row(uchar *record,
                                 bool primary_key_update,
                                 bool batched_update)
{
  bool has_auto_increment;
  const NdbOperation *op;
  THD *thd= table->in_use;
  Thd_ndb *thd_ndb= m_thd_ndb;
  NdbTransaction *trans;
  uint32 part_id;
  int error= 0;
  NdbOperation::SetValueSpec sets[3];
  Uint32 num_sets= 0;
  DBUG_ENTER("ha_ndbcluster::ndb_write_row");

  error = check_slave_state(thd);
  if (unlikely(error))
    DBUG_RETURN(error);

  has_auto_increment= (table->next_number_field && record == table->record[0]);

  if (has_auto_increment && table_share->primary_key != MAX_KEY) 
  {
    /*
     * Increase any auto_incremented primary key
     */
    m_skip_auto_increment= FALSE;
    if ((error= update_auto_increment()))
      DBUG_RETURN(error);
    m_skip_auto_increment= (insert_id_for_cur_row == 0 ||
                            thd->auto_inc_intervals_forced.nb_elements());
  }

  /*
   * If IGNORE the ignore constraint violations on primary and unique keys
   */
  if (!m_use_write && m_ignore_dup_key)
  {
    /*
      compare if expression with that in start_bulk_insert()
      start_bulk_insert will set parameters to ensure that each
      write_row is committed individually
    */
    int peek_res= peek_indexed_rows(record, NDB_INSERT);
    
    if (!peek_res) 
    {
      error= HA_ERR_FOUND_DUPP_KEY;
    }
    else if (peek_res != HA_ERR_KEY_NOT_FOUND)
    {
      error= peek_res;
    }
    if (error)
    {
      if ((has_auto_increment) && (m_skip_auto_increment))
      {
        int ret_val;
        if ((ret_val= set_auto_inc(thd, table->next_number_field)))
        {
          DBUG_RETURN(ret_val);
        }
      }
      m_skip_auto_increment= TRUE;
      DBUG_RETURN(error);
    }
  }

  bool uses_blobs= uses_blob_value(table->write_set);

  Uint64 auto_value;
  const NdbRecord *key_rec;
  const uchar *key_row;
  if (table_share->primary_key == MAX_KEY)
  {
    /* Table has hidden primary key. */
    Ndb *ndb= get_ndb(thd);
    uint retries= NDB_AUTO_INCREMENT_RETRIES;
    int retry_sleep= 30; /* 30 milliseconds, transaction */
    for (;;)
    {
      Ndb_tuple_id_range_guard g(m_share);
      if (ndb->getAutoIncrementValue(m_table, g.range, auto_value, 1000) == -1)
      {
	if (--retries && !thd->killed &&
	    ndb->getNdbError().status == NdbError::TemporaryError)
	{
	  do_retry_sleep(retry_sleep);
	  continue;
	}
	ERR_RETURN(ndb->getNdbError());
      }
      break;
    }
    sets[num_sets].column= get_hidden_key_column();
    sets[num_sets].value= &auto_value;
    num_sets++;
    key_rec= m_ndb_hidden_key_record;
    key_row= (const uchar *)&auto_value;
  }
  else
  {
    key_rec= m_index[table_share->primary_key].ndb_unique_record_row;
    key_row= record;
  }

  trans= thd_ndb->trans;
  if (m_user_defined_partitioning)
  {
    assert(m_use_partition_pruning);
    longlong func_value= 0;
    my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);
    error= m_part_info->get_partition_id(m_part_info, &part_id, &func_value);
    dbug_tmp_restore_column_map(table->read_set, old_map);
    if (unlikely(error))
    {
      m_part_info->err_value= func_value;
      DBUG_RETURN(error);
    }
    {
      /*
        We need to set the value of the partition function value in
        NDB since the NDB kernel doesn't have easy access to the function
        to calculate the value.
      */
      if (func_value >= INT_MAX32)
        func_value= INT_MAX32;
      sets[num_sets].column= get_partition_id_column();
      sets[num_sets].value= &func_value;
      num_sets++;
    }
    if (!trans)
      if (unlikely(!(trans= start_transaction_part_id(part_id, error))))
        DBUG_RETURN(error);
  }
  else if (!trans)
  {
    if (unlikely(!(trans= start_transaction_row(key_rec, key_row, error))))
      DBUG_RETURN(error);
  }
  assert(trans);

  ha_statistic_increment(&SSV::ha_write_count);

  /*
     Setup OperationOptions
   */
  NdbOperation::OperationOptions options;
  NdbOperation::OperationOptions *poptions = NULL;
  options.optionsPresent=0;
  
  eventSetAnyValue(thd, &options); 
  const bool need_flush=
      thd_ndb->add_row_check_if_batch_full(m_bytes_per_write);

  const Uint32 authorValue = 1;
  if ((thd->slave_thread) &&
      (m_table->getExtraRowAuthorBits()))
  {
    /* Set author to indicate slave updated last */
    sets[num_sets].column= NdbDictionary::Column::ROW_AUTHOR;
    sets[num_sets].value= &authorValue;
    num_sets++;
  }

  if (m_user_defined_partitioning)
  {
    options.optionsPresent |= NdbOperation::OperationOptions::OO_PARTITION_ID;
    options.partitionId= part_id;
  }
  if (num_sets)
  {
    options.optionsPresent |= NdbOperation::OperationOptions::OO_SETVALUE;
    options.extraSetValues= sets;
    options.numExtraSetValues= num_sets;
  }
  if (thd->slave_thread || THDVAR(thd, deferred_constraints))
  {
    options.optionsPresent |=
      NdbOperation::OperationOptions::OO_DEFERRED_CONSTAINTS;
  }

  if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS))
  {
    DBUG_PRINT("info", ("Disabling foreign keys"));
    options.optionsPresent |=
      NdbOperation::OperationOptions::OO_DISABLE_FK;
  }

  if (options.optionsPresent != 0)
    poptions=&options;

  const Uint32 bitmapSz= (NDB_MAX_ATTRIBUTES_IN_TABLE + 31)/32;
  uint32 tmpBitmapSpace[bitmapSz];
  MY_BITMAP tmpBitmap;
  MY_BITMAP *user_cols_written_bitmap;
  bool avoidNdbApiWriteOp = false; /* ndb_write_row defaults to write */
#ifdef HAVE_NDB_BINLOG
  /* Conflict resolution in slave thread */
  if (thd->slave_thread)
  {
    bool conflict_handled = false;

    if (unlikely((error = prepare_conflict_detection(WRITE_ROW,
                                                     key_rec,
                                                     m_ndb_record,
                                                     NULL,    /* old_data */
                                                     record,  /* new_data */
                                                     table->write_set,
                                                     trans,
                                                     NULL,    /* code */
                                                     &options,
                                                     conflict_handled,
                                                     avoidNdbApiWriteOp))))
      DBUG_RETURN(error);

    if (unlikely(conflict_handled))
    {
      /* No need to continue with operation definition */
      /* TODO : Ensure batch execution */
      DBUG_RETURN(0);
    }
  };
#endif

  if (m_use_write &&
      !avoidNdbApiWriteOp)
  {
    uchar* mask;

    if (applying_binlog(thd))
    {
      /*
        Use write_set when applying binlog to avoid trampling
        unchanged columns
      */
      user_cols_written_bitmap= table->write_set;
      mask= (uchar *)(user_cols_written_bitmap->bitmap);
    }
    else
    {
      /* Ignore write_set for REPLACE command */
      user_cols_written_bitmap= NULL;
      mask= NULL;
    }
    /* TODO : Add conflict detection etc when interpreted write supported */
    op= trans->writeTuple(key_rec, (const char *)key_row, m_ndb_record,
                          (char *)record, mask,
                          poptions, sizeof(NdbOperation::OperationOptions));
  }
  else
  {
    uchar *mask;

    /* Check whether Ndb table definition includes any default values. */
    if (m_table->hasDefaultValues())
    {
      DBUG_PRINT("info", ("Not sending values for native defaulted columns"));

      /*
        If Ndb is unaware of the table's defaults, we must provide all column values to the insert.  
        This is done using a NULL column mask.
        If Ndb is aware of the table's defaults, we only need to provide 
        the columns explicitly mentioned in the write set, 
        plus any extra columns required due to bug#41616. 
        plus the primary key columns required due to bug#42238.
      */
      /*
        The following code for setting user_cols_written_bitmap
        should be removed after BUG#41616 and Bug#42238 are fixed
      */
      /* Copy table write set so that we can add to it */
      user_cols_written_bitmap= &tmpBitmap;
      bitmap_init(user_cols_written_bitmap, tmpBitmapSpace,
                  table->write_set->n_bits, false);
      bitmap_copy(user_cols_written_bitmap, table->write_set);

      for (uint i= 0; i < table->s->fields; i++)
      {
        Field *field= table->field[i];
        DBUG_PRINT("info", ("Field#%u, (%u), Type : %u "
                            "NO_DEFAULT_VALUE_FLAG : %u PRI_KEY_FLAG : %u",
                            i, 
                            field->field_index,
                            field->real_type(),
                            field->flags & NO_DEFAULT_VALUE_FLAG,
                            field->flags & PRI_KEY_FLAG));
        if ((field->flags & (NO_DEFAULT_VALUE_FLAG | // bug 41616
                             PRI_KEY_FLAG)) ||       // bug 42238
            ! type_supports_default_value(field->real_type()))
        {
          bitmap_set_bit(user_cols_written_bitmap, field->field_index);
        }
      }

      mask= (uchar *)(user_cols_written_bitmap->bitmap);
    }
    else
    {
      /* No defaults in kernel, provide all columns ourselves */
      DBUG_PRINT("info", ("No native defaults, sending all values"));
      user_cols_written_bitmap= NULL;
      mask = NULL;
    }
      
    /* Using insert, we write all non default columns */
    op= trans->insertTuple(key_rec, (const char *)key_row, m_ndb_record,
                           (char *)record, mask, // Default value should be masked
                           poptions, sizeof(NdbOperation::OperationOptions));
  }
  if (!(op))
    ERR_RETURN(trans->getNdbError());

  bool do_batch= !need_flush &&
    (batched_update || thd_allow_batch(thd));
  uint blob_count= 0;
  if (table_share->blob_fields > 0)
  {
    my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set);
    /* Set Blob values for all columns updated by the operation */
    int res= set_blob_values(op, record - table->record[0],
                             user_cols_written_bitmap, &blob_count, do_batch);
    dbug_tmp_restore_column_map(table->read_set, old_map);
    if (res != 0)
      DBUG_RETURN(res);
  }

  m_rows_changed++;

  /*
    Execute write operation
    NOTE When doing inserts with many values in 
    each INSERT statement it should not be necessary
    to NoCommit the transaction between each row.
    Find out how this is detected!
  */
  m_rows_inserted++;
  no_uncommitted_rows_update(1);
  if (( (m_rows_to_insert == 1 || uses_blobs) && !do_batch ) ||
      primary_key_update ||
      need_flush)
  {
    int res= flush_bulk_insert();
    if (res != 0)
    {
      m_skip_auto_increment= TRUE;
      DBUG_RETURN(res);
    }
  }
  if ((has_auto_increment) && (m_skip_auto_increment))
  {
    int ret_val;
    if ((ret_val= set_auto_inc(thd, table->next_number_field)))
    {
      DBUG_RETURN(ret_val);
    }
  }
  m_skip_auto_increment= TRUE;

  DBUG_PRINT("exit",("ok"));
  DBUG_RETURN(0);
}


/* Compare if an update changes the primary key in a row. */
int ha_ndbcluster::primary_key_cmp(const uchar * old_row, const uchar * new_row)
{
  uint keynr= table_share->primary_key;
  KEY_PART_INFO *key_part=table->key_info[keynr].key_part;
  KEY_PART_INFO *end=key_part+table->key_info[keynr].user_defined_key_parts;

  for (; key_part != end ; key_part++)
  {
    if (!bitmap_is_set(table->write_set, key_part->fieldnr - 1))
      continue;

    /* The primary key does not allow NULLs. */
    assert(!key_part->null_bit);

    if (key_part->key_part_flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
    {

      if (key_part->field->cmp_binary((old_row + key_part->offset),
                                      (new_row + key_part->offset),
                                      (ulong) key_part->length))
        return 1;
    }
    else
    {
      if (memcmp(old_row+key_part->offset, new_row+key_part->offset,
                 key_part->length))
        return 1;
    }
  }
  return 0;
}

#ifdef HAVE_NDB_BINLOG

static Ndb_exceptions_data StaticRefreshExceptionsData=
  { NULL, NULL, NULL, NULL, NULL, NULL, NULL, REFRESH_ROW, false, 0 };

static int
handle_row_conflict(NDB_CONFLICT_FN_SHARE* cfn_share,
                    const char* table_name,
                    bool table_has_blobs,
                    const char* handling_type,
                    const NdbRecord* key_rec,
                    const NdbRecord* data_rec,
                    const uchar* old_row,
                    const uchar* new_row,
                    enum_conflicting_op_type op_type,
                    enum_conflict_cause conflict_cause,
                    const NdbError& conflict_error,
                    NdbTransaction* conflict_trans,
                    const MY_BITMAP *write_set,
                    Uint64 transaction_id)
{
  DBUG_ENTER("handle_row_conflict");

  const uchar* row = (op_type == DELETE_ROW)? old_row : new_row;
  /*
     We will refresh the row if the conflict function requires
     it, or if we are handling a transactional conflict.
  */
  bool refresh_row =
    (conflict_cause == TRANS_IN_CONFLICT) ||
    (cfn_share &&
     (cfn_share->m_flags & CFF_REFRESH_ROWS));

  if (refresh_row)
  {
    /* A conflict has been detected between an applied replicated operation
     * and the data in the DB.
     * The attempt to change the local DB will have been rejected.
     * We now take steps to generate a refresh Binlog event so that
     * other clusters will be re-aligned.
     */
    DBUG_PRINT("info", ("Conflict on table %s.  Operation type : %s, "
                        "conflict cause :%s, conflict error : %u : %s",
                        table_name,
                        ((op_type == WRITE_ROW)? "WRITE_ROW":
                         (op_type == UPDATE_ROW)? "UPDATE_ROW":
                         "DELETE_ROW"),
                        ((conflict_cause == ROW_ALREADY_EXISTS)?"ROW_ALREADY_EXISTS":
                         (conflict_cause == ROW_DOES_NOT_EXIST)?"ROW_DOES_NOT_EXIST":
                         "ROW_IN_CONFLICT"),
                        conflict_error.code,
                        conflict_error.message));

    assert(key_rec != NULL);
    assert(row != NULL);

    do
    {
      /* We cannot refresh a row which has Blobs, as we do not support
       * Blob refresh yet.
       * Rows implicated by a transactional conflict function may have
       * Blobs.
       * We will generate an error in this case
       */
      if (table_has_blobs)
      {
        char msg[FN_REFLEN];
        my_snprintf(msg, sizeof(msg), "%s conflict handling "
                    "on table %s failed as table has Blobs which cannot be refreshed.",
                    handling_type,
                    table_name);

        push_warning_printf(current_thd, Sql_condition::SL_WARNING,
                            ER_EXCEPTIONS_WRITE_ERROR,
                            ER(ER_EXCEPTIONS_WRITE_ERROR), msg);

        DBUG_RETURN(ER_EXCEPTIONS_WRITE_ERROR);
      }

      /* When the slave splits an epoch into batches, a conflict row detected
       * and refreshed in an early batch can be written to by operations in
       * a later batch.  As the operations will not have applied, and the
       * row has already been refreshed, we need not attempt to refresh
       * it again
       */
      if ((conflict_cause == ROW_IN_CONFLICT) &&
          (conflict_error.code == (int) error_op_after_refresh_op))
      {
        /* Attempt to apply an operation after the row was refreshed
         * Ignore the error
         */
        DBUG_PRINT("info", ("Operation after refresh error - ignoring"));
        break;
      }

      /* When a delete operation finds that the row does not exist, it indicates
       * a DELETE vs DELETE conflict.  If we refresh the row then we can get
       * non deterministic behaviour depending on slave batching as follows :
       *   Row is deleted
       *
       *     Case 1
       *       Slave applied DELETE, INSERT in 1 batch
       *
       *         After first batch, the row is present (due to INSERT), it is
       *         refreshed.
       *
       *     Case 2
       *       Slave applied DELETE in 1 batch, INSERT in 2nd batch
       *
       *         After first batch, the row is not present, it is refreshed
       *         INSERT is then rejected.
       *
       * The problem of not being able to 'record' a DELETE vs DELETE conflict
       * is known.  We attempt at least to give consistent behaviour for
       * DELETE vs DELETE conflicts by :
       *   NOT refreshing a row when a DELETE vs DELETE conflict is detected
       * This should map all batching scenarios onto Case1.
       */
      if ((op_type == DELETE_ROW) &&
          (conflict_cause == ROW_DOES_NOT_EXIST))
      {
        g_ndb_slave_state.current_delete_delete_count++;
        DBUG_PRINT("info", ("Delete vs Delete detected, NOT refreshing"));
        break;
      }

      /*
        We give the refresh operation some 'exceptions data', so that
        it can be identified as part of conflict resolution when
        handling operation errors.
        Specifically we need to be able to handle duplicate row
        refreshes.
        As there is no unique exceptions data, we use a singleton.

        We also need to 'force' the ANYVALUE of the row to 0 to
        indicate that the refresh is locally-sourced.
        Otherwise we can 'pickup' the ANYVALUE of a previous
        update to the row.
        If some previous update in this transaction came from a
        Slave, then using its ANYVALUE can result in that Slave
        ignoring this correction.
      */
      NdbOperation::OperationOptions options;
      options.optionsPresent =
        NdbOperation::OperationOptions::OO_CUSTOMDATA |
        NdbOperation::OperationOptions::OO_ANYVALUE;
      options.customData = &StaticRefreshExceptionsData;
      options.anyValue = 0;

      /* Use AnyValue to indicate that this is a refreshTuple op */
      ndbcluster_anyvalue_set_refresh_op(options.anyValue);

      /* Create a refresh to operation to realign other clusters */
      // TODO Do we ever get non-PK key?
      //      Keyless table?
      //      Unique index
      const NdbOperation* refresh_op= conflict_trans->refreshTuple(key_rec,
                                                                   (const char*) row,
                                                                   &options,
                                                                   sizeof(options));
      if (!refresh_op)
      {
        NdbError err = conflict_trans->getNdbError();

        if (err.status == NdbError::TemporaryError)
        {
          /* Slave will roll back and retry entire transaction. */
          ERR_RETURN(err);
        }
        else
        {
          char msg[FN_REFLEN];
          my_snprintf(msg, sizeof(msg), "Row conflict handling "
                      "on table %s hit Ndb error %d '%s'",
                      table_name,
                      err.code,
                      err.message);
          push_warning_printf(current_thd, Sql_condition::SL_WARNING,
                              ER_EXCEPTIONS_WRITE_ERROR,
                              ER(ER_EXCEPTIONS_WRITE_ERROR), msg);
          /* Slave will stop replication. */
          DBUG_RETURN(ER_EXCEPTIONS_WRITE_ERROR);
        }
      }
    } while(0); // End of 'refresh' block
  }

  DBUG_PRINT("info", ("Table %s does%s have an exceptions table",
                      table_name,
                      (cfn_share && cfn_share->m_ex_tab_writer.hasTable())
                      ? "" : " not"));
  if (cfn_share &&
      cfn_share->m_ex_tab_writer.hasTable())
  {
    NdbError err;
    if (cfn_share->m_ex_tab_writer.writeRow(conflict_trans,
                                            key_rec,
                                            data_rec,
                                            ::server_id,
                                            ndb_mi_get_master_server_id(),
                                            g_ndb_slave_state.current_master_server_epoch,
                                            old_row,
                                            new_row,
                                            op_type,
                                            conflict_cause,
                                            transaction_id,
                                            write_set,
                                            err) != 0)
    {
      if (err.code != 0)
      {
        if (err.status == NdbError::TemporaryError)
        {
          /* Slave will roll back and retry entire transaction. */
          ERR_RETURN(err);
        }
        else
        {
          char msg[FN_REFLEN];
          my_snprintf(msg, sizeof(msg), "%s conflict handling "
                      "on table %s hit Ndb error %d '%s'",
                      handling_type,
                      table_name,
                      err.code,
                      err.message);
          push_warning_printf(current_thd, Sql_condition::SL_WARNING,
                              ER_EXCEPTIONS_WRITE_ERROR,
                              ER(ER_EXCEPTIONS_WRITE_ERROR), msg);
          /* Slave will stop replication. */
          DBUG_RETURN(ER_EXCEPTIONS_WRITE_ERROR);
        }
      }
    }
  } /* if (cfn_share->m_ex_tab != NULL) */

  DBUG_RETURN(0);
}
#endif /* HAVE_NDB_BINLOG */

/**
  Update one record in NDB using primary key.
*/

bool ha_ndbcluster::start_bulk_update()
{
  DBUG_ENTER("ha_ndbcluster::start_bulk_update");
  if (!m_use_write && m_ignore_dup_key)
  {
    DBUG_PRINT("info", ("Batching turned off as duplicate key is "
                        "ignored by using peek_row"));
    DBUG_RETURN(TRUE);
  }
  DBUG_RETURN(FALSE);
}

int ha_ndbcluster::bulk_update_row(const uchar *old_data, uchar *new_data,
                                   uint *dup_key_found)
{
  DBUG_ENTER("ha_ndbcluster::bulk_update_row");
  *dup_key_found= 0;
  DBUG_RETURN(ndb_update_row(old_data, new_data, 1));
}

int ha_ndbcluster::exec_bulk_update(uint *dup_key_found)
{
  NdbTransaction* trans= m_thd_ndb->trans;
  DBUG_ENTER("ha_ndbcluster::exec_bulk_update");
  *dup_key_found= 0;

  // m_handler must be NULL or point to _this_ handler instance
  assert(m_thd_ndb->m_handler == NULL || m_thd_ndb->m_handler == this);

  if (m_thd_ndb->m_handler &&
      m_read_before_write_removal_possible)
  {
    /*
      This is an autocommit involving only one table and rbwr is on

      Commit the autocommit transaction early(before the usual place
      in ndbcluster_commit) in order to:
      1) save one round trip, "no-commit+commit" converted to "commit"
      2) return the correct number of updated and affected rows
         to the update loop(which will ask handler in rbwr mode)
    */
    DBUG_PRINT("info", ("committing auto-commit+rbwr early"));
    uint ignore_count= 0;
    const int ignore_error= 1;
    if (execute_commit(m_thd_ndb, trans,
                       m_thd_ndb->m_force_send, ignore_error,
                       &ignore_count) != 0)
    {
      no_uncommitted_rows_execute_failure();
      DBUG_RETURN(ndb_err(trans));
    }
    THD *thd= table->in_use;
    if (!applying_binlog(thd))
    {
      DBUG_PRINT("info", ("ignore_count: %u", ignore_count));
      assert(m_rows_changed >= ignore_count);
      assert(m_rows_updated >= ignore_count);
      m_rows_changed-= ignore_count;
      m_rows_updated-= ignore_count;
    }
    DBUG_RETURN(0);
  }

  if (m_thd_ndb->m_unsent_bytes == 0)
  {
    DBUG_PRINT("exit", ("skip execute - no unsent bytes"));
    DBUG_RETURN(0);
  }

  if (thd_allow_batch(table->in_use))
  {
    /*
      Turned on by @@transaction_allow_batching=ON
      or implicitly by slave exec thread
    */
    DBUG_PRINT("exit", ("skip execute - transaction_allow_batching is ON"));
    DBUG_RETURN(0);
  }

  if (m_thd_ndb->m_handler &&
      !m_blobs_pending)
  {
    // Execute at commit time(in 'ndbcluster_commit') to save a round trip
    DBUG_PRINT("exit", ("skip execute - simple autocommit"));
    DBUG_RETURN(0);
  }

  uint ignore_count= 0;
  if (execute_no_commit(m_thd_ndb, trans,
                        m_ignore_no_key || m_read_before_write_removal_used,
                        &ignore_count) != 0)
  {
    no_uncommitted_rows_execute_failure();
    DBUG_RETURN(ndb_err(trans));
  }
  THD *thd= table->in_use;
  if (!applying_binlog(thd))
  {
    assert(m_rows_changed >= ignore_count);
    assert(m_rows_updated >= ignore_count);
    m_rows_changed-= ignore_count;
    m_rows_updated-= ignore_count;
  }
  DBUG_RETURN(0);
}

void ha_ndbcluster::end_bulk_update()
{
  DBUG_ENTER("ha_ndbcluster::end_bulk_update");
  DBUG_VOID_RETURN;
}

int ha_ndbcluster::update_row(const uchar *old_data, uchar *new_data)
{
  return ndb_update_row(old_data, new_data, 0);
}

void
ha_ndbcluster::setup_key_ref_for_ndb_record(const NdbRecord **key_rec,
                                            const uchar **key_row,
                                            const uchar *record,
                                            bool use_active_index)
{
  DBUG_ENTER("setup_key_ref_for_ndb_record");
  if (use_active_index)
  {
    /* Use unique key to access table */
    DBUG_PRINT("info", ("Using unique index (%u)", active_index));
    assert((table->key_info[active_index].flags & HA_NOSAME));
    /* Can't use key if we didn't read it first */
    assert(bitmap_is_subset(m_key_fields[active_index], table->read_set));
    *key_rec= m_index[active_index].ndb_unique_record_row;
    *key_row= record;
  }
  else if (table_share->primary_key != MAX_KEY)
  {
    /* Use primary key to access table */
    DBUG_PRINT("info", ("Using primary key"));
    /* Can't use pk if we didn't read it first */
    assert(bitmap_is_subset(m_pk_bitmap_p, table->read_set));
    *key_rec= m_index[table_share->primary_key].ndb_unique_record_row;
    *key_row= record;
  }
  else
  {
    /* Use hidden primary key previously read into m_ref. */
    DBUG_PRINT("info", ("Using hidden primary key (%llu)", m_ref));
    /* Can't use hidden pk if we didn't read it first */
    assert(bitmap_is_subset(m_pk_bitmap_p, table->read_set));
    assert(m_read_before_write_removal_used == false);
    *key_rec= m_ndb_hidden_key_record;
    *key_row= (const uchar *)(&m_ref);
  }
  DBUG_VOID_RETURN;
}


/*
  Update one record in NDB using primary key
*/

int ha_ndbcluster::ndb_update_row(const uchar *old_data, uchar *new_data,
                                  int is_bulk_update)
{
  THD *thd= table->in_use;
  Thd_ndb *thd_ndb= m_thd_ndb;
  NdbScanOperation* cursor= m_active_cursor;
  const NdbOperation *op;
  uint32 old_part_id= ~uint32(0), new_part_id= ~uint32(0);
  int error;
  longlong func_value;
  Uint32 func_value_uint32;
  bool have_pk= (table_share->primary_key != MAX_KEY);
  bool pk_update= (!m_read_before_write_removal_possible &&
                   have_pk &&
                   bitmap_is_overlapping(table->write_set, m_pk_bitmap_p) &&
                   primary_key_cmp(old_data, new_data));
  bool batch_allowed= !m_update_cannot_batch && 
    (is_bulk_update || thd_allow_batch(thd));
  NdbOperation::SetValueSpec sets[2];
  Uint32 num_sets= 0;

  DBUG_ENTER("ndb_update_row");

  /* Start a transaction now if none available
   * (Manual Binlog application...)
   */
  /* TODO : Consider hinting */
  if (unlikely((!m_thd_ndb->trans) && 
               !get_transaction(error)))
  {
    DBUG_RETURN(error);
  }

  NdbTransaction *trans= m_thd_ndb->trans;
  assert(trans);

  error = check_slave_state(thd);
  if (unlikely(error))
    DBUG_RETURN(error);

  /*
   * If IGNORE the ignore constraint violations on primary and unique keys,
   * but check that it is not part of INSERT ... ON DUPLICATE KEY UPDATE
   */
  if (m_ignore_dup_key && (thd->lex->sql_command == SQLCOM_UPDATE ||
                           thd->lex->sql_command == SQLCOM_UPDATE_MULTI))
  {
    NDB_WRITE_OP write_op= (pk_update) ? NDB_PK_UPDATE : NDB_UPDATE;
    int peek_res= peek_indexed_rows(new_data, write_op);
    
    if (!peek_res) 
    {
      DBUG_RETURN(HA_ERR_FOUND_DUPP_KEY);
    }
    if (peek_res != HA_ERR_KEY_NOT_FOUND)
      DBUG_RETURN(peek_res);
  }

  ha_statistic_increment(&SSV::ha_update_count);

  bool skip_partition_for_unique_index= FALSE;
  if (m_use_partition_pruning)
  {
    if (!cursor && m_read_before_write_removal_used)
    {
      ndb_index_type type= get_index_type(active_index);
      /*
        Ndb unique indexes are global so when
        m_read_before_write_removal_used is active
        the unique index can be used directly for update
        without finding the partitions
      */
      if (type == UNIQUE_INDEX ||
          type == UNIQUE_ORDERED_INDEX)
      {
        skip_partition_for_unique_index= TRUE;
        goto skip_partition_pruning;
      }
    }
    if ((error= get_parts_for_update(old_data, new_data, table->record[0],
                                     m_part_info, &old_part_id, &new_part_id,
                                     &func_value)))
    {
      m_part_info->err_value= func_value;
      DBUG_RETURN(error);
    }
    DBUG_PRINT("info", ("old_part_id: %u  new_part_id: %u", old_part_id, new_part_id));
  skip_partition_pruning:
    (void)0;
  }

  /*
   * Check for update of primary key or partition change
   * for special handling
   */  
  if (pk_update || old_part_id != new_part_id)
  {
    DBUG_RETURN(ndb_pk_update_row(thd, old_data, new_data));
  }
  /*
    If we are updating a unique key with auto_increment
    then we need to update the auto_increment counter
   */
  if (table->found_next_number_field &&
      bitmap_is_set(table->write_set, 
		    table->found_next_number_field->field_index) &&
      (error= set_auto_inc(thd, table->found_next_number_field)))
  {
    DBUG_RETURN(error);
  }
  /*
    Set only non-primary-key attributes.
    We already checked that any primary key attribute in write_set has no
    real changes.
  */
  bitmap_copy(&m_bitmap, table->write_set);
  bitmap_subtract(&m_bitmap, m_pk_bitmap_p);
  uchar *mask= (uchar *)(m_bitmap.bitmap);
  assert(!pk_update);

  NdbOperation::OperationOptions *poptions = NULL;
  NdbOperation::OperationOptions options;
  options.optionsPresent=0;

  /* Need to set the value of any user-defined partitioning function. 
     (excecpt for when using unique index)
  */
  if (m_user_defined_partitioning && !skip_partition_for_unique_index)
  {
    if (func_value >= INT_MAX32)
      func_value_uint32= INT_MAX32;
    else
      func_value_uint32= (uint32)func_value;
    sets[num_sets].column= get_partition_id_column();
    sets[num_sets].value= &func_value_uint32;
    num_sets++;

    if (!cursor)
    {
      options.optionsPresent|= NdbOperation::OperationOptions::OO_PARTITION_ID;
      options.partitionId= new_part_id;
    }
  }
  
  eventSetAnyValue(thd, &options);
  
  const bool need_flush=
      thd_ndb->add_row_check_if_batch_full(m_bytes_per_write);

 const Uint32 authorValue = 1;
 if ((thd->slave_thread) &&
     (m_table->getExtraRowAuthorBits()))
 {
   /* Set author to indicate slave updated last */
   sets[num_sets].column= NdbDictionary::Column::ROW_AUTHOR;
   sets[num_sets].value= &authorValue;
   num_sets++;
 }

 if (num_sets)
 {
   options.optionsPresent|= NdbOperation::OperationOptions::OO_SETVALUE;
   options.extraSetValues= sets;
   options.numExtraSetValues= num_sets;
 }

  if (thd->slave_thread || THDVAR(thd, deferred_constraints))
  {
    options.optionsPresent |=
      NdbOperation::OperationOptions::OO_DEFERRED_CONSTAINTS;
  }

  if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS))
  {
    DBUG_PRINT("info", ("Disabling foreign keys"));
    options.optionsPresent |=
      NdbOperation::OperationOptions::OO_DISABLE_FK;
  }

  if (cursor)
  {
    /*
      We are scanning records and want to update the record
      that was just found, call updateCurrentTuple on the cursor 
      to take over the lock to a new update operation
      And thus setting the primary key of the record from 
      the active record in cursor
    */
    DBUG_PRINT("info", ("Calling updateTuple on cursor, write_set=0x%x",
                        table->write_set->bitmap[0]));

    if (options.optionsPresent != 0)
      poptions = &options;

    if (!(op= cursor->updateCurrentTuple(trans, m_ndb_record,
                                         (const char*)new_data, mask,
                                         poptions,
                                         sizeof(NdbOperation::OperationOptions))))
      ERR_RETURN(trans->getNdbError());

    m_lock_tuple= FALSE;
    thd_ndb->m_unsent_bytes+= 12;
  }
  else
  {  
    const NdbRecord *key_rec;
    const uchar *key_row;
    setup_key_ref_for_ndb_record(&key_rec, &key_row, new_data,
				 m_read_before_write_removal_used);

    bool avoidNdbApiWriteOp = true; /* Default update op for ndb_update_row */
#ifdef HAVE_NDB_BINLOG
    Uint32 buffer[ MAX_CONFLICT_INTERPRETED_PROG_SIZE ];
    NdbInterpretedCode code(m_table, buffer,
                            sizeof(buffer)/sizeof(buffer[0]));

    if (thd->slave_thread)
    {
      bool conflict_handled = false;
      /* Conflict resolution in slave thread. */
      DBUG_PRINT("info", ("Slave thread, preparing conflict resolution for update with mask : %x", *((Uint32*)mask)));

      if (unlikely((error = prepare_conflict_detection(UPDATE_ROW,
                                                       key_rec,
                                                       m_ndb_record,
                                                       old_data,
                                                       new_data,
                                                       table->write_set,
                                                       trans,
                                                       &code,
                                                       &options,
                                                       conflict_handled,
                                                       avoidNdbApiWriteOp))))
        DBUG_RETURN(error);

      if (unlikely(conflict_handled))
      {
        /* No need to continue with operation defintion */
        /* TODO : Ensure batch execution */
        DBUG_RETURN(0);
      }
    }
#endif /* HAVE_NDB_BINLOG */
    if (options.optionsPresent !=0)
      poptions= &options;

    if (likely(avoidNdbApiWriteOp))
    {
      if (!(op= trans->updateTuple(key_rec, (const char *)key_row,
                                   m_ndb_record, (const char*)new_data, mask,
                                   poptions,
                                   sizeof(NdbOperation::OperationOptions))))
        ERR_RETURN(trans->getNdbError());
    }
    else
    {
      DBUG_PRINT("info", ("Update op using writeTuple"));
      if (!(op= trans->writeTuple(key_rec, (const char *)key_row,
                                  m_ndb_record, (const char*)new_data, mask,
                                  poptions,
                                  sizeof(NdbOperation::OperationOptions))))
        ERR_RETURN(trans->getNdbError());
    }
  }

  uint blob_count= 0;
  if (uses_blob_value(table->write_set))
  {
    int row_offset= (int)(new_data - table->record[0]);
    int res= set_blob_values(op, row_offset, table->write_set, &blob_count,
                             (batch_allowed && !need_flush));
    if (res != 0)
      DBUG_RETURN(res);
  }
  uint ignore_count= 0;
  /*
    Batch update operation if we are doing a scan for update, unless
    there exist UPDATE AFTER triggers
  */
  if (m_update_cannot_batch ||
      !(cursor || (batch_allowed && have_pk)) ||
      need_flush)
  {
    if (execute_no_commit(m_thd_ndb, trans,
                          m_ignore_no_key || m_read_before_write_removal_used,
                          &ignore_count) != 0)
    {
      no_uncommitted_rows_execute_failure();
      DBUG_RETURN(ndb_err(trans));
    }
  }
  else if (blob_count > 0)
    m_blobs_pending= TRUE;

  m_rows_changed++;
  m_rows_updated++;

  if (!applying_binlog(thd))
  {
    assert(m_rows_changed >= ignore_count);
    assert(m_rows_updated >= ignore_count);
    m_rows_changed-= ignore_count;
    m_rows_updated-= ignore_count;
  }

  DBUG_RETURN(0);
}


/*
  handler delete interface
*/

int ha_ndbcluster::delete_row(const uchar *record)
{
  return ndb_delete_row(record, FALSE);
}

bool ha_ndbcluster::start_bulk_delete()
{
  DBUG_ENTER("start_bulk_delete");
  m_is_bulk_delete = true;
  DBUG_RETURN(0); // Bulk delete used by handler
}

int ha_ndbcluster::end_bulk_delete()
{
  NdbTransaction* trans= m_thd_ndb->trans;
  DBUG_ENTER("end_bulk_delete");
  assert(m_is_bulk_delete); // Don't allow end() without start()
  m_is_bulk_delete = false;

  // m_handler must be NULL or point to _this_ handler instance
  assert(m_thd_ndb->m_handler == NULL || m_thd_ndb->m_handler == this);

  if (m_thd_ndb->m_handler &&
      m_read_before_write_removal_possible)
  {
    /*
      This is an autocommit involving only one table and rbwr is on

      Commit the autocommit transaction early(before the usual place
      in ndbcluster_commit) in order to:
      1) save one round trip, "no-commit+commit" converted to "commit"
      2) return the correct number of updated and affected rows
         to the delete loop(which will ask handler in rbwr mode)
    */
    DBUG_PRINT("info", ("committing auto-commit+rbwr early"));
    uint ignore_count= 0;
    const int ignore_error= 1;
    if (execute_commit(m_thd_ndb, trans,
                       m_thd_ndb->m_force_send, ignore_error,
                       &ignore_count) != 0)
    {
      no_uncommitted_rows_execute_failure();
      m_rows_deleted = 0;
      DBUG_RETURN(ndb_err(trans));
    }
    THD *thd= table->in_use;
    if (!applying_binlog(thd))
    {
      DBUG_PRINT("info", ("ignore_count: %u", ignore_count));
      assert(m_rows_deleted >= ignore_count);
      m_rows_deleted-= ignore_count;
    }
    DBUG_RETURN(0);
  }

  if (m_thd_ndb->m_unsent_bytes == 0)
  {
    DBUG_PRINT("exit", ("skip execute - no unsent bytes"));
    DBUG_RETURN(0);
  }

  if (thd_allow_batch(table->in_use))
  {
    /*
      Turned on by @@transaction_allow_batching=ON
      or implicitly by slave exec thread
    */
    DBUG_PRINT("exit", ("skip execute - transaction_allow_batching is ON"));
    DBUG_RETURN(0);
  }

  if (m_thd_ndb->m_handler)
  {
    // Execute at commit time(in 'ndbcluster_commit') to save a round trip
    DBUG_PRINT("exit", ("skip execute - simple autocommit"));
    DBUG_RETURN(0);
  }

  uint ignore_count= 0;
  if (execute_no_commit(m_thd_ndb, trans,
                        m_ignore_no_key || m_read_before_write_removal_used,
                        &ignore_count) != 0)
  {
    no_uncommitted_rows_execute_failure();
    DBUG_RETURN(ndb_err(trans));
  }

  THD *thd= table->in_use;
  if (!applying_binlog(thd))
  {
    assert(m_rows_deleted >= ignore_count);
    m_rows_deleted-= ignore_count;
    no_uncommitted_rows_update(ignore_count);
  }
  DBUG_RETURN(0);
}


/**
  Delete one record from NDB, using primary key .
*/

int ha_ndbcluster::ndb_delete_row(const uchar *record,
                                  bool primary_key_update)
{
  THD *thd= table->in_use;
  Thd_ndb *thd_ndb= m_thd_ndb;
  NdbScanOperation* cursor= m_active_cursor;
  const NdbOperation *op;
  uint32 part_id= ~uint32(0);
  int error;
  bool allow_batch= !m_delete_cannot_batch &&
    (m_is_bulk_delete || thd_allow_batch(thd));

  DBUG_ENTER("ndb_delete_row");

  /* Start a transaction now if none available
   * (Manual Binlog application...)
   */
  /* TODO : Consider hinting */
  if (unlikely((!m_thd_ndb->trans) && 
               !get_transaction(error)))
  {
    DBUG_RETURN(error);
  }
    
  NdbTransaction *trans= m_thd_ndb->trans;
  assert(trans);

  error = check_slave_state(thd);
  if (unlikely(error))
    DBUG_RETURN(error);

  ha_statistic_increment(&SSV::ha_delete_count);
  m_rows_changed++;

  bool skip_partition_for_unique_index= FALSE;
  if (m_use_partition_pruning)
  {
    if (!cursor && m_read_before_write_removal_used)
    {
      ndb_index_type type= get_index_type(active_index);
      /*
        Ndb unique indexes are global so when
        m_read_before_write_removal_used is active
        the unique index can be used directly for deleting
        without finding the partitions
      */
      if (type == UNIQUE_INDEX ||
          type == UNIQUE_ORDERED_INDEX)
      {
        skip_partition_for_unique_index= TRUE;
        goto skip_partition_pruning;
      }
    }
    if ((error= get_part_for_delete(record, table->record[0], m_part_info,
                                    &part_id)))
    {
      DBUG_RETURN(error);
    }
  skip_partition_pruning:
    (void)0;
  }

  NdbOperation::OperationOptions options;
  NdbOperation::OperationOptions *poptions = NULL;
  options.optionsPresent=0;

  eventSetAnyValue(thd, &options);

  /*
    Poor approx. let delete ~ tabsize / 4
  */
  uint delete_size= 12 + (m_bytes_per_write >> 2);
  const bool need_flush =
      thd_ndb->add_row_check_if_batch_full(delete_size);

  if (thd->slave_thread || THDVAR(thd, deferred_constraints))
  {
    options.optionsPresent |=
      NdbOperation::OperationOptions::OO_DEFERRED_CONSTAINTS;
  }

  if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS))
  {
    DBUG_PRINT("info", ("Disabling foreign keys"));
    options.optionsPresent |=
      NdbOperation::OperationOptions::OO_DISABLE_FK;
  }

  if (cursor)
  {
    if (options.optionsPresent != 0)
      poptions = &options;

    /*
      We are scanning records and want to delete the record
      that was just found, call deleteTuple on the cursor 
      to take over the lock to a new delete operation
      And thus setting the primary key of the record from 
      the active record in cursor
    */
    DBUG_PRINT("info", ("Calling deleteTuple on cursor"));
    if ((op = cursor->deleteCurrentTuple(trans, m_ndb_record,
                                         NULL, // result_row
                                         NULL, // result_mask
                                         poptions, 
                                         sizeof(NdbOperation::OperationOptions))) == 0)
      ERR_RETURN(trans->getNdbError());     
    m_lock_tuple= FALSE;
    thd_ndb->m_unsent_bytes+= 12;

    no_uncommitted_rows_update(-1);
    m_rows_deleted++;

    if (!(primary_key_update || m_delete_cannot_batch))
    {
      // If deleting from cursor, NoCommit will be handled in next_result
      DBUG_RETURN(0);
    }
  }
  else
  {
    const NdbRecord *key_rec;
    const uchar *key_row;

    if (m_user_defined_partitioning && !skip_partition_for_unique_index)
    {
      options.optionsPresent|= NdbOperation::OperationOptions::OO_PARTITION_ID;
      options.partitionId= part_id;
    }

    setup_key_ref_for_ndb_record(&key_rec, &key_row, record,
				 m_read_before_write_removal_used);

#ifdef HAVE_NDB_BINLOG
    Uint32 buffer[ MAX_CONFLICT_INTERPRETED_PROG_SIZE ];
    NdbInterpretedCode code(m_table, buffer,
                            sizeof(buffer)/sizeof(buffer[0]));
    if (thd->slave_thread)
    {
       bool conflict_handled = false;
       bool dummy_delete_does_not_care = false;

      /* Conflict resolution in slave thread. */
      if (unlikely((error = prepare_conflict_detection(DELETE_ROW,
                                                       key_rec,
                                                       m_ndb_record,
                                                       key_row, /* old_data */
                                                       NULL,    /* new_data */
                                                       table->write_set,
                                                       trans,
                                                       &code,
                                                       &options,
                                                       conflict_handled,
                                                       dummy_delete_does_not_care))))
        DBUG_RETURN(error);

      if (unlikely(conflict_handled))
      {
        /* No need to continue with operation definition */
        /* TODO : Ensure batch execution */
        DBUG_RETURN(0);
      }
    }
#endif /* HAVE_NDB_BINLOG */
    if (options.optionsPresent != 0)
      poptions= &options;

    if (!(op=trans->deleteTuple(key_rec, (const char *)key_row,
                                m_ndb_record,
                                NULL, // row
                                NULL, // mask
                                poptions,
                                sizeof(NdbOperation::OperationOptions))))
      ERR_RETURN(trans->getNdbError());

    no_uncommitted_rows_update(-1);
    m_rows_deleted++;

    /*
      Check if we can batch the delete.

      We don't batch deletes as part of primary key updates.
      We do not batch deletes on tables with no primary key. For such tables,
      replication uses full table scan to locate the row to delete. The
      problem is the following scenario when deleting 2 (or more) rows:

       1. Table scan to locate the first row.
       2. Delete the row, batched so no execute.
       3. Table scan to locate the second row is executed, along with the
          batched delete operation from step 2.
       4. The first row is returned from nextResult() (not deleted yet).
       5. The kernel deletes the row (operation from step 2).
       6. lockCurrentTuple() is called on the row returned in step 4. However,
          as that row is now deleted, the operation fails and the transaction
          is aborted.
       7. The delete of the second tuple now fails, as the transaction has
          been aborted.
    */

    if ( allow_batch &&
	 table_share->primary_key != MAX_KEY &&
	 !primary_key_update &&
	 !need_flush)
    {
      DBUG_RETURN(0);
    }
  }

  // Execute delete operation
  uint ignore_count= 0;
  if (execute_no_commit(m_thd_ndb, trans,
                        m_ignore_no_key || m_read_before_write_removal_used,
                        &ignore_count) != 0)
  {
    no_uncommitted_rows_execute_failure();
    DBUG_RETURN(ndb_err(trans));
  }
  if (!primary_key_update)
  {
    if (!applying_binlog(thd))
    {
      assert(m_rows_deleted >= ignore_count);
      m_rows_deleted-= ignore_count;
      no_uncommitted_rows_update(ignore_count);
    }
  }
  DBUG_RETURN(0);
}
  
/**
  Unpack a record returned from a scan.
  We copy field-for-field to
   1. Avoid unnecessary copying for sparse rows.
   2. Properly initialize not used null bits.
  Note that we do not unpack all returned rows; some primary/unique key
  operations can read directly into the destination row.
*/
void ha_ndbcluster::unpack_record(uchar *dst_row, const uchar *src_row)
{
  int res;
  assert(src_row != NULL);

  my_ptrdiff_t dst_offset= dst_row - table->record[0];
  my_ptrdiff_t src_offset= src_row - table->record[0];

  /* Initialize the NULL bitmap. */
  memset(dst_row, 0xff, table->s->null_bytes);

  uchar *blob_ptr= m_blobs_buffer;

  for (uint i= 0; i < table_share->fields; i++) 
  {
    Field *field= table->field[i];
    if (bitmap_is_set(table->read_set, i))
    {
      if (field->type() == MYSQL_TYPE_BIT)
      {
        Field_bit *field_bit= static_cast<Field_bit*>(field);
        if (!field->is_real_null(src_offset))
        {
          field->move_field_offset(src_offset);
          longlong value= field_bit->val_int();
          field->move_field_offset(dst_offset-src_offset);
          field_bit->set_notnull();
          /* Field_bit in DBUG requires the bit set in write_set for store(). */
          my_bitmap_map *old_map=
            dbug_tmp_use_all_columns(table, table->write_set);
          int res = field_bit->store(value, true);
          assert(res == 0); NDB_IGNORE_VALUE(res);
          dbug_tmp_restore_column_map(table->write_set, old_map);
          field->move_field_offset(-dst_offset);
        }
      }
      else if (field->flags & BLOB_FLAG)
      {
        Field_blob *field_blob= (Field_blob *)field;
        NdbBlob *ndb_blob= m_value[i].blob;
        /* unpack_record *only* called for scan result processing
         * *while* the scan is open and the Blob is active.
         * Verify Blob state to be certain.
         * Accessing PK/UK op Blobs after execute() is unsafe
         */
        assert(ndb_blob != 0);
        assert(ndb_blob->getState() == NdbBlob::Active);
        int isNull;
        res= ndb_blob->getNull(isNull);
        assert(res == 0);                  // Already succeeded once
        Uint64 len64= 0;
        field_blob->move_field_offset(dst_offset);
        if (!isNull)
        {
          res= ndb_blob->getLength(len64);
          assert(res == 0 && len64 <= (Uint64)0xffffffff);

          if(len64 > field_blob->max_data_length())
          {
            len64 = calc_ndb_blob_len(ndb_blob->getColumn()->getCharset(), 
                                    blob_ptr, field_blob->max_data_length());

            // push a warning
            push_warning_printf(table->in_use, Sql_condition::SL_WARNING,
                        WARN_DATA_TRUNCATED,
                        "Truncated value from TEXT field \'%s\'", field_blob->field_name);

          }
          field->set_notnull();
        }
        /* Need not set_null(), as we initialized null bits to 1 above. */
        field_blob->set_ptr((uint32)len64, blob_ptr);
        field_blob->move_field_offset(-dst_offset);
        blob_ptr+= (len64 + 7) & ~((Uint64)7);
      }
      else
      {
        field->move_field_offset(src_offset);
        /* Normal field (not blob or bit type). */
        if (!field->is_null())
        {
          /* Only copy actually used bytes of varstrings. */
          uint32 actual_length= field_used_length(field);
          uchar *src_ptr= field->ptr;
          field->move_field_offset(dst_offset - src_offset);
          field->set_notnull();
          memcpy(field->ptr, src_ptr, actual_length);
          field->move_field_offset(-dst_offset);
        }
        else
          field->move_field_offset(-src_offset);
        /* No action needed for a NULL field. */
      }
    }
  }
}


/**
  Get the default value of the field from default_values of the table.
*/
static void get_default_value(void *def_val, Field *field)
{
  assert(field != NULL);

  my_ptrdiff_t src_offset= field->table->default_values_offset();

  {
    if (bitmap_is_set(field->table->read_set, field->field_index))
    {
      if (field->type() == MYSQL_TYPE_BIT)
      {
        Field_bit *field_bit= static_cast<Field_bit*>(field);
        if (!field->is_real_null(src_offset))
        {
          field->move_field_offset(src_offset);
          longlong value= field_bit->val_int();
          /* Map to NdbApi format - two Uint32s */
          Uint32 out[2];
          out[0] = 0;
          out[1] = 0;
          for (int b=0; b < 64; b++)
          {
            out[b >> 5] |= (value & 1) << (b & 31);
            
            value= value >> 1;
          }
          memcpy(def_val, out, sizeof(longlong));
          field->move_field_offset(-src_offset);
        }
      }
      else if (field->flags & BLOB_FLAG)
      {
        assert(false);
      }
      else
      {
        field->move_field_offset(src_offset);
        /* Normal field (not blob or bit type). */
        if (!field->is_null())
        {
          /* Only copy actually used bytes of varstrings. */
          uint32 actual_length= field_used_length(field);
          uchar *src_ptr= field->ptr;
          field->set_notnull();
          memcpy(def_val, src_ptr, actual_length);
        }
        field->move_field_offset(-src_offset);
        /* No action needed for a NULL field. */
      }
    }
  }
}

/*
    DBUG_EXECUTE("value", print_results(););
*/

void ha_ndbcluster::print_results()
{
  DBUG_ENTER("print_results");

#ifndef NDEBUG

  char buf_type[MAX_FIELD_WIDTH], buf_val[MAX_FIELD_WIDTH];
  String type(buf_type, sizeof(buf_type), &my_charset_bin);
  String val(buf_val, sizeof(buf_val), &my_charset_bin);
  for (uint f= 0; f < table_share->fields; f++)
  {
    /* Use DBUG_PRINT since DBUG_FILE cannot be filtered out */
    char buf[2000];
    Field *field;
    void* ptr;
    NdbValue value;

    buf[0]= 0;
    field= table->field[f];
    if (!(value= m_value[f]).ptr)
    {
      my_stpcpy(buf, "not read");
      goto print_value;
    }

    ptr= field->ptr;

    if (! (field->flags & BLOB_FLAG))
    {
      if (value.rec->isNULL())
      {
        my_stpcpy(buf, "NULL");
        goto print_value;
      }
      type.length(0);
      val.length(0);
      field->sql_type(type);
      field->val_str(&val);
      my_snprintf(buf, sizeof(buf), "%s %s", type.c_ptr(), val.c_ptr());
    }
    else
    {
      NdbBlob *ndb_blob= value.blob;
      bool isNull= TRUE;
      assert(ndb_blob->getState() == NdbBlob::Active);
      ndb_blob->getNull(isNull);
      if (isNull)
        my_stpcpy(buf, "NULL");
    }

print_value:
    DBUG_PRINT("value", ("%u,%s: %s", f, field->field_name, buf));
  }
#endif
  DBUG_VOID_RETURN;
}


int ha_ndbcluster::index_init(uint index, bool sorted)
{
  DBUG_ENTER("ha_ndbcluster::index_init");
  DBUG_PRINT("enter", ("index: %u  sorted: %d", index, sorted));
  active_index= index;
  m_sorted= sorted;
  /*
    Locks are are explicitly released in scan
    unless m_lock.type == TL_READ_HIGH_PRIORITY
    and no sub-sequent call to unlock_row()
  */
  m_lock_tuple= FALSE;

  if (table_share->primary_key == MAX_KEY &&
      m_use_partition_pruning)
  {
    bitmap_union(table->read_set, &m_part_info->full_part_field_set);
  }

  DBUG_RETURN(0);
}


int ha_ndbcluster::index_end()
{
  DBUG_ENTER("ha_ndbcluster::index_end");
  DBUG_RETURN(close_scan());
}

/**
  Check if key contains null.
*/
static
int
check_null_in_key(const KEY* key_info, const uchar *key, uint key_len)
{
  KEY_PART_INFO *curr_part, *end_part;
  const uchar* end_ptr= key + key_len;
  curr_part= key_info->key_part;
  end_part= curr_part + key_info->user_defined_key_parts;

  for (; curr_part != end_part && key < end_ptr; curr_part++)
  {
    if (curr_part->null_bit && *key)
      return 1;

    key += curr_part->store_length;
  }
  return 0;
}

int ha_ndbcluster::index_read(uchar *buf,
                              const uchar *key, uint key_len, 
                              enum ha_rkey_function find_flag)
{
  key_range start_key, end_key, *end_key_p=NULL;
  bool descending= FALSE;
  DBUG_ENTER("ha_ndbcluster::index_read");
  DBUG_PRINT("enter", ("active_index: %u, key_len: %u, find_flag: %d", 
                       active_index, key_len, find_flag));

  start_key.key= key;
  start_key.length= key_len;
  start_key.flag= find_flag;
  switch (find_flag) {
  case HA_READ_KEY_EXACT:
    /**
     * Specify as a closed EQ_RANGE.
     * Setting HA_READ_AFTER_KEY seems odd, but this is according
     * to MySQL convention, see opt_range.cc.
     */
    end_key.key= key;
    end_key.length= key_len;
    end_key.flag= HA_READ_AFTER_KEY;
    end_key_p= &end_key;
    break;
  case HA_READ_KEY_OR_PREV:
  case HA_READ_BEFORE_KEY:
  case HA_READ_PREFIX_LAST:
  case HA_READ_PREFIX_LAST_OR_PREV:
    descending= TRUE;
    break;
  default:
    break;
  }
  const int error= read_range_first_to_buf(&start_key, end_key_p,
                                           descending,
                                           m_sorted, buf);
  table->status=error ? STATUS_NOT_FOUND: 0;
  DBUG_RETURN(error);
}


int ha_ndbcluster::index_next(uchar *buf)
{
  DBUG_ENTER("ha_ndbcluster::index_next");
  ha_statistic_increment(&SSV::ha_read_next_count);
  const int error= next_result(buf);
  table->status=error ? STATUS_NOT_FOUND: 0;
  DBUG_RETURN(error);
}


int ha_ndbcluster::index_prev(uchar *buf)
{
  DBUG_ENTER("ha_ndbcluster::index_prev");
  ha_statistic_increment(&SSV::ha_read_prev_count);
  const int error= next_result(buf);
  table->status=error ? STATUS_NOT_FOUND: 0;
  DBUG_RETURN(error);
}


int ha_ndbcluster::index_first(uchar *buf)
{
  DBUG_ENTER("ha_ndbcluster::index_first");
  ha_statistic_increment(&SSV::ha_read_first_count);
  // Start the ordered index scan and fetch the first row

  // Only HA_READ_ORDER indexes get called by index_first
  const int error= ordered_index_scan(0, 0, m_sorted, FALSE, buf, NULL);
  table->status=error ? STATUS_NOT_FOUND: 0;
  DBUG_RETURN(error);
}


int ha_ndbcluster::index_last(uchar *buf)
{
  DBUG_ENTER("ha_ndbcluster::index_last");
  ha_statistic_increment(&SSV::ha_read_last_count);
  const int error= ordered_index_scan(0, 0, m_sorted, TRUE, buf, NULL);
  table->status=error ? STATUS_NOT_FOUND: 0;
  DBUG_RETURN(error);
}

int ha_ndbcluster::index_read_last(uchar * buf, const uchar * key, uint key_len)
{
  DBUG_ENTER("ha_ndbcluster::index_read_last");
  DBUG_RETURN(index_read(buf, key, key_len, HA_READ_PREFIX_LAST));
}


int ha_ndbcluster::read_range_first_to_buf(const key_range *start_key,
                                           const key_range *end_key,
                                           bool desc, bool sorted,
                                           uchar* buf)
{
  part_id_range part_spec;
  ndb_index_type type= get_index_type(active_index);
  const KEY* key_info= table->key_info+active_index;
  int error; 
  DBUG_ENTER("ha_ndbcluster::read_range_first_to_buf");
  DBUG_PRINT("info", ("desc: %d, sorted: %d", desc, sorted));

  if (unlikely((error= close_scan())))
    DBUG_RETURN(error);

  if (m_use_partition_pruning)
  {
    assert(m_pushed_join_operation != PUSHED_ROOT);
    get_partition_set(table, buf, active_index, start_key, &part_spec);
    DBUG_PRINT("info", ("part_spec.start_part: %u  part_spec.end_part: %u",
                        part_spec.start_part, part_spec.end_part));
    /*
      If partition pruning has found no partition in set
      we can return HA_ERR_END_OF_FILE
      If partition pruning has found exactly one partition in set
      we can optimize scan to run towards that partition only.
    */
    if (part_spec.start_part > part_spec.end_part)
    {
      DBUG_RETURN(HA_ERR_END_OF_FILE);
    }

    if (part_spec.start_part == part_spec.end_part)
    {
      /*
        Only one partition is required to scan, if sorted is required we
        don't need it any more since output from one ordered partitioned
        index is always sorted.
      */
      sorted= FALSE;
      if (unlikely(!get_transaction_part_id(part_spec.start_part, error)))
      {
        DBUG_RETURN(error);
      }
    }
  }

  switch (type){
  case PRIMARY_KEY_ORDERED_INDEX:
  case PRIMARY_KEY_INDEX:
    if (start_key && 
        start_key->length == key_info->key_length &&
        start_key->flag == HA_READ_KEY_EXACT)
    {
      if (!m_thd_ndb->trans)
        if (unlikely(!start_transaction_key(active_index,
                                            start_key->key, error)))
          DBUG_RETURN(error);
      error= pk_read(start_key->key, start_key->length, buf,
		  (m_use_partition_pruning)? &(part_spec.start_part) : NULL);
      DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error);
    }
    break;
  case UNIQUE_ORDERED_INDEX:
  case UNIQUE_INDEX:
    if (start_key && start_key->length == key_info->key_length &&
        start_key->flag == HA_READ_KEY_EXACT && 
        !check_null_in_key(key_info, start_key->key, start_key->length))
    {
      if (!m_thd_ndb->trans)
        if (unlikely(!start_transaction_key(active_index,
                                            start_key->key, error)))
          DBUG_RETURN(error);
      error= unique_index_read(start_key->key, start_key->length, buf);
      DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error);
    }
    else if (type == UNIQUE_INDEX)
      DBUG_RETURN(full_table_scan(key_info, 
                                  start_key,
                                  end_key,
                                  buf));
    break;
  default:
    break;
  }
  if (!m_use_partition_pruning && !m_thd_ndb->trans)
  {
    get_partition_set(table, buf, active_index, start_key, &part_spec);
    if (part_spec.start_part == part_spec.end_part)
      if (unlikely(!start_transaction_part_id(part_spec.start_part, error)))
        DBUG_RETURN(error);
  }
  // Start the ordered index scan and fetch the first row
  DBUG_RETURN(ordered_index_scan(start_key, end_key, sorted, desc, buf,
	  (m_use_partition_pruning)? &part_spec : NULL));
}

int ha_ndbcluster::read_range_first(const key_range *start_key,
                                    const key_range *end_key,
                                    bool eq_r, bool sorted)
{
  uchar* buf= table->record[0];
  DBUG_ENTER("ha_ndbcluster::read_range_first");
  DBUG_RETURN(read_range_first_to_buf(start_key, end_key, FALSE,
                                      sorted, buf));
}

int ha_ndbcluster::read_range_next()
{
  DBUG_ENTER("ha_ndbcluster::read_range_next");
  DBUG_RETURN(next_result(table->record[0]));
}


int ha_ndbcluster::rnd_init(bool scan)
{
  int error;
  DBUG_ENTER("rnd_init");
  DBUG_PRINT("enter", ("scan: %d", scan));

  if ((error= close_scan()))
    DBUG_RETURN(error);
  index_init(table_share->primary_key, 0);
  DBUG_RETURN(0);
}

int ha_ndbcluster::close_scan()
{
  DBUG_ENTER("close_scan");

  if (m_active_query)
  {
    m_active_query->close(m_thd_ndb->m_force_send);
    m_active_query= NULL;
  }

  NdbScanOperation *cursor= m_active_cursor;
  if (!cursor)
  {
    cursor = m_multi_cursor;
    if (!cursor)
      DBUG_RETURN(0);
  }

  int error;
  NdbTransaction *trans= m_thd_ndb->trans;
  if ((error= scan_handle_lock_tuple(cursor, trans)) != 0)
    DBUG_RETURN(error);

  if (m_thd_ndb->m_unsent_bytes)
  {
    /*
      Take over any pending transactions to the 
      deleteing/updating transaction before closing the scan    
    */
    DBUG_PRINT("info", ("thd_ndb->m_unsent_bytes: %ld",
                        (long) m_thd_ndb->m_unsent_bytes));    
    if (execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)
    {
      no_uncommitted_rows_execute_failure();
      DBUG_RETURN(ndb_err(trans));
    }
  }
  
  cursor->close(m_thd_ndb->m_force_send, TRUE);
  m_active_cursor= NULL;
  m_multi_cursor= NULL;
  DBUG_RETURN(0);
}

int ha_ndbcluster::rnd_end()
{
  DBUG_ENTER("rnd_end");
  DBUG_RETURN(close_scan());
}


int ha_ndbcluster::rnd_next(uchar *buf)
{
  DBUG_ENTER("rnd_next");
  ha_statistic_increment(&SSV::ha_read_rnd_next_count);

  int error;
  if (m_active_cursor || m_active_query)
    error= next_result(buf);
  else
    error= full_table_scan(NULL, NULL, NULL, buf);
  
  table->status= error ? STATUS_NOT_FOUND: 0;
  DBUG_RETURN(error);
}


/**
  An "interesting" record has been found and it's pk 
  retrieved by calling position. Now it's time to read
  the record from db once again.
*/

int ha_ndbcluster::rnd_pos(uchar *buf, uchar *pos)
{
  DBUG_ENTER("rnd_pos");
  ha_statistic_increment(&SSV::ha_read_rnd_count);
  // The primary key for the record is stored in pos
  // Perform a pk_read using primary key "index"
  {
    part_id_range part_spec;
    uint key_length= ref_length;
    if (m_user_defined_partitioning)
    {
      if (table_share->primary_key == MAX_KEY)
      {
        /*
          The partition id has been fetched from ndb
          and has been stored directly after the hidden key
        */
        DBUG_DUMP("key+part", pos, key_length);
        key_length= ref_length - sizeof(m_part_id);
        part_spec.start_part= part_spec.end_part= *(uint32 *)(pos + key_length);
      }
      else
      {
        key_range key_spec;
        KEY *key_info= table->key_info + table_share->primary_key;
        key_spec.key= pos;
        key_spec.length= key_length;
        key_spec.flag= HA_READ_KEY_EXACT;
        get_full_part_id_from_key(table, buf, key_info, 
                                  &key_spec, &part_spec);
        assert(part_spec.start_part == part_spec.end_part);
      }
      DBUG_PRINT("info", ("partition id %u", part_spec.start_part));
    }
    DBUG_DUMP("key", pos, key_length);
    int res= pk_read(pos, key_length, buf, 
                     (m_user_defined_partitioning) ? 
                     &(part_spec.start_part) 
                     : NULL);
    if (res == HA_ERR_KEY_NOT_FOUND)
    {
      /**
       * When using rnd_pos
       *   server first retrives a set of records (typically scans them)
       *   and store a unique identifier (for ndb this is the primary key)
       *   and later retreives the record again using rnd_pos and the
       *   saved primary key. For ndb, since we only support committed read
       *   the record could have been deleted in between the "save" and
       *   the rnd_pos.
       *   Therefor we return HA_ERR_RECORD_DELETED in this case rather than
       *   HA_ERR_KEY_NOT_FOUND (which will cause statment to be aborted)
       *   
       */
      res= HA_ERR_RECORD_DELETED;
    }
    table->status= res ? STATUS_NOT_FOUND: 0;
    DBUG_RETURN(res);
  }
}


/**
  Store the primary key of this record in ref 
  variable, so that the row can be retrieved again later
  using "reference" in rnd_pos.
*/

void ha_ndbcluster::position(const uchar *record)
{
  KEY *key_info;
  KEY_PART_INFO *key_part;
  KEY_PART_INFO *end;
  uchar *buff;
  uint key_length;

  DBUG_ENTER("position");

  if (table_share->primary_key != MAX_KEY) 
  {
    key_length= ref_length;
    key_info= table->key_info + table_share->primary_key;
    key_part= key_info->key_part;
    end= key_part + key_info->user_defined_key_parts;
    buff= ref;
    
    for (; key_part != end; key_part++) 
    {
      if (key_part->null_bit) {
        /* Store 0 if the key part is a NULL part */      
        if (record[key_part->null_offset]
            & key_part->null_bit) {
          *buff++= 1;
          continue;
        }      
        *buff++= 0;
      }

      size_t len = key_part->length;
      const uchar * ptr = record + key_part->offset;
      Field *field = key_part->field;
      if (field->type() ==  MYSQL_TYPE_VARCHAR)
      {
        size_t var_length;
        if (((Field_varstring*)field)->length_bytes == 1)
        {
          /**
           * Keys always use 2 bytes length
           */
          buff[0] = ptr[0];
          buff[1] = 0;
          var_length = ptr[0];
          assert(var_length <= len);
          memcpy(buff+2, ptr + 1, var_length);
        }
        else
        {
          var_length = ptr[0] + (ptr[1]*256);
          assert(var_length <= len);
          memcpy(buff, ptr, var_length + 2);
        }
        /**
          We have to zero-pad any unused VARCHAR buffer so that MySQL is 
          able to use simple memcmp to compare two instances of the same
          unique key value to determine if they are equal. 
          MySQL does this to compare contents of two 'ref' values.
          (Duplicate weedout algorithm is one such case.)
        */
        memset(buff+2+var_length, 0, len - var_length);
        len += 2;
      }
      else
      {
        memcpy(buff, ptr, len);
      }
      buff += len;
    }
  } 
  else 
  {
    // No primary key, get hidden key
    DBUG_PRINT("info", ("Getting hidden key"));
    // If table has user defined partition save the partition id as well
    if (m_user_defined_partitioning)
    {
      DBUG_PRINT("info", ("Saving partition id %u", m_part_id));
      key_length= ref_length - sizeof(m_part_id);
      memcpy(ref+key_length, (void *)&m_part_id, sizeof(m_part_id));
    }
    else
      key_length= ref_length;
#ifndef NDEBUG
    int hidden_no= table->s->fields;
    const NDBTAB *tab= m_table;  
    const NDBCOL *hidden_col= tab->getColumn(hidden_no);
    assert(hidden_col->getPrimaryKey() && 
           hidden_col->getAutoIncrement() &&
           key_length == NDB_HIDDEN_PRIMARY_KEY_LENGTH);
#endif
    memcpy(ref, &m_ref, key_length);
  }
#ifndef NDEBUG
  if (table_share->primary_key == MAX_KEY && m_user_defined_partitioning) 
    DBUG_DUMP("key+part", ref, key_length+sizeof(m_part_id));
#endif
  DBUG_DUMP("ref", ref, key_length);
  DBUG_VOID_RETURN;
}

int
ha_ndbcluster::cmp_ref(const uchar * ref1, const uchar * ref2)
{
  DBUG_ENTER("cmp_ref");

  if (table_share->primary_key != MAX_KEY) 
  {
    KEY *key_info= table->key_info + table_share->primary_key;
    KEY_PART_INFO *key_part= key_info->key_part;
    KEY_PART_INFO *end= key_part + key_info->user_defined_key_parts;
    
    for (; key_part != end; key_part++) 
    {
      // NOTE: No need to check for null since PK is not-null

      Field *field= key_part->field;
      int result= field->key_cmp(ref1, ref2);
      if (result)
      {
        DBUG_RETURN(result);
      }

      if (field->type() ==  MYSQL_TYPE_VARCHAR)
      {
        ref1+= 2;
        ref2+= 2;
      }
      
      ref1+= key_part->length;
      ref2+= key_part->length;
    }
    DBUG_RETURN(0);
  } 
  else
  {
    DBUG_RETURN(memcmp(ref1, ref2, ref_length));
  }
}

int ha_ndbcluster::info(uint flag)
{
  THD *thd= table->in_use;
  int result= 0;
  DBUG_ENTER("info");
  DBUG_PRINT("enter", ("flag: %d", flag));
  
  if (flag & HA_STATUS_POS)
    DBUG_PRINT("info", ("HA_STATUS_POS"));
  if (flag & HA_STATUS_TIME)
    DBUG_PRINT("info", ("HA_STATUS_TIME"));
  if (flag & HA_STATUS_CONST)
  {
    /*
      Set size required by a single record in the MRR 'HANDLER_BUFFER'.
      MRR buffer has both a fixed and a variable sized part.
      Size is calculated assuming max size of the variable part.

      See comments for multi_range_fixed_size() and
      multi_range_max_entry() regarding how the MRR buffer is organized.
    */
    stats.mrr_length_per_rec= multi_range_fixed_size(1) +
      multi_range_max_entry(PRIMARY_KEY_INDEX, table_share->reclength);
  }
  while (flag & HA_STATUS_VARIABLE)
  {
    if (!thd)
      thd= current_thd;
    DBUG_PRINT("info", ("HA_STATUS_VARIABLE"));

    if (!m_table_info)
    {
      set_my_errno(check_ndb_connection(thd));
      if (my_errno())
        DBUG_RETURN(my_errno());
    }

    /*
      May need to update local copy of statistics in
      'm_table_info', either directly from datanodes,
      or from shared (mutex protected) cached copy, if:
       1) 'use_exact_count' has been set (by config or user).
       2) HA_STATUS_NO_LOCK -> read from shared cached copy.
       3) Local copy is invalid.
    */
    bool exact_count= THDVAR(thd, use_exact_count);
    if (exact_count                 ||         // 1)
        !(flag & HA_STATUS_NO_LOCK) ||         // 2)
        m_table_info == NULL        ||         // 3)
        m_table_info->records == ~(ha_rows)0)  // 3)
    {
      result= update_stats(thd, (exact_count || !(flag & HA_STATUS_NO_LOCK)));
      if (result)
        DBUG_RETURN(result);
    }
    /* Read from local statistics, fast and fuzzy, wo/ locks */
    else
    {
      assert(m_table_info->records != ~(ha_rows)0);
      stats.records= m_table_info->records +
                     m_table_info->no_uncommitted_rows_count;
    }

    if (thd->lex->sql_command != SQLCOM_SHOW_TABLE_STATUS &&
        thd->lex->sql_command != SQLCOM_SHOW_KEYS)
    {
      /*
        just use whatever stats we have. However,
        optimizer interprets the values 0 and 1 as EXACT:
          -> < 2 should not be returned.
      */
      if (stats.records < 2)
        stats.records= 2;
    }
    break;
  }
  /* RPK moved to variable part */
  if (flag & HA_STATUS_VARIABLE)
  {
    /* No meaningful way to return error */
    DBUG_PRINT("info", ("rec_per_key"));
    set_rec_per_key();
  }
  if (flag & HA_STATUS_ERRKEY)
  {
    DBUG_PRINT("info", ("HA_STATUS_ERRKEY dupkey=%u", m_dupkey));
    errkey= m_dupkey;
  }
  if (flag & HA_STATUS_AUTO)
  {
    DBUG_PRINT("info", ("HA_STATUS_AUTO"));
    if (m_table && table->found_next_number_field)
    {
      if (!thd)
        thd= current_thd;
      set_my_errno(check_ndb_connection(thd));
      if (my_errno())
        DBUG_RETURN(my_errno());
      Ndb *ndb= get_ndb(thd);
      Ndb_tuple_id_range_guard g(m_share);
      
      Uint64 auto_increment_value64;
      if (ndb->readAutoIncrementValue(m_table, g.range,
                                      auto_increment_value64) == -1)
      {
        const NdbError err= ndb->getNdbError();
        sql_print_error("Error %lu in readAutoIncrementValue(): %s",
                        (ulong) err.code, err.message);
        stats.auto_increment_value= ~(ulonglong)0;
      }
      else
        stats.auto_increment_value= (ulonglong)auto_increment_value64;
    }
  }

  if(result == -1)
    result= HA_ERR_NO_CONNECTION;

  DBUG_RETURN(result);
}


void ha_ndbcluster::get_dynamic_partition_info(ha_statistics *stat_info,
                                               ha_checksum *check_sum,
                                               uint part_id)
{
  DBUG_PRINT("info", ("ha_ndbcluster::get_dynamic_partition_info"));

  int error = 0;
  THD *thd = table->in_use;

  if (!thd)
    thd = current_thd;
  if (!m_table_info)
  {
    if ((error = check_ndb_connection(thd)))
      goto err;
  }
  error = update_stats(thd, 1, part_id);

  if (error == 0)
  {
    stat_info->records = stats.records;
    stat_info->mean_rec_length = stats.mean_rec_length;
    stat_info->data_file_length = stats.data_file_length;
    stat_info->delete_length = stats.delete_length;
    stat_info->max_data_file_length = stats.max_data_file_length;
    return;
  }

err: 

  DBUG_PRINT("warning", 
    ("ha_ndbcluster::get_dynamic_partition_info failed with error code %u", 
     error));
}


int ha_ndbcluster::extra(enum ha_extra_function operation)
{
  DBUG_ENTER("extra");
  switch (operation) {
  case HA_EXTRA_IGNORE_DUP_KEY:       /* Dup keys don't rollback everything*/
    DBUG_PRINT("info", ("HA_EXTRA_IGNORE_DUP_KEY"));
    DBUG_PRINT("info", ("Ignoring duplicate key"));
    m_ignore_dup_key= TRUE;
    break;
  case HA_EXTRA_NO_IGNORE_DUP_KEY:
    DBUG_PRINT("info", ("HA_EXTRA_NO_IGNORE_DUP_KEY"));
    m_ignore_dup_key= FALSE;
    break;
  case HA_EXTRA_IGNORE_NO_KEY:
    DBUG_PRINT("info", ("HA_EXTRA_IGNORE_NO_KEY"));
    DBUG_PRINT("info", ("Turning on AO_IgnoreError at Commit/NoCommit"));
    m_ignore_no_key= TRUE;
    break;
  case HA_EXTRA_NO_IGNORE_NO_KEY:
    DBUG_PRINT("info", ("HA_EXTRA_NO_IGNORE_NO_KEY"));
    DBUG_PRINT("info", ("Turning on AO_IgnoreError at Commit/NoCommit"));
    m_ignore_no_key= FALSE;
    break;
  case HA_EXTRA_WRITE_CAN_REPLACE:
    DBUG_PRINT("info", ("HA_EXTRA_WRITE_CAN_REPLACE"));
    if (!m_has_unique_index ||
        /* 
           Always set if slave, quick fix for bug 27378
           or if manual binlog application, for bug 46662
        */
        applying_binlog(current_thd))
    {
      DBUG_PRINT("info", ("Turning ON use of write instead of insert"));
      m_use_write= TRUE;
    }
    break;
  case HA_EXTRA_WRITE_CANNOT_REPLACE:
    DBUG_PRINT("info", ("HA_EXTRA_WRITE_CANNOT_REPLACE"));
    DBUG_PRINT("info", ("Turning OFF use of write instead of insert"));
    m_use_write= FALSE;
    break;
  case HA_EXTRA_DELETE_CANNOT_BATCH:
    DBUG_PRINT("info", ("HA_EXTRA_DELETE_CANNOT_BATCH"));
    m_delete_cannot_batch= TRUE;
    break;
  case HA_EXTRA_UPDATE_CANNOT_BATCH:
    DBUG_PRINT("info", ("HA_EXTRA_UPDATE_CANNOT_BATCH"));
    m_update_cannot_batch= TRUE;
    break;
  // We don't implement 'KEYREAD'. However, KEYREAD also implies DISABLE_JOINPUSH.
  case HA_EXTRA_KEYREAD:
    DBUG_PRINT("info", ("HA_EXTRA_KEYREAD"));
    m_disable_pushed_join= TRUE;
    break;
  case HA_EXTRA_NO_KEYREAD:
    DBUG_PRINT("info", ("HA_EXTRA_NO_KEYREAD"));
    m_disable_pushed_join= FALSE;
    break;
  default:
    break;
  }
  
  DBUG_RETURN(0);
}


bool ha_ndbcluster::start_read_removal()
{
  THD *thd= table->in_use;
  DBUG_ENTER("start_read_removal");

  if (uses_blob_value(table->write_set))
  {
    DBUG_PRINT("exit", ("No! Blob field in write_set"));
    DBUG_RETURN(false);
  }

  if (thd->lex->sql_command == SQLCOM_DELETE &&
      table_share->blob_fields)
  {
    DBUG_PRINT("exit", ("No! DELETE from table with blob(s)"));
    DBUG_RETURN(false);
  }

  if (table_share->primary_key == MAX_KEY)
  {
    DBUG_PRINT("exit", ("No! Table with hidden key"));
    DBUG_RETURN(false);
  }

  if (bitmap_is_overlapping(table->write_set, m_pk_bitmap_p))
  {
    DBUG_PRINT("exit", ("No! Updating primary key"));
    DBUG_RETURN(false);
  }

  if (m_has_unique_index)
  {
    for (uint i= 0; i < table_share->keys; i++)
    {
      const KEY* key= table->key_info + i;
      if ((key->flags & HA_NOSAME) &&
          bitmap_is_overlapping(table->write_set,
                                m_key_fields[i]))
      {
        DBUG_PRINT("exit", ("No! Unique key %d is updated", i));
        DBUG_RETURN(false);
      }
    }
  }
  m_read_before_write_removal_possible= TRUE;
  DBUG_PRINT("exit", ("Yes, rbwr is possible!"));
  DBUG_RETURN(true);
}


ha_rows ha_ndbcluster::end_read_removal(void)
{
  DBUG_ENTER("end_read_removal");
  assert(m_read_before_write_removal_possible);
  DBUG_PRINT("info", ("updated: %llu, deleted: %llu",
                      m_rows_updated, m_rows_deleted));
  DBUG_RETURN(m_rows_updated + m_rows_deleted);
}


int ha_ndbcluster::reset()
{
  DBUG_ENTER("ha_ndbcluster::reset");
  if (m_cond)
  {
    m_cond->cond_clear();
  }
  assert(m_active_query == NULL);
  if (m_pushed_join_operation==PUSHED_ROOT)  // Root of pushed query
  {
    delete m_pushed_join_member;             // Also delete QueryDef
  }
  m_pushed_join_member= NULL;
  m_pushed_join_operation= -1;
  m_disable_pushed_join= FALSE;

#if 0
  // Magnus, disble this "hack" until it's possible to test if
  // it's still needed
  /*
    Regular partition pruning will set the bitmap appropriately.
    Some queries like ALTER TABLE doesn't use partition pruning and
    thus the 'used_partitions' bitmap needs to be initialized
  */
  if (m_part_info)
    bitmap_set_all(&m_part_info->used_partitions);
#endif

  /* reset flags set by extra calls */
  m_read_before_write_removal_possible= FALSE;
  m_read_before_write_removal_used= FALSE;
  m_rows_updated= m_rows_deleted= 0;
  m_ignore_dup_key= FALSE;
  m_use_write= FALSE;
  m_ignore_no_key= FALSE;
  m_rows_inserted= (ha_rows) 0;
  m_rows_to_insert= (ha_rows) 1;
  m_delete_cannot_batch= FALSE;
  m_update_cannot_batch= FALSE;

  assert(m_is_bulk_delete == false);
  m_is_bulk_delete = false;
  DBUG_RETURN(0);
}


/**
  Start of an insert, remember number of rows to be inserted, it will
  be used in write_row and get_autoincrement to send an optimal number
  of rows in each roundtrip to the server.

  @param
   rows     number of rows to insert, 0 if unknown
*/

int
ha_ndbcluster::flush_bulk_insert(bool allow_batch)
{
  NdbTransaction *trans= m_thd_ndb->trans;
  DBUG_ENTER("ha_ndbcluster::flush_bulk_insert");
  DBUG_PRINT("info", ("Sending inserts to NDB, rows_inserted: %d", 
                      (int)m_rows_inserted));
  assert(trans);

  
  if (! (m_thd_ndb->trans_options & TNTO_TRANSACTIONS_OFF))
  {
    if (!allow_batch &&
        execute_no_commit(m_thd_ndb, trans, m_ignore_no_key) != 0)
    {
      no_uncommitted_rows_execute_failure();
      DBUG_RETURN(ndb_err(trans));
    }
  }
  else
  {
    /*
      signal that transaction has been broken up and hence cannot
      be rolled back
    */
    THD *thd= table->in_use;
    thd->get_transaction()->mark_modified_non_trans_table(Transaction_ctx::SESSION);
    thd->get_transaction()->mark_modified_non_trans_table(Transaction_ctx::STMT);
    if (execute_commit(m_thd_ndb, trans, m_thd_ndb->m_force_send,
                       m_ignore_no_key) != 0)
    {
      no_uncommitted_rows_execute_failure();
      DBUG_RETURN(ndb_err(trans));
    }
    if (trans->restart() != 0)
    {
      assert(0);
      DBUG_RETURN(-1);
    }
  }
  DBUG_RETURN(0);
}

void ha_ndbcluster::start_bulk_insert(ha_rows rows)
{
  DBUG_ENTER("start_bulk_insert");
  DBUG_PRINT("enter", ("rows: %d", (int)rows));
  
  m_rows_inserted= (ha_rows) 0;
  if (!m_use_write && m_ignore_dup_key)
  {
    /*
      compare if expression with that in write_row
      we have a situation where peek_indexed_rows() will be called
      so we cannot batch
    */
    DBUG_PRINT("info", ("Batching turned off as duplicate key is "
                        "ignored by using peek_row"));
    m_rows_to_insert= 1;
    DBUG_VOID_RETURN;
  }
  if (rows == (ha_rows) 0)
  {
    /* We don't know how many will be inserted, guess */
    m_rows_to_insert=
      (m_autoincrement_prefetch > DEFAULT_AUTO_PREFETCH)
      ? m_autoincrement_prefetch
      : DEFAULT_AUTO_PREFETCH;
    m_autoincrement_prefetch= m_rows_to_insert;
  }
  else
  {
    m_rows_to_insert= rows;
    if (m_autoincrement_prefetch < m_rows_to_insert)
      m_autoincrement_prefetch= m_rows_to_insert;
  }

  DBUG_VOID_RETURN;
}

/**
  End of an insert.
*/
int ha_ndbcluster::end_bulk_insert()
{
  int error= 0;

  DBUG_ENTER("end_bulk_insert");
  // Check if last inserts need to be flushed

  THD *thd= table->in_use;
  Thd_ndb *thd_ndb= m_thd_ndb;
  
  if (!thd_allow_batch(thd) && thd_ndb->m_unsent_bytes)
  {
    bool allow_batch= (thd_ndb->m_handler != 0);
    error= flush_bulk_insert(allow_batch);
    if (error != 0)
      set_my_errno(error);
  }

  m_rows_inserted= (ha_rows) 0;
  m_rows_to_insert= (ha_rows) 1;
  DBUG_RETURN(error);
}


int ha_ndbcluster::extra_opt(enum ha_extra_function operation, ulong cache_size)
{
  DBUG_ENTER("extra_opt");
  DBUG_PRINT("enter", ("cache_size: %lu", cache_size));
  DBUG_RETURN(extra(operation));
}

static const char *ha_ndbcluster_exts[] = {
 ha_ndb_ext,
 NullS
};

const char** ha_ndbcluster::bas_ext() const
{
  return ha_ndbcluster_exts;
}

/**
  How many seeks it will take to read through the table.

  This is to be comparable to the number returned by records_in_range so
  that we can decide if we should scan the table or use keys.
*/

double ha_ndbcluster::scan_time()
{
  DBUG_ENTER("ha_ndbcluster::scan_time()");
  double res= rows2double(stats.records*1000);
  DBUG_PRINT("exit", ("table: %s value: %f", 
                      m_tabname, res));
  DBUG_RETURN(res);
}

/*
  Convert MySQL table locks into locks supported by Ndb Cluster.
  Note that MySQL Cluster does currently not support distributed
  table locks, so to be safe one should set cluster in Single
  User Mode, before relying on table locks when updating tables
  from several MySQL servers
*/

THR_LOCK_DATA **ha_ndbcluster::store_lock(THD *thd,
                                          THR_LOCK_DATA **to,
                                          enum thr_lock_type lock_type)
{
  DBUG_ENTER("store_lock");
  if (lock_type != TL_IGNORE && m_lock.type == TL_UNLOCK) 
  {

    /* If we are not doing a LOCK TABLE, then allow multiple
       writers */
    
    /* Since NDB does not currently have table locks
       this is treated as a ordinary lock */

    const bool in_lock_tables = thd_in_lock_tables(thd);
    const uint sql_command = thd_sql_command(thd);
    if ((lock_type >= TL_WRITE_CONCURRENT_INSERT &&
         lock_type <= TL_WRITE) &&
        !(in_lock_tables && sql_command == SQLCOM_LOCK_TABLES))
      lock_type= TL_WRITE_ALLOW_WRITE;
    
    /* In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
       MySQL would use the lock TL_READ_NO_INSERT on t2, and that
       would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
       to t2. Convert the lock to a normal read lock to allow
       concurrent inserts to t2. */
    
    if (lock_type == TL_READ_NO_INSERT && !thd->in_lock_tables)
      lock_type= TL_READ;

    /**
     * We need locks on source table when
     *   doing offline alter...
     * In 5.1 this worked due to TL_WRITE_ALLOW_READ...
     * but that has been removed in 5.5
     * I simply add this to get it...
     */
    if (sql_command == SQLCOM_ALTER_TABLE)
      lock_type = TL_WRITE;

    m_lock.type=lock_type;
  }
  *to++= &m_lock;

  DBUG_PRINT("exit", ("lock_type: %d", lock_type));
  
  DBUG_RETURN(to);
}

/*
  As MySQL will execute an external lock for every new table it uses
  we can use this to start the transactions.
  If we are in auto_commit mode we just need to start a transaction
  for the statement, this will be stored in thd_ndb.stmt.
  If not, we have to start a master transaction if there doesn't exist
  one from before, this will be stored in thd_ndb.all
 
  When a table lock is held one transaction will be started which holds
  the table lock and for each statement a hupp transaction will be started  
  If we are locking the table then:
  - save the NdbDictionary::Table for easy access
  - save reference to table statistics
  - refresh list of the indexes for the table if needed (if altered)
 */

#ifdef HAVE_NDB_BINLOG
static int ndbcluster_update_apply_status(THD *thd, int do_update)
{
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  Ndb *ndb= thd_ndb->ndb;
  NDBDICT *dict= ndb->getDictionary();
  const NDBTAB *ndbtab;
  NdbTransaction *trans= thd_ndb->trans;
  ndb->setDatabaseName(NDB_REP_DB);
  Ndb_table_guard ndbtab_g(dict, NDB_APPLY_TABLE);
  if (!(ndbtab= ndbtab_g.get_table()))
  {
    return -1;
  }
  NdbOperation *op= 0;
  int r= 0;
  r|= (op= trans->getNdbOperation(ndbtab)) == 0;
  assert(r == 0);
  if (do_update)
    r|= op->updateTuple();
  else
    r|= op->writeTuple();
  assert(r == 0);
  // server_id
  r|= op->equal(0u, (Uint32)thd->server_id);
  assert(r == 0);
  if (!do_update)
  {
    // epoch
    r|= op->setValue(1u, (Uint64)0);
    assert(r == 0);
  }
  const char* group_master_log_name =
    ndb_mi_get_group_master_log_name();
  const Uint64 group_master_log_pos =
    ndb_mi_get_group_master_log_pos();
  const Uint64 future_event_relay_log_pos =
    ndb_mi_get_future_event_relay_log_pos();
  const Uint64 group_relay_log_pos =
    ndb_mi_get_group_relay_log_pos();

  // log_name
  char tmp_buf[FN_REFLEN];
  ndb_pack_varchar(ndbtab->getColumn(2u), tmp_buf,
                   group_master_log_name, (int)strlen(group_master_log_name));
  r|= op->setValue(2u, tmp_buf);
  assert(r == 0);
  // start_pos
  r|= op->setValue(3u, group_master_log_pos);
  assert(r == 0);
  // end_pos
  r|= op->setValue(4u, group_master_log_pos +
                   (future_event_relay_log_pos - group_relay_log_pos));
  assert(r == 0);
  return 0;
}
#endif /* HAVE_NDB_BINLOG */


void
Thd_ndb::transaction_checks()
{
  THD* thd = m_thd;

  if (thd->lex->sql_command == SQLCOM_LOAD)
    trans_options|= TNTO_TRANSACTIONS_OFF;
  else if (!thd->get_transaction()->m_flags.enabled)
    trans_options|= TNTO_TRANSACTIONS_OFF;
  else if (!THDVAR(thd, use_transactions))
    trans_options|= TNTO_TRANSACTIONS_OFF;
  m_force_send= THDVAR(thd, force_send);
  if (!thd->slave_thread)
    m_batch_size= THDVAR(thd, batch_size);
  else
  {
    m_batch_size= THDVAR(NULL, batch_size); /* using global value */
    /* Do not use hinted TC selection in slave thread */
    THDVAR(thd, optimized_node_selection)=
      THDVAR(NULL, optimized_node_selection) & 1; /* using global value */
  }
}


int ha_ndbcluster::start_statement(THD *thd,
                                   Thd_ndb *thd_ndb,
                                   uint table_count)
{
  NdbTransaction *trans= thd_ndb->trans;
  int error;
  DBUG_ENTER("ha_ndbcluster::start_statement");

  m_thd_ndb= thd_ndb;
  m_thd_ndb->transaction_checks();

  if (table_count == 0)
  {
    trans_register_ha(thd, FALSE, ht, NULL);
    if (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
    {
      if (!trans)
        trans_register_ha(thd, TRUE, ht, NULL);
      thd_ndb->m_handler= NULL;
    }
    else
    {
      /*
        this is an autocommit, we may keep a reference to the
        handler to be used in the commit phase for optimization
        reasons, defering execute
      */
      thd_ndb->m_handler= this;
    }
  }
  else
  {
    /*
      there is more than one handler involved, execute deferal
      not possible
    */
    ha_ndbcluster* handler = thd_ndb->m_handler;
    thd_ndb->m_handler= NULL;
    if (handler != NULL)
    {
      /**
       * If we initially belived that this could be run
       *  using execute deferal...but changed out mind
       *  add handler to thd_ndb->open_tables like it would
       *  have done "normally"
       */
      add_handler_to_open_tables(thd, thd_ndb, handler);
    }
  }
  if (!trans && table_count == 0)
  {
    assert(thd_ndb->changed_tables.is_empty() == TRUE);
    thd_ndb->trans_options= 0;

    DBUG_PRINT("trans",("Possibly starting transaction"));
    const uint opti_node_select = THDVAR(thd, optimized_node_selection);
    DBUG_PRINT("enter", ("optimized_node_selection: %u", opti_node_select));
    if (!(opti_node_select & 2) ||
        thd->lex->sql_command == SQLCOM_LOAD)
      if (unlikely(!start_transaction(error)))
        DBUG_RETURN(error);

    thd_ndb->init_open_tables();
    thd_ndb->m_slow_path= FALSE;
    if (!(thd_options(thd) & OPTION_BIN_LOG) ||
        thd->variables.binlog_format == BINLOG_FORMAT_STMT)
    {
      thd_ndb->trans_options|= TNTO_NO_LOGGING;
      thd_ndb->m_slow_path= TRUE;
    }
    else if (thd->slave_thread)
      thd_ndb->m_slow_path= TRUE;
  }
  DBUG_RETURN(0);
}

int
ha_ndbcluster::add_handler_to_open_tables(THD *thd,
                                          Thd_ndb *thd_ndb,
                                          ha_ndbcluster* handler)
{
  DBUG_ENTER("ha_ndbcluster::add_handler_to_open_tables");
  DBUG_PRINT("info", ("Adding %s", handler->m_share->key_string()));

  /**
   * thd_ndb->open_tables is only used iff thd_ndb->m_handler is not
   */
  assert(thd_ndb->m_handler == NULL);
  const void *key= handler->m_share;
  HASH_SEARCH_STATE state;
  THD_NDB_SHARE *thd_ndb_share=
    (THD_NDB_SHARE*)my_hash_first(&thd_ndb->open_tables,
                                  (const uchar *)&key, sizeof(key),
                                  &state);
  while (thd_ndb_share && thd_ndb_share->key != key)
  {
    thd_ndb_share=
      (THD_NDB_SHARE*)my_hash_next(&thd_ndb->open_tables,
                                   (const uchar *)&key, sizeof(key),
                                   &state);
  }
  if (thd_ndb_share == 0)
  {
    thd_ndb_share=
      (THD_NDB_SHARE *) thd->get_transaction()->allocate_memory(sizeof(THD_NDB_SHARE));
    if (!thd_ndb_share)
    {
      mem_alloc_error(sizeof(THD_NDB_SHARE));
      DBUG_RETURN(1);
    }
    thd_ndb_share->key= key;
    thd_ndb_share->stat.last_count= thd_ndb->count;
    thd_ndb_share->stat.no_uncommitted_rows_count= 0;
    thd_ndb_share->stat.records= ~(ha_rows)0;
    my_hash_insert(&thd_ndb->open_tables, (uchar *)thd_ndb_share);
  }
  else if (thd_ndb_share->stat.last_count != thd_ndb->count)
  {
    thd_ndb_share->stat.last_count= thd_ndb->count;
    thd_ndb_share->stat.no_uncommitted_rows_count= 0;
    thd_ndb_share->stat.records= ~(ha_rows)0;
  }

  handler->m_table_info= &thd_ndb_share->stat;
  DBUG_RETURN(0);
}

int ha_ndbcluster::init_handler_for_statement(THD *thd)
{
  /*
    This is the place to make sure this handler instance
    has a started transaction.
     
    The transaction is started by the first handler on which 
    MySQL Server calls external lock
   
    Other handlers in the same stmt or transaction should use 
    the same NDB transaction. This is done by setting up the m_thd_ndb
    pointer to point to the NDB transaction object. 
   */

  DBUG_ENTER("ha_ndbcluster::init_handler_for_statement");
  Thd_ndb *thd_ndb= m_thd_ndb;
  assert(thd_ndb);

  // store thread specific data first to set the right context
  m_autoincrement_prefetch= THDVAR(thd, autoincrement_prefetch_sz);
  // Start of transaction
  m_rows_changed= 0;
  m_blobs_pending= FALSE;
  release_blobs_buffer();
  m_slow_path= m_thd_ndb->m_slow_path;
#ifdef HAVE_NDB_BINLOG
  if (unlikely(m_slow_path))
  {
    if (m_share == ndb_apply_status_share && thd->slave_thread)
        m_thd_ndb->trans_options|= TNTO_INJECTED_APPLY_STATUS;
  }
#endif

  int ret = 0;
  if (thd_ndb->m_handler == 0)
  {
    assert(m_share);
    ret = add_handler_to_open_tables(thd, thd_ndb, this);
  }
  else
  {
    struct Ndb_local_table_statistics &stat= m_table_info_instance;
    stat.last_count= thd_ndb->count;
    stat.no_uncommitted_rows_count= 0;
    stat.records= ~(ha_rows)0;
    m_table_info= &stat;
  }
  DBUG_RETURN(ret);
}

int ha_ndbcluster::external_lock(THD *thd, int lock_type)
{
  DBUG_ENTER("external_lock");
  if (lock_type != F_UNLCK)
  {
    int error;
    /*
      Check that this handler instance has a connection
      set up to the Ndb object of thd
    */
    if (check_ndb_connection(thd))
      DBUG_RETURN(1);
    Thd_ndb *thd_ndb= get_thd_ndb(thd);

    DBUG_PRINT("enter", ("lock_type != F_UNLCK "
                         "this: 0x%lx  thd: 0x%lx  thd_ndb: %lx  "
                         "thd_ndb->lock_count: %d",
                         (long) this, (long) thd, (long) thd_ndb,
                         thd_ndb->lock_count));

    if ((error= start_statement(thd, thd_ndb,
                                thd_ndb->lock_count++)))
    {
      thd_ndb->lock_count--;
      DBUG_RETURN(error);
    }
    if ((error= init_handler_for_statement(thd)))
    {
      thd_ndb->lock_count--;
      DBUG_RETURN(error);
    }
    DBUG_RETURN(0);
  }
  else
  {
    Thd_ndb *thd_ndb= m_thd_ndb;
    assert(thd_ndb);

    DBUG_PRINT("enter", ("lock_type == F_UNLCK "
                         "this: 0x%lx  thd: 0x%lx  thd_ndb: %lx  "
                         "thd_ndb->lock_count: %d",
                         (long) this, (long) thd, (long) thd_ndb,
                         thd_ndb->lock_count));

    if (m_rows_changed && global_system_variables.query_cache_type)
    {
      DBUG_PRINT("info", ("Rows has changed"));

      if (thd_ndb->trans &&
          thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
      {
        DBUG_PRINT("info", ("Add share to list of changed tables, %p",
                            m_share));
        /* NOTE push_back allocates memory using transactions mem_root! */
        thd_ndb->changed_tables.push_back(get_share(m_share),
                                          thd->get_transaction()->transaction_memroot());
      }

      if (opt_ndb_cache_check_time)
      {
        native_mutex_lock(&m_share->mutex);
        DBUG_PRINT("info", ("Invalidating commit_count"));
        m_share->commit_count= 0;
        m_share->commit_count_lock++;
        native_mutex_unlock(&m_share->mutex);
      }
    }

    if (!--thd_ndb->lock_count)
    {
      DBUG_PRINT("trans", ("Last external_lock"));

      if ((!(thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) &&
          thd_ndb->trans)
      {
        if (thd_ndb->trans)
        {
          /*
            Unlock is done without a transaction commit / rollback.
            This happens if the thread didn't update any rows
            We must in this case close the transaction to release resources
          */
          DBUG_PRINT("trans",("ending non-updating transaction"));
          thd_ndb->ndb->closeTransaction(thd_ndb->trans);
          thd_ndb->trans= NULL;
          thd_ndb->m_handler= NULL;
        }
      }
    }
    m_table_info= NULL;

    /*
      This is the place to make sure this handler instance
      no longer are connected to the active transaction.

      And since the handler is no longer part of the transaction 
      it can't have open cursors, ops, queries or blobs pending.
    */
    m_thd_ndb= NULL;    

    assert(m_active_query == NULL);
    if (m_active_query)
      DBUG_PRINT("warning", ("m_active_query != NULL"));
    m_active_query= NULL;

    if (m_active_cursor)
      DBUG_PRINT("warning", ("m_active_cursor != NULL"));
    m_active_cursor= NULL;

    if (m_multi_cursor)
      DBUG_PRINT("warning", ("m_multi_cursor != NULL"));
    m_multi_cursor= NULL;

    if (m_blobs_pending)
      DBUG_PRINT("warning", ("blobs_pending != 0"));
    m_blobs_pending= 0;
    
    DBUG_RETURN(0);
  }
}

/**
  Unlock the last row read in an open scan.
  Rows are unlocked by default in ndb, but
  for SELECT FOR UPDATE and SELECT LOCK WIT SHARE MODE
  locks are kept if unlock_row() is not called.
*/

void ha_ndbcluster::unlock_row() 
{
  DBUG_ENTER("unlock_row");

  DBUG_PRINT("info", ("Unlocking row"));
  m_lock_tuple= FALSE;
  DBUG_VOID_RETURN;
}

/**
  Start statement, used when one of the tables are locked and also when
  a stored function is executed.

  start_stmt()
    thd                    Thd object
    lock_type              Lock type on table

  RETURN VALUE
    0                      Success
    >0                     Error code

  DESCRIPTION
    This call indicates the start of a statement when one of the tables in
    the statement are locked. In this case we cannot call external_lock.
    It also implies that external_lock is not called at end of statement.
    Rather the handlerton call commit (ndbcluster_commit) is called to
    indicate end of transaction. There are cases thus when the commit call
    actually doesn't refer to a commit but only to and end of statement.

    In the case of stored functions, one stored function is treated as one
    statement and the call to commit comes at the end of the stored function.
*/

int ha_ndbcluster::start_stmt(THD *thd, thr_lock_type lock_type)
{
  int error=0;
  Thd_ndb *thd_ndb;
  DBUG_ENTER("start_stmt");
  assert(thd == table->in_use);

  thd_ndb= get_thd_ndb(thd);
  if ((error= start_statement(thd, thd_ndb, thd_ndb->start_stmt_count++)))
    goto error;
  if ((error= init_handler_for_statement(thd)))
    goto error;
  DBUG_RETURN(0);
error:
  thd_ndb->start_stmt_count--;
  DBUG_RETURN(error);
}

NdbTransaction *
ha_ndbcluster::start_transaction_row(const NdbRecord *ndb_record,
                                     const uchar *record,
                                     int &error)
{
  NdbTransaction *trans;
  DBUG_ENTER("ha_ndbcluster::start_transaction_row");
  assert(m_thd_ndb);
  assert(m_thd_ndb->trans == NULL);

  m_thd_ndb->transaction_checks();

  Ndb *ndb= m_thd_ndb->ndb;

  Uint64 tmp[(MAX_KEY_SIZE_IN_WORDS*MAX_XFRM_MULTIPLY) >> 1];
  char *buf= (char*)&tmp[0];
  trans= ndb->startTransaction(ndb_record,
                               (const char*)record,
                               buf, sizeof(tmp));

  if (trans)
  {
    m_thd_ndb->m_transaction_hint_count[trans->getConnectedNodeId()]++;
    DBUG_PRINT("info", ("Delayed allocation of TC"));
    DBUG_RETURN(m_thd_ndb->trans= trans);
  }

  ERR_SET(m_thd_ndb->ndb->getNdbError(), error);
  DBUG_RETURN(NULL);
}

NdbTransaction *
ha_ndbcluster::start_transaction_key(uint inx_no,
                                     const uchar *key_data,
                                     int &error)
{
  NdbTransaction *trans;
  DBUG_ENTER("ha_ndbcluster::start_transaction_key");
  assert(m_thd_ndb);
  assert(m_thd_ndb->trans == NULL);

  m_thd_ndb->transaction_checks();

  Ndb *ndb= m_thd_ndb->ndb;
  const NdbRecord *key_rec= m_index[inx_no].ndb_unique_record_key;

  Uint64 tmp[(MAX_KEY_SIZE_IN_WORDS*MAX_XFRM_MULTIPLY) >> 1];
  char *buf= (char*)&tmp[0];
  trans= ndb->startTransaction(key_rec,
                               (const char*)key_data,
                               buf, sizeof(tmp));

  if (trans)
  {
    m_thd_ndb->m_transaction_hint_count[trans->getConnectedNodeId()]++;
    DBUG_PRINT("info", ("Delayed allocation of TC"));
    DBUG_RETURN(m_thd_ndb->trans= trans);
  }

  ERR_SET(m_thd_ndb->ndb->getNdbError(), error);
  DBUG_RETURN(NULL);
}

NdbTransaction *
ha_ndbcluster::start_transaction(int &error)
{
  NdbTransaction *trans;
  DBUG_ENTER("ha_ndbcluster::start_transaction");

  assert(m_thd_ndb);
  assert(m_thd_ndb->trans == NULL);

  m_thd_ndb->transaction_checks();

  const uint opti_node_select= THDVAR(table->in_use, optimized_node_selection);
  m_thd_ndb->connection->set_optimized_node_selection(opti_node_select & 1);
  if ((trans= m_thd_ndb->ndb->startTransaction()))
  {
    m_thd_ndb->m_transaction_no_hint_count[trans->getConnectedNodeId()]++;
    DBUG_PRINT("info", ("Delayed allocation of TC"));
    DBUG_RETURN(m_thd_ndb->trans= trans);
  }

  ERR_SET(m_thd_ndb->ndb->getNdbError(), error);
  DBUG_RETURN(NULL);
}
   
NdbTransaction *
ha_ndbcluster::start_transaction_part_id(Uint32 part_id, int &error)
{
  NdbTransaction *trans;
  DBUG_ENTER("ha_ndbcluster::start_transaction_part_id");

  assert(m_thd_ndb);
  assert(m_thd_ndb->trans == NULL);

  m_thd_ndb->transaction_checks();

  if ((trans= m_thd_ndb->ndb->startTransaction(m_table, part_id)))
  {
    m_thd_ndb->m_transaction_hint_count[trans->getConnectedNodeId()]++;
    DBUG_PRINT("info", ("Delayed allocation of TC"));
    DBUG_RETURN(m_thd_ndb->trans= trans);
  }

  ERR_SET(m_thd_ndb->ndb->getNdbError(), error);
  DBUG_RETURN(NULL);
}
   
/**
  Static error print function called from static handler method
  ndbcluster_commit and ndbcluster_rollback.
*/
static void
ndbcluster_print_error(int error, const NdbOperation *error_op)
{
  DBUG_ENTER("ndbcluster_print_error");
  TABLE_SHARE share;
  const char *tab_name= (error_op) ? error_op->getTableName() : "";
  if (tab_name == NULL)
  {
    assert(tab_name != NULL);
    tab_name= "";
  }
  share.db.str= (char*) "";
  share.db.length= 0;
  share.table_name.str= (char *) tab_name;
  share.table_name.length= strlen(tab_name);
  ha_ndbcluster error_handler(ndbcluster_hton, &share);
  error_handler.print_error(error, MYF(0));
  DBUG_VOID_RETURN;
}


/**
  Commit a transaction started in NDB.
*/

int ndbcluster_commit(handlerton *hton, THD *thd, bool all)
{
  int res= 0;
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  Ndb *ndb= thd_ndb->ndb;
  NdbTransaction *trans= thd_ndb->trans;
  bool retry_slave_trans = false;
  (void) retry_slave_trans;

  DBUG_ENTER("ndbcluster_commit");
  assert(ndb);
  DBUG_PRINT("enter", ("Commit %s", (all ? "all" : "stmt")));
  thd_ndb->start_stmt_count= 0;
  if (trans == NULL)
  {
    DBUG_PRINT("info", ("trans == NULL"));
    DBUG_RETURN(0);
  }
  if (!all && (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
  {
    /*
      An odditity in the handler interface is that commit on handlerton
      is called to indicate end of statement only in cases where 
      autocommit isn't used and the all flag isn't set.
   
      We also leave quickly when a transaction haven't even been started,
      in this case we are safe that no clean up is needed. In this case
      the MySQL Server could handle the query without contacting the
      NDB kernel.
    */
    thd_ndb->save_point_count++;
    DBUG_PRINT("info", ("Commit before start or end-of-statement only"));
    DBUG_RETURN(0);
  }
  thd_ndb->save_point_count= 0;

#ifdef HAVE_NDB_BINLOG
  if (unlikely(thd_ndb->m_slow_path))
  {
    if (thd->slave_thread)
      ndbcluster_update_apply_status
        (thd, thd_ndb->trans_options & TNTO_INJECTED_APPLY_STATUS);
  }
#endif /* HAVE_NDB_BINLOG */

  if (thd->slave_thread)
  {
#ifdef HAVE_NDB_BINLOG
    /* If this slave transaction has included conflict detecting ops
     * and some defined operations are not yet sent, then perform
     * an execute(NoCommit) before committing, as conflict op handling
     * is done by execute(NoCommit)
     */
    /* TODO : Add as function */
    if (g_ndb_slave_state.conflict_flags & SCS_OPS_DEFINED)
    {
      if (thd_ndb->m_unsent_bytes)
        res = execute_no_commit(thd_ndb, trans, TRUE);
    }

    if (likely(res == 0))
      res = g_ndb_slave_state.atConflictPreCommit(retry_slave_trans);
#endif /* HAVE_NDB_BINLOG */

    if (likely(res == 0))
      res= execute_commit(thd_ndb, trans, 1, TRUE);

    update_slave_api_stats(thd_ndb->ndb);
  }
  else
  {
    if (thd_ndb->m_handler &&
        thd_ndb->m_handler->m_read_before_write_removal_possible)
    {
      /*
        This is an autocommit involving only one table and
        rbwr is on, thus the transaction has already been
        committed in exec_bulk_update() or end_bulk_delete()
      */
      DBUG_PRINT("info", ("autocommit+rbwr, transaction already committed"));
      const NdbTransaction::CommitStatusType commitStatus = trans->commitStatus();
      
      if(commitStatus == NdbTransaction::Committed)
      {
        /* Already committed transaction to save roundtrip */
        assert(get_thd_ndb(current_thd)->m_error == FALSE);
      }
      else if(commitStatus == NdbTransaction::Aborted)
      {
        /* Commit failed before transaction was started */ 
        assert(get_thd_ndb(current_thd)->m_error == TRUE);
      }
      else if(commitStatus == NdbTransaction::NeedAbort)
      {
        /* Commit attempt failed and rollback is needed */
        res = -1; 
        
      }
      else
      {
        /* Commit was never attempted - this should not be possible */
        assert(commitStatus == NdbTransaction::Started || commitStatus == NdbTransaction::NotStarted);
        sql_print_error("found uncommitted autocommit+rbwr transaction, "
                        "commit status: %d", commitStatus);
        abort();
      }
    }
    else
    {
      const bool ignore_error= applying_binlog(thd);
      res= execute_commit(thd_ndb, trans,
                          THDVAR(thd, force_send),
                          ignore_error);
    }
  }

  if (res != 0)
  {
#ifdef HAVE_NDB_BINLOG
    if (retry_slave_trans)
    {
      if (st_ndb_slave_state::MAX_RETRY_TRANS_COUNT >
          g_ndb_slave_state.retry_trans_count++)
      {
        /*
           Warning is necessary to cause retry from slave.cc
           exec_relay_log_event()
        */
        push_warning(thd, Sql_condition::SL_WARNING,
                     ER_SLAVE_SILENT_RETRY_TRANSACTION,
                     "Slave transaction rollback requested");
        /*
          Set retry count to zero to:
          1) Avoid consuming slave-temp-error retry attempts
          2) Ensure no inter-attempt sleep

          Better fix : Save + restore retry count around transactional
          conflict handling
        */
        ndb_mi_set_relay_log_trans_retries(0);
      }
      else
      {
        /*
           Too many retries, print error and exit - normal
           too many retries mechanism will cause exit
         */
        sql_print_error("Ndb slave retried transaction %u time(s) in vain.  Giving up.",
                        st_ndb_slave_state::MAX_RETRY_TRANS_COUNT);
      }
      res= ER_GET_TEMPORARY_ERRMSG;
    }
    else
#endif
    {
      const NdbError err= trans->getNdbError();
      const NdbOperation *error_op= trans->getNdbErrorOperation();
      res= ndb_to_mysql_error(&err);
      if (res != -1)
        ndbcluster_print_error(res, error_op);
    }
  }
  else
  {
    /* Update shared statistics for tables inserted into / deleted from*/
    if (thd_ndb->m_handler &&      // Autocommit Txn
        thd_ndb->m_handler->m_share &&
        thd_ndb->m_handler->m_table_info)
    {
      modify_shared_stats(thd_ndb->m_handler->m_share, thd_ndb->m_handler->m_table_info);
    }

    /* Manual commit: Update all affected NDB_SHAREs found in 'open_tables' */
    for (uint i= 0; i<thd_ndb->open_tables.records; i++)
    {
      THD_NDB_SHARE *thd_share=
        (THD_NDB_SHARE*)my_hash_element(&thd_ndb->open_tables, i);
      modify_shared_stats((NDB_SHARE*)thd_share->key, &thd_share->stat);
    }
  }

  ndb->closeTransaction(trans);
  thd_ndb->trans= NULL;
  thd_ndb->m_handler= NULL;

  /* Clear commit_count for tables changed by transaction */
  NDB_SHARE* share;
  List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables);
  while ((share= it++))
  {
    DBUG_PRINT("info", ("Remove share to list of changed tables, %p",
                        share));
    native_mutex_lock(&share->mutex);
    DBUG_PRINT("info", ("Invalidate commit_count for %s, share->commit_count: %lu",
                        share->table_name, (ulong) share->commit_count));
    share->commit_count= 0;
    share->commit_count_lock++;
    native_mutex_unlock(&share->mutex);
    free_share(&share);
  }
  thd_ndb->changed_tables.empty();

  DBUG_RETURN(res);
}


/**
  Rollback a transaction started in NDB.
*/

static int ndbcluster_rollback(handlerton *hton, THD *thd, bool all)
{
  int res= 0;
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  Ndb *ndb= thd_ndb->ndb;
  NdbTransaction *trans= thd_ndb->trans;

  DBUG_ENTER("ndbcluster_rollback");
  DBUG_PRINT("enter", ("all: %d  thd_ndb->save_point_count: %d",
                       all, thd_ndb->save_point_count));
  assert(ndb);
  thd_ndb->start_stmt_count= 0;
  if (trans == NULL)
  {
    /* Ignore end-of-statement until real rollback or commit is called */
    DBUG_PRINT("info", ("trans == NULL"));
    DBUG_RETURN(0);
  }
  if (!all && (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) &&
      (thd_ndb->save_point_count > 0))
  {
    /*
      Ignore end-of-statement until real rollback or commit is called
      as ndb does not support rollback statement
      - mark that rollback was unsuccessful, this will cause full rollback
      of the transaction
    */
    DBUG_PRINT("info", ("Rollback before start or end-of-statement only"));
    thd_mark_transaction_to_rollback(thd, 1);
    my_error(ER_WARN_ENGINE_TRANSACTION_ROLLBACK, MYF(0), "NDB");
    DBUG_RETURN(0);
  }
  thd_ndb->save_point_count= 0;
  if (thd->slave_thread)
    g_ndb_slave_state.atTransactionAbort();
  thd_ndb->m_unsent_bytes= 0;
  thd_ndb->m_execute_count++;
  DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));
  if (trans->execute(NdbTransaction::Rollback) != 0)
  {
    const NdbError err= trans->getNdbError();
    const NdbOperation *error_op= trans->getNdbErrorOperation();
    res= ndb_to_mysql_error(&err);
    if (res != -1) 
      ndbcluster_print_error(res, error_op);
  }
  ndb->closeTransaction(trans);
  thd_ndb->trans= NULL;
  thd_ndb->m_handler= NULL;

  /* Clear list of tables changed by transaction */
  NDB_SHARE* share;
  List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables);
  while ((share= it++))
  {
    DBUG_PRINT("info", ("Remove share to list of changed tables, %p",
                        share));
    free_share(&share);
  }
  thd_ndb->changed_tables.empty();

  if (thd->slave_thread)
    update_slave_api_stats(thd_ndb->ndb);

  DBUG_RETURN(res);
}

/**
 * Support for create table/column modifiers
 *   by exploiting the comment field
 */
struct NDB_Modifier
{
  enum { M_BOOL } m_type;
  const char * m_name;
  size_t m_name_len;
  bool m_found;
  union {
    bool m_val_bool;
#ifdef TODO__
    int m_val_int;
    struct {
      const char * str;
      size_t len;
    } m_val_str;
#endif
  };
};

static const
struct NDB_Modifier ndb_table_modifiers[] =
{
  { NDB_Modifier::M_BOOL, STRING_WITH_LEN("NOLOGGING"), 0, {0} },
  { NDB_Modifier::M_BOOL, 0, 0, 0, {0} }
};

static const
struct NDB_Modifier ndb_column_modifiers[] =
{
  { NDB_Modifier::M_BOOL, STRING_WITH_LEN("MAX_BLOB_PART_SIZE"), 0, {0} },
  { NDB_Modifier::M_BOOL, 0, 0, 0, {0} }
};

/**
 * NDB_Modifiers
 *
 * This class implements a simple parser for getting modifiers out
 *   of a string (e.g a comment field)
 */
class NDB_Modifiers
{
public:
  NDB_Modifiers(const NDB_Modifier modifiers[]);
  ~NDB_Modifiers();

  /**
   * parse string-with length (not necessarily NULL terminated)
   */
  int parse(THD* thd, const char * prefix, const char * str, size_t strlen);

  /**
   * Get modifier...returns NULL if unknown
   */
  const NDB_Modifier * get(const char * name) const;
private:
  uint m_len;
  struct NDB_Modifier * m_modifiers;

  int parse_modifier(THD *thd, const char * prefix,
                     struct NDB_Modifier* m, const char * str);
};

static
bool
end_of_token(const char * str)
{
  return str[0] == 0 || str[0] == ' ' || str[0] == ',';
}

NDB_Modifiers::NDB_Modifiers(const NDB_Modifier modifiers[])
{
  for (m_len = 0; modifiers[m_len].m_name != 0; m_len++)
  {}
  m_modifiers = new NDB_Modifier[m_len];
  memcpy(m_modifiers, modifiers, m_len * sizeof(NDB_Modifier));
}

NDB_Modifiers::~NDB_Modifiers()
{
  delete [] m_modifiers;
}

int
NDB_Modifiers::parse_modifier(THD *thd,
                              const char * prefix,
                              struct NDB_Modifier* m,
                              const char * str)
{
  if (m->m_found)
  {
    push_warning_printf(thd, Sql_condition::SL_WARNING,
                        ER_ILLEGAL_HA_CREATE_OPTION,
                        "%s : modifier %s specified twice",
                        prefix, m->m_name);
  }

  switch(m->m_type){
  case NDB_Modifier::M_BOOL:
    if (end_of_token(str))
    {
      m->m_val_bool = true;
      goto found;
    }
    if (str[0] != '=')
      break;

    str++;
    if (str[0] == '1' && end_of_token(str+1))
    {
      m->m_val_bool = true;
      goto found;
    }

    if (str[0] == '0' && end_of_token(str+1))
    {
      m->m_val_bool = false;
      goto found;
    }
  }

  {
    const char * end = strpbrk(str, " ,");
    if (end)
    {
      push_warning_printf(thd, Sql_condition::SL_WARNING,
                          ER_ILLEGAL_HA_CREATE_OPTION,
                          "%s : invalid value '%.*s' for %s",
                          prefix, (int)(end - str), str, m->m_name);
    }
    else
    {
      push_warning_printf(thd, Sql_condition::SL_WARNING,
                          ER_ILLEGAL_HA_CREATE_OPTION,
                          "%s : invalid value '%s' for %s",
                          prefix, str, m->m_name);
    }
  }
  return -1;
found:
  m->m_found = true;
  return 0;
}

int
NDB_Modifiers::parse(THD *thd,
                     const char * prefix,
                     const char * _source,
                     size_t _source_len)
{
  if (_source == 0 || _source_len == 0)
    return 0;

  const char * source = 0;

  /**
   * Check if _source is NULL-terminated
   */
  for (size_t i = 0; i<_source_len; i++)
  {
    if (_source[i] == 0)
    {
      source = _source;
      break;
    }
  }

  if (source == 0)
  {
    /**
     * Make NULL terminated string so that strXXX-functions are safe
     */
    char * tmp = new char[_source_len+1];
    if (tmp == 0)
    {
      push_warning_printf(thd, Sql_condition::SL_WARNING,
                          ER_ILLEGAL_HA_CREATE_OPTION,
                          "%s : unable to parse due to out of memory",
                          prefix);
      return -1;
    }
    memcpy(tmp, _source, _source_len);
    tmp[_source_len] = 0;
    source = tmp;
  }

  const char * pos = source;
  if ((pos = strstr(pos, prefix)) == 0)
  {
    if (source != _source)
      delete [] source;
    return 0;
  }

  pos += strlen(prefix);

  while (pos && pos[0] != 0 && pos[0] != ' ')
  {
    const char * end = strpbrk(pos, " ,"); // end of current modifier

    for (uint i = 0; i < m_len; i++)
    {
      size_t l = m_modifiers[i].m_name_len;
      if (strncmp(pos, m_modifiers[i].m_name, l) == 0)
      {
        /**
         * Found modifier...
         */

        if (! (end_of_token(pos + l) || pos[l] == '='))
          goto unknown;

        pos += l;
        int res = parse_modifier(thd, prefix, m_modifiers+i, pos);

        if (res == -1)
        {
          /**
           * We continue parsing even if modifier had error
           */
        }

        goto next;
      }
    }

    {
  unknown:
      if (end)
      {
        push_warning_printf(thd, Sql_condition::SL_WARNING,
                            ER_ILLEGAL_HA_CREATE_OPTION,
                            "%s : unknown modifier: %.*s",
                            prefix, (int)(end - pos), pos);
      }
      else
      {
        push_warning_printf(thd, Sql_condition::SL_WARNING,
                            ER_ILLEGAL_HA_CREATE_OPTION,
                            "%s : unknown modifier: %s",
                            prefix, pos);
      }
    }

next:
    pos = end;
    if (pos && pos[0] == ',')
      pos++;
  }

  if (source != _source)
    delete [] source;

  return 0;
}

const NDB_Modifier *
NDB_Modifiers::get(const char * name) const
{
  for (uint i = 0; i < m_len; i++)
  {
    if (strcmp(name, m_modifiers[i].m_name) == 0)
    {
      return m_modifiers + i;
    }
  }
  return 0;
}

/**
  Define NDB column based on Field.

  Not member of ha_ndbcluster because NDBCOL cannot be declared.

  MySQL text types with character set "binary" are mapped to true
  NDB binary types without a character set.

  Blobs are V2 and striping from mysql level is not supported
  due to lack of syntax and lack of support for partitioning.

  @return
    Returns 0 or mysql error code.
*/

static bool
ndb_blob_striping()
{
#ifndef NDEBUG
  const char* p= getenv("NDB_BLOB_STRIPING");
  if (p != 0 && *p != 0 && *p != '0' && *p != 'n' && *p != 'N')
    return true;
#endif
  return false;
}

#if NDB_VERSION_D < NDB_MAKE_VERSION(7,2,0)
const Uint32 OLD_NDB_MAX_TUPLE_SIZE_IN_WORDS = 2013;
#else
const Uint32 OLD_NDB_MAX_TUPLE_SIZE_IN_WORDS = NDB_MAX_TUPLE_SIZE_IN_WORDS;
#endif

static int
create_ndb_column(THD *thd,
                  NDBCOL &col,
                  Field *field,
                  HA_CREATE_INFO *create_info,
                  column_format_type default_format= COLUMN_FORMAT_TYPE_DEFAULT)
{
  NDBCOL::StorageType type= NDBCOL::StorageTypeMemory;
  bool dynamic= FALSE;

  char buf[MAX_ATTR_DEFAULT_VALUE_SIZE];
  DBUG_ENTER("create_ndb_column");
  // Set name
  if (col.setName(field->field_name))
  {
    set_my_errno(errno);
    DBUG_RETURN(errno);
  }
  // Get char set
  CHARSET_INFO *cs= const_cast<CHARSET_INFO*>(field->charset());
  // Set type and sizes
  const enum enum_field_types mysql_type= field->real_type();

  NDB_Modifiers column_modifiers(ndb_column_modifiers);
  column_modifiers.parse(thd, "NDB_COLUMN=",
                         field->comment.str,
                         field->comment.length);

  const NDB_Modifier * mod_maxblob = column_modifiers.get("MAX_BLOB_PART_SIZE");

  {
    /* Clear default value (col obj is reused for whole table def) */
    col.setDefaultValue(NULL, 0); 

    /* If the data nodes are capable then set native 
     * default.
     */
    bool nativeDefaults =
      ! (thd &&
         (! ndb_native_default_support(get_thd_ndb(thd)->
                                       ndb->getMinDbNodeVersion())));

    if (likely( nativeDefaults ))
    {
      if ((!(field->flags & PRI_KEY_FLAG) ) &&
          type_supports_default_value(mysql_type))
      {
        if (!(field->flags & NO_DEFAULT_VALUE_FLAG))
        {
          my_ptrdiff_t src_offset= field->table->default_values_offset();
          if ((! field->is_real_null(src_offset)) ||
              ((field->flags & NOT_NULL_FLAG)))
          {
            /* Set a non-null native default */
            memset(buf, 0, MAX_ATTR_DEFAULT_VALUE_SIZE);
            get_default_value(buf, field);

            /* For bit columns, default length is rounded up to 
               nearest word, ensuring all data sent
            */
            Uint32 defaultLen = field_used_length(field);
            if(field->type() == MYSQL_TYPE_BIT)
              defaultLen = ((defaultLen + 3) /4) * 4;
            col.setDefaultValue(buf, defaultLen);
          }
        }
      }
    }
  }
  switch (mysql_type) {
  // Numeric types
  case MYSQL_TYPE_TINY:        
    if (field->flags & UNSIGNED_FLAG)
      col.setType(NDBCOL::Tinyunsigned);
    else
      col.setType(NDBCOL::Tinyint);
    col.setLength(1);
    break;
  case MYSQL_TYPE_SHORT:
    if (field->flags & UNSIGNED_FLAG)
      col.setType(NDBCOL::Smallunsigned);
    else
      col.setType(NDBCOL::Smallint);
    col.setLength(1);
    break;
  case MYSQL_TYPE_LONG:
    if (field->flags & UNSIGNED_FLAG)
      col.setType(NDBCOL::Unsigned);
    else
      col.setType(NDBCOL::Int);
    col.setLength(1);
    break;
  case MYSQL_TYPE_INT24:       
    if (field->flags & UNSIGNED_FLAG)
      col.setType(NDBCOL::Mediumunsigned);
    else
      col.setType(NDBCOL::Mediumint);
    col.setLength(1);
    break;
  case MYSQL_TYPE_LONGLONG:
    if (field->flags & UNSIGNED_FLAG)
      col.setType(NDBCOL::Bigunsigned);
    else
      col.setType(NDBCOL::Bigint);
    col.setLength(1);
    break;
  case MYSQL_TYPE_FLOAT:
    col.setType(NDBCOL::Float);
    col.setLength(1);
    break;
  case MYSQL_TYPE_DOUBLE:
    col.setType(NDBCOL::Double);
    col.setLength(1);
    break;
  case MYSQL_TYPE_DECIMAL:    
    {
      Field_decimal *f= (Field_decimal*)field;
      uint precision= f->pack_length();
      uint scale= f->decimals();
      if (field->flags & UNSIGNED_FLAG)
      {
        col.setType(NDBCOL::Olddecimalunsigned);
        precision-= (scale > 0);
      }
      else
      {
        col.setType(NDBCOL::Olddecimal);
        precision-= 1 + (scale > 0);
      }
      col.setPrecision(precision);
      col.setScale(scale);
      col.setLength(1);
    }
    break;
  case MYSQL_TYPE_NEWDECIMAL:    
    {
      Field_new_decimal *f= (Field_new_decimal*)field;
      uint precision= f->precision;
      uint scale= f->decimals();
      if (field->flags & UNSIGNED_FLAG)
      {
        col.setType(NDBCOL::Decimalunsigned);
      }
      else
      {
        col.setType(NDBCOL::Decimal);
      }
      col.setPrecision(precision);
      col.setScale(scale);
      col.setLength(1);
    }
    break;
  // Date types
  case MYSQL_TYPE_DATETIME:    
    col.setType(NDBCOL::Datetime);
    col.setLength(1);
    break;
  case MYSQL_TYPE_DATETIME2:    
    {
      Field_datetimef *f= (Field_datetimef*)field;
      uint prec= f->decimals();
      col.setType(NDBCOL::Datetime2);
      col.setLength(1);
      col.setPrecision(prec);
    }
    break;
  case MYSQL_TYPE_DATE: // ?
    col.setType(NDBCOL::Char);
    col.setLength(field->pack_length());
    break;
  case MYSQL_TYPE_NEWDATE:
    col.setType(NDBCOL::Date);
    col.setLength(1);
    break;
  case MYSQL_TYPE_TIME:        
    col.setType(NDBCOL::Time);
    col.setLength(1);
    break;
  case MYSQL_TYPE_TIME2:        
    {
      Field_timef *f= (Field_timef*)field;
      uint prec= f->decimals();
      col.setType(NDBCOL::Time2);
      col.setLength(1);
      col.setPrecision(prec);
    }
    break;
  case MYSQL_TYPE_YEAR:
    col.setType(NDBCOL::Year);
    col.setLength(1);
    break;
  case MYSQL_TYPE_TIMESTAMP:
    col.setType(NDBCOL::Timestamp);
    col.setLength(1);
    break;
  case MYSQL_TYPE_TIMESTAMP2:
    {
      Field_timestampf *f= (Field_timestampf*)field;
      uint prec= f->decimals();
      col.setType(NDBCOL::Timestamp2);
      col.setLength(1);
      col.setPrecision(prec);
    }
    break;
  // Char types
  case MYSQL_TYPE_STRING:      
    if (field->pack_length() == 0)
    {
      col.setType(NDBCOL::Bit);
      col.setLength(1);
    }
    else if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
    {
      col.setType(NDBCOL::Binary);
      col.setLength(field->pack_length());
    }
    else
    {
      col.setType(NDBCOL::Char);
      col.setCharset(cs);
      col.setLength(field->pack_length());
    }
    break;
  case MYSQL_TYPE_VAR_STRING: // ?
  case MYSQL_TYPE_VARCHAR:
    {
      Field_varstring* f= (Field_varstring*)field;
      if (f->length_bytes == 1)
      {
        if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
          col.setType(NDBCOL::Varbinary);
        else {
          col.setType(NDBCOL::Varchar);
          col.setCharset(cs);
        }
      }
      else if (f->length_bytes == 2)
      {
        if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
          col.setType(NDBCOL::Longvarbinary);
        else {
          col.setType(NDBCOL::Longvarchar);
          col.setCharset(cs);
        }
      }
      else
      {
        DBUG_RETURN(HA_ERR_UNSUPPORTED);
      }
      col.setLength(field->field_length);
    }
    break;
  // Blob types (all come in as MYSQL_TYPE_BLOB)
  mysql_type_tiny_blob:
  case MYSQL_TYPE_TINY_BLOB:
    if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
      col.setType(NDBCOL::Blob);
    else {
      col.setType(NDBCOL::Text);
      col.setCharset(cs);
    }
    col.setInlineSize(256);
    // No parts
    col.setPartSize(0);
    col.setStripeSize(ndb_blob_striping() ? 0 : 0);
    break;
  //mysql_type_blob:
  case MYSQL_TYPE_GEOMETRY:
  case MYSQL_TYPE_BLOB:    
    if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
      col.setType(NDBCOL::Blob);
    else {
      col.setType(NDBCOL::Text);
      col.setCharset(cs);
    }
    {
      Field_blob *field_blob= (Field_blob *)field;
      /*
       * max_data_length is 2^8-1, 2^16-1, 2^24-1 for tiny, blob, medium.
       * Tinyblob gets no blob parts.  The other cases are just a crude
       * way to control part size and striping.
       *
       * In mysql blob(256) is promoted to blob(65535) so it does not
       * in fact fit "inline" in NDB.
       */
      if (field_blob->max_data_length() < (1 << 8))
        goto mysql_type_tiny_blob;
      else if (field_blob->max_data_length() < (1 << 16))
      {
        col.setInlineSize(256);
        col.setPartSize(2000);
        col.setStripeSize(ndb_blob_striping() ? 16 : 0);
        if (mod_maxblob->m_found)
        {
          col.setPartSize(4 * (NDB_MAX_TUPLE_SIZE_IN_WORDS - /* safty */ 13));
        }
      }
      else if (field_blob->max_data_length() < (1 << 24))
        goto mysql_type_medium_blob;
      else
        goto mysql_type_long_blob;
    }
    break;
  mysql_type_medium_blob:
  case MYSQL_TYPE_MEDIUM_BLOB:   
    if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
      col.setType(NDBCOL::Blob);
    else {
      col.setType(NDBCOL::Text);
      col.setCharset(cs);
    }
    col.setInlineSize(256);
    col.setPartSize(4000);
    col.setStripeSize(ndb_blob_striping() ? 8 : 0);
    if (mod_maxblob->m_found)
    {
      col.setPartSize(4 * (NDB_MAX_TUPLE_SIZE_IN_WORDS - /* safty */ 13));
    }
    break;
  mysql_type_long_blob:
  case MYSQL_TYPE_LONG_BLOB:  
    if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
      col.setType(NDBCOL::Blob);
    else {
      col.setType(NDBCOL::Text);
      col.setCharset(cs);
    }
    col.setInlineSize(256);
    col.setPartSize(4 * (OLD_NDB_MAX_TUPLE_SIZE_IN_WORDS - /* safty */ 13));
    col.setStripeSize(ndb_blob_striping() ? 4 : 0);
    if (mod_maxblob->m_found)
    {
      col.setPartSize(4 * (NDB_MAX_TUPLE_SIZE_IN_WORDS - /* safty */ 13));
    }
    break;

  // MySQL 5.7 binary-encoded JSON type
  case MYSQL_TYPE_JSON:
  {
    /*
      JSON columns are just like LONG BLOB columns except for inline size
      and part size. Inline size is chosen to accommodate a large number
      of embedded json documents without spilling over to the part table.
      The tradeoff is that only three JSON columns can be defined in a table
      due to the large inline size. Part size is chosen to optimize use of
      pages in the part table. Note that much of the JSON functionality is
      available by storing JSON documents in VARCHAR columns, including
      extracting keys from documents to be used as indexes.
     */
    const int NDB_JSON_INLINE_SIZE = 4000;
    const int NDB_JSON_PART_SIZE = 8100;

    col.setType(NDBCOL::Blob);
    col.setInlineSize(NDB_JSON_INLINE_SIZE);
    col.setPartSize(NDB_JSON_PART_SIZE);
    col.setStripeSize(ndb_blob_striping() ? 16 : 0);
    break;
  }

  // Other types
  case MYSQL_TYPE_ENUM:
    col.setType(NDBCOL::Char);
    col.setLength(field->pack_length());
    break;
  case MYSQL_TYPE_SET:         
    col.setType(NDBCOL::Char);
    col.setLength(field->pack_length());
    break;
  case MYSQL_TYPE_BIT:
  {
    int no_of_bits= field->field_length;
    col.setType(NDBCOL::Bit);
    if (!no_of_bits)
      col.setLength(1);
      else
        col.setLength(no_of_bits);
    break;
  }
  case MYSQL_TYPE_NULL:        
    goto mysql_type_unsupported;
  mysql_type_unsupported:
  default:
    DBUG_RETURN(HA_ERR_UNSUPPORTED);
  }
  // Set nullable and pk
  col.setNullable(field->maybe_null());
  col.setPrimaryKey(field->flags & PRI_KEY_FLAG);
  if ((field->flags & FIELD_IN_PART_FUNC_FLAG) != 0)
  {
    col.setPartitionKey(TRUE);
  }

  // Set autoincrement
  if (field->flags & AUTO_INCREMENT_FLAG) 
  {
    col.setAutoIncrement(TRUE);
    ulonglong value= create_info->auto_increment_value ?
      create_info->auto_increment_value : (ulonglong) 1;
    DBUG_PRINT("info", ("Autoincrement key, initial: %llu", value));
    col.setAutoIncrementInitialValue(value);
  }
  else
    col.setAutoIncrement(FALSE);

  DBUG_PRINT("info", ("storage: %u  format: %u  ",
                      field->field_storage_type(),
                      field->column_format()));
  switch (field->field_storage_type()) {
  case(HA_SM_DEFAULT):
  default:
    if (create_info->storage_media == HA_SM_DISK)
      type= NDBCOL::StorageTypeDisk;
    else
      type= NDBCOL::StorageTypeMemory;
    break;
  case(HA_SM_DISK):
    type= NDBCOL::StorageTypeDisk;
    break;
  case(HA_SM_MEMORY):
    type= NDBCOL::StorageTypeMemory;
    break;
  }

  switch (field->column_format()) {
  case(COLUMN_FORMAT_TYPE_FIXED):
    dynamic= FALSE;
    break;
  case(COLUMN_FORMAT_TYPE_DYNAMIC):
    dynamic= TRUE;
    break;
  case(COLUMN_FORMAT_TYPE_DEFAULT):
  default:
    if (create_info->row_type == ROW_TYPE_DEFAULT)
      dynamic= default_format;
    else
      dynamic= (create_info->row_type == ROW_TYPE_DYNAMIC);
    break;
  }
  DBUG_PRINT("info", ("Column %s is declared %s", field->field_name,
                      (dynamic) ? "dynamic" : "static"));
  if (type == NDBCOL::StorageTypeDisk)
  {
    if (dynamic)
    {
      DBUG_PRINT("info", ("Dynamic disk stored column %s changed to static",
                          field->field_name));
      dynamic= false;
    }

    if (thd && field->column_format() == COLUMN_FORMAT_TYPE_DYNAMIC)
    {
      push_warning_printf(thd, Sql_condition::SL_WARNING,
                          ER_ILLEGAL_HA_CREATE_OPTION,
                          "DYNAMIC column %s with "
                          "STORAGE DISK is not supported, "
                          "column will become FIXED",
                          field->field_name);
    }
  }

  switch (create_info->row_type) {
  case ROW_TYPE_FIXED:
    if (thd && (dynamic || field_type_forces_var_part(field->type())))
    {
      push_warning_printf(thd, Sql_condition::SL_WARNING,
                          ER_ILLEGAL_HA_CREATE_OPTION,
                          "Row format FIXED incompatible with "
                          "dynamic attribute %s",
                          field->field_name);
    }
    break;
  case ROW_TYPE_DYNAMIC:
    /*
      Future: make columns dynamic in this case
    */
    break;
  default:
    break;
  }

  DBUG_PRINT("info", ("Format %s, Storage %s", (dynamic)?"dynamic":"fixed",(type == NDBCOL::StorageTypeDisk)?"disk":"memory"));
  col.setStorageType(type);
  col.setDynamic(dynamic);

  DBUG_RETURN(0);
}

void ha_ndbcluster::update_create_info(HA_CREATE_INFO *create_info)
{
  DBUG_ENTER("ha_ndbcluster::update_create_info");
  THD *thd= current_thd;
  const NDBTAB *ndbtab= m_table;
  Ndb *ndb= check_ndb_in_thd(thd);

  if (!(create_info->used_fields & HA_CREATE_USED_AUTO))
  {
    /*
      Find any initial auto_increment value
    */
    for (uint i= 0; i < table->s->fields; i++) 
    {
      Field *field= table->field[i];
      if (field->flags & AUTO_INCREMENT_FLAG)
      {
        ulonglong auto_value;
        uint retries= NDB_AUTO_INCREMENT_RETRIES;
        int retry_sleep= 30; /* 30 milliseconds, transaction */
        for (;;)
        {
          Ndb_tuple_id_range_guard g(m_share);
          if (ndb->readAutoIncrementValue(ndbtab, g.range, auto_value))
          {
            if (--retries && !thd->killed &&
                ndb->getNdbError().status == NdbError::TemporaryError)
            {
              do_retry_sleep(retry_sleep);
              continue;
            }
            const NdbError err= ndb->getNdbError();
            sql_print_error("Error %lu in ::update_create_info(): %s",
                            (ulong) err.code, err.message);
            DBUG_VOID_RETURN;
          }
          break;
        }
        if (auto_value > 1)
        {
          create_info->auto_increment_value= auto_value;
        }
        break;
      }
    }
  }

  /*
    FK data is handled in get_metadata and release_metadata but
    for some reason it is not enough
  */
  if (1)
  {
    int error= get_fk_data(thd, ndb);
    if (error != 0)
    {
      sql_print_error("update_create_info: get FK data: error %d", error);
      DBUG_VOID_RETURN;
    }
  }

  DBUG_VOID_RETURN;
}

/*
  Create a table in NDB Cluster
 */
static uint get_no_fragments(ulonglong max_rows)
{
  ulonglong acc_row_size= 25 + /*safety margin*/ 2;
  ulonglong acc_fragment_size= 512*1024*1024;
  return uint((max_rows*acc_row_size)/acc_fragment_size)+1;
}


/*
  Routine to adjust default number of partitions to always be a multiple
  of number of nodes and never more than 4 times the number of nodes.

*/
static
bool
adjusted_frag_count(Ndb* ndb,
                    uint requested_frags,
                    uint &reported_frags)
{
  unsigned no_nodes= g_ndb_cluster_connection->no_db_nodes();
  unsigned no_replicas= no_nodes == 1 ? 1 : 2;

  unsigned no_threads= 1;
  const unsigned no_nodegroups= g_ndb_cluster_connection->max_nodegroup() + 1;

  {
    /**
     * Use SYSTAB_0 to get #replicas, and to guess #threads
     */
    char dbname[FN_HEADLEN+1];
    dbname[FN_HEADLEN]= 0;
    my_stpnmov(dbname, ndb->getDatabaseName(), sizeof(dbname) - 1);
    ndb->setDatabaseName("sys");
    Ndb_table_guard ndbtab_g(ndb->getDictionary(), "SYSTAB_0");
    const NdbDictionary::Table * tab = ndbtab_g.get_table();
    if (tab)
    {
      no_replicas= ndbtab_g.get_table()->getReplicaCount();

      /**
       * Guess #threads
       */
      {
        const Uint32 frags = tab->getFragmentCount();
        Uint32 node = 0;
        Uint32 cnt = 0;
        for (Uint32 i = 0; i<frags; i++)
        {
          Uint32 replicas[4];
          if (tab->getFragmentNodes(i, replicas, NDB_ARRAY_SIZE(replicas)))
          {
            if (node == replicas[0] || node == 0)
            {
              node = replicas[0];
              cnt ++;
            }
          }
        }
        no_threads = cnt; // No of primary replica on 1-node
      }
    }
    ndb->setDatabaseName(dbname);
  }

  const unsigned usable_nodes = no_replicas * no_nodegroups;
  const uint max_replicas = 8 * usable_nodes * no_threads;

  reported_frags = usable_nodes * no_threads; // Start with 1 frag per threads
  Uint32 replicas = reported_frags * no_replicas;

  /**
   * Loop until requested replicas, and not exceed max-replicas
   */
  while (reported_frags < requested_frags &&
         (replicas + usable_nodes * no_threads * no_replicas) <= max_replicas)
  {
    reported_frags += usable_nodes * no_threads;
    replicas += usable_nodes * no_threads * no_replicas;
  }

  return (reported_frags < requested_frags);
}


extern bool ndb_fk_util_truncate_allowed(THD* thd,
                                         NdbDictionary::Dictionary* dict,
                                         const char* db,
                                         const NdbDictionary::Table* tab,
                                         bool& allow);

/*
  Forward declaration of the utility functions used
  when creating partitioned tables
*/
static int
create_table_set_up_partition_info(HA_CREATE_INFO* create_info,
                                   partition_info *part_info,
                                   NdbDictionary::Table&);
static int
create_table_set_range_data(const partition_info* part_info,
                            NdbDictionary::Table&);
static int
create_table_set_list_data(const partition_info* part_info,
                           NdbDictionary::Table&);


/**
  Create a table in NDB Cluster
*/

int ha_ndbcluster::create(const char *name, 
                          TABLE *form, 
                          HA_CREATE_INFO *create_info)
{
  THD *thd= current_thd;
  NDBTAB tab;
  NDBCOL col;
  size_t pack_length, length;
  uint i, pk_length= 0;
  uchar *data= NULL, *pack_data= NULL;
  bool create_temporary= (create_info->options & HA_LEX_CREATE_TMP_TABLE);
  bool create_from_engine= (create_info->table_options & HA_OPTION_CREATE_FROM_ENGINE);
  bool is_alter= (thd->lex->sql_command == SQLCOM_ALTER_TABLE);
  bool is_truncate= (thd->lex->sql_command == SQLCOM_TRUNCATE);
  bool use_disk= FALSE;
  NdbDictionary::Table::SingleUserMode single_user_mode= NdbDictionary::Table::SingleUserModeLocked;
  bool ndb_sys_table= FALSE;
  int result= 0;
  NdbDictionary::ObjectId objId;
  Ndb_fk_list fk_list_for_truncate;

  DBUG_ENTER("ha_ndbcluster::create");
  DBUG_PRINT("enter", ("name: %s", name));

  if (create_temporary)
  {
    /*
      Ndb does not support temporary tables
     */
    set_my_errno(ER_ILLEGAL_HA_CREATE_OPTION);
    DBUG_PRINT("info", ("Ndb doesn't support temporary tables"));
    push_warning_printf(thd, Sql_condition::SL_WARNING,
                        ER_ILLEGAL_HA_CREATE_OPTION,
                        "Ndb doesn't support temporary tables");
    DBUG_RETURN(my_errno());
  }

  assert(*fn_rext((char*)name) == 0);
  set_dbname(name);
  set_tabname(name);
  
  /*
    Check that database name and table name will fit within limits
  */
  if (strlen(m_dbname) > NDB_MAX_DDL_NAME_BYTESIZE ||
      strlen(m_tabname) > NDB_MAX_DDL_NAME_BYTESIZE)
  {
    char *invalid_identifier=
        (strlen(m_dbname) > NDB_MAX_DDL_NAME_BYTESIZE)?m_dbname:m_tabname;
    push_warning_printf(thd, Sql_condition::SL_WARNING,
                        ER_TOO_LONG_IDENT,
                        "Ndb has an internal limit of %u bytes on the size of schema identifiers",
                        NDB_MAX_DDL_NAME_BYTESIZE);
    my_error(ER_TOO_LONG_IDENT, MYF(0), invalid_identifier);
    DBUG_RETURN(HA_WRONG_CREATE_OPTION);
  }

  set_my_errno(check_ndb_connection(thd));
  if (my_errno())
    DBUG_RETURN(my_errno());
  
  Ndb *ndb= get_ndb(thd);
  NDBDICT *dict= ndb->getDictionary();

  table= form;
  if (create_from_engine)
  {
    /*
      Table already exists in NDB and frm file has been created by 
      caller.
      Do Ndb specific stuff, such as create a .ndb file
    */
    set_my_errno(write_ndb_file(name));
    if (my_errno())
      DBUG_RETURN(my_errno());

    ndbcluster_create_binlog_setup(thd, ndb, name, (uint)strlen(name),
                                   m_dbname, m_tabname, form);
    if (my_errno() == HA_ERR_TABLE_EXIST)
    {
      push_warning_printf(thd, Sql_condition::SL_WARNING,
                          ER_TABLE_EXISTS_ERROR,
                          "Failed to setup replication of table %s.%s",
                          m_dbname, m_tabname);
      set_my_errno(0);
    }


    DBUG_RETURN(my_errno());
  }

  Thd_ndb *thd_ndb= get_thd_ndb(thd);

  if (!((thd_ndb->options & TNO_NO_LOCK_SCHEMA_OP) ||
        thd_ndb->has_required_global_schema_lock("ha_ndbcluster::create")))
  
    DBUG_RETURN(HA_ERR_NO_CONNECTION);


  if (!ndb_schema_dist_is_ready())
  {
    /*
      Don't allow table creation unless schema distribution is ready
      ( unless it is a creation of the schema dist table itself )
    */
    if (!(strcmp(m_dbname, NDB_REP_DB) == 0 &&
          strcmp(m_tabname, NDB_SCHEMA_TABLE) == 0))
    {
      DBUG_PRINT("info", ("Schema distribution table not setup"));
      DBUG_RETURN(HA_ERR_NO_CONNECTION);
    }
    single_user_mode = NdbDictionary::Table::SingleUserModeReadWrite;
    ndb_sys_table= TRUE;
  }

  if (!ndb_apply_status_share)
  {
    if ((strcmp(m_dbname, NDB_REP_DB) == 0 &&
         strcmp(m_tabname, NDB_APPLY_TABLE) == 0))
    {
      ndb_sys_table= TRUE;
    }
  }

  if (is_truncate)
  {
    Ndb_table_guard ndbtab_g(dict);
    ndbtab_g.init(m_tabname);
    if (!ndbtab_g.get_table())
      ERR_RETURN(dict->getNdbError());

    /*
      Don't allow truncate on table which is foreign key parent.
      This is kind of a kludge to get legacy compatibility behaviour
      but it also reduces the complexity involved in rewriting
      fks during this "recreate".
     */
    bool allow;
    if (!ndb_fk_util_truncate_allowed(thd, dict, m_dbname,
                                      ndbtab_g.get_table(), allow))
    {
      DBUG_RETURN(HA_ERR_NO_CONNECTION);
    }
    if (!allow)
    {
      my_error(ER_TRUNCATE_ILLEGAL_FK, MYF(0), "");
      DBUG_RETURN(1);
    }

    /* save the foreign key information in fk_list */
    int err;
    if ((err= get_fk_data_for_truncate(dict, ndbtab_g.get_table(),
                                       fk_list_for_truncate)))
      DBUG_RETURN(err);

    DBUG_PRINT("info", ("Dropping and re-creating table for TRUNCATE"));
    if ((err= delete_table(name)))
      DBUG_RETURN(err);
    ndbtab_g.reinit();
  }

  NDB_Modifiers table_modifiers(ndb_table_modifiers);
  table_modifiers.parse(thd, "NDB_TABLE=", create_info->comment.str,
                        create_info->comment.length);
  const NDB_Modifier * mod_nologging = table_modifiers.get("NOLOGGING");

#ifdef HAVE_NDB_BINLOG
  /* Read ndb_replication entry for this table, if any */
  Uint32 binlog_flags;
  const st_conflict_fn_def* conflict_fn= NULL;
  st_conflict_fn_arg args[MAX_CONFLICT_ARGS];
  Uint32 num_args = MAX_CONFLICT_ARGS;

  int rep_read_rc= ndbcluster_get_binlog_replication_info(thd,
                                                          ndb,
                                                          m_dbname,
                                                          m_tabname,
                                                          ::server_id,
                                                          &binlog_flags,
                                                          &conflict_fn,
                                                          args,
                                                          &num_args);
  if (rep_read_rc != 0)
  {
    DBUG_RETURN(rep_read_rc);
  }

  /* Reset database name */
  ndb->setDatabaseName(m_dbname);

  /* TODO : Add as per conflict function 'virtual' */
  /* Use ndb_replication information as required */
  if (conflict_fn != NULL)
  {
    switch(conflict_fn->type)
    {
    case CFT_NDB_EPOCH:
    case CFT_NDB_EPOCH_TRANS:
    case CFT_NDB_EPOCH2:
    case CFT_NDB_EPOCH2_TRANS:
    {
      /* Default 6 extra Gci bits allows 2^6 == 64
       * epochs / saveGCP, a comfortable default
       */
      Uint32 numExtraGciBits = 6;
      Uint32 numExtraAuthorBits = 1;

      if ((num_args == 1) &&
          (args[0].type == CFAT_EXTRA_GCI_BITS))
      {
        numExtraGciBits = args[0].extraGciBits;
      }
      DBUG_PRINT("info", ("Setting ExtraRowGciBits to %u, "
                          "ExtraAuthorBits to %u",
                          numExtraGciBits,
                          numExtraAuthorBits));

      tab.setExtraRowGciBits(numExtraGciBits);
      tab.setExtraRowAuthorBits(numExtraAuthorBits);
    }
    default:
      break;
    }
  }
#endif

  if ((dict->beginSchemaTrans() == -1))
  {
    DBUG_PRINT("info", ("Failed to start schema transaction"));
    goto err_return;
  }
  DBUG_PRINT("info", ("Started schema transaction"));

  DBUG_PRINT("table", ("name: %s", m_tabname));  
  if (tab.setName(m_tabname))
  {
    set_my_errno(errno);
    goto abort;
  }
  if (!ndb_sys_table)
  {
    if (THDVAR(thd, table_temporary))
    {
#ifdef DOES_NOT_WORK_CURRENTLY
      tab.setTemporary(TRUE);
#endif
      tab.setLogging(FALSE);
    }
    else if (THDVAR(thd, table_no_logging))
    {
      tab.setLogging(FALSE);
    }

    if (mod_nologging->m_found)
    {
      tab.setLogging(!mod_nologging->m_val_bool);
    }
  }
  tab.setSingleUserMode(single_user_mode);

  // Save frm data for this table
  if (readfrm(name, &data, &length))
  {
    result= 1;
    goto abort_return;
  }
  if (packfrm(data, length, &pack_data, &pack_length))
  {
    my_free((char*)data, MYF(0));
    result= 2;
    goto abort_return;
  }
  DBUG_PRINT("info",
             ("setFrm data: 0x%lx  len: %lu", (long) pack_data,
              (ulong) pack_length));
  tab.setFrm(pack_data, Uint32(pack_length));      
  my_free((char*)data, MYF(0));
  my_free((char*)pack_data, MYF(0));
  
  /*
    Handle table row type

    Default is to let table rows have var part reference so that online 
    add column can be performed in the future.  Explicitly setting row 
    type to fixed will omit var part reference, which will save data 
    memory in ndb, but at the cost of not being able to online add 
    column to this table
  */
  switch (create_info->row_type) {
  case ROW_TYPE_FIXED:
    tab.setForceVarPart(FALSE);
    break;
  case ROW_TYPE_DYNAMIC:
    /* fall through, treat as default */
  default:
    /* fall through, treat as default */
  case ROW_TYPE_DEFAULT:
    tab.setForceVarPart(TRUE);
    break;
  }

  /*
    Setup columns
  */
  my_bitmap_map *old_map;
  {
    restore_record(form, s->default_values);
    old_map= tmp_use_all_columns(form, form->read_set);
  }

  for (i= 0; i < form->s->fields; i++) 
  {
    Field *field= form->field[i];
    DBUG_PRINT("info", ("name: %s, type: %u, pack_length: %d",
                        field->field_name, field->real_type(),
                        field->pack_length()));
    set_my_errno(create_ndb_column(thd, col, field, create_info));
    if (my_errno())
      goto abort;

    if (!use_disk &&
        col.getStorageType() == NDBCOL::StorageTypeDisk)
      use_disk= TRUE;

    if (tab.addColumn(col))
    {
      set_my_errno(errno);
      goto abort;
    }
    if (col.getPrimaryKey())
      pk_length += (field->pack_length() + 3) / 4;
  }

  tmp_restore_column_map(form->read_set, old_map);
  if (use_disk)
  { 
    tab.setLogging(TRUE);
    tab.setTemporary(FALSE);
    if (create_info->tablespace)
      tab.setTablespaceName(create_info->tablespace);
    else
      tab.setTablespaceName("DEFAULT-TS");
  }

  // Save the table level storage media setting
  switch(create_info->storage_media)
  {
    case HA_SM_DISK:
      tab.setStorageType(NdbDictionary::Column::StorageTypeDisk);
      break;
    case HA_SM_DEFAULT:
      tab.setStorageType(NdbDictionary::Column::StorageTypeDefault);
      break;
    case HA_SM_MEMORY:
      tab.setStorageType(NdbDictionary::Column::StorageTypeMemory);
      break;
  }

  DBUG_PRINT("info", ("Table %s is %s stored with tablespace %s",
                      m_tabname,
                      (use_disk) ? "disk" : "memory",
                      (use_disk) ? tab.getTablespaceName() : "N/A"));
 
  KEY* key_info;
  for (i= 0, key_info= form->key_info; i < form->s->keys; i++, key_info++)
  {
    KEY_PART_INFO *key_part= key_info->key_part;
    KEY_PART_INFO *end= key_part + key_info->user_defined_key_parts;
    for (; key_part != end; key_part++)
    {
      if (key_part->field->field_storage_type() == HA_SM_DISK)
      {
        push_warning_printf(thd, Sql_condition::SL_WARNING,
                            ER_ILLEGAL_HA_CREATE_OPTION,
                            ER(ER_ILLEGAL_HA_CREATE_OPTION),
                            ndbcluster_hton_name,
                            "Index on field "
                            "declared with "
                            "STORAGE DISK is not supported");
        result= HA_ERR_UNSUPPORTED;
        goto abort_return;
      }
      tab.getColumn(key_part->fieldnr-1)->setStorageType(
                             NdbDictionary::Column::StorageTypeMemory);
    }
  }

  // No primary key, create shadow key as 64 bit, auto increment  
  if (form->s->primary_key == MAX_KEY) 
  {
    DBUG_PRINT("info", ("Generating shadow key"));
    if (col.setName("$PK"))
    {
      set_my_errno(errno);
      goto abort;
    }
    col.setType(NdbDictionary::Column::Bigunsigned);
    col.setLength(1);
    col.setNullable(FALSE);
    col.setPrimaryKey(TRUE);
    col.setAutoIncrement(TRUE);
    col.setDefaultValue(NULL, 0);
    if (tab.addColumn(col))
    {
      set_my_errno(errno);
      goto abort;
    }
    pk_length += 2;
  }
 
  // Make sure that blob tables don't have too big part size
  for (i= 0; i < form->s->fields; i++) 
  {
    /**
     * The extra +7 concists
     * 2 - words from pk in blob table
     * 5 - from extra words added by tup/dict??
     */

    // To be upgrade/downgrade safe...we currently use
    // old NDB_MAX_TUPLE_SIZE_IN_WORDS, unless MAX_BLOB_PART_SIZE is set
    switch (form->field[i]->real_type()) {
    case MYSQL_TYPE_GEOMETRY:
    case MYSQL_TYPE_BLOB:    
    case MYSQL_TYPE_MEDIUM_BLOB:   
    case MYSQL_TYPE_LONG_BLOB: 
    case MYSQL_TYPE_JSON:
    {
      NdbDictionary::Column * column= tab.getColumn(i);
      unsigned size= pk_length + (column->getPartSize()+3)/4 + 7;
      unsigned ndb_max= OLD_NDB_MAX_TUPLE_SIZE_IN_WORDS;
      if (column->getPartSize() > (int)(4 * ndb_max))
        ndb_max= NDB_MAX_TUPLE_SIZE_IN_WORDS; // MAX_BLOB_PART_SIZE

      if (size > ndb_max &&
          (pk_length+7) < ndb_max)
      {
        size= ndb_max - pk_length - 7;
        column->setPartSize(4*size);
      }
      /**
       * If size > NDB_MAX and pk_length+7 >= NDB_MAX
       *   then the table can't be created anyway, so skip
       *   changing part size, and have error later
       */ 
    }
    default:
      break;
    }
  }

  // Assume that table_share->max/min_rows equals create_info->min/max
  // although this is create so create_info should be used
  assert(create_info->max_rows == table_share->max_rows);
  assert(create_info->min_rows == table_share->min_rows);

  // Check partition info
  set_my_errno(create_table_set_up_partition_info(create_info,
                                                  form->part_info,
                                                  tab));
  if (my_errno())
    goto abort;

  if (tab.getFragmentType() == NDBTAB::HashMapPartition && 
      tab.getDefaultNoPartitionsFlag() &&
      (create_info->max_rows != 0 || create_info->min_rows != 0))
  {
    ulonglong rows= create_info->max_rows >= create_info->min_rows ? 
      create_info->max_rows : 
      create_info->min_rows;
    uint no_fragments= get_no_fragments(rows);
    uint reported_frags= no_fragments;
    if (adjusted_frag_count(ndb, no_fragments, reported_frags))
    {
      push_warning(current_thd,
                   Sql_condition::SL_WARNING, ER_UNKNOWN_ERROR,
                   "Ndb might have problems storing the max amount "
                   "of rows specified");
    }
    tab.setFragmentCount(reported_frags);
    tab.setDefaultNoPartitionsFlag(false);
    tab.setFragmentData(0, 0);
  }

  // Check for HashMap
  if (tab.getFragmentType() == NDBTAB::HashMapPartition && 
      tab.getDefaultNoPartitionsFlag())
  {
    tab.setFragmentCount(0);
    tab.setFragmentData(0, 0);
  }
  else if (tab.getFragmentType() == NDBTAB::HashMapPartition)
  {
    NdbDictionary::HashMap hm;
    int res= dict->getDefaultHashMap(hm, tab.getFragmentCount());
    if (res == -1)
    {
      res= dict->initDefaultHashMap(hm, tab.getFragmentCount());
      if (res == -1)
      {
        const NdbError err= dict->getNdbError();
        set_my_errno(ndb_to_mysql_error(&err));
        goto abort;
      }

      res= dict->createHashMap(hm);
      if (res == -1)
      {
        const NdbError err= dict->getNdbError();
        set_my_errno(ndb_to_mysql_error(&err));
        goto abort;
      }
    }
  }

  // Create the table in NDB     
  if (dict->createTable(tab, &objId) != 0)
  {
    const NdbError err= dict->getNdbError();
    set_my_errno(ndb_to_mysql_error(&err));
    goto abort;
  }

  DBUG_PRINT("info", ("Table %s/%s created successfully", 
                      m_dbname, m_tabname));

  // Create secondary indexes
  tab.assignObjId(objId);
  m_table= &tab;
  set_my_errno(create_indexes(thd, ndb, form));

  if (!is_truncate && my_errno() == 0)
  {
    set_my_errno(create_fks(thd, ndb));
  }

  if (is_alter && my_errno() == 0)
  {
    /**
     * mysql doesnt know/care about FK (buhhh)
     *   so we need to copy the old ones ourselves
     */
    set_my_errno(copy_fk_for_offline_alter(thd, ndb, &tab));
  }

  if (!fk_list_for_truncate.is_empty() && my_errno() == 0)
  {
    /*
     create FKs for the new table from the list got from old table.
     for truncate table.
     */
    set_my_errno(recreate_fk_for_truncate(thd, ndb, tab.getName(),
                                          fk_list_for_truncate));
  }

  m_table= 0;

  if (!my_errno())
  {
    /*
     * All steps have succeeded, try and commit schema transaction
     */
    if (dict->endSchemaTrans() == -1)
      goto err_return;
    set_my_errno(write_ndb_file(name));
  }
  else
  {
abort:
/*
 *  Some step during table creation failed, abort schema transaction
 */
    DBUG_PRINT("info", ("Aborting schema transaction due to error %i",
                        my_errno()));
    if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)
        == -1)
      DBUG_PRINT("info", ("Failed to abort schema transaction, %i",
                          dict->getNdbError().code));
    m_table= 0;

    {
      // Flush the table out of ndbapi's dictionary cache
      Ndb_table_guard ndbtab_g(dict);
      ndbtab_g.init(m_tabname);
      ndbtab_g.invalidate();
    }

    DBUG_RETURN(my_errno());
abort_return:
    DBUG_PRINT("info", ("Aborting schema transaction"));
    if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)
        == -1)
      DBUG_PRINT("info", ("Failed to abort schema transaction, %i",
                          dict->getNdbError().code));
    DBUG_RETURN(result);
err_return:
    m_table= 0;
    ERR_RETURN(dict->getNdbError());
  }

  /**
   * createTable/index schema transaction OK
   */
  Ndb_table_guard ndbtab_g(dict, m_tabname);
  m_table= ndbtab_g.get_table();

  if (my_errno())
  {
    /*
      Failed to create an index,
      drop the table (and all it's indexes)
    */
    while (!thd->killed)
    {
      if (dict->beginSchemaTrans() == -1)
        goto cleanup_failed;
      if (dict->dropTableGlobal(*m_table))
      {
        switch (dict->getNdbError().status)
        {
        case NdbError::TemporaryError:
          if (!thd->killed) 
          {
            if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)
                == -1)
              DBUG_PRINT("info", ("Failed to abort schema transaction, %i",
                                  dict->getNdbError().code));
            goto cleanup_failed;
          }
          break;
        default:
          break;
        }
      }
      if (dict->endSchemaTrans() == -1)
      {
cleanup_failed:
        DBUG_PRINT("info", ("Could not cleanup failed create %i",
                          dict->getNdbError().code));
        continue; // retry indefinitly
      }
      break;
    }
    m_table = 0;
    DBUG_RETURN(my_errno());
  }
  else // if (!my_errno)
  {
    NDB_SHARE *share= 0;
    native_mutex_lock(&ndbcluster_mutex);
    /*
      First make sure we get a "fresh" share here, not an old trailing one...
    */
    {
      uint length= (uint) strlen(name);
      if ((share= (NDB_SHARE*) my_hash_search(&ndbcluster_open_tables,
                                              (const uchar*) name, length)))
        handle_trailing_share(thd, share);
    }
    /*
      get a new share
    */

    /* ndb_share reference create */
    if (!(share= get_share(name, form, TRUE, TRUE)))
    {
      sql_print_error("NDB: allocating table share for %s failed", name);
      /* my_errno is set */
    }
    else
    {
      DBUG_PRINT("NDB_SHARE", ("%s binlog create  use_count: %u",
                               share->key_string(), share->use_count));
    }
    native_mutex_unlock(&ndbcluster_mutex);

    while (!IS_TMP_PREFIX(m_tabname))
    {
#ifdef HAVE_NDB_BINLOG
      if (share)
      {
        /* Set the Binlogging information we retrieved above */
        ndbcluster_apply_binlog_replication_info(thd,
                                                 share,
                                                 m_table,
                                                 conflict_fn,
                                                 args,
                                                 num_args,
                                                 TRUE, /* Do set binlog flags */
                                                 binlog_flags);
      }
#endif
      String event_name(INJECTOR_EVENT_LEN);
      ndb_rep_event_name(&event_name, m_dbname, m_tabname,
                         get_binlog_full(share));
      int do_event_op= ndb_binlog_running;

      if (!ndb_schema_dist_is_ready() &&
          strcmp(share->db, NDB_REP_DB) == 0 &&
          strcmp(share->table_name, NDB_SCHEMA_TABLE) == 0)
        do_event_op= 1;

      /*
        Always create an event for the table, as other mysql servers
        expect it to be there.
      */
      if (!Ndb_dist_priv_util::is_distributed_priv_table(m_dbname, m_tabname) &&
          !ndbcluster_create_event(thd, ndb, m_table, event_name.c_ptr(), share,
                                   do_event_op ? 2 : 1/* push warning */))
      {
        if (opt_ndb_extra_logging)
          sql_print_information("NDB Binlog: CREATE TABLE Event: %s",
                                event_name.c_ptr());

        if (ndbcluster_create_event_ops(thd, share,
                                        m_table, event_name.c_ptr()))
        {
          sql_print_error("NDB Binlog: FAILED CREATE TABLE event operations."
                          " Event: %s", name);
          /* a warning has been issued to the client */
        }
      }
      /*
        warning has been issued if ndbcluster_create_event failed
        and (share && do_event_op)
      */
      if (share && !do_event_op)
        set_binlog_nologging(share);
      ndbcluster_log_schema_op(thd,
                               thd->query().str, thd->query().length,
                               share->db, share->table_name,
                               m_table->getObjectId(),
                               m_table->getObjectVersion(),
                               (is_truncate) ?
			       SOT_TRUNCATE_TABLE : SOT_CREATE_TABLE, 
			       NULL, NULL);
      break;
    }
  }

  m_table= 0;
  DBUG_RETURN(my_errno());
}


int ha_ndbcluster::create_index(THD *thd, const char *name, KEY *key_info, 
                                NDB_INDEX_TYPE idx_type, uint idx_no) const
{
  int error= 0;
  char unique_name[FN_LEN + 1];
  static const char* unique_suffix= "$unique";
  DBUG_ENTER("ha_ndbcluster::create_index");
  DBUG_PRINT("info", ("Creating index %u: %s", idx_no, name));  

  if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX)
  {
    strxnmov(unique_name, FN_LEN, name, unique_suffix, NullS);
    DBUG_PRINT("info", ("Created unique index name \'%s\' for index %d",
                        unique_name, idx_no));
  }
    
  switch (idx_type){
  case PRIMARY_KEY_INDEX:
    // Do nothing, already created
    break;
  case PRIMARY_KEY_ORDERED_INDEX:
    error= create_ordered_index(thd, name, key_info);
    break;
  case UNIQUE_ORDERED_INDEX:
    if (!(error= create_ordered_index(thd, name, key_info)))
      error= create_unique_index(thd, unique_name, key_info);
    break;
  case UNIQUE_INDEX:
    if (check_index_fields_not_null(key_info))
    {
      push_warning_printf(thd, Sql_condition::SL_WARNING,
			  ER_NULL_COLUMN_IN_INDEX,
			  "Ndb does not support unique index on NULL valued attributes, index access with NULL value will become full table scan");
    }
    error= create_unique_index(thd, unique_name, key_info);
    break;
  case ORDERED_INDEX:
    if (key_info->algorithm == HA_KEY_ALG_HASH)
    {
      push_warning_printf(thd, Sql_condition::SL_WARNING,
			  ER_ILLEGAL_HA_CREATE_OPTION,
			  ER(ER_ILLEGAL_HA_CREATE_OPTION),
			  ndbcluster_hton_name,
			  "Ndb does not support non-unique "
			  "hash based indexes");
      error= HA_ERR_UNSUPPORTED;
      break;
    }
    error= create_ordered_index(thd, name, key_info);
    break;
  default:
    assert(FALSE);
    break;
  }
  
  DBUG_RETURN(error);
}

int ha_ndbcluster::create_ordered_index(THD *thd, const char *name, 
                                        KEY *key_info) const
{
  DBUG_ENTER("ha_ndbcluster::create_ordered_index");
  DBUG_RETURN(create_ndb_index(thd, name, key_info, FALSE));
}

int ha_ndbcluster::create_unique_index(THD *thd, const char *name, 
                                       KEY *key_info) const
{

  DBUG_ENTER("ha_ndbcluster::create_unique_index");
  DBUG_RETURN(create_ndb_index(thd, name, key_info, TRUE));
}


/**
  Create an index in NDB Cluster.

  @todo
    Only temporary ordered indexes supported
*/

int ha_ndbcluster::create_ndb_index(THD *thd, const char *name, 
                                    KEY *key_info,
                                    bool unique) const
{
  char index_name[FN_LEN + 1];
  Ndb *ndb= get_ndb(thd);
  NdbDictionary::Dictionary *dict= ndb->getDictionary();
  KEY_PART_INFO *key_part= key_info->key_part;
  KEY_PART_INFO *end= key_part + key_info->user_defined_key_parts;
  
  DBUG_ENTER("ha_ndbcluster::create_index");
  DBUG_PRINT("enter", ("name: %s ", name));

  ndb_protect_char(name, index_name, sizeof(index_name) - 1, '/');
  DBUG_PRINT("info", ("index name: %s ", index_name));

  NdbDictionary::Index ndb_index(index_name);
  if (unique)
    ndb_index.setType(NdbDictionary::Index::UniqueHashIndex);
  else 
  {
    ndb_index.setType(NdbDictionary::Index::OrderedIndex);
    // TODO Only temporary ordered indexes supported
    ndb_index.setLogging(FALSE); 
  }
  if (!m_table->getLogging())
    ndb_index.setLogging(FALSE); 
  if (((NDBTAB*)m_table)->getTemporary())
    ndb_index.setTemporary(TRUE); 
  if (ndb_index.setTable(m_tabname))
  {
    set_my_errno(errno);
    DBUG_RETURN(errno);
  }

  for (; key_part != end; key_part++) 
  {
    Field *field= key_part->field;
    if (field->field_storage_type() == HA_SM_DISK)
    {
      push_warning_printf(thd, Sql_condition::SL_WARNING,
                          ER_ILLEGAL_HA_CREATE_OPTION,
                          ER(ER_ILLEGAL_HA_CREATE_OPTION),
                          ndbcluster_hton_name,
                          "Index on field "
                          "declared with "
                          "STORAGE DISK is not supported");
      DBUG_RETURN(HA_ERR_UNSUPPORTED);
    }
    DBUG_PRINT("info", ("attr: %s", field->field_name));
    if (ndb_index.addColumnName(field->field_name))
    {
      set_my_errno(errno);
      DBUG_RETURN(errno);
    }
  }
  
  if (dict->createIndex(ndb_index, *m_table))
    ERR_RETURN(dict->getNdbError());

  // Success
  DBUG_PRINT("info", ("Created index %s", name));
  DBUG_RETURN(0);  
}

/*
 Prepare for an on-line alter table
*/ 
void ha_ndbcluster::prepare_for_alter()
{
  /* ndb_share reference schema */
  ndbcluster_get_share(m_share); // Increase ref_count
  DBUG_PRINT("NDB_SHARE", ("%s binlog schema  use_count: %u",
                           m_share->key_string(), m_share->use_count));
  set_ndb_share_state(m_share, NSS_ALTERED);
}

/*
  Add an index on-line to a table
*/
/*
int ha_ndbcluster::add_index(TABLE *table_arg, 
                             KEY *key_info, uint num_of_keys,
                             handler_add_index **add)
{
  // TODO: As we don't yet implement ::final_add_index(),
  // we don't need a handler_add_index object either..?
  *add= NULL; // new handler_add_index(table_arg, key_info, num_of_keys);
  return add_index_impl(current_thd, table_arg, key_info, num_of_keys);
}
*/

int ha_ndbcluster::add_index_impl(THD *thd, TABLE *table_arg, 
                                  KEY *key_info, uint num_of_keys)
{
  int error= 0;
  uint idx;
  DBUG_ENTER("ha_ndbcluster::add_index");
  DBUG_PRINT("enter", ("table %s", table_arg->s->table_name.str));
  assert(m_share->state == NSS_ALTERED);

  for (idx= 0; idx < num_of_keys; idx++)
  {
    KEY *key= key_info + idx;
    KEY_PART_INFO *key_part= key->key_part;
    KEY_PART_INFO *end= key_part + key->user_defined_key_parts;
    NDB_INDEX_TYPE idx_type= get_index_type_from_key(idx, key_info, false);
    DBUG_PRINT("info", ("Adding index: '%s'", key_info[idx].name));
    // Add fields to key_part struct
    for (; key_part != end; key_part++)
      key_part->field= table->field[key_part->fieldnr];
    // Check index type
    // Create index in ndb
    if((error= create_index(thd, key_info[idx].name, key, idx_type, idx)))
      break;
  }
  DBUG_RETURN(error);  
}

/*
  Mark one or several indexes for deletion. and
  renumber the remaining indexes
*/
int ha_ndbcluster::prepare_drop_index(TABLE *table_arg, 
                                      uint *key_num, uint num_of_keys)
{
  DBUG_ENTER("ha_ndbcluster::prepare_drop_index");
  assert(m_share->state == NSS_ALTERED);
  // Mark indexes for deletion
  uint idx;
  for (idx= 0; idx < num_of_keys; idx++)
  {
    DBUG_PRINT("info", ("ha_ndbcluster::prepare_drop_index %u", *key_num));
    uint i = *key_num++;
    m_index[i].status= TO_BE_DROPPED;
    // Prepare delete of index stat entry
    if (m_index[i].type == PRIMARY_KEY_ORDERED_INDEX ||
        m_index[i].type == UNIQUE_ORDERED_INDEX ||
        m_index[i].type == ORDERED_INDEX)
    {
      const NdbDictionary::Index *index= m_index[i].index;
      if (index) // safety
      {
        int index_id= index->getObjectId();
        int index_version= index->getObjectVersion();
        ndb_index_stat_free(m_share, index_id, index_version);
      }
    }
  }
  // Renumber indexes
  THD *thd= current_thd;
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  Ndb *ndb= thd_ndb->ndb;
  renumber_indexes(ndb, table_arg);
  DBUG_RETURN(0);
}
 
/*
  Really drop all indexes marked for deletion
*/
int ha_ndbcluster::final_drop_index(TABLE *table_arg)
{
  int error;
  DBUG_ENTER("ha_ndbcluster::final_drop_index");
  // Really drop indexes
  THD *thd= current_thd;
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  Ndb *ndb= thd_ndb->ndb;
  error= drop_indexes(ndb, table_arg);
  DBUG_RETURN(error);
}


extern void ndb_fk_util_resolve_mock_tables(THD* thd,
                                            NdbDictionary::Dictionary* dict,
                                            const char* new_parent_db,
                                            const char* new_parent_name);


int
ha_ndbcluster::rename_table_impl(THD* thd, Ndb* ndb,
                                 const NdbDictionary::Table* orig_tab,
                                 const char* from, const char* to,
                                 const char* old_dbname,
                                 const char* old_tabname,
                                 const char* new_dbname,
                                 const char* new_tabname,
                                 bool real_rename,
                                 const char* real_rename_db,
                                 const char* real_rename_name,
                                 bool real_rename_log_on_participant,
                                 bool drop_events,
                                 bool create_events,
                                 bool commit_alter)
{
  DBUG_ENTER("ha_ndbcluster::rename_table_impl");
  DBUG_PRINT("info", ("real_rename: %d", real_rename));
  DBUG_PRINT("info", ("real_rename_db: '%s'", real_rename_db));
  DBUG_PRINT("info", ("real_rename_name: '%s'", real_rename_name));
  DBUG_PRINT("info", ("real_rename_log_on_participant: %d",
                      real_rename_log_on_participant));
  // Verify default values of real_rename related parameters
  assert(real_rename ||
         (real_rename_db == NULL &&
          real_rename_name == NULL &&
          real_rename_log_on_participant == false));

  DBUG_PRINT("info", ("drop_events: %d", drop_events));
  DBUG_PRINT("info", ("create_events: %d", create_events));
  DBUG_PRINT("info", ("commit_alter: %d", commit_alter));

  NDBDICT* dict = ndb->getDictionary();
  NDBDICT::List index_list;
  if (my_strcasecmp(system_charset_info, new_dbname, old_dbname))
  {
    // When moving tables between databases the indexes need to be
    // recreated, save list of indexes before rename to allow
    // them to be recreated afterwards
    dict->listIndexes(index_list, *orig_tab);
  }

  // Change current database to that of target table
  if (ndb->setDatabaseName(new_dbname))
  {
    ERR_RETURN(ndb->getNdbError());
  }

  const int ndb_table_id= orig_tab->getObjectId();
  const int ndb_table_version= orig_tab->getObjectVersion();

  Ndb_share_temp_ref share(from);
  if (real_rename)
  {
    /*
      Prepare the rename on the participant, i.e make the participant
      save the final table name in the NDB_SHARE of the table to be renamed.

      NOTE! The tricky thing here is that the NDB_SHARE haven't yet been
      renamed on the participant and thus you have to use the original
      table name when communicating with the participant, otherwise it
      will not find the share where to stash the final table name.

      Also note that the main reason for doing this prepare phase
      (which the participant can't refuse) is due to lack of placeholders
      available in the schema dist protocol. There are simply not
      enough placeholders available to transfer all required parameters
      at once.
   */
    ndbcluster_log_schema_op(thd, to, (int)strlen(to),
                             real_rename_db, real_rename_name,
                             ndb_table_id, ndb_table_version,
                             SOT_RENAME_TABLE_PREPARE,
                             new_dbname /* unused */,
                             new_tabname /* unused */);
  }
  NDB_SHARE_KEY* old_key = share->key; // Save current key
  NDB_SHARE_KEY* new_key = NDB_SHARE::create_key(to);
  (void)ndbcluster_rename_share(thd, share, new_key);

  NdbDictionary::Table new_tab= *orig_tab;
  new_tab.setName(new_tabname);
  if (dict->alterTableGlobal(*orig_tab, new_tab) != 0)
  {
    const NdbError ndb_error= dict->getNdbError();
    // Rename the share back to old_key
    (void)ndbcluster_rename_share(thd, share, old_key);
    // Release the unused new_key
    NDB_SHARE::free_key(new_key);
    ERR_RETURN(ndb_error);
  }
  // Release the unused old_key
  NDB_SHARE::free_key(old_key);

  ndb_fk_util_resolve_mock_tables(thd, ndb->getDictionary(),
                                  new_dbname, new_tabname);

  {
    // Rename .ndb file
    int result;
    if ((result= handler::rename_table(from, to)))
    {
      // ToDo in 4.1 should rollback alter table...

      DBUG_RETURN(result);
    }
  }

  /* handle old table */
  if (drop_events)
  {
    ndbcluster_drop_event(thd, ndb, share,
                          old_dbname, old_tabname);
  }

  if (create_events)
  {
    Ndb_table_guard ndbtab_g2(dict, new_tabname);
    const NDBTAB *ndbtab= ndbtab_g2.get_table();
#ifdef HAVE_NDB_BINLOG
    ndbcluster_read_binlog_replication(thd, ndb, share, ndbtab,
                                       ::server_id, TRUE);
#endif
    /* always create an event for the table */
    String event_name(INJECTOR_EVENT_LEN);
    ndb_rep_event_name(&event_name, new_dbname, new_tabname,
                       get_binlog_full(share));

    if (!Ndb_dist_priv_util::is_distributed_priv_table(new_dbname,
                                                       new_tabname) &&
        !ndbcluster_create_event(thd, ndb, ndbtab, event_name.c_ptr(), share,
                                 ndb_binlog_running ? 2 : 1/* push warning */))
    {
      if (opt_ndb_extra_logging)
        sql_print_information("NDB Binlog: RENAME Event: %s",
                              event_name.c_ptr());
      if (share->op == 0 &&
          ndbcluster_create_event_ops(thd, share, ndbtab, event_name.c_ptr()))
      {
        sql_print_error("NDB Binlog: FAILED create event operations "
                        "during RENAME. Event %s", event_name.c_ptr());
        /* a warning has been issued to the client */
      }
    }
    /*
      warning has been issued if ndbcluster_create_event failed
      and ndb_binlog_running
    */
  }

  if (real_rename)
  {
    /*
      Commit of "real" rename table on participant i.e make the participant
      extract the original table name which it got in prepare.

      NOTE! The tricky thing also here is that the NDB_SHARE haven't yet been
      renamed on the participant and thus you have to use the original
      table name when communicating with the participant, otherwise it
      will not find the share where the final table name has been stashed.

      Also note the special flag which control wheter or not this
      query is written to binlog or not on the participants.
    */
    ndbcluster_log_schema_op(thd, thd->query().str, thd->query().length,
                             real_rename_db, real_rename_name,
                             ndb_table_id, ndb_table_version,
                             SOT_RENAME_TABLE,
                             new_dbname, new_tabname,
                             real_rename_log_on_participant);
  }

  if (commit_alter)
  {
    /* final phase of offline alter table */
    ndbcluster_log_schema_op(thd, thd->query().str, thd->query().length,
                             new_dbname, new_tabname,
                             ndb_table_id, ndb_table_version,
                             SOT_ALTER_TABLE_COMMIT,
                             NULL, NULL);
  }

  for (unsigned i = 0; i < index_list.count; i++)
  {
    NDBDICT::List::Element& index_el = index_list.elements[i];
    // Recreate any indexes not stored in the system database
    if (my_strcasecmp(system_charset_info,
                      index_el.database, NDB_SYSTEM_DATABASE))
    {
      // Get old index
      ndb->setDatabaseName(old_dbname);
      const NDBINDEX * index= dict->getIndexGlobal(index_el.name,  new_tab);
      DBUG_PRINT("info", ("Creating index %s/%s",
                          index_el.database, index->getName()));
      // Create the same "old" index on new tab
      dict->createIndex(*index, new_tab);
      DBUG_PRINT("info", ("Dropping index %s/%s",
                          index_el.database, index->getName()));
      // Drop old index
      ndb->setDatabaseName(old_dbname);
      dict->dropIndexGlobal(*index);
    }
  }
  DBUG_RETURN(0);
}


/**
  Rename a table in NDB and on the participating mysqld(s)
*/

int ha_ndbcluster::rename_table(const char *from, const char *to)
{
  THD *thd= current_thd;
  char old_dbname[FN_HEADLEN];
  char new_dbname[FN_HEADLEN];
  char new_tabname[FN_HEADLEN];

  DBUG_ENTER("ha_ndbcluster::rename_table");
  DBUG_PRINT("info", ("Renaming %s to %s", from, to));

  /*
    ALTER RENAME with some more change is currently not supported
    by Ndb due to
    Bug #16021021 ALTER ... RENAME FAILS TO RENAME ON PARTICIPANT MYSQLD

    Check if command is not RENAME and some more alter_flag
    except ALTER_RENAME is set.
  */
  if (thd->lex->sql_command == SQLCOM_ALTER_TABLE)
  {
    Alter_info *alter_info= &(thd->lex->alter_info);
    uint flags= alter_info->flags;

    if (flags & Alter_info::ALTER_RENAME && flags & ~Alter_info::ALTER_RENAME)
    {
      my_error(ER_NOT_SUPPORTED_YET, MYF(0), thd->query().str);
      DBUG_RETURN(ER_NOT_SUPPORTED_YET);
    }
  }

  set_dbname(from, old_dbname);
  set_dbname(to, new_dbname);
  set_tabname(from);
  set_tabname(to, new_tabname);

  DBUG_PRINT("info", ("old_tabname: '%s'", m_tabname));
  DBUG_PRINT("info", ("new_tabname: '%s'", new_tabname));

  /* Check that the new table or database name does not exceed max limit */
  if (strlen(new_dbname) > NDB_MAX_DDL_NAME_BYTESIZE ||
       strlen(new_tabname) > NDB_MAX_DDL_NAME_BYTESIZE)
  {
    char *invalid_identifier=
        (strlen(new_dbname) > NDB_MAX_DDL_NAME_BYTESIZE) ?
          new_dbname : new_tabname;
    push_warning_printf(thd, Sql_condition::SL_WARNING,
                        ER_TOO_LONG_IDENT,
                        "Ndb has an internal limit of %u bytes on the "\
                        "size of schema identifiers",
                        NDB_MAX_DDL_NAME_BYTESIZE);
    my_error(ER_TOO_LONG_IDENT, MYF(0), invalid_identifier);
    DBUG_RETURN(HA_WRONG_CREATE_OPTION);
  }

  if (check_ndb_connection(thd))
    DBUG_RETURN(HA_ERR_NO_CONNECTION);

  Thd_ndb *thd_ndb= thd_get_thd_ndb(thd);
  if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::rename_table"))
    DBUG_RETURN(HA_ERR_NO_CONNECTION);

  // Open the table which is to be renamed(aka. the old)
  Ndb *ndb= get_ndb(thd);
  ndb->setDatabaseName(old_dbname);
  NDBDICT *dict= ndb->getDictionary();
  Ndb_table_guard ndbtab_g(dict, m_tabname);
  const NDBTAB *orig_tab;
  if (!(orig_tab= ndbtab_g.get_table()))
    ERR_RETURN(dict->getNdbError());
  DBUG_PRINT("info", ("NDB table name: '%s'", orig_tab->getName()));

  // Magically detect if this is a rename or some form of alter
  // and decide which actions need to be performed
  const bool old_is_temp = IS_TMP_PREFIX(m_tabname);
  const bool new_is_temp = IS_TMP_PREFIX(new_tabname);
  switch (thd_sql_command(thd))
  {
  case SQLCOM_DROP_INDEX:
  case SQLCOM_CREATE_INDEX:
    DBUG_PRINT("info", ("CREATE or DROP INDEX as copying ALTER"));
    // fallthrough
  case SQLCOM_ALTER_TABLE:
    DBUG_PRINT("info", ("SQLCOM_ALTER_TABLE"));

    if (!new_is_temp && !old_is_temp)
    {
      /*
        This is a rename directly from real to real which occurs:
        1) when the ALTER is "simple" RENAME i.e only consists of RENAME
           and/or enable/disable indexes
        2) as part of inplace ALTER .. RENAME
       */
      DBUG_PRINT("info", ("simple rename detected"));
      DBUG_RETURN(rename_table_impl(thd, ndb, orig_tab, from, to,
                                    old_dbname, m_tabname,
                                    new_dbname, new_tabname,
                                    true, // real_rename
                                    old_dbname, // real_rename_db
                                    m_tabname, // real_rename_name
                                    true, // real_rename_log_on_participants
                                    true, // drop_events
                                    true, // create events
                                    false)); // commit_alter
    }

    // Make sure that inplace was not requested
    assert(thd->lex->alter_info.requested_algorithm !=
           Alter_info::ALTER_TABLE_ALGORITHM_INPLACE);

    /*
      This is a copying alter table which is implemented as
      1) Create destination table with temporary name
          -> ha_ndbcluster::create_table('#sql_75636-87')
          There are now the source table and one with temporary name:
             [t1] + [#sql_75636-87]
      2) Copy data from source table to destination table.
      3) Backup the source table by renaming it to another temporary name.
          -> ha_ndbcluster::rename_table('t1', '#sql_86545-98')
          There are now two temporary named tables:
            [#sql_86545-98] + [#sql_75636-87]
      4) Rename the destination table to it's real name.
          ->  ha_ndbcluster::rename_table('#sql_75636-87', 't1')
      5) Drop the source table


    */

    if (new_is_temp)
    {
      /*
        This is an alter table which renames real name to temp name.
        ie. step 3) per above and is the first of
        two rename_table() calls. Drop events from the table.
      */
      DBUG_PRINT("info", ("real -> temp"));
      DBUG_RETURN(rename_table_impl(thd, ndb, orig_tab, from, to,
                                    old_dbname, m_tabname,
                                    new_dbname, new_tabname,
                                    false, // real_rename
                                    NULL, // real_rename_db
                                    NULL, // real_rename_name
                                    false, // real_rename_log_on_participants
                                    true, // drop_events
                                    false, // create events
                                    false)); // commit_alter
    }

    if (old_is_temp)
    {
      /*
        This is an alter table which renames temp name to real name.
        ie. step 5) per above and is the second call to rename_table().
        Create new events and commit the alter so that participant are
        made aware that the table changed and can reopen the table.
      */
      DBUG_PRINT("info", ("temp -> real"));

      /*
        Detect if this is the special case which occurs when
        the table is both altered and renamed.

        Important here is to remeber to rename the table also
        on all partiticipants so they will find the table when
        the alter is completed. This is slightly problematic since
        their table is renamed directly from real to real name, while
        the mysqld who performs the alter renames from temp to real
        name. Fortunately it's possible to lookup the original table
        name via THD.
      */
      const char* orig_name = thd->lex->select_lex->table_list.first->table_name;
      const char* orig_db = thd->lex->select_lex->table_list.first->db;
      if (thd->lex->alter_info.flags & Alter_info::ALTER_RENAME &&
          (my_strcasecmp(system_charset_info, orig_db, new_dbname) ||
           my_strcasecmp(system_charset_info, orig_name, new_tabname)))
      {
        DBUG_PRINT("info", ("ALTER with RENAME detected"));
        /*
          Use the original table name when communicating with participant
        */
        const char* real_rename_db = orig_db;
        const char* real_rename_name = orig_name;

        /*
          Don't log the rename query on participant since that would
          cause both an ALTER TABLE RENAME and RENAME TABLE to appear in
          the binlog
        */
        const bool real_rename_log_on_participant = false;
        DBUG_RETURN(rename_table_impl(thd, ndb, orig_tab,from,to,
                                      old_dbname, m_tabname,
                                      new_dbname, new_tabname,
                                      true, // real_rename
                                      real_rename_db,
                                      real_rename_name,
                                      real_rename_log_on_participant,
                                      false, // drop_events
                                      true, // create events
                                      true)); // commit_alter
      }

      DBUG_RETURN(rename_table_impl(thd, ndb, orig_tab,from,to,
                                    old_dbname, m_tabname,
                                    new_dbname, new_tabname,
                                    false, // real_rename
                                    NULL, // real_rename_db
                                    NULL, // real_rename_name
                                    false, // real_rename_log_on_participants
                                    false, // drop_events
                                    true, // create events
                                    true)); // commit_alter
    }
    break;

  case SQLCOM_RENAME_TABLE:
    DBUG_PRINT("info", ("SQLCOM_RENAME_TABLE"));

    DBUG_RETURN(rename_table_impl(thd, ndb, orig_tab, from, to,
                                  old_dbname, m_tabname,
                                  new_dbname, new_tabname,
                                  true, // real_rename
                                  old_dbname, // real_rename_db
                                  m_tabname, // real_rename_name
                                  true, // real_rename_log_on_participants
                                  true, // drop_events
                                  true, // create events
                                  false)); // commit_alter
    break;

  default:
    sql_print_error("Unexpected rename case detected, sql_command: %d",
                    thd_sql_command(thd));
    abort();
    break;
  }

  // Never reached
  DBUG_RETURN(HA_ERR_UNSUPPORTED);
}


/**
  Delete table from NDB Cluster.
*/

static
void
delete_table_drop_share(NDB_SHARE* share, const char * path)
{
  DBUG_ENTER("delete_table_drop_share");
  if (share)
  {
    native_mutex_lock(&ndbcluster_mutex);
do_drop:
    if (share->state != NSS_DROPPED)
    {
      /*
        The share kept by the server has not been freed, free it
      */
      ndbcluster_mark_share_dropped(share);
      /* ndb_share reference create free */
      DBUG_PRINT("NDB_SHARE", ("%s create free  use_count: %u",
                               share->key_string(), share->use_count));
      free_share(&share, TRUE);
    }
    /* ndb_share reference temporary free */
    DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
                             share->key_string(), share->use_count));
    free_share(&share, TRUE);
    native_mutex_unlock(&ndbcluster_mutex);
  }
  else if (path)
  {
    native_mutex_lock(&ndbcluster_mutex);
    share= get_share(path, 0, FALSE, TRUE);
    if (share)
    {
      goto do_drop;
    }
    native_mutex_unlock(&ndbcluster_mutex);
  }
  DBUG_VOID_RETURN;
}


// Declare adapter functions for Dummy_table_util function
extern bool ndb_fk_util_build_list(THD*, NdbDictionary::Dictionary*,
                                   const NdbDictionary::Table*, List<char>&);
extern void ndb_fk_util_drop_list(THD*, Ndb* ndb, NdbDictionary::Dictionary*, List<char>&);
extern bool ndb_fk_util_drop_table(THD*, Ndb* ndb, NdbDictionary::Dictionary*,
                                   const NdbDictionary::Table*);
extern bool ndb_fk_util_is_mock_name(const char* table_name);

bool
ha_ndbcluster::drop_table_and_related(THD* thd, Ndb* ndb, NdbDictionary::Dictionary* dict,
                                      const NdbDictionary::Table* table,
                                      int drop_flags, bool skip_related)
{
  DBUG_ENTER("drop_table_and_related");
  DBUG_PRINT("enter", ("cascade_constraints: %d dropdb: %d skip_related: %d",
                       MY_TEST(drop_flags & NDBDICT::DropTableCascadeConstraints),
                       MY_TEST(drop_flags & NDBDICT::DropTableCascadeConstraintsDropDB),
                       skip_related));

  /*
    Build list of objects which should be dropped after the table
    unless the caller ask to skip dropping related
  */
  List<char> drop_list;
  if (!skip_related &&
      !ndb_fk_util_build_list(thd, dict, table, drop_list))
  {
    DBUG_RETURN(false);
  }

  // Drop the table
  if (dict->dropTableGlobal(*table, drop_flags) != 0)
  {
    const NdbError& ndb_err = dict->getNdbError();
    if (ndb_err.code == 21080 &&
        thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS))
    {
      /*
        Drop was not allowed because table is still referenced by
        foreign key(s). Since foreign_key_checks=0 the problem is
        worked around by creating a mock table, recreating the foreign
        key(s) to point at the mock table and finally dropping
        the requested table.
      */
      if (!ndb_fk_util_drop_table(thd, ndb, dict, table))
      {
        DBUG_RETURN(false);
      }
    }
    else
    {
      DBUG_RETURN(false);
    }
  }

  // Drop objects which should be dropped after table
  ndb_fk_util_drop_list(thd, ndb, dict, drop_list);

  DBUG_RETURN(true);
}


/* static version which does not need a handler */

int
ha_ndbcluster::drop_table_impl(THD *thd, ha_ndbcluster *h, Ndb *ndb,
                               const char *path,
                               const char *db,
                               const char *table_name)
{
  DBUG_ENTER("ha_ndbcluster::drop_table_impl");
  NDBDICT *dict= ndb->getDictionary();
  int ndb_table_id= 0;
  int ndb_table_version= 0;

  if (!ndb_schema_dist_is_ready())
  {
    /* Don't allow drop table unless schema distribution is ready */
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
  }
  /* ndb_share reference temporary */
  NDB_SHARE *share= get_share(path, 0, FALSE);
  if (share)
  {
    DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",
                             share->key_string(), share->use_count));
  }

  bool skip_related= false;
  int drop_flags = 0;
  /* Copying alter can leave #sql table which is parent of old FKs */
  if (thd->lex->sql_command == SQLCOM_ALTER_TABLE &&
      strncmp(table_name, "#sql", 4) == 0)
  {
    DBUG_PRINT("info", ("Using cascade constraints for ALTER of temp table"));
    drop_flags |= NDBDICT::DropTableCascadeConstraints;
    // Cascade constraint is used and related will be dropped anyway
    skip_related = true;
  }

  if (thd->lex->sql_command == SQLCOM_DROP_DB)
  {
    DBUG_PRINT("info", ("Using cascade constraints DB for drop database"));
    drop_flags |= NDBDICT::DropTableCascadeConstraintsDropDB;
  }

  if (thd->lex->sql_command == SQLCOM_TRUNCATE)
  {
    DBUG_PRINT("info", ("Deleting table for TRUNCATE, skip dropping related"));
    skip_related= true;
  }

  /* Drop the table from NDB */
  int res= 0;
  if (h && h->m_table)
  {
retry_temporary_error1:
    if (drop_table_and_related(thd, ndb, dict, h->m_table,
                               drop_flags, skip_related))
    {
      ndb_table_id= h->m_table->getObjectId();
      ndb_table_version= h->m_table->getObjectVersion();
      DBUG_PRINT("info", ("success 1"));
    }
    else
    {
      switch (dict->getNdbError().status)
      {
        case NdbError::TemporaryError:
          if (!thd->killed) 
            goto retry_temporary_error1; // retry indefinitly
          break;
        default:
          break;
      }
      res= ndb_to_mysql_error(&dict->getNdbError());
      DBUG_PRINT("info", ("error(1) %u", res));
    }
    h->release_metadata(thd, ndb);
  }
  else
  {
    ndb->setDatabaseName(db);
    while (1)
    {
      Ndb_table_guard ndbtab_g(dict, table_name);
      if (ndbtab_g.get_table())
      {
    retry_temporary_error2:
        if (drop_table_and_related(thd, ndb, dict, ndbtab_g.get_table(),
                                   drop_flags, skip_related))
        {
          ndb_table_id= ndbtab_g.get_table()->getObjectId();
          ndb_table_version= ndbtab_g.get_table()->getObjectVersion();
          DBUG_PRINT("info", ("success 2"));
          break;
        }
        else
        {
          switch (dict->getNdbError().status)
          {
            case NdbError::TemporaryError:
              if (!thd->killed) 
                goto retry_temporary_error2; // retry indefinitly
              break;
            default:
              if (dict->getNdbError().code == NDB_INVALID_SCHEMA_OBJECT)
              {
                ndbtab_g.invalidate();
                continue;
              }
              break;
          }
        }
      }
      res= ndb_to_mysql_error(&dict->getNdbError());
      DBUG_PRINT("info", ("error(2) %u", res));
      break;
    }
  }

  if (res)
  {
    /* the drop table failed for some reason, drop the share anyways */
    delete_table_drop_share(share, 0);
    DBUG_RETURN(res);
  }

  /* stop the logging of the dropped table, and cleanup */

  /*
    drop table is successful even if table does not exist in ndb
    and in case table was actually not dropped, there is no need
    to force a gcp, and setting the event_name to null will indicate
    that there is no event to be dropped
  */
  int table_dropped= dict->getNdbError().code != 709;

  {
    if (table_dropped)
    {
      ndbcluster_handle_drop_table(thd, ndb, share, "delete table", 
                                   db, table_name);
    }
    else
    {
      /**
       * Setting 0,0 will cause ndbcluster_drop_event *not* to be called
       */
      ndbcluster_handle_drop_table(thd, ndb, share, "delete table", 
                                   0, 0);
    }
  }

  if (!IS_TMP_PREFIX(table_name) && share &&
      thd->lex->sql_command != SQLCOM_TRUNCATE)
  {
    ndbcluster_log_schema_op(thd,
                             thd->query().str, thd->query().length,
                             share->db, share->table_name,
                             ndb_table_id, ndb_table_version,
                             SOT_DROP_TABLE, NULL, NULL);
  }

  delete_table_drop_share(share, 0);
  DBUG_RETURN(0);
}

int ha_ndbcluster::delete_table(const char *name)
{
  THD *thd= current_thd;

  DBUG_ENTER("ha_ndbcluster::delete_table");
  DBUG_PRINT("enter", ("name: %s", name));

  if (thd == injector_thd)
  {
    /*
      Table was dropped remotely is already
      dropped inside ndb.
      Just drop local files.
    */
    DBUG_PRINT("info", ("Table is already dropped in NDB"));
    delete_table_drop_share(0, name);
    DBUG_RETURN(handler::delete_table(name));
  }

  set_dbname(name);
  set_tabname(name);

  if (!ndb_schema_dist_is_ready())
  {
    /* Don't allow drop table unless schema distribution is ready */
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
  }

  if (check_ndb_connection(thd))
  {
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
  }

  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::delete_table"))
  {
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
  }

  /*
    Drop table in ndb.
    If it was already gone it might have been dropped
    remotely, give a warning and then drop .ndb file.
   */
  int error;
  Ndb* ndb= thd_ndb->ndb;
  if (!(error= drop_table_impl(thd, this, ndb, name,
                               m_dbname, m_tabname)) ||
      error == HA_ERR_NO_SUCH_TABLE)
  {
    /* Call ancestor function to delete .ndb file */
    int error1= handler::delete_table(name);
    if (!error)
      error= error1;
  }

  DBUG_RETURN(error);
}


void ha_ndbcluster::get_auto_increment(ulonglong offset, ulonglong increment,
                                       ulonglong nb_desired_values,
                                       ulonglong *first_value,
                                       ulonglong *nb_reserved_values)
{
  Uint64 auto_value;
  THD *thd= current_thd;
  DBUG_ENTER("get_auto_increment");
  DBUG_PRINT("enter", ("m_tabname: %s", m_tabname));
  Ndb *ndb= get_ndb(table->in_use);
  uint retries= NDB_AUTO_INCREMENT_RETRIES;
  int retry_sleep= 30; /* 30 milliseconds, transaction */
  for (;;)
  {
    Ndb_tuple_id_range_guard g(m_share);
    if ((m_skip_auto_increment &&
         ndb->readAutoIncrementValue(m_table, g.range, auto_value)) ||
        ndb->getAutoIncrementValue(m_table, g.range, auto_value, 
                                   Uint32(m_autoincrement_prefetch), 
                                   increment, offset))
    {
      if (--retries && !thd->killed &&
          ndb->getNdbError().status == NdbError::TemporaryError)
      {
        do_retry_sleep(retry_sleep);
        continue;
      }
      const NdbError err= ndb->getNdbError();
      sql_print_error("Error %lu in ::get_auto_increment(): %s",
                      (ulong) err.code, err.message);
      *first_value= ~(ulonglong) 0;
      DBUG_VOID_RETURN;
    }
    break;
  }
  *first_value= (longlong)auto_value;
  /* From the point of view of MySQL, NDB reserves one row at a time */
  *nb_reserved_values= 1;
  DBUG_VOID_RETURN;
}


/**
  Constructor for the NDB Cluster table handler .
*/

ha_ndbcluster::ha_ndbcluster(handlerton *hton, TABLE_SHARE *table_arg):
  handler(hton, table_arg),
  m_thd_ndb(NULL),
  m_active_cursor(NULL),
  m_table(NULL),
  m_ndb_record(0),
  m_ndb_hidden_key_record(0),
  m_table_info(NULL),
  m_share(0),
  m_key_fields(NULL),
  m_part_info(NULL),
  m_user_defined_partitioning(FALSE),
  m_use_partition_pruning(FALSE),
  m_sorted(FALSE),
  m_use_write(FALSE),
  m_ignore_dup_key(FALSE),
  m_has_unique_index(FALSE),
  m_ignore_no_key(FALSE),
  m_read_before_write_removal_possible(FALSE),
  m_read_before_write_removal_used(FALSE),
  m_rows_updated(0),
  m_rows_deleted(0),
  m_rows_to_insert((ha_rows) 1),
  m_rows_inserted((ha_rows) 0),
  m_rows_changed((ha_rows) 0),
  m_delete_cannot_batch(FALSE),
  m_update_cannot_batch(FALSE),
  m_skip_auto_increment(TRUE),
  m_blobs_pending(0),
  m_is_bulk_delete(false),
  m_blobs_row_total_size(0),
  m_blobs_buffer(0),
  m_blobs_buffer_size(0),
  m_dupkey((uint) -1),
  m_autoincrement_prefetch(DEFAULT_AUTO_PREFETCH),
  m_pushed_join_member(NULL),
  m_pushed_join_operation(-1),
  m_disable_pushed_join(FALSE),
  m_active_query(NULL),
  m_pushed_operation(NULL),
  m_cond(NULL),
  m_multi_cursor(NULL)
{
  uint i;
 
  DBUG_ENTER("ha_ndbcluster");

  m_tabname[0]= '\0';
  m_dbname[0]= '\0';

  stats.records= ~(ha_rows)0; // uninitialized
  stats.block_size= 1024;

  for (i= 0; i < MAX_KEY; i++)
    ndb_init_index(m_index[i]);

  // make sure is initialized
  init_alloc_root(PSI_INSTRUMENT_ME, &m_fk_mem_root, fk_root_block_size, 0);
  m_fk_data= NULL;

  DBUG_VOID_RETURN;
}


/**
  Destructor for NDB Cluster table handler.
*/

ha_ndbcluster::~ha_ndbcluster() 
{
  THD *thd= current_thd;
  Ndb *ndb= thd ? check_ndb_in_thd(thd) : g_ndb;
  DBUG_ENTER("~ha_ndbcluster");

  if (m_share)
  {
    /* ndb_share reference handler free */
    DBUG_PRINT("NDB_SHARE", ("%s handler free  use_count: %u",
                             m_share->key_string(), m_share->use_count));
    free_share(&m_share);
  }
  release_metadata(thd, ndb);
  release_blobs_buffer();

  // Check for open cursor/transaction
  assert(m_thd_ndb == NULL);

  // Discard any generated condition
  DBUG_PRINT("info", ("Deleting generated condition"));
  if (m_cond)
  {
    delete m_cond;
    m_cond= NULL;
  }
  DBUG_PRINT("info", ("Deleting pushed joins"));
  assert(m_active_query == NULL);
  assert(m_active_cursor == NULL);
  if (m_pushed_join_operation==PUSHED_ROOT)
  {
    delete m_pushed_join_member;             // Also delete QueryDef
  }
  m_pushed_join_member= NULL;

  // make sure is released
  free_root(&m_fk_mem_root, 0);
  m_fk_data= NULL;
  DBUG_VOID_RETURN;
}


/**
  Open a table for further use
  - fetch metadata for this table from NDB
  - check that table exists

  @retval
    0    ok
  @retval
    < 0  Table has changed
*/

int ha_ndbcluster::open(const char *name, int mode, uint test_if_locked)
{
  THD *thd= current_thd;
  int res;
  KEY *key;
  KEY_PART_INFO *key_part_info;
  uint key_parts, i, j;
  DBUG_ENTER("ha_ndbcluster::open");
  DBUG_PRINT("enter", ("name: %s  mode: %d  test_if_locked: %d",
                       name, mode, test_if_locked));

  if (table_share->primary_key != MAX_KEY)
  {
    /*
      Setup ref_length to make room for the whole
      primary key to be written in the ref variable
    */
    key= table->key_info+table_share->primary_key;
    ref_length= key->key_length;
  }
  else
  {
    if (m_user_defined_partitioning)
    {
      /* Add space for partid in ref */
      ref_length+= sizeof(m_part_id);
    }
  }
  DBUG_PRINT("info", ("ref_length: %d", ref_length));

  {
    char* bitmap_array;
    uint extra_hidden_keys= table_share->primary_key != MAX_KEY ? 0 : 1;
    uint n_keys= table_share->keys + extra_hidden_keys;
    uint ptr_size= sizeof(MY_BITMAP*) * (n_keys + 1 /* null termination */);
    uint map_size= sizeof(MY_BITMAP) * n_keys;
    m_key_fields= (MY_BITMAP**)my_malloc(PSI_INSTRUMENT_ME,
                                         ptr_size + map_size,
                                         MYF(MY_WME + MY_ZEROFILL));
    if (!m_key_fields)
    {
      local_close(thd, FALSE);
      DBUG_RETURN(1);
    } 
    bitmap_array= ((char*)m_key_fields) + ptr_size;
    for (i= 0; i < n_keys; i++)
    {
      my_bitmap_map *bitbuf= NULL;
      bool is_hidden_key= (i == table_share->keys);
      m_key_fields[i]= (MY_BITMAP*)bitmap_array;
      if (is_hidden_key || (i == table_share->primary_key))
      {
        m_pk_bitmap_p= m_key_fields[i];
        bitbuf= m_pk_bitmap_buf;
      }
      if (bitmap_init(m_key_fields[i], bitbuf,
                      table_share->fields, FALSE))
      {
        m_key_fields[i]= NULL;
        local_close(thd, FALSE);
        DBUG_RETURN(1);
      }
      if (!is_hidden_key)
      {
        key= table->key_info + i;
        key_part_info= key->key_part;
        key_parts= key->user_defined_key_parts;
        for (j= 0; j < key_parts; j++, key_part_info++)
          bitmap_set_bit(m_key_fields[i], key_part_info->fieldnr-1);
      }
      else
      {
        uint field_no= table_share->fields;
        ((uchar *)m_pk_bitmap_buf)[field_no>>3]|= (1 << (field_no & 7));
      }
      bitmap_array+= sizeof(MY_BITMAP);
    }
    m_key_fields[i]= NULL;
  }

  set_dbname(name);
  set_tabname(name);

  if ((res= check_ndb_connection(thd)) != 0)
  {
    local_close(thd, FALSE);
    DBUG_RETURN(res);
  }

  // Init table lock structure
  /* ndb_share reference handler */
  if ((m_share=get_share(name, table, FALSE)) == 0)
  {
    /**
     * No share present...we must create one
     */
    if (opt_ndb_extra_logging > 19)
    {
      sql_print_information("Calling ndbcluster_create_binlog_setup(%s) in ::open",
                            name);
    }
    Ndb* ndb= check_ndb_in_thd(thd);
    ndbcluster_create_binlog_setup(thd, ndb, name, (uint)strlen(name),
                                   m_dbname, m_tabname, table);
    if ((m_share=get_share(name, table, FALSE)) == 0)
    {
      local_close(thd, FALSE);
      DBUG_RETURN(1);
    }
  }

  DBUG_PRINT("NDB_SHARE", ("%s handler  use_count: %u",
                           m_share->key_string(), m_share->use_count));
  thr_lock_data_init(&m_share->lock,&m_lock,(void*) 0);

  if ((res= get_metadata(thd, name)))
  {
    local_close(thd, FALSE);
    DBUG_RETURN(res);
  }

  if ((res= update_stats(thd, 1)) ||
      (res= info(HA_STATUS_CONST)))
  {
    local_close(thd, TRUE);
    DBUG_RETURN(res);
  }
  if (ndb_binlog_is_read_only())
  {
    table->db_stat|= HA_READ_ONLY;
    sql_print_information("table '%s' opened read only", name);
  }
  DBUG_RETURN(0);
}

/*
 * Support for OPTIMIZE TABLE
 * reclaims unused space of deleted rows
 * and updates index statistics
 */
int ha_ndbcluster::optimize(THD* thd, HA_CHECK_OPT* check_opt)
{
  ulong error, stats_error= 0;
  const uint delay= (uint)THDVAR(thd, optimization_delay);

  error= ndb_optimize_table(thd, delay);
  stats_error= update_stats(thd, 1);
  return (error) ? error : stats_error;
}

int ha_ndbcluster::ndb_optimize_table(THD* thd, uint delay)
{
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  Ndb *ndb= thd_ndb->ndb;
  NDBDICT *dict= ndb->getDictionary();
  int result=0, error= 0;
  uint i;
  NdbDictionary::OptimizeTableHandle th;
  NdbDictionary::OptimizeIndexHandle ih;

  DBUG_ENTER("ndb_optimize_table");
  if ((error= dict->optimizeTable(*m_table, th)))
  {
    DBUG_PRINT("info",
               ("Optimze table %s returned %d", m_tabname, error));
    ERR_RETURN(ndb->getNdbError());
  }
  while((result= th.next()) == 1)
  {
    if (thd->killed)
      DBUG_RETURN(-1);
    my_sleep(1000*delay);
  }
  if (result == -1 || th.close() == -1)
  {
    DBUG_PRINT("info",
               ("Optimize table %s did not complete", m_tabname));
    ERR_RETURN(ndb->getNdbError());
  };
  for (i= 0; i < MAX_KEY; i++)
  {
    if (thd->killed)
      DBUG_RETURN(-1);
    if (m_index[i].status == ACTIVE)
    {
      const NdbDictionary::Index *index= m_index[i].index;
      const NdbDictionary::Index *unique_index= m_index[i].unique_index;
      
      if (index)
      {
        if ((error= dict->optimizeIndex(*index, ih)))
        {
          DBUG_PRINT("info",
                     ("Optimze index %s returned %d", 
                      index->getName(), error));
          ERR_RETURN(ndb->getNdbError());
          
        }
        while((result= ih.next()) == 1)
        {
          if (thd->killed)
            DBUG_RETURN(-1);
          my_sleep(1000*delay);        
        }
        if (result == -1 || ih.close() == -1)
        {
          DBUG_PRINT("info",
                     ("Optimize index %s did not complete", index->getName()));
          ERR_RETURN(ndb->getNdbError());
        }          
      }
      if (unique_index)
      {
        if ((error= dict->optimizeIndex(*unique_index, ih)))
        {
          DBUG_PRINT("info",
                     ("Optimze unique index %s returned %d", 
                      unique_index->getName(), error));
          ERR_RETURN(ndb->getNdbError());
        } 
        while((result= ih.next()) == 1)
        {
          if (thd->killed)
            DBUG_RETURN(-1);
          my_sleep(1000*delay);
        }
        if (result == -1 || ih.close() == -1)
        {
          DBUG_PRINT("info",
                     ("Optimize index %s did not complete", index->getName()));
          ERR_RETURN(ndb->getNdbError());
        }
      }
    }
  }
  DBUG_RETURN(0);
}

int ha_ndbcluster::analyze(THD* thd, HA_CHECK_OPT* check_opt)
{
  int err;
  if ((err= update_stats(thd, 1)) != 0)
    return err;
  const bool index_stat_enable= THDVAR(NULL, index_stat_enable) &&
                                THDVAR(thd, index_stat_enable);
  if (index_stat_enable)
  {
    if ((err= analyze_index(thd)) != 0)
      return err;
  }
  return 0;
}

int
ha_ndbcluster::analyze_index(THD *thd)
{
  DBUG_ENTER("ha_ndbcluster::analyze_index");

  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  Ndb *ndb= thd_ndb->ndb;

  uint inx_list[MAX_INDEXES];
  uint inx_count= 0;

  uint inx;
  for (inx= 0; inx < table_share->keys; inx++)
  {
    NDB_INDEX_TYPE idx_type= get_index_type(inx);  

    if ((idx_type == PRIMARY_KEY_ORDERED_INDEX ||
         idx_type == UNIQUE_ORDERED_INDEX ||
         idx_type == ORDERED_INDEX))
    {
      if (inx_count < MAX_INDEXES)
        inx_list[inx_count++]= inx;
    }
  }

  if (inx_count != 0)
  {
    int err= ndb_index_stat_analyze(ndb, inx_list, inx_count);
    if (err != 0)
      DBUG_RETURN(err);
  }
  DBUG_RETURN(0);
}

/*
  Set partition info

  SYNOPSIS
    set_part_info()
    part_info

  RETURN VALUE
    NONE

  DESCRIPTION
    Set up partition info when handler object created
*/

void ha_ndbcluster::set_part_info(partition_info *part_info, bool early)
{
  DBUG_ENTER("ha_ndbcluster::set_part_info");
  m_part_info= part_info;
  if (!early)
  {
    m_use_partition_pruning= FALSE;
    if (!(m_part_info->part_type == HASH_PARTITION &&
          m_part_info->list_of_part_fields &&
          !m_part_info->is_sub_partitioned()))
    {
      /*
        PARTITION BY HASH, RANGE and LIST plus all subpartitioning variants
        all use MySQL defined partitioning. PARTITION BY KEY uses NDB native
        partitioning scheme.
      */
      m_use_partition_pruning= TRUE;
      m_user_defined_partitioning= TRUE;
    }
    if (m_part_info->part_type == HASH_PARTITION &&
        m_part_info->list_of_part_fields &&
        m_part_info->num_full_part_fields == 0)
    {
      /*
        CREATE TABLE t (....) ENGINE NDB PARTITON BY KEY();
        where no primary key is defined uses a hidden key as partition field
        and this makes it impossible to use any partition pruning. Partition
        pruning requires partitioning based on real fields, also the lack of
        a primary key means that all accesses to tables are based on either
        full table scans or index scans and they can never be pruned those
        scans given that the hidden key is unknown. In write_row, update_row,
        and delete_row the normal hidden key handling will fix things.
      */
      m_use_partition_pruning= FALSE;
    }
    DBUG_PRINT("info", ("m_use_partition_pruning = %d",
                         m_use_partition_pruning));
  }
  DBUG_VOID_RETURN;
}

/**
  Close the table
  - release resources setup by open()
 */

void ha_ndbcluster::local_close(THD *thd, bool release_metadata_flag)
{
  Ndb *ndb;
  DBUG_ENTER("ha_ndbcluster::local_close");
  if (m_key_fields)
  {
    MY_BITMAP **inx_bitmap;
    for (inx_bitmap= m_key_fields;
         (inx_bitmap != NULL) && ((*inx_bitmap) != NULL);
         inx_bitmap++)
      if ((*inx_bitmap)->bitmap != m_pk_bitmap_buf)
        bitmap_free(*inx_bitmap);
    my_free((char*)m_key_fields, MYF(0));
    m_key_fields= NULL;
  }
  if (m_share)
  {
    /* ndb_share reference handler free */
    DBUG_PRINT("NDB_SHARE", ("%s handler free  use_count: %u",
                             m_share->key_string(), m_share->use_count));
    free_share(&m_share);
  }
  m_share= 0;
  if (release_metadata_flag)
  {
    ndb= thd ? check_ndb_in_thd(thd) : g_ndb;
    release_metadata(thd, ndb);
  }
  DBUG_VOID_RETURN;
}

int ha_ndbcluster::close(void)
{
  DBUG_ENTER("close");
  THD *thd= table->in_use;
  local_close(thd, TRUE);
  DBUG_RETURN(0);
}


int ha_ndbcluster::check_ndb_connection(THD* thd) const
{
  Ndb *ndb;
  DBUG_ENTER("check_ndb_connection");
  
  if (!(ndb= check_ndb_in_thd(thd, true)))
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
  if (ndb->setDatabaseName(m_dbname))
  {
    ERR_RETURN(ndb->getNdbError());
  }
  DBUG_RETURN(0);
}


static int ndbcluster_close_connection(handlerton *hton, THD *thd)
{
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  DBUG_ENTER("ndbcluster_close_connection");
  if (thd_ndb)
  {
    Thd_ndb::release(thd_ndb);
    thd_set_thd_ndb(thd, NULL);
  }
  DBUG_RETURN(0);
}


/**
  Try to discover one table from NDB.
*/
static
int ndbcluster_discover(handlerton *hton, THD* thd, const char *db, 
                        const char *name,
                        uchar **frmblob, 
                        size_t *frmlen)
{
  int error= 0;
  NdbError ndb_error;
  size_t len;
  uchar* data= NULL;
  Ndb* ndb;
  char key[FN_REFLEN + 1];
  DBUG_ENTER("ndbcluster_discover");
  DBUG_PRINT("enter", ("db: %s, name: %s", db, name)); 

  // Check if the database directory for the table to discover exists
  // as otherwise there is no place to put the discovered .frm file.
  build_table_filename(key, sizeof(key) - 1, db, "", "", 0);
  const int database_exists= !my_access(key, F_OK);
  if (!database_exists)
  {
    sql_print_information("NDB: Could not find database directory '%s' "
                          "while trying to discover table '%s'", db, name);
    // Can't discover table when database directory does not exist
    DBUG_RETURN(1);
  }

  if (!(ndb= check_ndb_in_thd(thd)))
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
  if (ndb->setDatabaseName(db))
  {
    ERR_RETURN(ndb->getNdbError());
  }

  build_table_filename(key, sizeof(key) - 1, db, name, "", 0);
  /* ndb_share reference temporary */
  NDB_SHARE* share= get_share(key, 0, FALSE);
  if (share)
  {
    DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",
                             share->key_string(), share->use_count));
  }
  if (share && get_ndb_share_state(share) == NSS_ALTERED)
  {
    // Frm has been altered on disk, but not yet written to ndb
    if (readfrm(key, &data, &len))
    {
      DBUG_PRINT("error", ("Could not read frm"));
      error= 1;
      goto err;
    }
  }
  else
  {
    NDBDICT* dict= ndb->getDictionary();
    Ndb_table_guard ndbtab_g(dict, name);
    const NDBTAB *tab= ndbtab_g.get_table();
    if (!tab)
    {
      const NdbError err= dict->getNdbError();
      if (err.code == 709 || err.code == 723)
      {
        error= -1;
        DBUG_PRINT("info", ("ndb_error.code: %u", ndb_error.code));
      }
      else
      {
        error= -1;
        ndb_error= err;
        DBUG_PRINT("info", ("ndb_error.code: %u", ndb_error.code));
      }
      goto err;
    }
    DBUG_PRINT("info", ("Found table %s", tab->getName()));
    
    len= tab->getFrmLength();  
    if (len == 0 || tab->getFrmData() == NULL)
    {
      DBUG_PRINT("error", ("No frm data found."));
      error= 1;
      goto err;
    }
    
    if (unpackfrm(&data, &len, (uchar*) tab->getFrmData()))
    {
      DBUG_PRINT("error", ("Could not unpack table"));
      error= 1;
      goto err;
    }
  }
#ifdef HAVE_NDB_BINLOG
  if (ndbcluster_check_if_local_table(db, name) &&
      !Ndb_dist_priv_util::is_distributed_priv_table(db, name))
  {
    DBUG_PRINT("info", ("ndbcluster_discover: Skipping locally defined table '%s.%s'",
                        db, name));
    sql_print_error("ndbcluster_discover: Skipping locally defined table '%s.%s'",
                    db, name);
    error= 1;
    goto err;
  }
#endif
  *frmlen= len;
  *frmblob= data;
  
  if (share)
  {
    /* ndb_share reference temporary free */
    DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
                             share->key_string(), share->use_count));
    free_share(&share);
  }

  DBUG_RETURN(0);
err:
  my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));
  if (share)
  {
    /* ndb_share reference temporary free */
    DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
                             share->key_string(), share->use_count));
    free_share(&share);
  }

  if (ndb_error.code)
  {
    ERR_RETURN(ndb_error);
  }
  DBUG_RETURN(error);
}

/**
  Check if a table exists in NDB.
*/
static
int ndbcluster_table_exists_in_engine(handlerton *hton, THD* thd, 
                                      const char *db,
                                      const char *name)
{
  Ndb* ndb;
  DBUG_ENTER("ndbcluster_table_exists_in_engine");
  DBUG_PRINT("enter", ("db: %s  name: %s", db, name));

  if (!(ndb= check_ndb_in_thd(thd)))
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
  NDBDICT* dict= ndb->getDictionary();
  NdbDictionary::Dictionary::List list;
  if (dict->listObjects(list, NdbDictionary::Object::UserTable) != 0)
  {
    ERR_RETURN(dict->getNdbError());
  }
  for (uint i= 0 ; i < list.count ; i++)
  {
    NdbDictionary::Dictionary::List::Element& elmt= list.elements[i];
    if (my_strcasecmp(table_alias_charset, elmt.database, db))
      continue;
    if (my_strcasecmp(table_alias_charset, elmt.name, name))
      continue;
    DBUG_PRINT("info", ("Found table"));
    DBUG_RETURN(HA_ERR_TABLE_EXIST);
  }
  DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
}


extern "C" uchar* tables_get_key(const char *entry, size_t *length,
                                my_bool not_used MY_ATTRIBUTE((unused)))
{
  *length= strlen(entry);
  return (uchar*) entry;
}


/**
  Drop a database in NDB Cluster

  @note
    add a dummy void function, since stupid handlerton is returning void instead of int...
*/
int ndbcluster_drop_database_impl(THD *thd, const char *path)
{
  DBUG_ENTER("ndbcluster_drop_database");
  char dbname[FN_HEADLEN];
  Ndb* ndb;
  NdbDictionary::Dictionary::List list;
  uint i;
  char *tabname;
  List<char> drop_list;
  int ret= 0;
  ha_ndbcluster::set_dbname(path, (char *)&dbname);
  DBUG_PRINT("enter", ("db: %s", dbname));
  
  if (!(ndb= check_ndb_in_thd(thd)))
    DBUG_RETURN(-1);
  
  // List tables in NDB
  NDBDICT *dict= ndb->getDictionary();
  if (dict->listObjects(list, 
                        NdbDictionary::Object::UserTable) != 0)
    DBUG_RETURN(-1);
  for (i= 0 ; i < list.count ; i++)
  {
    NdbDictionary::Dictionary::List::Element& elmt= list.elements[i];
    DBUG_PRINT("info", ("Found %s/%s in NDB", elmt.database, elmt.name));     
    
    // Add only tables that belongs to db
    // Ignore Blob part tables - they are deleted when their table
    // is deleted.
    if (my_strcasecmp(system_charset_info, elmt.database, dbname) ||
        IS_NDB_BLOB_PREFIX(elmt.name) ||
        ndb_fk_util_is_mock_name(elmt.name))
      continue;
    DBUG_PRINT("info", ("%s must be dropped", elmt.name));     
    drop_list.push_back(thd->mem_strdup(elmt.name));
  }
  // Drop any tables belonging to database
  char full_path[FN_REFLEN + 1];
  char *tmp= full_path +
    build_table_filename(full_path, sizeof(full_path) - 1, dbname, "", "", 0);
  if (ndb->setDatabaseName(dbname))
  {
    ERR_RETURN(ndb->getNdbError());
  }
  List_iterator_fast<char> it(drop_list);
  while ((tabname=it++))
  {
    tablename_to_filename(tabname, tmp, (uint)(FN_REFLEN - (tmp - full_path)-1));
    if (ha_ndbcluster::drop_table_impl(thd, 0, ndb, full_path, dbname, tabname))
    {
      const NdbError err= dict->getNdbError();
      if (err.code != 709 && err.code != 723)
      {
        ret= ndb_to_mysql_error(&err);
      }
    }
  }

  dict->invalidateDbGlobal(dbname);
  DBUG_RETURN(ret);
}


/**
   @brief Check the given directory for any remaining NDB related
          leftovers and try to remove them.

   @param path The path of the directory to check

   @note This function is called only when all tables which mysqld or NDB
         knew about has been removed. Thus anything left behind can be
         safely removed.
*/

static void
ndbcluster_drop_database_leftovers(const char* path)
{
  DBUG_ENTER("ndbcluster_drop_database_leftovers");
  MY_DIR* dirp;
  if (!(dirp= my_dir(path,MYF(MY_DONT_SORT))))
  {
    // The database directory didn't exist, crash in debug since
    // something is obviously wrong
    assert(false);
    DBUG_VOID_RETURN;
  }

  for (uint i= 0; i < dirp->number_off_files; i++)
  {
    FILEINFO* file= dirp->dir_entry + i;
    DBUG_PRINT("info", ("found: '%s'", file->name));

    char* extension= fn_ext(file->name);
    DBUG_PRINT("info", ("extension: '%s'", extension));
    if (strcmp(extension, ha_ndb_ext))
      continue;

    char file_path[FN_REFLEN];
    strxmov(file_path, path, "/", file->name, NullS);
    DBUG_PRINT("info", ("Found leftover .ndb file '%s'! Try to delete it.",
                        file_path));
    if (my_delete_with_symlink(file_path, MYF(0)))
    {
      // Failed to delete the file. Ignore it since the DROP DATABASE
      // will report an error later when it tries to delete the directory
      DBUG_PRINT("error", ("Delete of of '%s' failed, my_errno: %d",
                           file_path, my_errno()));
    }
  }

  my_dirend(dirp);
  DBUG_VOID_RETURN;
}


static void ndbcluster_drop_database(handlerton *hton, char *path)
{
  THD *thd= current_thd;
  DBUG_ENTER("ndbcluster_drop_database");

  if (!ndb_schema_dist_is_ready())
  {
    /* Don't allow drop database unless schema distribution is ready */
    DBUG_VOID_RETURN;
  }

  ndbcluster_drop_database_impl(thd, path);

  /*
    At this point the mysqld has looped over all the tables it knew
    about in the database and dropped them one by one. The above call
    to 'ndbcluster_drop_database_impl' has dropped any NDB tables in
    the database which mysqld didn't know about(this could potentially
    happen if there was a "local" table with same name). This means that
    the database directory should be free of anything NDB related.
    Double check to make sure nothing is left behind and remove any
    leftovers(which according to BUG#44529 could happen after for
    example a failed ALTER TABLE).
  */
  ndbcluster_drop_database_leftovers(path);

  char db[FN_REFLEN];
  ha_ndbcluster::set_dbname(path, db);
  uint32 table_id= 0, table_version= 0;
  /*
    Since databases aren't real ndb schema object
    they don't have any id/version

    But since that id/version is used to make sure that event's on SCHEMA_TABLE
    is correct, we set random numbers
  */
  table_id = (uint32)rand();
  table_version = (uint32)rand();
  ndbcluster_log_schema_op(thd,
                           thd->query().str, thd->query().length,
                           db, "", table_id, table_version,
                           SOT_DROP_DB, NULL, NULL);
  DBUG_VOID_RETURN;
}

int ndb_create_table_from_engine(THD *thd, const char *db,
                                 const char *table_name)
{
  // Copy db and table_name to stack buffers since functions used by
  // ha_create_table_from_engine may convert to lowercase on some platforms
  char db_buf[FN_REFLEN + 1];
  char table_name_buf[FN_REFLEN + 1];
  my_stpnmov(db_buf, db, sizeof(db_buf));
  my_stpnmov(table_name_buf, table_name, sizeof(table_name_buf));

  LEX *old_lex= thd->lex, newlex;
  thd->lex= &newlex;
  newlex.set_current_select(NULL);
  lex_start(thd);
  int res= ha_create_table_from_engine(thd, db_buf, table_name_buf);
  thd->lex= old_lex;
  return res;
}


static int
ndbcluster_find_files(handlerton *hton, THD *thd,
                      const char *db, const char *path,
                      const char *wild, bool dir, List<LEX_STRING> *files)
{
  DBUG_ENTER("ndbcluster_find_files");
  DBUG_PRINT("enter", ("db: %s", db));
  { // extra bracket to avoid gcc 2.95.3 warning
  uint i;
  Thd_ndb *thd_ndb;
  Ndb* ndb;
  char name[FN_REFLEN + 1];
  HASH ndb_tables, ok_tables;
  NDBDICT::List list;

  if (!(ndb= check_ndb_in_thd(thd)))
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
  thd_ndb= get_thd_ndb(thd);

  if (dir)
    DBUG_RETURN(0); // Discover of databases not yet supported

  Ndb_global_schema_lock_guard ndb_global_schema_lock_guard(thd);
  if (ndb_global_schema_lock_guard.lock())
    DBUG_RETURN(HA_ERR_NO_CONNECTION);

  // List tables in NDB
  NDBDICT *dict= ndb->getDictionary();
  if (dict->listObjects(list, 
                        NdbDictionary::Object::UserTable) != 0)
    ERR_RETURN(dict->getNdbError());

  if (my_hash_init(&ndb_tables, table_alias_charset,list.count,0,0,
                   (my_hash_get_key)tables_get_key,0,0,
                   PSI_INSTRUMENT_ME))
  {
    DBUG_PRINT("error", ("Failed to init HASH ndb_tables"));
    DBUG_RETURN(-1);
  }

  if (my_hash_init(&ok_tables, system_charset_info,32,0,0,
                   (my_hash_get_key)tables_get_key,0,0,
                   PSI_INSTRUMENT_ME))
  {
    DBUG_PRINT("error", ("Failed to init HASH ok_tables"));
    my_hash_free(&ndb_tables);
    DBUG_RETURN(-1);
  }  

  for (i= 0 ; i < list.count ; i++)
  {
    NDBDICT::List::Element& elmt= list.elements[i];
    if (IS_TMP_PREFIX(elmt.name) || IS_NDB_BLOB_PREFIX(elmt.name))
    {
      DBUG_PRINT("info", ("Skipping %s.%s in NDB", elmt.database, elmt.name));
      continue;
    }
    DBUG_PRINT("info", ("Found %s/%s in NDB", elmt.database, elmt.name));

    // Add only tables that belongs to db
    if (my_strcasecmp(system_charset_info, elmt.database, db))
      continue;

    // Apply wildcard to list of tables in NDB
    if (wild)
    {
      if (lower_case_table_names)
      {
        if (wild_case_compare(files_charset_info, elmt.name, wild))
          continue;
      }
      else if (wild_compare(elmt.name,wild,0))
        continue;
    }
    DBUG_PRINT("info", ("Inserting %s into ndb_tables hash", elmt.name));     
    my_hash_insert(&ndb_tables, (uchar*)thd->mem_strdup(elmt.name));
  }

  LEX_STRING *file_name;
  List_iterator<LEX_STRING> it(*files);
  List<char> delete_list;
  char *file_name_str;
  while ((file_name=it++))
  {
    bool file_on_disk= FALSE;
    DBUG_PRINT("info", ("File : %s", file_name->str));
    if (my_hash_search(&ndb_tables,
                       (const uchar*)file_name->str, file_name->length))
    {
      build_table_filename(name, sizeof(name) - 1, db,
                           file_name->str, reg_ext, 0);
      if (my_access(name, F_OK))
      {
        /* No frm for database, table name combination, but
         * Cluster says the table with that combination exists.
         * Assume frm was deleted, re-discover from engine.
         */
        DBUG_PRINT("info", ("Table %s listed and need discovery",
                            file_name->str));
        if (ndb_create_table_from_engine(thd, db, file_name->str))
        {
          push_warning_printf(thd, Sql_condition::SL_WARNING,
                              ER_TABLE_EXISTS_ERROR,
                              "Discover of table %s.%s failed",
                              db, file_name->str);
          continue;
        }
      }
      DBUG_PRINT("info", ("%s existed in NDB _and_ on disk ", file_name->str));
      file_on_disk= TRUE;
    }
    
    // Check for .ndb file with this name
    build_table_filename(name, sizeof(name) - 1, db,
                         file_name->str, ha_ndb_ext, 0);
    DBUG_PRINT("info", ("Check access for %s", name));
    if (my_access(name, F_OK))
    {
      DBUG_PRINT("info", ("%s did not exist on disk", name));     
      // .ndb file did not exist on disk, another table type
      if (file_on_disk)
      {
        // Cluster table and an frm file exist, but no .ndb file
        // Assume this means the frm is for a local table, and is
        // hiding the cluster table in its shadow. 
	// Ignore this ndb table 
 	uchar *record= my_hash_search(&ndb_tables,
                                      (const uchar*) file_name->str,
                                      file_name->length);
	assert(record);
	my_hash_delete(&ndb_tables, record);
        push_warning_printf(thd, Sql_condition::SL_WARNING,
			    ER_TABLE_EXISTS_ERROR,
			    "Local table %s.%s shadows ndb table",
			    db, file_name->str);
      }
      continue;
    }

    /* .ndb file exists */
    if (file_on_disk) 
    {
      // File existed in Cluster and has both frm and .ndb files, 
      // Put in ok_tables list
      my_hash_insert(&ok_tables, (uchar*) file_name->str);
      continue;
    }
    DBUG_PRINT("info", ("%s existed on disk", name));     
    // The .ndb file exists on disk, but it's not in list of tables in cluster
    // Verify that handler agrees table is gone.
    if (ndbcluster_table_exists_in_engine(hton, thd, db, file_name->str) ==
        HA_ERR_NO_SUCH_TABLE)
    {
      DBUG_PRINT("info", ("NDB says %s does not exists", file_name->str));
      it.remove();
      if (thd == injector_thd)
      {
	/*
	  Don't delete anything when called from
	  the binlog thread. This is a kludge to avoid
	  that something is deleted when "Ndb schema dist"
	  uses find_files() to check for "local tables in db"
	*/
      }
      else
	// Put in list of tables to remove from disk
	delete_list.push_back(thd->mem_strdup(file_name->str));
    }
  }

  if (!thd_ndb->skip_binlog_setup_in_find_files())
  {
    /* setup logging to binlog for all discovered tables */
    char *end, *end1= name +
      build_table_filename(name, sizeof(name) - 1, db, "", "", 0);
    for (i= 0; i < ok_tables.records; i++)
    {
      file_name_str= (char*)my_hash_element(&ok_tables, i);
      end= end1 +
        tablename_to_filename(file_name_str, end1, (uint)(sizeof(name) - (end1 - name)));
      ndbcluster_create_binlog_setup(thd, ndb, name, (uint)(end-name),
                                     db, file_name_str, 0);
    }
  }

  // Check for new files to discover
  DBUG_PRINT("info", ("Checking for new files to discover"));       
  List<char> create_list;
  for (i= 0 ; i < ndb_tables.records ; i++)
  {
    file_name_str= (char*) my_hash_element(&ndb_tables, i);
    if (!my_hash_search(&ok_tables,
                        (const uchar*) file_name_str, strlen(file_name_str)))
    {
      /* Table in Cluster did not have frm or .ndb */
      build_table_filename(name, sizeof(name) - 1,
                           db, file_name_str, reg_ext, 0);
      if (my_access(name, F_OK))
      {
        DBUG_PRINT("info", ("%s must be discovered", file_name_str));
        // File is in list of ndb tables and not in ok_tables.
        // It is missing an frm file.
        // This table need to be created
        create_list.push_back(thd->mem_strdup(file_name_str));
      }
    }
  }

  if (thd == injector_thd)
  {
    /*
      Don't delete anything when called from
      the binlog thread. This is a kludge to avoid
      that something is deleted when "Ndb schema dist"
      uses find_files() to check for "local tables in db"
    */
  }
  else
  {
    /*
      Delete old files
      (.frm files with corresponding .ndb + does not exists in NDB)
    */
    List_iterator_fast<char> it3(delete_list);
    while ((file_name_str= it3++))
    {
      DBUG_PRINT("info", ("Deleting local files for table '%s.%s'",
                          db, file_name_str));

      // Delete the table and its related files from disk
      Ndb_local_schema::Table local_table(thd, db, file_name_str);
      local_table.remove_table();

      // Flush the table out of ndbapi's dictionary cache
      Ndb_table_guard ndbtab_g(ndb->getDictionary(), file_name_str);
      ndbtab_g.invalidate();

      // Flush the table from table def. cache.
      ndb_tdc_close_cached_table(thd, db, file_name_str);

      assert(!thd->is_error());
    }
  }

  // Create new files
  List_iterator_fast<char> it2(create_list);
  while ((file_name_str=it2++))
  {  
    DBUG_PRINT("info", ("Table %s need discovery", file_name_str));
    if (ndb_create_table_from_engine(thd, db, file_name_str) == 0)
    {
      LEX_STRING *tmp_file_name= 0;
      tmp_file_name= thd->make_lex_string(tmp_file_name, file_name_str,
                                          (uint)strlen(file_name_str), TRUE);
      files->push_back(tmp_file_name); 
    }
  }

  my_hash_free(&ok_tables);
  my_hash_free(&ndb_tables);

  /* Hide mysql.ndb_schema table */
  if (!strcmp(db, NDB_REP_DB))
  {
    LEX_STRING* file_name;
    List_iterator<LEX_STRING> it(*files);
    while ((file_name= it++))
    {
      if (!strcmp(file_name->str, NDB_SCHEMA_TABLE))
      {
        DBUG_PRINT("info", ("Hiding table '%s.%s'", db, file_name->str));
        it.remove();
      }
    }
  }
  } // extra bracket to avoid gcc 2.95.3 warning
  DBUG_RETURN(0);    
}


/**
  Check if the given table is a system table which is
  supported to store in NDB

*/
static bool is_supported_system_table(const char *db,
                                      const char *table_name,
                                      bool is_sql_layer_system_table)
{
  if (!is_sql_layer_system_table)
  {
    // No need to check tables which MySQL Server does not
    // consider as system tables
    return false;
  }

  if (Ndb_dist_priv_util::is_distributed_priv_table(db, table_name))
  {
    // Table is supported as distributed system table and should be allowed
    // to be stored in NDB
    return true;
  }

  return false;
}


/* Call back after cluster connect */
static int connect_callback()
{
  native_mutex_lock(&ndb_util_thread.LOCK);
  update_status_variables(NULL, &g_ndb_status,
                          g_ndb_cluster_connection);
  native_cond_broadcast(&ndb_util_thread.COND);
  native_mutex_unlock(&ndb_util_thread.LOCK);
  return 0;
}

/**
 * Components
 */
Ndb_util_thread ndb_util_thread;
Ndb_index_stat_thread ndb_index_stat_thread;

extern THD * ndb_create_thd(char * stackptr);

#ifndef NDB_NO_WAIT_SETUP
static int ndb_wait_setup_func_impl(ulong max_wait)
{
  DBUG_ENTER("ndb_wait_setup_func_impl");

  native_mutex_lock(&ndbcluster_mutex);

  struct timespec abstime;
  set_timespec(&abstime, 1);

  while (max_wait &&
         (!ndb_setup_complete || !ndb_index_stat_thread.is_setup_complete()))
  {
    int rc= native_cond_timedwait(&COND_ndb_setup_complete,
                                  &ndbcluster_mutex,
                                  &abstime);
    if (rc)
    {
      if (rc == ETIMEDOUT)
      {
        DBUG_PRINT("info", ("1s elapsed waiting"));
        max_wait--;
        set_timespec(&abstime, 1); /* 1 second from now*/
      }
      else
      {
        DBUG_PRINT("info", ("Bad native_cond_timedwait rc : %u",
                            rc));
        assert(false);
        break;
      }
    }
  }

  native_mutex_unlock(&ndbcluster_mutex);

#ifndef NDB_WITHOUT_DIST_PRIV
  do
  {
    /**
     * Check if we (might) need a flush privileges
     */
    THD* thd= current_thd;
    bool own_thd= thd == NULL;
    if (own_thd)
    {
      thd= ndb_create_thd((char*)&thd);
      if (thd == 0)
        break;
    }

    if (Ndb_dist_priv_util::priv_tables_are_in_ndb(thd))
    {
      Ndb_local_connection mysqld(thd);
      mysqld.raw_run_query("FLUSH PRIVILEGES", sizeof("FLUSH PRIVILEGES"), 0);
    }

    if (own_thd)
    {
      // TLS variables should not point to thd anymore.
      thd->restore_globals();
      delete thd;
    }
  } while (0);
#endif

  DBUG_RETURN((ndb_setup_complete == 1)? 0 : 1);
}

int(*ndb_wait_setup_func)(ulong) = 0;
#endif

static int
ndbcluster_make_pushed_join(handlerton *, THD*, const AQP::Join_plan*);

/* Version in composite numerical format */
static Uint32 ndb_version = NDB_VERSION_D;
static MYSQL_SYSVAR_UINT(
  version,                          /* name */
  ndb_version,                      /* var */
  PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_READONLY,
  "Compile version for ndbcluster",
  NULL,                             /* check func. */
  NULL,                             /* update func. */
  0,                                /* default */
  0,                                /* min */
  0,                                /* max */
  0                                 /* block */
);

/* Version in ndb-Y.Y.Y[-status] format */
static char* ndb_version_string = (char*)NDB_NDB_VERSION_STRING;
static MYSQL_SYSVAR_STR(
  version_string,                  /* name */
  ndb_version_string,              /* var */
  PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_READONLY,
  "Compile version string for ndbcluster",
  NULL,                             /* check func. */
  NULL,                             /* update func. */
  NULL                              /* default */
);

extern int ndb_dictionary_is_mysqld;

Uint32 recv_thread_num_cpus;
static int ndb_recv_thread_cpu_mask_check_str(const char *str);
static void ndb_recv_thread_cpu_mask_update();
handlerton* ndbcluster_hton;


/*
  Handle failure from ndbcluster_init() by printing error
  message(s) and exit the MySQL Server.

  NOTE! This is done to avoid the current undefined behaviour which occurs
  when an error return code from plugin's init() function just disables
  the plugin.
*/

static
void ndbcluster_init_abort(const char* error)
{
  ndb_log_error("%s", error);
  ndb_log_error("Failed to initialize ndbcluster, aborting!");
  ndb_log_error("Use --skip-ndbcluster to start without ndbcluster.");
  exit(1);
}


/*
  Initialize the ndbcluster storage engine
 */

static
int ndbcluster_init(void* p)
{
  DBUG_ENTER("ndbcluster_init");

  assert(!ndbcluster_inited);

#ifdef HAVE_NDB_BINLOG
  /* Check const alignment */
  assert(DependencyTracker::InvalidTransactionId ==
         Ndb_binlog_extra_row_info::InvalidTransactionId);

  if (global_system_variables.binlog_format == BINLOG_FORMAT_STMT)
  {
    /* Set global to mixed - note that this is not the default,
     * but the current global value
     */
    global_system_variables.binlog_format = BINLOG_FORMAT_MIXED;
    sql_print_information("NDB: Changed global value of binlog_format from STATEMENT to MIXED");

  }
#endif
  if (ndb_util_thread.init() ||
      DBUG_EVALUATE_IF("ndbcluster_init_fail1", true, false))
  {
    ndbcluster_init_abort("Failed to initialize NDB Util");
  }

  if (ndb_index_stat_thread.init())
  {
    ndbcluster_init_abort("Failed to initialize NDB Index Stat");
  }

  native_mutex_init(&ndbcluster_mutex,MY_MUTEX_INIT_FAST);
  native_cond_init(&COND_ndb_setup_complete);
  ndb_dictionary_is_mysqld= 1;
  ndb_setup_complete= 0;
  ndbcluster_hton= (handlerton *)p;
  ndbcluster_global_schema_lock_init(ndbcluster_hton);

  {
    handlerton *h= ndbcluster_hton;
    h->state=            SHOW_OPTION_YES;
    h->db_type=          DB_TYPE_NDBCLUSTER;
    h->close_connection= ndbcluster_close_connection;
    h->commit=           ndbcluster_commit;
    h->rollback=         ndbcluster_rollback;
    h->create=           ndbcluster_create_handler; /* Create a new handler */
    h->drop_database=    ndbcluster_drop_database;  /* Drop a database */
    h->panic=            ndbcluster_end;            /* Panic call */
    h->show_status=      ndbcluster_show_status;    /* Show status */
    h->get_tablespace=   ndbcluster_get_tablespace; /* Get ts for old ver */
    h->alter_tablespace= ndbcluster_alter_tablespace;    /* Show status */
    h->partition_flags=  ndbcluster_partition_flags; /* Partition flags */
#if MYSQL_VERSION_ID >= 50501
    h->fill_is_table=    ndbcluster_fill_is_table;
#else
    h->fill_files_table= ndbcluster_fill_files_table;
#endif
    ndbcluster_binlog_init(h);
    h->flags=            HTON_CAN_RECREATE | HTON_TEMPORARY_NOT_SUPPORTED |
      HTON_NO_BINLOG_ROW_OPT;
    h->discover=         ndbcluster_discover;
    h->find_files=       ndbcluster_find_files;
    h->table_exists_in_engine= ndbcluster_table_exists_in_engine;
    h->make_pushed_join= ndbcluster_make_pushed_join;
    h->is_supported_system_table = is_supported_system_table;
  }

  // Initialize NdbApi
  ndb_init_internal();

  /* allocate connection resources and connect to cluster */
  const uint global_opti_node_select= THDVAR(NULL, optimized_node_selection);
  if (ndbcluster_connect(connect_callback, opt_ndb_wait_connected,
                         opt_ndb_cluster_connection_pool,
                         (global_opti_node_select & 1),
                         opt_ndb_connectstring,
                         opt_ndb_nodeid,
                         opt_ndb_recv_thread_activation_threshold))
  {
    ndbcluster_init_abort("Failed to initialize connection(s)");
  }

  /* Translate recv thread cpu mask if set */
  if (ndb_recv_thread_cpu_mask_check_str(opt_ndb_recv_thread_cpu_mask) == 0)
  {
    if (recv_thread_num_cpus)
    {
      ndb_recv_thread_cpu_mask_update();
    }
  }

  (void) my_hash_init(&ndbcluster_open_tables,table_alias_charset,32,0,0,
                      (my_hash_get_key) ndbcluster_get_key,0,0,
                      PSI_INSTRUMENT_ME);
  (void) my_hash_init(&ndbcluster_dropped_tables,table_alias_charset,32,0,0,
                      (my_hash_get_key) ndbcluster_get_key,0,0,
                      PSI_INSTRUMENT_ME);
  /* start the ndb injector thread */
  if (ndbcluster_binlog_start())
  {
    ndbcluster_init_abort("Failed to start NDB Binlog");
  }

  // Create utility thread
  if (ndb_util_thread.start())
  {
    ndbcluster_init_abort("Failed to start NDB Util");
  }

  // Create index statistics thread
  if (ndb_index_stat_thread.start() ||
      DBUG_EVALUATE_IF("ndbcluster_init_fail2", true, false))
  {
    ndbcluster_init_abort("Failed to start NDB Index Stat");
  }

#ifndef NDB_NO_WAIT_SETUP
  ndb_wait_setup_func= ndb_wait_setup_func_impl;
#endif

  memset(&g_slave_api_client_stats, 0, sizeof(g_slave_api_client_stats));

  ndbcluster_inited= 1;

  DBUG_RETURN(0); // OK
}

#ifndef NDEBUG
static
const char*
get_share_state_string(NDB_SHARE_STATE s)
{
  switch(s) {
  case NSS_INITIAL:
    return "NSS_INITIAL";
  case NSS_ALTERED:
    return "NSS_ALTERED";
  case NSS_DROPPED:
    return "NSS_DROPPED";
  }
  assert(false);
  return "<unknown>";
}
#endif

int ndbcluster_binlog_end(THD *thd);

static int ndbcluster_end(handlerton *hton, ha_panic_function type)
{
  DBUG_ENTER("ndbcluster_end");

  if (!ndbcluster_inited)
    DBUG_RETURN(0);
  ndbcluster_inited= 0;

  /* Stop index stat thread */
  ndb_index_stat_thread.stop();

  /* wait for util and binlog thread to finish */
  ndbcluster_binlog_end(NULL);

  {
    native_mutex_lock(&ndbcluster_mutex);
    uint save = ndbcluster_open_tables.records; (void)save;
    while (ndbcluster_open_tables.records)
    {
      NDB_SHARE *share=
        (NDB_SHARE*) my_hash_element(&ndbcluster_open_tables, 0);
#ifndef NDEBUG
      fprintf(stderr,
              "NDB: table share %s with use_count %d state: %s(%u) not freed\n",
              share->key_string(), share->use_count,
              get_share_state_string(share->state),
              (uint)share->state);
#endif
      ndbcluster_real_free_share(&share);
    }
    native_mutex_unlock(&ndbcluster_mutex);
    assert(save == 0);
  }
  my_hash_free(&ndbcluster_open_tables);

  {
    native_mutex_lock(&ndbcluster_mutex);
    uint save = ndbcluster_dropped_tables.records; (void)save;
    while (ndbcluster_dropped_tables.records)
    {
      NDB_SHARE *share=
        (NDB_SHARE*) my_hash_element(&ndbcluster_dropped_tables, 0);
#ifndef NDEBUG
      fprintf(stderr,
              "NDB: table share %s with use_count %d state: %s(%u) not freed\n",
              share->key_string(), share->use_count,
              get_share_state_string(share->state),
              (uint)share->state);
      /**
       * For unknown reasons...the dist-priv tables linger here
       * TODO investigate why
       */
      if (Ndb_dist_priv_util::is_distributed_priv_table(share->db,
                                                        share->table_name))
      {
        save--;
      }
#endif
      ndbcluster_real_free_share(&share);
    }
    native_mutex_unlock(&ndbcluster_mutex);
    assert(save == 0);
  }
  my_hash_free(&ndbcluster_dropped_tables);

  ndb_index_stat_end();
  ndbcluster_disconnect();

  ndbcluster_global_schema_lock_deinit();
  ndb_util_thread.deinit();
  ndb_index_stat_thread.deinit();

  native_mutex_destroy(&ndbcluster_mutex);
  native_cond_destroy(&COND_ndb_setup_complete);

  // Cleanup NdbApi
  ndb_end_internal();

  DBUG_RETURN(0);
}

void ha_ndbcluster::print_error(int error, myf errflag)
{
  DBUG_ENTER("ha_ndbcluster::print_error");
  DBUG_PRINT("enter", ("error: %d", error));

  if (error == HA_ERR_NO_PARTITION_FOUND)
    m_part_info->print_no_partition_found(table);
  else
  {
    if (error == HA_ERR_FOUND_DUPP_KEY &&
        (table == NULL || table->file == NULL))
    {
      /*
        This is a sideffect of 'ndbcluster_print_error' (called from
        'ndbcluster_commit' and 'ndbcluster_rollback') which realises
        that it "knows nothing" and creates a brand new ha_ndbcluster
        in order to be able to call the print_error() function.
        Unfortunately the new ha_ndbcluster hasn't been open()ed
        and thus table pointer etc. is not set. Since handler::print_error()
        will use that pointer without checking for NULL(it naturally
        assumes an error can only be returned when the handler is open)
        this would crash the mysqld unless it's handled here.
      */
      my_error(ER_DUP_KEY, errflag, table_share->table_name.str, error);
      DBUG_VOID_RETURN;
    }
    if (error == ER_CANT_DROP_FIELD_OR_KEY)
    {
      /*
        Called on drop unknown FK by server when algorithm=copy or
        by handler when algorithm=inplace.  In both cases the error
        was already printed in ha_ndb_ddl_fk.cc.
      */
      THD* thd= NULL;
      if (table != NULL &&
          (thd= table->in_use) != NULL &&
          thd->lex != NULL &&
          thd->lex->sql_command == SQLCOM_ALTER_TABLE)
      {
        DBUG_VOID_RETURN;
      }
      assert(false);
    }

    handler::print_error(error, errflag);
  }
  DBUG_VOID_RETURN;
}


/**
  Set a given location from full pathname to database name.
*/

void ha_ndbcluster::set_dbname(const char *path_name, char *dbname)
{
  ndb_set_dbname(path_name, dbname);
}

/**
  Set m_dbname from full pathname to table file.
*/

void ha_ndbcluster::set_dbname(const char *path_name)
{
  ndb_set_dbname(path_name, m_dbname);
}

/**
  Set a given location from full pathname to table file.
*/

void
ha_ndbcluster::set_tabname(const char *path_name, char * tabname)
{
  ndb_set_tabname(path_name, tabname);
}

/**
  Set m_tabname from full pathname to table file.
*/

void ha_ndbcluster::set_tabname(const char *path_name)
{
  ndb_set_tabname(path_name, m_tabname);
}


/*
  If there are no stored stats, should we do a tree-dive on all db
  nodes.  The result is fairly good but does mean a round-trip.
 */
static const bool g_ndb_records_in_range_tree_dive= false;

/* Determine roughly how many records are in the range specified */
ha_rows 
ha_ndbcluster::records_in_range(uint inx, key_range *min_key,
                                key_range *max_key)
{
  KEY *key_info= table->key_info + inx;
  uint key_length= key_info->key_length;
  NDB_INDEX_TYPE idx_type= get_index_type(inx);  

  DBUG_ENTER("records_in_range");
  // Prevent partial read of hash indexes by returning HA_POS_ERROR
  if ((idx_type == UNIQUE_INDEX || idx_type == PRIMARY_KEY_INDEX) &&
      ((min_key && min_key->length < key_length) ||
       (max_key && max_key->length < key_length)))
    DBUG_RETURN(HA_POS_ERROR);
  
  // Read from hash index with full key
  // This is a "const" table which returns only one record!      
  if ((idx_type != ORDERED_INDEX) &&
      ((min_key && min_key->length == key_length) &&
       (max_key && max_key->length == key_length) &&
       (min_key->key==max_key->key ||
        memcmp(min_key->key, max_key->key, key_length)==0)))
    DBUG_RETURN(1);
  
  // XXX why this if
  if ((idx_type == PRIMARY_KEY_ORDERED_INDEX ||
       idx_type == UNIQUE_ORDERED_INDEX ||
       idx_type == ORDERED_INDEX))
  {
    THD *thd= current_thd;
    const bool index_stat_enable= THDVAR(NULL, index_stat_enable) &&
                                  THDVAR(thd, index_stat_enable);

    if (index_stat_enable)
    {
      ha_rows rows= HA_POS_ERROR;
      int err= ndb_index_stat_get_rir(inx, min_key, max_key, &rows);
      if (err == 0)
      {
        /**
         * optmizer thinks that all values < 2 are exact...but
         * but we don't provide exact statistics
         */
        if (rows < 2)
          rows = 2;
        DBUG_RETURN(rows);
      }
      if (err != 0 &&
          /* no stats is not unexpected error */
          err != NdbIndexStat::NoIndexStats &&
          /* warning was printed at first error */
          err != NdbIndexStat::MyHasError &&
          /* stats thread aborted request */
          err != NdbIndexStat::MyAbortReq)
      {
        push_warning_printf(thd, Sql_condition::SL_WARNING,
                            ER_CANT_GET_STAT, /* pun? */
                            "index stats (RIR) for key %s:"
                            " unexpected error %d",
                            key_info->name, err);
      }
      /*fall through*/
    }

    if (g_ndb_records_in_range_tree_dive)
    {
      NDB_INDEX_DATA& d=m_index[inx];
      const NDBINDEX* index= d.index;
      Ndb *ndb= get_ndb(thd);
      NdbTransaction* active_trans= m_thd_ndb ? m_thd_ndb->trans : 0;
      NdbTransaction* trans=NULL;
      int res=0;
      Uint64 rows;

      do
      {
        if ((trans=active_trans) == NULL || 
            trans->commitStatus() != NdbTransaction::Started)
        {
          DBUG_PRINT("info", ("no active trans"));
          if (! (trans=ndb->startTransaction()))
            ERR_BREAK(ndb->getNdbError(), res);
        }
        
        /* Create an IndexBound struct for the keys */
        NdbIndexScanOperation::IndexBound ib;
        compute_index_bounds(ib,
                             key_info,
                             min_key, 
                             max_key,
                             0);

        ib.range_no= 0;

        NdbIndexStat is;
        if (is.records_in_range(index, 
                                trans, 
                                d.ndb_record_key,
                                m_ndb_record,
                                &ib, 
                                0, 
                                &rows, 
                                0) == -1)
          ERR_BREAK(is.getNdbError(), res);
      } while (0);

      if (trans != active_trans && rows == 0)
        rows = 1;
      if (trans != active_trans && trans != NULL)
        ndb->closeTransaction(trans);
      if (res == 0)
        DBUG_RETURN(rows);
      /*fall through*/
    }
  }

  /* Use simple heuristics to estimate fraction
     of 'stats.record' returned from range.
  */
  do
  {
    if (stats.records == ~(ha_rows)0 || stats.records == 0)
    {
      /* Refresh statistics, only read from datanodes if 'use_exact_count' */
      THD *thd= current_thd;
      if (update_stats(thd, THDVAR(thd, use_exact_count)))
        break;
    }

    Uint64 rows;
    Uint64 table_rows= stats.records;
    size_t eq_bound_len= 0;
    size_t min_key_length= (min_key) ? min_key->length : 0;
    size_t max_key_length= (max_key) ? max_key->length : 0; 

    // Might have an closed/open range bound:
    // Low range open
    if (!min_key_length)
    {
      rows= (!max_key_length) 
           ? table_rows             // No range was specified
           : table_rows/10;         // -oo .. <high range> -> 10% selectivity
    }
    // High range open
    else if (!max_key_length)
    {
      rows= table_rows/10;          // <low range>..oo -> 10% selectivity
    }
    else
    {
      size_t bounds_len= MIN(min_key_length,max_key_length);
      uint eq_bound_len= 0;
      uint eq_bound_offs= 0;

      KEY_PART_INFO* key_part= key_info->key_part;
      KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
      for (; key_part != end; key_part++) 
      {
        uint part_length= key_part->store_length;
        if (eq_bound_offs+part_length > bounds_len ||
            memcmp(&min_key->key[eq_bound_offs],
                   &max_key->key[eq_bound_offs],
                   part_length))
        {
          break;
        }
        eq_bound_len+= key_part->length;
        eq_bound_offs+= part_length;
      }

      if (!eq_bound_len)
      {
        rows= table_rows/20;        // <low range>..<high range> -> 5% 
      }
      else
      {
        // Has an equality range on a leading part of 'key_length':
        // - Assume reduced selectivity for non-unique indexes
        //   by decreasing 'eq_fraction' by 20%
        // - Assume equal selectivity for all eq_parts in key.

        double eq_fraction = (double)(eq_bound_len) / key_length;
        if (idx_type == ORDERED_INDEX) // Non-unique index -> less selectivity
          eq_fraction/= 1.20;
        if (eq_fraction >= 1.0)        // Exact match -> 1 row
          DBUG_RETURN(1);

        rows = (Uint64)((double)table_rows / pow((double)table_rows, eq_fraction));
        if (rows > (table_rows/50))    // EQ-range: Max 2% of rows
          rows= (table_rows/50);

        if (min_key_length > eq_bound_offs)
          rows/= 2;
        if (max_key_length > eq_bound_offs)
          rows/= 2;
      }
    }

    // Make sure that EQ is preferred even if row-count is low
    if (eq_bound_len && rows < 2)      // At least 2 rows as not exact
      rows= 2;
    else if (rows < 3)
      rows= 3;
    DBUG_RETURN(MIN(rows,table_rows));
  } while (0);

  DBUG_RETURN(10); /* Poor guess when you don't know anything */
}

ulonglong ha_ndbcluster::table_flags(void) const
{
  THD *thd= current_thd;
  ulonglong f=
    HA_REC_NOT_IN_SEQ |
    HA_NULL_IN_KEY |
    HA_AUTO_PART_KEY |
    HA_NO_PREFIX_CHAR_KEYS |
    HA_CAN_GEOMETRY |
    HA_CAN_BIT_FIELD |
    HA_PRIMARY_KEY_REQUIRED_FOR_POSITION |
    HA_PARTIAL_COLUMN_READ |
    HA_HAS_OWN_BINLOGGING |
    HA_BINLOG_ROW_CAPABLE |
    HA_HAS_RECORDS |
    HA_READ_BEFORE_WRITE_REMOVAL |
    0;

  /*
    To allow for logging of ndb tables during stmt based logging;
    flag cabablity, but also turn off flag for OWN_BINLOGGING
  */
  if (thd->variables.binlog_format == BINLOG_FORMAT_STMT)
    f= (f | HA_BINLOG_STMT_CAPABLE) & ~HA_HAS_OWN_BINLOGGING;

   /*
     Allow MySQL Server to decide that STATEMENT logging should be used
     for the distributed privilege tables. NOTE! This is a workaround
     for generic problem with forcing STATEMENT logging see BUG16482501.
   */
  if (Ndb_dist_priv_util::is_distributed_priv_table(m_dbname,m_tabname))
    f= (f | HA_BINLOG_STMT_CAPABLE) & ~HA_HAS_OWN_BINLOGGING;

  /**
   * To maximize join pushability we want const-table 
   * optimization blocked if 'ndb_join_pushdown= on'
   */
  if (THDVAR(thd, join_pushdown))
    f= f | HA_BLOCK_CONST_TABLE;

  return f;
}

const char * ha_ndbcluster::table_type() const 
{
  return("NDBCLUSTER");
}
uint ha_ndbcluster::max_supported_record_length() const
{ 
  return NDB_MAX_TUPLE_SIZE;
}
uint ha_ndbcluster::max_supported_keys() const
{
  return MAX_KEY;
}
uint ha_ndbcluster::max_supported_key_parts() const 
{
  return NDB_MAX_NO_OF_ATTRIBUTES_IN_KEY;
}
uint ha_ndbcluster::max_supported_key_length() const
{
  return NDB_MAX_KEY_SIZE;
}
uint ha_ndbcluster::max_supported_key_part_length(HA_CREATE_INFO
                                 *create_info MY_ATTRIBUTE((unused))) const
{
  return NDB_MAX_KEY_SIZE;
}
bool ha_ndbcluster::low_byte_first() const
{ 
#ifdef WORDS_BIGENDIAN
  return FALSE;
#else
  return TRUE;
#endif
}
const char* ha_ndbcluster::index_type(uint key_number)
{
  switch (get_index_type(key_number)) {
  case ORDERED_INDEX:
  case UNIQUE_ORDERED_INDEX:
  case PRIMARY_KEY_ORDERED_INDEX:
    return "BTREE";
  case UNIQUE_INDEX:
  case PRIMARY_KEY_INDEX:
  default:
    return "HASH";
  }
}

uint8 ha_ndbcluster::table_cache_type()
{
  DBUG_ENTER("ha_ndbcluster::table_cache_type=HA_CACHE_TBL_ASKTRANSACT");
  DBUG_RETURN(HA_CACHE_TBL_ASKTRANSACT);
}

/**
   Retrieve the commit count for the table object.

   @param thd              Thread context.
   @param norm_name        Normalized path to the table.
   @param[out] commit_count Commit count for the table.

   @return 0 on success.
   @return 1 if an error occured.
*/

uint ndb_get_commitcount(THD *thd, char *norm_name,
                         Uint64 *commit_count)
{
  char dbname[NAME_LEN + 1];
  NDB_SHARE *share;
  DBUG_ENTER("ndb_get_commitcount");

  DBUG_PRINT("enter", ("name: %s", norm_name));
  native_mutex_lock(&ndbcluster_mutex);
  if (!(share=(NDB_SHARE*) my_hash_search(&ndbcluster_open_tables,
                                          (const uchar*) norm_name,
                                          strlen(norm_name))))
  {
    native_mutex_unlock(&ndbcluster_mutex);
    DBUG_PRINT("info", ("Table %s not found in ndbcluster_open_tables",
                         norm_name));
    DBUG_RETURN(1);
  }
  /* ndb_share reference temporary, free below */
  share->use_count++;
  DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",
                           share->key_string(), share->use_count));
  native_mutex_unlock(&ndbcluster_mutex);

  native_mutex_lock(&share->mutex);
  if (opt_ndb_cache_check_time > 0)
  {
    if (share->commit_count != 0)
    {
      DBUG_PRINT("info", ("Getting commit_count: %llu from share",
                          share->commit_count));
      *commit_count= share->commit_count;
      native_mutex_unlock(&share->mutex);
      /* ndb_share reference temporary free */
      DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
                               share->key_string(), share->use_count));
      free_share(&share);
      DBUG_RETURN(0);
    }
  }
  DBUG_PRINT("info", ("Get commit_count from NDB"));
  Ndb *ndb;
  if (!(ndb= check_ndb_in_thd(thd)))
    DBUG_RETURN(1);

  ha_ndbcluster::set_dbname(norm_name, dbname);
  if (ndb->setDatabaseName(dbname))
  {
    ERR_RETURN(ndb->getNdbError());
  }
  uint lock= share->commit_count_lock;
  native_mutex_unlock(&share->mutex);

  struct Ndb_statistics stat;
  {
    char tblname[NAME_LEN + 1];
    ha_ndbcluster::set_tabname(norm_name, tblname);
    Ndb_table_guard ndbtab_g(ndb->getDictionary(), tblname);
    if (ndbtab_g.get_table() == 0
        || ndb_get_table_statistics(thd, NULL, 
                                    FALSE, 
                                    ndb, 
                                    ndbtab_g.get_table()->getDefaultRecord(),
                                    &stat))
    {
      /* ndb_share reference temporary free */
      DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
                               share->key_string(), share->use_count));
      free_share(&share);
      DBUG_RETURN(1);
    }
  }

  native_mutex_lock(&share->mutex);
  if (share->commit_count_lock == lock)
  {
    DBUG_PRINT("info", ("Setting commit_count: %llu", stat.commit_count));
    share->commit_count= stat.commit_count;
    *commit_count= stat.commit_count;
  }
  else
  {
    DBUG_PRINT("info", ("Discarding commit_count, comit_count_lock changed"));
    *commit_count= 0;
  }
  native_mutex_unlock(&share->mutex);
  /* ndb_share reference temporary free */
  DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
                           share->key_string(), share->use_count));
  free_share(&share);
  DBUG_RETURN(0);
}


/**
  Check if a cached query can be used.

  This is done by comparing the supplied engine_data to commit_count of
  the table.

  The commit_count is either retrieved from the share for the table, where
  it has been cached by the util thread. If the util thread is not started,
  NDB has to be contacetd to retrieve the commit_count, this will introduce
  a small delay while waiting for NDB to answer.


  @param thd            thread handle
  @param full_name      normalized path to the table in the canonical
                        format.
  @param full_name_len  length of the normalized path to the table.
  @param engine_data    parameter retrieved when query was first inserted into
                        the cache. If the value of engine_data is changed,
                        all queries for this table should be invalidated.

  @retval
    TRUE  Yes, use the query from cache
  @retval
    FALSE No, don't use the cached query, and if engine_data
          has changed, all queries for this table should be invalidated

*/

static my_bool
ndbcluster_cache_retrieval_allowed(THD *thd,
                                   char *full_name, uint full_name_len,
                                   ulonglong *engine_data)
{
  Uint64 commit_count;
  char dbname[NAME_LEN + 1];
  char tabname[NAME_LEN + 1];

  ha_ndbcluster::set_dbname(full_name, dbname);
  ha_ndbcluster::set_tabname(full_name, tabname);

  DBUG_ENTER("ndbcluster_cache_retrieval_allowed");
  DBUG_PRINT("enter", ("dbname: %s, tabname: %s",
                       dbname, tabname));

  if (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
  {
    /* Don't allow qc to be used if table has been previously
       modified in transaction */
    if (!check_ndb_in_thd(thd))
      DBUG_RETURN(FALSE);
   Thd_ndb *thd_ndb= get_thd_ndb(thd);
    if (!thd_ndb->changed_tables.is_empty())
    {
      NDB_SHARE* share;
      List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables);
      while ((share= it++))
      {
        if (strcmp(share->table_name, tabname) == 0 &&
            strcmp(share->db, dbname) == 0)
        {
          DBUG_PRINT("exit", ("No, transaction has changed table"));
          DBUG_RETURN(FALSE);
        }
      }
    }
  }

  if (ndb_get_commitcount(thd, full_name, &commit_count))
  {
    *engine_data= 0; /* invalidate */
    DBUG_PRINT("exit", ("No, could not retrieve commit_count"));
    DBUG_RETURN(FALSE);
  }
  DBUG_PRINT("info", ("engine_data: %llu, commit_count: %llu",
                      *engine_data, commit_count));
  if (commit_count == 0)
  {
    *engine_data= 0; /* invalidate */
    DBUG_PRINT("exit", ("No, local commit has been performed"));
    DBUG_RETURN(FALSE);
  }
  else if (*engine_data != commit_count)
  {
    *engine_data= commit_count; /* invalidate */
     DBUG_PRINT("exit", ("No, commit_count has changed"));
     DBUG_RETURN(FALSE);
   }

  DBUG_PRINT("exit", ("OK to use cache, engine_data: %llu",
                      *engine_data));
  DBUG_RETURN(TRUE);
}


/**
  Register a table for use in the query cache.

  Fetch the commit_count for the table and return it in engine_data,
  this will later be used to check if the table has changed, before
  the cached query is reused.

  @param thd            thread handle
  @param full_name      normalized path to the table in the 
                        canonical format.
  @param full_name_len  length of the normalized path to the table.
  @param engine_callback  function to be called before using cache on
                          this table
  @param[out] engine_data    commit_count for this table

  @retval
    TRUE  Yes, it's ok to cahce this query
  @retval
    FALSE No, don't cach the query
*/

my_bool
ha_ndbcluster::register_query_cache_table(THD *thd,
                                          char *full_name,
                                          size_t full_name_len,
                                          qc_engine_callback *engine_callback,
                                          ulonglong *engine_data)
{
  Uint64 commit_count;

  DBUG_ENTER("ha_ndbcluster::register_query_cache_table");
  DBUG_PRINT("enter",("dbname: %s, tabname: %s",
		      m_dbname, m_tabname));

  if (thd_options(thd) & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
  {
    /* Don't allow qc to be used if table has been previously
       modified in transaction */
    Thd_ndb *thd_ndb= get_thd_ndb(thd);
    if (!thd_ndb->changed_tables.is_empty())
    {
      assert(m_share);
      NDB_SHARE* share;
      List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables);
      while ((share= it++))
      {
        if (m_share == share)
        {
          DBUG_PRINT("exit", ("No, transaction has changed table"));
          DBUG_RETURN(FALSE);
        }
      }
    }
  }

  if (ndb_get_commitcount(thd, full_name, &commit_count))
  {
    *engine_data= 0;
    DBUG_PRINT("exit", ("Error, could not get commitcount"));
    DBUG_RETURN(FALSE);
  }
  *engine_data= commit_count;
  *engine_callback= ndbcluster_cache_retrieval_allowed;
  DBUG_PRINT("exit", ("commit_count: %llu", commit_count));
  DBUG_RETURN(commit_count > 0);
}


static uchar *ndbcluster_get_key(NDB_SHARE *share, size_t *length,
                                my_bool)
{
  *length= share->key_length();
  return (uchar*) share->key_string();
}


#ifndef NDEBUG

static void print_ndbcluster_open_tables()
{
  DBUG_LOCK_FILE;
  fprintf(DBUG_FILE, ">ndbcluster_open_tables\n");
  for (uint i= 0; i < ndbcluster_open_tables.records; i++)
  {
    NDB_SHARE* share= (NDB_SHARE*)my_hash_element(&ndbcluster_open_tables, i);
    share->print("", DBUG_FILE);
  }
  fprintf(DBUG_FILE, "<ndbcluster_open_tables\n");
  DBUG_UNLOCK_FILE;
}

#endif


#define dbug_print_open_tables()                \
  DBUG_EXECUTE("info",                          \
               print_ndbcluster_open_tables(););


/*
  For some reason a share is still around, try to salvage the situation
  by closing all cached tables. If the share still exists, there is an
  error somewhere but only report this to the error log.  Keep this
  "trailing share" but rename it since there are still references to it
  to avoid segmentation faults.  There is a risk that the memory for
  this trailing share leaks.
  
  Must be called with previous native_mutex_lock(&ndbcluster_mutex)
*/
int handle_trailing_share(THD *thd, NDB_SHARE *share)
{
  static ulong trailing_share_id= 0;
  DBUG_ENTER("handle_trailing_share");

  /* ndb_share reference temporary, free below */
  ++share->use_count;
  if (opt_ndb_extra_logging > 9)
    sql_print_information ("handle_trailing_share: %s use_count: %u",
                           share->key_string(), share->use_count);
  DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",
                           share->key_string(), share->use_count));
  native_mutex_unlock(&ndbcluster_mutex);

  ndb_tdc_close_cached_table(thd, share->db, share->table_name);

  native_mutex_lock(&ndbcluster_mutex);
  /* ndb_share reference temporary free */
  DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
                           share->key_string(), share->use_count));
  if (!--share->use_count)
  {
    if (opt_ndb_extra_logging > 9)
      sql_print_information ("handle_trailing_share: %s use_count: %u",
                             share->key_string(), share->use_count);
    if (opt_ndb_extra_logging)
      sql_print_information("NDB_SHARE: trailing share %s, "
                            "released by close_cached_tables",
                            share->key_string());
    ndbcluster_real_free_share(&share);
    DBUG_RETURN(0);
  }
  if (opt_ndb_extra_logging > 9)
    sql_print_information ("handle_trailing_share: %s use_count: %u",
                           share->key_string(), share->use_count);

  /*
    share still exists, if share has not been dropped by server
    release that share
  */
  if (share->state != NSS_DROPPED)
  {
    ndbcluster_mark_share_dropped(share);
    /* ndb_share reference create free */
    DBUG_PRINT("NDB_SHARE", ("%s create free  use_count: %u",
                             share->key_string(), share->use_count));
    --share->use_count;
    if (opt_ndb_extra_logging > 9)
      sql_print_information ("handle_trailing_share: %s use_count: %u",
                             share->key_string(), share->use_count);

    if (share->use_count == 0)
    {
      if (opt_ndb_extra_logging)
        sql_print_information("NDB_SHARE: trailing share %s, "
                              "released after NSS_DROPPED check",
                              share->key_string());
      ndbcluster_real_free_share(&share);
      DBUG_RETURN(0);
    }
  }

  DBUG_PRINT("info", ("NDB_SHARE: %s already exists use_count=%d, op=0x%lx.",
                      share->key_string(), share->use_count, (long) share->op));
  /* 
     Ignore table shares only opened by util thread
   */
  if (!((share->use_count == 1) && share->util_thread))
  {
    sql_print_warning("NDB_SHARE: %s already exists use_count=%d."
                      " Moving away for safety, but possible memleak.",
                      share->key_string(), share->use_count);
  }
  dbug_print_open_tables();

  /*
    Ndb share has not been released as it should
  */
#ifdef NOT_YET
  assert(FALSE);
#endif

  /*
    This is probably an error.  We can however save the situation
    at the cost of a possible mem leak, by "renaming" the share
    - First remove from hash
  */
  my_hash_delete(&ndbcluster_open_tables, (uchar*) share);

  {
    /*
      Give the leaked share a new name using a running number
    */
    char leak_name_buf[16]; // strlen("#leak4294967295")
    my_snprintf(leak_name_buf, sizeof(leak_name_buf),
                "#leak%lu", trailing_share_id++);
    share->key = NDB_SHARE::create_key(leak_name_buf);
    // Note that share->db, share->table_name as well
    // as share->shadow_table->s->db etc. points into the memory
    // which share->key pointed to before the memory for leak key
    // was allocated, so it's not a good time to free the old key
    // here.
  }
  /* Keep it for possible the future trailing free */
  my_hash_insert(&ndbcluster_open_tables, (uchar*) share);

  DBUG_RETURN(0);
}


int
ndbcluster_rename_share(THD *thd, NDB_SHARE *share, NDB_SHARE_KEY* new_key)
{
  DBUG_ENTER("ndbcluster_rename_share");
  native_mutex_lock(&ndbcluster_mutex);
  DBUG_PRINT("enter", ("share->key: '%s'", share->key_string()));
  DBUG_PRINT("enter", ("new_key: '%s'", NDB_SHARE::key_get_key(new_key)));

  // Handle the case where NDB_SHARE with new_key already exists
  {
    NDB_SHARE *tmp =
        (NDB_SHARE*)my_hash_search(&ndbcluster_open_tables,
                                   NDB_SHARE::key_get_key(new_key),
                                   NDB_SHARE::key_get_length(new_key));
    if (tmp)
    {
      handle_trailing_share(thd, tmp);
    }
  }

  /* remove the share from hash */
  my_hash_delete(&ndbcluster_open_tables, (uchar*) share);

  /* save old key if insert should fail */
  NDB_SHARE_KEY *old_key= share->key;

  share->key= new_key;

  if (my_hash_insert(&ndbcluster_open_tables, (uchar*) share))
  {
    DBUG_PRINT("error", ("Failed to insert %s", share->key_string()));
    // Catch this unlikely error in debug
    assert(false);
    share->key= old_key;
    if (my_hash_insert(&ndbcluster_open_tables, (uchar*) share))
    {
      sql_print_error("ndbcluster_rename_share: failed to recover %s",
                      share->key_string());
      DBUG_PRINT("error", ("Failed to reinsert share with old name %s",
                           share->key_string()));
    }
    native_mutex_unlock(&ndbcluster_mutex);
    DBUG_RETURN(-1);
  }

  DBUG_PRINT("info", ("setting db and table_name to point at new key"));
  share->db= NDB_SHARE::key_get_db_name(share->key);
  share->table_name= NDB_SHARE::key_get_table_name(share->key);

  Ndb_event_data *event_data= share->get_event_data_ptr();
  if (event_data && event_data->shadow_table)
  {
    if (!IS_TMP_PREFIX(share->table_name))
    {
      DBUG_PRINT("info", ("Renaming shadow table"));
      // Allocate new strings for db and table_name for shadow_table
      // in event_data's MEM_ROOT(where the shadow_table itself is allocated)
      // NOTE! This causes a slight memory leak since the already existing
      // strings are not release until the mem_root is eventually
      // released.
      lex_string_copy(&event_data->mem_root,
                      &event_data->shadow_table->s->db,
                      share->db);
      lex_string_copy(&event_data->mem_root,
                      &event_data->shadow_table->s->table_name,
                      share->table_name);
    }
    else
    {
      DBUG_PRINT("info", ("Name is temporary, skip rename of shadow table"));
      /**
       * we don't rename the table->s here 
       *   that is used by injector
       *   as we don't know if all events has been processed
       * This will be dropped anyway
       */
    }
  }
  /* else rename will be handled when the ALTER event comes */

  // Print share after rename
  dbug_print_share("renamed share:", share);

  native_mutex_unlock(&ndbcluster_mutex);
  DBUG_RETURN(0);
}

/*
  Increase refcount on existing share.
  Always returns share and cannot fail.
*/
NDB_SHARE *ndbcluster_get_share(NDB_SHARE *share)
{
  native_mutex_lock(&ndbcluster_mutex);
  share->use_count++;

  dbug_print_open_tables();
  dbug_print_share("ndbcluster_get_share:", share);
  if (opt_ndb_extra_logging > 9)
    sql_print_information ("ndbcluster_get_share: %s use_count: %u",
                           share->key_string(), share->use_count);
  native_mutex_unlock(&ndbcluster_mutex);
  return share;
}



NDB_SHARE*
NDB_SHARE::create(const char* key, TABLE* table)
{
  NDB_SHARE* share;
  if (!(share= (NDB_SHARE*) my_malloc(PSI_INSTRUMENT_ME,
                                      sizeof(*share),
                                      MYF(MY_WME | MY_ZEROFILL))))
    return NULL;

  share->flags= 0;
  share->state= NSS_INITIAL;

  /* Allocates enough space for key, db, and table_name */
  share->key= NDB_SHARE::create_key(key);

  share->db= NDB_SHARE::key_get_db_name(share->key);
  share->table_name= NDB_SHARE::key_get_table_name(share->key);

  thr_lock_init(&share->lock);
  native_mutex_init(&share->mutex, MY_MUTEX_INIT_FAST);
  share->commit_count= 0;
  share->commit_count_lock= 0;

#ifdef HAVE_NDB_BINLOG
  share->m_cfn_share= NULL;
#endif

  share->op= 0;
  share->new_op= 0;
  share->event_data= 0;

  if (ndbcluster_binlog_init_share(current_thd, share, table))
  {
    DBUG_PRINT("error", ("get_share: %s could not init share", key));
    assert(share->event_data == NULL);
    NDB_SHARE::destroy(share);
    return NULL;
  }

  return share;
}


static inline
NDB_SHARE *ndbcluster_get_share(const char *key, TABLE *table,
                                bool create_if_not_exists)
{
  NDB_SHARE *share;
  DBUG_ENTER("ndbcluster_get_share");
  DBUG_PRINT("enter", ("key: '%s'", key));

  if (!(share= (NDB_SHARE*) my_hash_search(&ndbcluster_open_tables,
                                           (const uchar*) key,
                                           strlen(key))))
  {
    if (!create_if_not_exists)
    {
      DBUG_PRINT("error", ("get_share: %s does not exist", key));
      DBUG_RETURN(0);
    }

    if (!(share= NDB_SHARE::create(key, table)))
    {
      DBUG_PRINT("error", ("get_share: failed to alloc share"));
      my_error(ER_OUTOFMEMORY, MYF(0), static_cast<int>(sizeof(*share)));
      DBUG_RETURN(0);
    }

    // Insert the new share in list of open shares
    if (my_hash_insert(&ndbcluster_open_tables, (uchar*) share))
    {
      NDB_SHARE::destroy(share);
      DBUG_RETURN(0);
    }
  }
  share->use_count++;
  if (opt_ndb_extra_logging > 9)
    sql_print_information ("ndbcluster_get_share: %s use_count: %u",
                           share->key_string(), share->use_count);

  dbug_print_open_tables();
  dbug_print_share("ndbcluster_get_share:", share);
  DBUG_RETURN(share);
}


/**
  Get NDB_SHARE for key

  Returns share for key, and increases the refcount on the share.

  @param create_if_not_exists, creates share if it does not already exist
  @param have_lock, ndbcluster_mutex already locked
*/

NDB_SHARE *ndbcluster_get_share(const char *key, TABLE *table,
                                bool create_if_not_exists,
                                bool have_lock)
{
  NDB_SHARE *share;
  DBUG_ENTER("ndbcluster_get_share");
  DBUG_PRINT("enter", ("key: '%s', create_if_not_exists: %d, have_lock: %d",
                       key, create_if_not_exists, have_lock));

  if (!have_lock)
    native_mutex_lock(&ndbcluster_mutex);

  share= ndbcluster_get_share(key, table, create_if_not_exists);

  if (!have_lock)
    native_mutex_unlock(&ndbcluster_mutex);

  DBUG_RETURN(share);
}

void ndbcluster_real_free_share(NDB_SHARE **share)
{
  DBUG_ENTER("ndbcluster_real_free_share");
  dbug_print_share("ndbcluster_real_free_share:", *share);

  if (opt_ndb_extra_logging > 9)
    sql_print_information ("ndbcluster_real_free_share: %s use_count: %u",
                           (*share)->key_string(), (*share)->use_count);

  ndb_index_stat_free(*share);

  bool found= false;
  if ((* share)->state == NSS_DROPPED)
  {
    found= my_hash_delete(&ndbcluster_dropped_tables, (uchar*) *share) == 0;

    // If this is a 'trailing share', it might still be 'open'
    my_hash_delete(&ndbcluster_open_tables, (uchar*) *share);
  }
  else
  {
    found= my_hash_delete(&ndbcluster_open_tables, (uchar*) *share) == 0;
  }
  assert(found);

  NDB_SHARE::destroy(*share);
  *share= 0;

  dbug_print_open_tables();
  DBUG_VOID_RETURN;
}


void ndbcluster_free_share(NDB_SHARE **share, bool have_lock)
{
  if (!have_lock)
    native_mutex_lock(&ndbcluster_mutex);
  if (!--(*share)->use_count)
  {
    if (opt_ndb_extra_logging > 9)
      sql_print_information ("ndbcluster_free_share: %s use_count: %u",
                             (*share)->key_string(), (*share)->use_count);
    ndbcluster_real_free_share(share);
  }
  else
  {
    if (opt_ndb_extra_logging > 9)
      sql_print_information ("ndbcluster_free_share: %s use_count: %u",
                             (*share)->key_string(), (*share)->use_count);
    dbug_print_open_tables();
    dbug_print_share("ndbcluster_free_share:", *share);
  }
  if (!have_lock)
    native_mutex_unlock(&ndbcluster_mutex);
}

void
ndbcluster_mark_share_dropped(NDB_SHARE* share)
{
  share->state= NSS_DROPPED;
  if (my_hash_delete(&ndbcluster_open_tables, (uchar*) share) == 0)
  {
    my_hash_insert(&ndbcluster_dropped_tables, (uchar*) share);
  }
  else
  {
    assert(false);
  }
  if (opt_ndb_extra_logging > 9)
  {
    sql_print_information ("ndbcluster_mark_share_dropped: %s use_count: %u",
                           share->key_string(), share->use_count);
  }
}

struct ndb_table_statistics_row {
  Uint64 rows;
  Uint64 commits;
  Uint32 size;
  Uint64 fixed_mem;
  Uint64 var_mem;
};

int ha_ndbcluster::update_stats(THD *thd,
                                bool do_read_stat,
                                uint part_id)
{
  struct Ndb_statistics stat;
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  DBUG_ENTER("ha_ndbcluster::update_stats");
  do
  {
    if (m_share && !do_read_stat)
    {
      native_mutex_lock(&m_share->mutex);
      stat= m_share->stat;
      native_mutex_unlock(&m_share->mutex);

      assert(stat.row_count != ~(ha_rows)0); // should never be invalid

      /* Accept shared cached statistics if row_count is valid. */
      if (stat.row_count != ~(ha_rows)0)
        break;
    }

    /* Request statistics from datanodes */
    Ndb *ndb= thd_ndb->ndb;
    if (ndb->setDatabaseName(m_dbname))
    {
      set_my_errno(HA_ERR_OUT_OF_MEM);
      DBUG_RETURN(HA_ERR_OUT_OF_MEM);
    }
    if (int err= ndb_get_table_statistics(thd, this, TRUE, ndb,
                                          m_ndb_record, &stat,
                                          part_id))
    {
      DBUG_RETURN(err);
    }

    /* Update shared statistics with fresh data */
    if (m_share)
    {
      native_mutex_lock(&m_share->mutex);
      m_share->stat= stat;
      native_mutex_unlock(&m_share->mutex);
    }
    break;
  }
  while(0);

  int no_uncommitted_rows_count= 0;
  if (m_table_info && !thd_ndb->m_error)
  {
    m_table_info->records= stat.row_count;
    m_table_info->last_count= thd_ndb->count;
    no_uncommitted_rows_count= m_table_info->no_uncommitted_rows_count;
  }
  stats.mean_rec_length= stat.row_size;
  stats.data_file_length= stat.fragment_memory;
  stats.records= stat.row_count + no_uncommitted_rows_count;
  stats.max_data_file_length= stat.fragment_extent_space;
  stats.delete_length= stat.fragment_extent_free_space;

  DBUG_PRINT("exit", ("stats.records: %d  "
                      "stat->row_count: %d  "
                      "no_uncommitted_rows_count: %d"
                      "stat->fragment_extent_space: %u  "
                      "stat->fragment_extent_free_space: %u",
                      (int)stats.records,
                      (int)stat.row_count,
                      (int)no_uncommitted_rows_count,
                      (uint)stat.fragment_extent_space,
                      (uint)stat.fragment_extent_free_space));
  DBUG_RETURN(0);
}

/**
  Update 'row_count' in shared table statistcs if any rows where
  inserted/deleted by the local transaction related to specified
 'local_stat'.
  Should be called when transaction has succesfully commited its changes.
*/
static
void modify_shared_stats(NDB_SHARE *share,
                         Ndb_local_table_statistics *local_stat)
{
  if (local_stat->no_uncommitted_rows_count)
  {
    native_mutex_lock(&share->mutex);
    assert(share->stat.row_count != ~(ha_rows)0);// should never be invalid
    if (share->stat.row_count != ~(ha_rows)0)
    {
      DBUG_PRINT("info", ("Update row_count for %s, row_count: %lu, with:%d",
                          share->table_name, (ulong) share->stat.row_count,
                          local_stat->no_uncommitted_rows_count));
      share->stat.row_count=
        ((Int64)share->stat.row_count+local_stat->no_uncommitted_rows_count > 0)
         ? share->stat.row_count+local_stat->no_uncommitted_rows_count
         : 0;
    }
    native_mutex_unlock(&share->mutex);
    local_stat->no_uncommitted_rows_count= 0;
  }
}

/* If part_id contains a legal partition id, ndbstat returns the
   partition-statistics pertaining to that partition only.
   Otherwise, it returns the table-statistics,
   which is an aggregate over all partitions of that table.
 */
static 
int
ndb_get_table_statistics(THD *thd, ha_ndbcluster* file, bool report_error, Ndb* ndb,
                         const NdbRecord *record,
                         struct Ndb_statistics * ndbstat,
                         uint part_id)
{
  Thd_ndb *thd_ndb= get_thd_ndb(current_thd);
  NdbTransaction* pTrans;
  NdbError error;
  int retries= 100;
  int reterr= 0;
  int retry_sleep= 30; /* 30 milliseconds */
  const char *dummyRowPtr;
  NdbOperation::GetValueSpec extraGets[8];
  Uint64 rows, commits, fixed_mem, var_mem, ext_space, free_ext_space;
  Uint32 size, fragid;

  DBUG_ENTER("ndb_get_table_statistics");

  assert(record != 0);
  
  /* We use the passed in NdbRecord just to get access to the
     table, we mask out any/all columns it may have and add
     our reads as extraGets.  This is necessary as they are
     all pseudo-columns
  */
  extraGets[0].column= NdbDictionary::Column::ROW_COUNT;
  extraGets[0].appStorage= &rows;
  extraGets[1].column= NdbDictionary::Column::COMMIT_COUNT;
  extraGets[1].appStorage= &commits;
  extraGets[2].column= NdbDictionary::Column::ROW_SIZE;
  extraGets[2].appStorage= &size;
  extraGets[3].column= NdbDictionary::Column::FRAGMENT_FIXED_MEMORY;
  extraGets[3].appStorage= &fixed_mem;
  extraGets[4].column= NdbDictionary::Column::FRAGMENT_VARSIZED_MEMORY;
  extraGets[4].appStorage= &var_mem;
  extraGets[5].column= NdbDictionary::Column::FRAGMENT_EXTENT_SPACE;
  extraGets[5].appStorage= &ext_space;
  extraGets[6].column= NdbDictionary::Column::FRAGMENT_FREE_EXTENT_SPACE;
  extraGets[6].appStorage= &free_ext_space;
  extraGets[7].column= NdbDictionary::Column::FRAGMENT;
  extraGets[7].appStorage= &fragid;

  const Uint32 codeWords= 1;
  Uint32 codeSpace[ codeWords ];
  NdbInterpretedCode code(NULL, // Table is irrelevant
                          &codeSpace[0],
                          codeWords);
  if ((code.interpret_exit_last_row() != 0) ||
      (code.finalise() != 0))
  {
    reterr= code.getNdbError().code;
    DBUG_PRINT("exit", ("failed, reterr: %u, NdbError %u(%s)", reterr,
                        error.code, error.message));
    DBUG_RETURN(reterr);
  }

  do
  {
    Uint32 count= 0;
    Uint64 sum_rows= 0;
    Uint64 sum_commits= 0;
    Uint64 sum_row_size= 0;
    Uint64 sum_mem= 0;
    Uint64 sum_ext_space= 0;
    Uint64 sum_free_ext_space= 0;
    NdbScanOperation*pOp;
    int check;

    if ((pTrans= ndb->startTransaction()) == NULL)
    {
      error= ndb->getNdbError();
      goto retry;
    }

    NdbScanOperation::ScanOptions options;
    options.optionsPresent= NdbScanOperation::ScanOptions::SO_BATCH |
                            NdbScanOperation::ScanOptions::SO_GETVALUE |
                            NdbScanOperation::ScanOptions::SO_INTERPRETED;
    /* Set batch_size=1, as we need only one row per fragment. */
    options.batch= 1;
    options.extraGetValues= &extraGets[0];
    options.numExtraGetValues= sizeof(extraGets)/sizeof(extraGets[0]); 
    options.interpretedCode= &code;

    if ((pOp= pTrans->scanTable(record, NdbOperation::LM_CommittedRead,
                                empty_mask,
                                &options,
                                sizeof(NdbScanOperation::ScanOptions))) == NULL)
    {
      error= pTrans->getNdbError();
      goto retry;
    }
    thd_ndb->m_scan_count++;
    thd_ndb->m_pruned_scan_count += (pOp->getPruned()? 1 : 0);
    
    thd_ndb->m_execute_count++;
    DBUG_PRINT("info", ("execute_count: %u", thd_ndb->m_execute_count));
    if (pTrans->execute(NdbTransaction::NoCommit,
                        NdbOperation::AbortOnError,
                        TRUE) == -1)
    {
      error= pTrans->getNdbError();
      goto retry;
    }
    
    while ((check= pOp->nextResult(&dummyRowPtr, TRUE, TRUE)) == 0)
    {
      DBUG_PRINT("info", ("nextResult rows: %llu, commits: %llu"
                          "fixed_mem_size %llu var_mem_size %llu "
                          "fragmentid %u extent_space %llu free_extent_space %llu",
                          rows, commits, fixed_mem, var_mem, fragid,
                          ext_space, free_ext_space));

      if ((part_id != ~(uint)0) && fragid != part_id)
      {
        continue;
      }

      sum_rows+= rows;
      sum_commits+= commits;
      if (sum_row_size < size)
        sum_row_size= size;
      sum_mem+= fixed_mem + var_mem;
      count++;
      sum_ext_space += ext_space;
      sum_free_ext_space += free_ext_space;

      if ((part_id != ~(uint)0) && fragid == part_id)
      {
        break;
      }
    }
    
    if (check == -1)
    {
      error= pOp->getNdbError();
      goto retry;
    }

    pOp->close(TRUE);

    ndb->closeTransaction(pTrans);

    ndbstat->row_count= sum_rows;
    ndbstat->commit_count= sum_commits;
    ndbstat->row_size= (ulong)sum_row_size;
    ndbstat->fragment_memory= sum_mem;
    ndbstat->fragment_extent_space= sum_ext_space;
    ndbstat->fragment_extent_free_space= sum_free_ext_space;

    DBUG_PRINT("exit", ("records: %llu commits: %llu row_size: %llu "
                        "mem: %llu allocated: %llu free: %llu count: %u",
                        sum_rows, sum_commits, sum_row_size,
                        sum_mem, sum_ext_space, sum_free_ext_space, count));

    DBUG_RETURN(0);
retry:
    if(report_error)
    {
      if (file && pTrans)
      {
        reterr= file->ndb_err(pTrans);
      }
      else
      {
        const NdbError& tmp= error;
        ERR_PRINT(tmp);
        reterr= ndb_to_mysql_error(&tmp);
      }
    }
    else
      reterr= error.code;

    if (pTrans)
    {
      ndb->closeTransaction(pTrans);
      pTrans= NULL;
    }
    if (error.status == NdbError::TemporaryError &&
        retries-- && !thd->killed)
    {
      do_retry_sleep(retry_sleep);
      continue;
    }
    break;
  } while(1);
  DBUG_PRINT("exit", ("failed, reterr: %u, NdbError %u(%s)", reterr,
                      error.code, error.message));
  DBUG_RETURN(reterr);
}

/**
  Create a .ndb file to serve as a placeholder indicating 
  that the table with this name is a ndb table.
*/

int ha_ndbcluster::write_ndb_file(const char *name) const
{
  File file;
  bool error=1;
  char path[FN_REFLEN];
  
  DBUG_ENTER("write_ndb_file");
  DBUG_PRINT("enter", ("name: %s", name));

#ifndef EMBEDDED_LIBRARY
  (void)strxnmov(path, FN_REFLEN-1, 
                 mysql_data_home,"/",name,ha_ndb_ext,NullS);
#else
  (void)strxnmov(path, FN_REFLEN-1, name,ha_ndb_ext, NullS);
#endif

  if ((file=my_create(path, CREATE_MODE,O_RDWR | O_TRUNC,MYF(MY_WME))) >= 0)
  {
    // It's an empty file
    error=0;
    my_close(file,MYF(0));
  }
  DBUG_RETURN(error);
}

void ha_ndbcluster::check_read_before_write_removal()
{
  DBUG_ENTER("check_read_before_write_removal");

  /* Must have determined that rbwr is possible */
  assert(m_read_before_write_removal_possible);
  m_read_before_write_removal_used= true;

  /* Can't use on table with hidden primary key */
  assert(table_share->primary_key != MAX_KEY);

  /* Index must be unique */
  DBUG_PRINT("info", ("using index %d", active_index));
  const KEY *key= table->key_info + active_index;
  assert((key->flags & HA_NOSAME)); NDB_IGNORE_VALUE(key);

  DBUG_VOID_RETURN;
}


/****************************************************************************
 * MRR interface implementation
 ***************************************************************************/

/**
   We will not attempt to deal with more than this many ranges in a single
   MRR execute().
*/
#define MRR_MAX_RANGES 128

/*
  Types of ranges during multi_range_read.

  Code assumes that X < enum_ordered_range is a valid check for range converted
  to key operation.
*/
enum multi_range_types
{
  enum_unique_range,            /// Range converted to key operation
  enum_empty_unique_range,      /// No data found (in key operation)
  enum_ordered_range,           /// Normal ordered index scan range
  enum_skip_range               /// Empty range (eg. partition pruning)
};

/**
  Usage of the MRR buffer is as follows:

  First, N char * values, each being the custom value obtained from
  RANGE_SEQ_IF::next() that needs to be returned from multi_range_read_next().
  N is usually == total number of ranges, but never more than MRR_MAX_RANGES
  (the MRR is split across several execute()s if necessary). N may be lower
  than actual number of ranges in a single execute() in case of split for
  other reasons.

  This is followed by N variable-sized entries, each

   - 1 byte of multi_range_types for this range.

   - (Only) for ranges converted to key operations (enum_unique_range and
     enum_empty_unique_range), this is followed by table_share->reclength
     bytes of row data.
*/

static inline
ulong multi_range_buffer_size(const HANDLER_BUFFER* buffer)
{
  const size_t buf_size = buffer->buffer_end - buffer->buffer;
  assert(buf_size < ULONG_MAX);
  return (ulong)buf_size;
}

/* Return the needed size of the fixed array at start of HANDLER_BUFFER. */
static ulong
multi_range_fixed_size(int num_ranges)
{
  if (num_ranges > MRR_MAX_RANGES)
    num_ranges= MRR_MAX_RANGES;
  return num_ranges * sizeof(char *);
}

/* Return max number of ranges so that fixed part will still fit in buffer. */
static int
multi_range_max_ranges(int num_ranges, ulong bufsize)
{
  if (num_ranges > MRR_MAX_RANGES)
    num_ranges= MRR_MAX_RANGES;
  if (num_ranges * sizeof(char *) > bufsize)
    num_ranges= bufsize / sizeof(char *);
  return num_ranges;
}

/* Return the size in HANDLER_BUFFER of a variable-sized entry. */
static ulong
multi_range_entry_size(my_bool use_keyop, ulong reclength)
{
  /* Space for type byte. */
  ulong len= 1;
  if (use_keyop)
    len+= reclength;
  return len;
}

/*
  Return the maximum size of a variable-sized entry in HANDLER_BUFFER.

  Actual size may depend on key values (whether the actual value can be
  converted to a hash key operation or needs to be done as an ordered index
  scan).
*/
static ulong
multi_range_max_entry(NDB_INDEX_TYPE keytype, ulong reclength)
{
  return multi_range_entry_size(keytype != ORDERED_INDEX, reclength);
}

static uchar &
multi_range_entry_type(uchar *p)
{
  return *p;
}

/* Find the start of the next entry in HANDLER_BUFFER. */
static uchar *
multi_range_next_entry(uchar *p, ulong reclength)
{
  my_bool use_keyop= multi_range_entry_type(p) < enum_ordered_range;
  return p + multi_range_entry_size(use_keyop, reclength);
}

/* Get pointer to row data (for range converted to key operation). */
static uchar *
multi_range_row(uchar *p)
{
  assert(multi_range_entry_type(p) == enum_unique_range);
  return p + 1;
}

/* Get and put upper layer custom char *, use memcpy() for unaligned access. */
static char *
multi_range_get_custom(HANDLER_BUFFER *buffer, int range_no)
{
  assert(range_no < MRR_MAX_RANGES);
  char* res;
  memcpy(&res, buffer->buffer + range_no*sizeof(char*), sizeof(char*));
  return res;
}

static void
multi_range_put_custom(HANDLER_BUFFER *buffer, int range_no, char *custom)
{
  assert(range_no < MRR_MAX_RANGES);
  // memcpy() required for unaligned access.
  memcpy(buffer->buffer + range_no*sizeof(char*), &custom, sizeof(char*));
}

/*
  This is used to check if an ordered index scan is needed for a range in
  a multi range read.
  If a scan is not needed, we use a faster primary/unique key operation
  instead.
*/
static my_bool
read_multi_needs_scan(NDB_INDEX_TYPE cur_index_type, const KEY *key_info,
                      const KEY_MULTI_RANGE *r, bool is_pushed)
{
  if (cur_index_type == ORDERED_INDEX || is_pushed)
    return TRUE;
  if (cur_index_type == PRIMARY_KEY_INDEX)
    return FALSE;
  if (cur_index_type == UNIQUE_INDEX) {  // a 'UNIQUE ... USING HASH' index
    // UNIQUE_INDEX is used iff optimizer set HA_MRR_NO_NULL_ENDPOINTS.
    // Assert that there are no NULL values in key as promissed.
    assert(!check_null_in_key(key_info, r->start_key.key, r->start_key.length));
    return FALSE;
  }
  assert(cur_index_type == PRIMARY_KEY_ORDERED_INDEX ||
         cur_index_type == UNIQUE_ORDERED_INDEX);
  if (r->start_key.length != key_info->key_length ||
      r->start_key.flag != HA_READ_KEY_EXACT)
    return TRUE;                                // Not exact match, need scan
  if (cur_index_type == UNIQUE_ORDERED_INDEX &&
      check_null_in_key(key_info, r->start_key.key,r->start_key.length))
    return TRUE;                                // Can't use for NULL values
  return FALSE;
}

/*
  Get cost and other information about MRR scan over a known list of ranges

  SYNOPSIS
    See handler::multi_range_read_info_const.

  DESCRIPTION
    The implementation is copied from handler::multi_range_read_info_const.
    The only difference is that NDB-MRR cannot handle blob columns or keys
    with NULLs for unique indexes. We disable MRR for those cases.

  NOTES
    See NOTES for handler::multi_range_read_info_const().
*/

ha_rows 
ha_ndbcluster::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
                                           void *seq_init_param, 
                                           uint n_ranges, uint *bufsz,
                                           uint *flags, Cost_estimate *cost)
{
  ha_rows rows;
  uint def_flags= *flags;
  uint def_bufsz= *bufsz;

  DBUG_ENTER("ha_ndbcluster::multi_range_read_info_const");

  /* Get cost/flags/mem_usage of default MRR implementation */
  rows= handler::multi_range_read_info_const(keyno, seq, seq_init_param,
                                             n_ranges, &def_bufsz, 
                                             &def_flags, cost);
  if (unlikely(rows == HA_POS_ERROR))
  {
    DBUG_RETURN(rows);
  }

  /*
    If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is
    an order to use the default MRR implementation.
    Otherwise, make a choice based on requested *flags, handler
    capabilities, cost and mrr* flags of @@optimizer_switch.
  */
  if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
      choose_mrr_impl(keyno, n_ranges, rows, bufsz, flags, cost))
  {
    DBUG_PRINT("info", ("Default MRR implementation choosen"));
    *flags= def_flags;
    *bufsz= def_bufsz;
    assert(*flags & HA_MRR_USE_DEFAULT_IMPL);
  }
  else
  {
    /* *flags and *bufsz were set by choose_mrr_impl */
    DBUG_PRINT("info", ("NDB-MRR implementation choosen"));
    assert(!(*flags & HA_MRR_USE_DEFAULT_IMPL));
  }
  DBUG_RETURN(rows);
}


/*
  Get cost and other information about MRR scan over some sequence of ranges

  SYNOPSIS
    See handler::multi_range_read_info.
*/

ha_rows 
ha_ndbcluster::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
                                     uint *bufsz, uint *flags,
                                     Cost_estimate *cost)
{
  ha_rows res;
  uint def_flags= *flags;
  uint def_bufsz= *bufsz;

  DBUG_ENTER("ha_ndbcluster::multi_range_read_info");

  /* Get cost/flags/mem_usage of default MRR implementation */
  res= handler::multi_range_read_info(keyno, n_ranges, n_rows,
                                      &def_bufsz, &def_flags,
                                      cost);
  if (unlikely(res == HA_POS_ERROR))
  {
    /* Default implementation can't perform MRR scan => we can't either */
    DBUG_RETURN(res);
  }
  assert(!res);

  if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || 
      choose_mrr_impl(keyno, n_ranges, n_rows, bufsz, flags, cost))
  {
    /* Default implementation is choosen */
    DBUG_PRINT("info", ("Default MRR implementation choosen"));
    *flags= def_flags;
    *bufsz= def_bufsz;
    assert(*flags & HA_MRR_USE_DEFAULT_IMPL);
  }
  else
  {
    /* *flags and *bufsz were set by choose_mrr_impl */
    DBUG_PRINT("info", ("NDB-MRR implementation choosen"));
    assert(!(*flags & HA_MRR_USE_DEFAULT_IMPL));
  }
  DBUG_RETURN(res);
}

/**
  Internals: Choose between Default MRR implementation and 
                    native ha_ndbcluster MRR

  Make the choice between using Default MRR implementation and ha_ndbcluster-MRR.
  This function contains common functionality factored out of multi_range_read_info()
  and multi_range_read_info_const(). The function assumes that the default MRR
  implementation's applicability requirements are satisfied.

  @param keyno       Index number
  @param n_ranges    Number of ranges/keys (i.e. intervals) in the range sequence.
  @param n_rows      E(full rows to be retrieved)
  @param bufsz  OUT  If DS-MRR is choosen, buffer use of DS-MRR implementation
                     else the value is not modified
  @param flags  IN   MRR flags provided by the MRR user
                OUT  If DS-MRR is choosen, flags of DS-MRR implementation
                     else the value is not modified
  @param cost   IN   Cost of default MRR implementation
                OUT  If DS-MRR is choosen, cost of DS-MRR scan
                     else the value is not modified

  @retval TRUE   Default MRR implementation should be used
  @retval FALSE  NDB-MRR implementation should be used
*/

bool ha_ndbcluster::choose_mrr_impl(uint keyno, uint n_ranges, ha_rows n_rows,
                                    uint *bufsz, uint *flags, Cost_estimate *cost)
{
  THD *thd= current_thd;
  NDB_INDEX_TYPE key_type= get_index_type(keyno);

  get_read_set(true, keyno);

  /* Disable MRR on blob read and on NULL lookup in unique index. */
  if (!thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR) ||
      uses_blob_value(table->read_set) ||
      ( key_type == UNIQUE_INDEX &&
        has_null_in_unique_index(keyno) &&
        !(*flags & HA_MRR_NO_NULL_ENDPOINTS)))
  {
    /* Use the default implementation, don't modify args: See comments  */
    return true;
  }

  /**
   * Calculate *bufsz, fallback to default MRR if we can't allocate
   * suffient buffer space for NDB-MRR
   */
  {
    uint save_bufsize= *bufsz;
    ulong reclength= table_share->reclength;
    uint entry_size= multi_range_max_entry(key_type, reclength);
    uint min_total_size= entry_size + multi_range_fixed_size(1);
    DBUG_PRINT("info", ("MRR bufsize suggested=%u want=%u limit=%d",
                        save_bufsize, (uint)(n_rows + 1) * entry_size,
                        (*flags & HA_MRR_LIMITS) != 0));
    if (save_bufsize < min_total_size)
    {
      if (*flags & HA_MRR_LIMITS)
      {
        /* Too small buffer limit for native NDB-MRR. */
        return true;
      }
      *bufsz= min_total_size;
    }
    else
    {
      uint max_ranges= (n_ranges > 0) ? n_ranges : MRR_MAX_RANGES;
      *bufsz= min(save_bufsize,
                  (uint)(n_rows * entry_size + multi_range_fixed_size(max_ranges)));
    }
    DBUG_PRINT("info", ("MRR bufsize set to %u", *bufsz));
  }

  /**
   * Cost based MRR optimization is known to be incorrect.
   * Disabled -> always use NDB-MRR whenever possible
   */
  *flags&= ~HA_MRR_USE_DEFAULT_IMPL;
  *flags|= HA_MRR_SUPPORT_SORTED;

  return false;
}


int ha_ndbcluster::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, 
                                         void *seq_init_param,
                                         uint n_ranges, uint mode,
                                         HANDLER_BUFFER *buffer)
{
  int error;
  DBUG_ENTER("ha_ndbcluster::multi_range_read_init");

  /*
    If supplied buffer is smaller than needed for just one range, we cannot do
    multi_range_read.
  */
  const ulong bufsize= multi_range_buffer_size(buffer);

  if (mode & HA_MRR_USE_DEFAULT_IMPL
      || bufsize < multi_range_fixed_size(1) +
                   multi_range_max_entry(get_index_type(active_index),
                                         table_share->reclength)
      || (m_pushed_join_operation==PUSHED_ROOT &&
         !m_disable_pushed_join &&
         !m_pushed_join_member->get_query_def().isScanQuery())
      || m_delete_cannot_batch || m_update_cannot_batch)
  {
    m_disable_multi_read= TRUE;
    DBUG_RETURN(handler::multi_range_read_init(seq_funcs, seq_init_param,
                                               n_ranges, mode, buffer));
  }

  /**
   * There may still be an open m_multi_cursor from the previous mrr access on this handler.
   * Close it now to free up resources for this NdbScanOperation.
   */ 
  if (unlikely((error= close_scan())))
    DBUG_RETURN(error);

  m_disable_multi_read= FALSE;

  mrr_is_output_sorted= MY_TEST(mode & HA_MRR_SORTED);
  /*
    Copy arguments into member variables
  */
  multi_range_buffer= buffer;
  mrr_funcs= *seq_funcs;
  mrr_iter= mrr_funcs.init(seq_init_param, n_ranges, mode);
  ranges_in_seq= n_ranges;
  m_range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range);
  mrr_need_range_assoc = !MY_TEST(mode & HA_MRR_NO_ASSOCIATION);
  if (mrr_need_range_assoc)
  {
    ha_statistic_increment(&SSV::ha_multi_range_read_init_count);
  }

  /*
    We do not start fetching here with execute(), rather we defer this to the
    first call to multi_range_read_next() by setting first_running_range and
    first_unstarted_range like this.

    The reason is that the MRR interface is designed so that in some cases
    multi_range_read_next() may never get called (eg. in case of WHERE
    condition on previous table that is never satisfied). So we may not need
    to fetch anything.

    Also, at the time of writing, returning an error from
    multi_range_read_init() does not correctly set the error status, so we get
    an assert on missing result status in net_end_statement().
  */
  first_running_range= 0;
  first_unstarted_range= 0;

  DBUG_RETURN(0);
}


int ha_ndbcluster::multi_range_start_retrievals(uint starting_range)
{
  KEY* key_info= table->key_info + active_index;
  ulong reclength= table_share->reclength;
  const NdbOperation* op;
  NDB_INDEX_TYPE cur_index_type= get_index_type(active_index);
  const NdbOperation *oplist[MRR_MAX_RANGES];
  uint num_keyops= 0;
  NdbTransaction *trans= m_thd_ndb->trans;
  int error;
  const bool is_pushed=
    check_if_pushable(NdbQueryOperationDef::OrderedIndexScan,
                      active_index);

  DBUG_ENTER("multi_range_start_retrievals");

  /*
   * read multi range will read ranges as follows (if not ordered)
   *
   * input    read order
   * ======   ==========
   * pk-op 1  pk-op 1
   * pk-op 2  pk-op 2
   * range 3  range (3,5) NOTE result rows will be intermixed
   * pk-op 4  pk-op 4
   * range 5
   * pk-op 6  pk-op 6
   */   

  /*
    We loop over all ranges, converting into primary/unique key operations if
    possible, and adding ranges to an ordered index scan for the rest.

    If the supplied HANDLER_BUFFER is too small, we may also need to do only
    part of the multi read at once.
  */

  assert(cur_index_type != UNDEFINED_INDEX);
  assert(m_multi_cursor==NULL);
  assert(m_active_query==NULL);

  const NdbOperation::LockMode lm = get_ndb_lock_mode(m_lock.type);
  const uchar *end_of_buffer= multi_range_buffer->buffer_end;

  /*
    Normally we should have sufficient buffer for the whole fixed_sized part.
    But we need to make sure we do not crash if upper layer gave us a _really_
    small buffer.

    We already checked (in multi_range_read_init()) that we got enough buffer
    for at least one range.
  */
  uint min_entry_size=
    multi_range_entry_size(!read_multi_needs_scan(cur_index_type, key_info,
                                                  &mrr_cur_range, is_pushed),
                                                  reclength);
  const ulong bufsize= multi_range_buffer_size(multi_range_buffer);
  int max_range= multi_range_max_ranges(ranges_in_seq,
                                        bufsize - min_entry_size);
  assert(max_range > 0);
  uchar *row_buf= multi_range_buffer->buffer + multi_range_fixed_size(max_range);
  m_multi_range_result_ptr= row_buf;

  int range_no= 0;
  int mrr_range_no= starting_range;
  bool any_real_read= FALSE;

  if (m_read_before_write_removal_possible)
    check_read_before_write_removal();

  for (;
       !m_range_res;
       range_no++, m_range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range))
  {
    if (range_no >= max_range)
      break;
    my_bool need_scan=
      read_multi_needs_scan(cur_index_type, key_info, &mrr_cur_range, is_pushed);
    if (row_buf + multi_range_entry_size(!need_scan, reclength) > end_of_buffer)
      break;
    if (need_scan)
    {
      if (range_no > NdbIndexScanOperation::MaxRangeNo)
        break;
      /*
        Check how much KEYINFO data we already used for index bounds, and
        split the MRR here if it exceeds a certain limit. This way we avoid
        overloading the TC block in the ndb kernel.

        The limit used is based on the value MAX_KEY_SIZE_IN_WORDS.
      */
      if (m_multi_cursor && m_multi_cursor->getCurrentKeySize() >= 1000)
        break;
    }

    mrr_range_no++;
    multi_range_put_custom(multi_range_buffer, range_no, mrr_cur_range.ptr);

    part_id_range part_spec;
    if (m_use_partition_pruning)
    {
      get_partition_set(table, table->record[0], active_index,
                        &mrr_cur_range.start_key,
                        &part_spec);
      DBUG_PRINT("info", ("part_spec.start_part: %u  part_spec.end_part: %u",
                          part_spec.start_part, part_spec.end_part));
      /*
        If partition pruning has found no partition in set
        we can skip this scan
      */
      if (part_spec.start_part > part_spec.end_part)
      {
        /*
          We can skip this range since the key won't fit into any
          partition
        */
        multi_range_entry_type(row_buf)= enum_skip_range;
        row_buf= multi_range_next_entry(row_buf, reclength);
        continue;
      }
      if (!trans &&
          (part_spec.start_part == part_spec.end_part))
        if (unlikely(!(trans= start_transaction_part_id(part_spec.start_part,
                                                        error))))
          DBUG_RETURN(error);
    }

    if (need_scan)
    {
      if (!trans)
      {
        // ToDo see if we can use start_transaction_key here instead
        if (!m_use_partition_pruning)
        {
          get_partition_set(table, table->record[0], active_index,
                            &mrr_cur_range.start_key,
                            &part_spec);
          if (part_spec.start_part == part_spec.end_part)
          {
            if (unlikely(!(trans= start_transaction_part_id(part_spec.start_part,
                                                            error))))
              DBUG_RETURN(error);
          }
          else if (unlikely(!(trans= start_transaction(error))))
            DBUG_RETURN(error);
        }
        else if (unlikely(!(trans= start_transaction(error))))
          DBUG_RETURN(error);
      }

      any_real_read= TRUE;
      DBUG_PRINT("info", ("any_real_read= TRUE"));

      /* Create the scan operation for the first scan range. */
      if (check_if_pushable(NdbQueryOperationDef::OrderedIndexScan, 
                            active_index))
      {
        assert(!m_read_before_write_removal_used);
        if (!m_active_query)
        {
          const int error= create_pushed_join();
          if (unlikely(error))
            DBUG_RETURN(error);

          NdbQuery* const query= m_active_query;
          if (mrr_is_output_sorted &&
              query->getQueryOperation((uint)PUSHED_ROOT)->setOrdering(NdbQueryOptions::ScanOrdering_ascending))
            ERR_RETURN(query->getNdbError());
        }
      } // check_if_pushable()
      else
      if (!m_multi_cursor)
      {
        /* Do a multi-range index scan for ranges not done by primary/unique key. */
        NdbScanOperation::ScanOptions options;
        NdbInterpretedCode code(m_table);

        options.optionsPresent=
          NdbScanOperation::ScanOptions::SO_SCANFLAGS |
          NdbScanOperation::ScanOptions::SO_PARALLEL;

        options.scan_flags= 
          NdbScanOperation::SF_ReadRangeNo |
          NdbScanOperation::SF_MultiRange;

        if (lm == NdbOperation::LM_Read)
          options.scan_flags|= NdbScanOperation::SF_KeyInfo;
        if (mrr_is_output_sorted)
          options.scan_flags|= NdbScanOperation::SF_OrderByFull;

        options.parallel= DEFAULT_PARALLELISM;

        NdbOperation::GetValueSpec gets[2];
        if (table_share->primary_key == MAX_KEY)
          get_hidden_fields_scan(&options, gets);

        if (m_cond && m_cond->generate_scan_filter(&code, &options))
          ERR_RETURN(code.getNdbError());

        /* Define scan */
        NdbIndexScanOperation *scanOp= trans->scanIndex
          (m_index[active_index].ndb_record_key,
           m_ndb_record, 
           lm,
           (uchar *)(table->read_set->bitmap),
           NULL, /* All bounds specified below */
           &options,
           sizeof(NdbScanOperation::ScanOptions));

        if (!scanOp)
          ERR_RETURN(trans->getNdbError());

        m_multi_cursor= scanOp;

        /* Can't have blobs in multi range read */
        assert(!uses_blob_value(table->read_set));

        /* We set m_next_row=0 to m that no row was fetched from the scan yet. */
        m_next_row= 0;
      }

      Ndb::PartitionSpec ndbPartitionSpec;
      const Ndb::PartitionSpec* ndbPartSpecPtr= NULL;

      /* If this table uses user-defined partitioning, use MySQLD provided
       * partition info as pruning info
       * Otherwise, scan range pruning is performed automatically by
       * NDBAPI based on distribution key values.
       */
      if (m_use_partition_pruning && 
          m_user_defined_partitioning &&
          (part_spec.start_part == part_spec.end_part))
      {
        DBUG_PRINT("info", ("Range on user-def-partitioned table can be pruned to part %u",
                            part_spec.start_part));
        ndbPartitionSpec.type= Ndb::PartitionSpec::PS_USER_DEFINED;
        ndbPartitionSpec.UserDefined.partitionId= part_spec.start_part;
        ndbPartSpecPtr= &ndbPartitionSpec;
      }

      /* Include this range in the ordered index scan. */
      NdbIndexScanOperation::IndexBound bound;
      compute_index_bounds(bound, key_info,
			   &mrr_cur_range.start_key, &mrr_cur_range.end_key, 0);
      bound.range_no= range_no;

      const NdbRecord *key_rec= m_index[active_index].ndb_record_key;
      if (m_active_query)
      {
        DBUG_PRINT("info", ("setBound:%d, for pushed join", bound.range_no));
        if (m_active_query->setBound(key_rec, &bound))
        {
          ERR_RETURN(trans->getNdbError());
        }
      }
      else
      {
        if (m_multi_cursor->setBound(m_index[active_index].ndb_record_key,
                                     bound,
                                     ndbPartSpecPtr, // Only for user-def tables
                                     sizeof(Ndb::PartitionSpec)))
        {
          ERR_RETURN(trans->getNdbError());
        }
      }

      multi_range_entry_type(row_buf)= enum_ordered_range;
      row_buf= multi_range_next_entry(row_buf, reclength);
    }
    else
    {
      multi_range_entry_type(row_buf)= enum_unique_range;

      if (!trans)
      {
        assert(active_index != MAX_KEY);
        if (unlikely(!(trans= start_transaction_key(active_index,
                                                    mrr_cur_range.start_key.key,
                                                    error))))
          DBUG_RETURN(error);
      }

      if (m_read_before_write_removal_used)
      {
        DBUG_PRINT("info", ("m_read_before_write_removal_used == TRUE"));

        /* Key will later be returned as result record.
         * Save it in 'row_buf' from where it will later retrieved.
         */
        key_restore(multi_range_row(row_buf),
                    (uchar*)mrr_cur_range.start_key.key,
                    key_info, key_info->key_length);

        op= NULL;  // read_before_write_removal
      }
      else
      {
        any_real_read= TRUE;
        DBUG_PRINT("info", ("any_real_read= TRUE"));

        /* Convert to primary/unique key operation. */
        Uint32 partitionId;
        Uint32* ppartitionId = NULL;

        if (m_user_defined_partitioning &&
            (cur_index_type == PRIMARY_KEY_ORDERED_INDEX ||
             cur_index_type == PRIMARY_KEY_INDEX))
        {
          partitionId=part_spec.start_part;
          ppartitionId=&partitionId;
        }

        /**
         * 'Pushable codepath' is incomplete and expected not
         * to be produced as make_join_pushed() handle 
         * AT_MULTI_UNIQUE_KEY as non-pushable.
         */
        if (m_pushed_join_operation==PUSHED_ROOT &&
            !m_disable_pushed_join &&
            !m_pushed_join_member->get_query_def().isScanQuery())
        {
          op= NULL;            // Avoid compiler warning
          assert(false);  // FIXME: Incomplete code, should not be executed
          assert(lm == NdbOperation::LM_CommittedRead);
          const int error= pk_unique_index_read_key_pushed(active_index,
                                                           mrr_cur_range.start_key.key,
                                                           ppartitionId);
          if (unlikely(error))
            DBUG_RETURN(error);
        }
        else
        {
          if (m_pushed_join_operation == PUSHED_ROOT)
          {
            DBUG_PRINT("info", ("Cannot push join due to incomplete implementation."));
            m_thd_ndb->m_pushed_queries_dropped++;
          }
          if (!(op= pk_unique_index_read_key(active_index,
                                             mrr_cur_range.start_key.key,
                                             multi_range_row(row_buf), lm,
                                             ppartitionId)))
            ERR_RETURN(trans->getNdbError());
        }
      }
      oplist[num_keyops++]= op;
      row_buf= multi_range_next_entry(row_buf, reclength);
    }
  }

  if (m_active_query != NULL &&         
      m_pushed_join_member->get_query_def().isScanQuery())
  {
    m_thd_ndb->m_scan_count++;
    if (mrr_is_output_sorted)
    {        
      m_thd_ndb->m_sorted_scan_count++;
    }

    bool prunable= false;
    if (unlikely(m_active_query->isPrunable(prunable) != 0))
      ERR_RETURN(m_active_query->getNdbError());
    if (prunable)
      m_thd_ndb->m_pruned_scan_count++;

    DBUG_PRINT("info", ("Is MRR scan-query pruned to 1 partition? :%u", prunable));
    assert(!m_multi_cursor);
  }
  if (m_multi_cursor)
  {
    DBUG_PRINT("info", ("Is MRR scan pruned to 1 partition? :%u",
                        m_multi_cursor->getPruned()));
    m_thd_ndb->m_scan_count++;
    m_thd_ndb->m_pruned_scan_count += (m_multi_cursor->getPruned()? 1 : 0);
    if (mrr_is_output_sorted)
    {        
      m_thd_ndb->m_sorted_scan_count++;
    }
  }

  if (any_real_read && execute_no_commit_ie(m_thd_ndb, trans))
    ERR_RETURN(trans->getNdbError());

  if (!m_range_res)
  {
    DBUG_PRINT("info",
               ("Split MRR read, %d-%d of %d bufsize=%lu used=%lu range_no=%d",
                starting_range, mrr_range_no - 1, ranges_in_seq,
                (ulong)(end_of_buffer - multi_range_buffer->buffer),
                (ulong)(row_buf - multi_range_buffer->buffer), range_no));
    /*
      Mark that we're using entire buffer (even if might not) as we are not
      reading read all ranges yet.

      This as we don't want mysqld to reuse the buffer when we read the
      remaining ranges.
    */
    multi_range_buffer->end_of_used_area= multi_range_buffer->buffer_end;
  }
  else
    multi_range_buffer->end_of_used_area= row_buf;

  first_running_range= first_range_in_batch= starting_range;
  first_unstarted_range= mrr_range_no;
  m_current_range_no= 0;

  /*
    Now we need to inspect all ranges that were converted to key operations.

    We need to check for any error (in particular NoDataFound), and remember
    the status, since the operation pointer may no longer be valid when we
    actually get to it in multi_range_next_entry() (we may have done further
    execute()'s in a different handler object during joins eg.)
  */
  row_buf= m_multi_range_result_ptr;
  uint op_idx= 0;
  for (uint r= first_range_in_batch; r < first_unstarted_range; r++)
  {
    uchar &type_loc= multi_range_entry_type(row_buf);
    row_buf= multi_range_next_entry(row_buf, reclength);
    if (type_loc >= enum_ordered_range)
      continue;

    assert(op_idx < MRR_MAX_RANGES);
    if ((op= oplist[op_idx++]) == NULL)
      continue;  // read_before_write_removal

    const NdbError &error= op->getNdbError();
    if (error.code != 0)
    {
      if (error.classification == NdbError::NoDataFound)
        type_loc= enum_empty_unique_range;
      else
      {
        /*
          This shouldn't really happen.

          There aren't really any other errors that could happen on the read
          without also aborting the transaction and causing execute() to
          return failure.

          (But we can still safely return an error code in non-debug builds).
        */
        assert(FALSE);
        ERR_RETURN(error);      /* purecov: deadcode */
      }
    }
  }

  DBUG_RETURN(0);
}

int ha_ndbcluster::multi_range_read_next(char **range_info)
{
  int res;
  DBUG_ENTER("ha_ndbcluster::multi_range_read_next");

  if (m_disable_multi_read)
  {
    DBUG_RETURN(handler::multi_range_read_next(range_info));
  }

  for(;;)
  {
    /* for each range (we should have remembered the number) */
    while (first_running_range < first_unstarted_range)
    {
      uchar *row_buf= m_multi_range_result_ptr;
      int expected_range_no= first_running_range - first_range_in_batch;

      switch (multi_range_entry_type(row_buf))
      {
        case enum_skip_range:
        case enum_empty_unique_range:
          /* Nothing in this range; continue with next. */
          break;

        case enum_unique_range:
          /*
            Move to next range; we can have at most one record from a unique
            range.
          */
          first_running_range++;
          m_multi_range_result_ptr=
            multi_range_next_entry(m_multi_range_result_ptr,
                                   table_share->reclength);

          /*
            Clear m_active_cursor; it is used as a flag in update_row() /
            delete_row() to know whether the current tuple is from a scan
            or pk operation.
          */
          m_active_cursor= NULL;

          /* Return the record. */
          *range_info= multi_range_get_custom(multi_range_buffer,
                                              expected_range_no);
          memcpy(table->record[0], multi_range_row(row_buf),
                 table_share->reclength);
          DBUG_RETURN(0);

        case enum_ordered_range:
          /* An index scan range. */
          {
            int res;
            if ((res= read_multi_range_fetch_next()) != 0)
            {
              *range_info= multi_range_get_custom(multi_range_buffer,
                                                  expected_range_no);
              first_running_range++;
              m_multi_range_result_ptr=
                multi_range_next_entry(m_multi_range_result_ptr,
                                       table_share->reclength);
              DBUG_RETURN(res);
            }
          }
          if (!m_next_row)
          {
            /*
              The whole scan is done, and the cursor has been closed.
              So nothing more for this range. Move to next.
            */
            break;
          }
          else
          {
            int current_range_no= m_current_range_no;
            /*
              For a sorted index scan, we will receive rows in increasing
              range_no order, so we can return ranges in order, pausing when
              range_no indicate that the currently processed range
              (first_running_range) is done.

              But for unsorted scan, we may receive a high range_no from one
              fragment followed by a low range_no from another fragment. So we
              need to process all index scan ranges together.
            */
            if (!mrr_is_output_sorted || expected_range_no == current_range_no)
            {
              *range_info= multi_range_get_custom(multi_range_buffer,
                                                  current_range_no);
              /* Copy out data from the new row. */
              unpack_record(table->record[0], m_next_row);
              table->status= 0;
              /*
                Mark that we have used this row, so we need to fetch a new
                one on the next call.
              */
              m_next_row= 0;
              /*
                Set m_active_cursor; it is used as a flag in update_row() /
                delete_row() to know whether the current tuple is from a scan or
                pk operation.
              */
              m_active_cursor= m_multi_cursor;

              DBUG_RETURN(0);
            }
            else if (current_range_no > expected_range_no)
            {
              /* Nothing more in scan for this range. Move to next. */
              break;
            }
            else
            {
              /*
                Should not happen. Ranges should be returned from NDB API in
                the order we requested them.
              */
              assert(0);
              break;                              // Attempt to carry on
            }
          }

        default:
          assert(0);
      }
      /* At this point the current range is done, proceed to next. */
      first_running_range++;
      m_multi_range_result_ptr=
        multi_range_next_entry(m_multi_range_result_ptr, table_share->reclength);
    }

    if (m_range_res)   // mrr_funcs.next() has consumed all ranges.
      DBUG_RETURN(HA_ERR_END_OF_FILE);

    /*
      Read remaining ranges
    */
    if ((res= multi_range_start_retrievals(first_running_range)))
      DBUG_RETURN(res);

  } // for(;;)
}


/*
  Fetch next row from the ordered index cursor in multi range scan.

  We keep the next row in m_next_row, and the range_no of the
  next row in m_current_range_no. This is used in sorted index scan
  to correctly interleave rows from primary/unique key operations with
  rows from the scan.
*/
int
ha_ndbcluster::read_multi_range_fetch_next()
{
  DBUG_ENTER("read_multi_range_fetch_next");

  if (m_active_query)
  {
    DBUG_PRINT("info", ("read_multi_range_fetch_next from pushed join, m_next_row:%p", m_next_row));
    if (!m_next_row)
    {
      int res= fetch_next_pushed();
      if (res == NdbQuery::NextResult_gotRow)
      {
        m_current_range_no= 0;
//      m_current_range_no= cursor->get_range_no();  // FIXME SPJ, need rangeNo from index scan
      }
      else if (res == NdbQuery::NextResult_scanComplete)
      {
        /* We have fetched the last row from the scan. */
        m_active_query->close(FALSE);
        m_active_query= NULL;
        m_next_row= 0;
        DBUG_RETURN(0);
      }
      else
      {
        /* An error. */
        DBUG_RETURN(res);
      }
    }
  }
  else if (m_multi_cursor)
  {
    if (!m_next_row)
    {
      NdbIndexScanOperation *cursor= m_multi_cursor;
      int res= fetch_next(cursor);
      if (res == 0)
      {
        m_current_range_no= cursor->get_range_no();
      }
      else if (res == 1)
      {
        /* We have fetched the last row from the scan. */
        cursor->close(FALSE, TRUE);
        m_active_cursor= 0;
        m_multi_cursor= 0;
        m_next_row= 0;
        DBUG_RETURN(0);
      }
      else
      {
        /* An error. */
        DBUG_RETURN(res);
      }
    }
  }
  DBUG_RETURN(0);
}


/**
 * Try to find pushable subsets of a join plan.
 * @param hton unused (maybe useful for other engines).
 * @param thd Thread.
 * @param plan The join plan to examine.
 * @return Possible error code.
 */

static
int ndbcluster_make_pushed_join(handlerton *hton,
                                THD* thd,
                                const AQP::Join_plan* plan)
{
  DBUG_ENTER("ndbcluster_make_pushed_join");
  (void)ha_ndb_ext; // prevents compiler warning.

  if (THDVAR(thd, join_pushdown) &&
      // Check for online upgrade/downgrade.
      ndb_join_pushdown(g_ndb_cluster_connection->get_min_db_version()))
  {
    bool pushed_something = false;
    ndb_pushed_builder_ctx pushed_builder(*plan);

    for (uint i= 0; i < plan->get_access_count()-1; i++)
    {
      const AQP::Table_access* const join_root= plan->get_table_access(i);
      const ndb_pushed_join* pushed_join= NULL;

      // Try to build a ndb_pushed_join starting from 'join_root'
      int error= pushed_builder.make_pushed_join(join_root, pushed_join);
      if (unlikely(error))
      {
        if (error < 0)  // getNdbError() gives us the error code
        {
          ERR_SET(pushed_builder.getNdbError(),error);
        }
        join_root->get_table()->file->print_error(error, MYF(0));
        DBUG_RETURN(error);
      }

      // Assign any produced pushed_join definitions to 
      // the ha_ndbcluster instance representing its root.
      if (pushed_join != NULL)
      {
        ha_ndbcluster* const handler=
          static_cast<ha_ndbcluster*>(join_root->get_table()->file);

        error= handler->assign_pushed_join(pushed_join);
        if (unlikely(error))
        {
          delete pushed_join;
          handler->print_error(error, MYF(0));
          DBUG_RETURN(error);
        }
        // Something was pushed and the QEP need to be modified
        pushed_something = true;
      }
    }

    if (pushed_something)
    {
      // Modify the QEP_TAB's to use the 'linked' read functions
      // for those parts of the join which have been pushed down.
      for (uint i= 0; i < plan->get_access_count(); i++)
      {
        plan->get_table_access(i)->set_pushed_table_access_method();
      }
    }
  }
  DBUG_RETURN(0);
}


/**
 * In case a pushed join having the table for this handler as its root
 * has been produced. ::assign_pushed_join() is responsible for setting
 * up this ha_ndbcluster instance such that the prepared NdbQuery 
 * might be instantiated at execution time.
 */
int
ha_ndbcluster::assign_pushed_join(const ndb_pushed_join* pushed_join)
{
  DBUG_ENTER("assign_pushed_join");
  m_thd_ndb->m_pushed_queries_defined++;

  for (uint i = 0; i < pushed_join->get_operation_count(); i++)
  {
    const TABLE* const tab= pushed_join->get_table(i);
    assert(tab->file->ht == ht);
    ha_ndbcluster* child= static_cast<ha_ndbcluster*>(tab->file);
    child->m_pushed_join_member= pushed_join;
    child->m_pushed_join_operation= i;
  }

  DBUG_PRINT("info", ("Assigned pushed join with %d child operations", 
                      pushed_join->get_operation_count()-1));

  DBUG_RETURN(0);
}


/**
 * First level of filtering tables which *maybe* may be part of
 * a pushed query: Returning 'false' will eliminate this table
 * from being a part of a pushed join.
 * A 'reason' for rejecting this table is required if 'false'
 * is returned.
 */
bool
ha_ndbcluster::maybe_pushable_join(const char*& reason) const
{
  reason= NULL;
  if (uses_blob_value(table->read_set))
  {
    reason= "select list can't contain BLOB columns";
    return false;
  }
  if (m_user_defined_partitioning)
  {
    reason= "has user defined partioning";
    return false;
  }

  // Pushed operations may not set locks.
  const NdbOperation::LockMode lockMode= get_ndb_lock_mode(m_lock.type);
  switch (lockMode)
  {
  case NdbOperation::LM_CommittedRead:
    return true;

  case NdbOperation::LM_Read:
  case NdbOperation::LM_Exclusive:
    reason= "lock modes other than 'read committed' not implemented";
    return false;
        
  default: // Other lock modes not used by handler.
    assert(false);
    return false;
  }

  return true;
}

/**
 * Check if this table access operation (and a number of succeding operation)
 * can be pushed to the cluster and executed there. This requires that there
 * is an NdbQueryDefiniton and that it still matches the corresponds to the 
 * type of operation that we intend to execute. (The MySQL server will 
 * sometimes change its mind and replace a scan with a lookup or vice versa
 * as it works its way into the nested loop join.)
 *
 * @param type This is the operation type that the server want to execute.
 * @param idx  Index used whenever relevant for operation type
 * @param needSorted True if the root operation is an ordered index scan 
 * with sorted results.
 * @return True if the operation may be pushed.
 */
bool 
ha_ndbcluster::check_if_pushable(int type,  //NdbQueryOperationDef::Type, 
                                 uint idx) const
{
  if (m_disable_pushed_join)
  {
    DBUG_PRINT("info", ("Push disabled (HA_EXTRA_KEYREAD)"));
    return false;
  }
  return   m_pushed_join_operation == PUSHED_ROOT
        && m_pushed_join_member    != NULL
        && m_pushed_join_member->match_definition(
                        type,
                        (idx<MAX_KEY) ? &m_index[idx] : NULL);
}


int
ha_ndbcluster::create_pushed_join(const NdbQueryParamValue* keyFieldParams, uint paramCnt)
{
  DBUG_ENTER("create_pushed_join");
  assert(m_pushed_join_member && m_pushed_join_operation == PUSHED_ROOT);

  NdbQuery* const query= 
    m_pushed_join_member->make_query_instance(m_thd_ndb->trans, keyFieldParams, paramCnt);

  if (unlikely(query==NULL))
    ERR_RETURN(m_thd_ndb->trans->getNdbError());

  // Bind to instantiated NdbQueryOperations.
  for (uint i= 0; i < m_pushed_join_member->get_operation_count(); i++)
  {
    const TABLE* const tab= m_pushed_join_member->get_table(i);
    ha_ndbcluster* handler= static_cast<ha_ndbcluster*>(tab->file);

    assert(handler->m_pushed_join_operation==(int)i);
    NdbQueryOperation* const op= query->getQueryOperation(i);
    handler->m_pushed_operation= op;

    // Bind to result buffers
    const NdbRecord* const resultRec= handler->m_ndb_record;
    int res= op->setResultRowRef(
                        resultRec,
                        handler->_m_next_row,
                        (uchar *)(tab->read_set->bitmap));
    if (unlikely(res))
      ERR_RETURN(query->getNdbError());
    
    // We clear 'm_next_row' to say that no row was fetched from the query yet.
    handler->_m_next_row= 0;
  }

  assert(m_active_query==NULL);
  m_active_query= query;
  m_thd_ndb->m_pushed_queries_executed++;

  DBUG_RETURN(0);
}


/**
 * Check if this table access operation is part of a pushed join operation
 * which is actively executing.
 */
bool 
ha_ndbcluster::check_is_pushed() const
{
  if (m_pushed_join_member == NULL)
    return false;

  handler *root= m_pushed_join_member->get_table(PUSHED_ROOT)->file;
  return (static_cast<ha_ndbcluster*>(root)->m_active_query);
}

uint 
ha_ndbcluster::number_of_pushed_joins() const
{
  if (m_pushed_join_member == NULL)
    return 0;
  else
    return m_pushed_join_member->get_operation_count();
}

const TABLE*
ha_ndbcluster::root_of_pushed_join() const
{
  if (m_pushed_join_member == NULL)
    return NULL;
  else
    return m_pushed_join_member->get_table(PUSHED_ROOT);
}

const TABLE*
ha_ndbcluster::parent_of_pushed_join() const
{
  if (m_pushed_join_operation > PUSHED_ROOT)
  {
    assert(m_pushed_join_member!=NULL);
    uint parent_ix= m_pushed_join_member
                    ->get_query_def().getQueryOperation(m_pushed_join_operation)
                    ->getParentOperation(0)
                    ->getOpNo();
    return m_pushed_join_member->get_table(parent_ix);
  }
  return NULL;
}

/**
  Utility thread main loop.
*/
Ndb_util_thread::Ndb_util_thread()
  : Ndb_component("Util")
{
  native_mutex_init(&LOCK, MY_MUTEX_INIT_FAST);
  native_cond_init(&COND);
}

Ndb_util_thread::~Ndb_util_thread()
{
  native_mutex_destroy(&LOCK);
  native_cond_destroy(&COND);
}

void Ndb_util_thread::do_wakeup()
{
  // Wakeup from potential wait
  log_info("Wakeup");

  native_mutex_lock(&LOCK);
  native_cond_signal(&COND);
  native_mutex_unlock(&LOCK);
}


void ndb_util_thread_stop(void)
{
  ndb_util_thread.stop();
}

#include "ndb_log.h"

void
Ndb_util_thread::do_run()
{
  THD *thd; /* needs to be first for thread_stack */
  struct timespec abstime;
  Thd_ndb *thd_ndb= NULL;
  uint share_list_size= 0;
  NDB_SHARE **share_list= NULL;

  DBUG_ENTER("ndb_util_thread");
  DBUG_PRINT("enter", ("cache_check_time: %lu", opt_ndb_cache_check_time));

  log_info("Starting...");

  native_mutex_lock(&LOCK);

  thd= new THD; /* note that contructor of THD uses DBUG_ */
  if (thd == NULL)
  {
    set_my_errno(HA_ERR_OUT_OF_MEM);
    DBUG_VOID_RETURN;
  }
  THD_CHECK_SENTRY(thd);

  thd->thread_stack= (char*)&thd; /* remember where our stack is */
  if (thd->store_globals())
    goto ndb_util_thread_fail;
  thd_set_command(thd, COM_DAEMON);
#ifndef NDB_THD_HAS_NO_VERSION
  thd->version=refresh_version;
#endif
  thd->get_protocol_classic()->set_client_capabilities(0);
  thd->security_context()->skip_grants();
  thd->get_protocol_classic()->init_net((st_vio *) 0);

  CHARSET_INFO *charset_connection;
  charset_connection= get_charset_by_csname("utf8",
                                            MY_CS_PRIMARY, MYF(MY_WME));
  thd->variables.character_set_client= charset_connection;
  thd->variables.character_set_results= charset_connection;
  thd->variables.collation_connection= charset_connection;
  thd->update_charset();

  native_mutex_unlock(&LOCK);

  log_info("Wait for server start completed");
  /*
    wait for mysql server to start
  */
  mysql_mutex_lock(&LOCK_server_started);
  while (!mysqld_server_started)
  {
    set_timespec(&abstime, 1);
    mysql_cond_timedwait(&COND_server_started, &LOCK_server_started,
                         &abstime);
    if (is_stop_requested())
    {
      mysql_mutex_unlock(&LOCK_server_started);
      native_mutex_lock(&LOCK);
      goto ndb_util_thread_end;
    }
  }
  mysql_mutex_unlock(&LOCK_server_started);

  // Defer call of THD::init_for_query until after mysqld_server_started
  // to ensure that the parts of MySQL Server it uses has been created
  thd->init_for_queries();

  log_info("Wait for cluster to start");
  /*
    Wait for cluster to start
  */
  native_mutex_lock(&LOCK);
  while (!g_ndb_status.cluster_node_id && (ndbcluster_hton->slot != ~(uint)0))
  {
    /* ndb not connected yet */
    native_cond_wait(&COND, &LOCK);
    if (is_stop_requested())
      goto ndb_util_thread_end;
  }
  native_mutex_unlock(&LOCK);

  /* Get thd_ndb for this thread */
  if (!(thd_ndb= Thd_ndb::seize(thd)))
  {
    sql_print_error("Could not allocate Thd_ndb object");
    native_mutex_lock(&LOCK);
    goto ndb_util_thread_end;
  }
  thd_set_thd_ndb(thd, thd_ndb);
  thd_ndb->options|= TNO_NO_LOG_SCHEMA_OP;

  if (opt_ndb_extra_logging && ndb_binlog_running)
    sql_print_information("NDB Binlog: Ndb tables initially read only.");

  log_info("Started");

  set_timespec(&abstime, 0);
  for (;;)
  {
    native_mutex_lock(&LOCK);
    if (!is_stop_requested())
      native_cond_timedwait(&COND,
                             &LOCK,
                             &abstime);
    if (is_stop_requested()) /* Stopping thread */
      goto ndb_util_thread_end;
    native_mutex_unlock(&LOCK);
#ifdef NDB_EXTRA_DEBUG_UTIL_THREAD
    DBUG_PRINT("ndb_util_thread", ("Started, cache_check_time: %lu",
                                   opt_ndb_cache_check_time));
#endif

    /*
      Check if the Ndb object in thd_ndb is still valid(it will be
      invalid if connection to cluster has been lost) and recycle
      it if necessary.
    */
    if (!check_ndb_in_thd(thd, false))
    {
      set_timespec(&abstime, 1);
      continue;
    }

    /*
      Regularly give the ndb_binlog component chance to set it self up
      i.e at first start it needs to create the ndb_* system tables
      and setup event operations on those. In case of lost connection
      to cluster, the ndb_* system tables are hopefully still there
      but the event operations need to be recreated.
    */
    if (!ndb_binlog_setup(thd))
    {
      /* Failed to setup binlog, try again in 1 second */
      set_timespec(&abstime, 1);
      continue;
    }

    if (opt_ndb_cache_check_time == 0)
    {
      /* Wake up in 1 second to check if value has changed */
      set_timespec(&abstime, 1);
      continue;
    }

    /* Lock mutex and fill list with pointers to all open tables */
    NDB_SHARE *share;
    native_mutex_lock(&ndbcluster_mutex);
    uint i, open_count, record_count= ndbcluster_open_tables.records;
    if (share_list_size < record_count)
    {
      NDB_SHARE ** new_share_list= new NDB_SHARE * [record_count];
      if (!new_share_list)
      {
        sql_print_warning("ndb util thread: malloc failure, "
                          "query cache not maintained properly");
        native_mutex_unlock(&ndbcluster_mutex);
        goto next;                               // At least do not crash
      }
      delete [] share_list;
      share_list_size= record_count;
      share_list= new_share_list;
    }
    for (i= 0, open_count= 0; i < record_count; i++)
    {
      share= (NDB_SHARE *)my_hash_element(&ndbcluster_open_tables, i);
      if ((share->use_count - (int) (share->op != 0) - (int) (share->op != 0))
          <= 0)
        continue; // injector thread is the only user, skip statistics
      /* ndb_share reference temporary, free below */
      share->use_count++; /* Make sure the table can't be closed */
      share->util_thread= true;
      DBUG_PRINT("NDB_SHARE", ("%s temporary  use_count: %u",
                               share->key_string(), share->use_count));
      DBUG_PRINT("ndb_util_thread",
                 ("Found open table[%d]: %s, use_count: %d",
                  i, share->table_name, share->use_count));

      /* Store pointer to table */
      share_list[open_count++]= share;
    }
    native_mutex_unlock(&ndbcluster_mutex);

    /* Iterate through the open files list */
    for (i= 0; i < open_count; i++)
    {
      share= share_list[i];
      if ((share->use_count - (int) (share->op != 0) - (int) (share->op != 0))
          <= 1)
      {
        /*
          Util thread and injector thread is the only user, skip statistics
	*/
        /* ndb_share reference temporary free */
        DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
                                 share->key_string(), share->use_count));
        
        native_mutex_lock(&ndbcluster_mutex);
        share->util_thread= false;
        free_share(&share, true);
        native_mutex_unlock(&ndbcluster_mutex);
        continue;
      }
      DBUG_PRINT("ndb_util_thread",
                 ("Fetching commit count for: %s", share->key_string()));

      struct Ndb_statistics stat;
      uint lock;
      native_mutex_lock(&share->mutex);
      lock= share->commit_count_lock;
      native_mutex_unlock(&share->mutex);
      {
        /* Contact NDB to get commit count for table */
        Ndb* ndb= thd_ndb->ndb;
        if (ndb->setDatabaseName(share->db))
        {
          goto loop_next;
        }
        Ndb_table_guard ndbtab_g(ndb->getDictionary(), share->table_name);
        if (ndbtab_g.get_table() &&
            ndb_get_table_statistics(thd, NULL, FALSE, ndb,
                                     ndbtab_g.get_table()->getDefaultRecord(), 
                                     &stat) == 0)
        {
          DBUG_PRINT("info", ("Table: %s, commit_count: %llu,  rows: %llu",
                              share->key_string(),
                              stat.commit_count, stat.row_count));
        }
        else
        {
          DBUG_PRINT("ndb_util_thread",
                     ("Error: Could not get commit count for table %s",
                      share->key_string()));
          stat.commit_count= 0;
        }
      }
  loop_next:
      native_mutex_lock(&share->mutex);
      if (share->commit_count_lock == lock)
        share->commit_count= stat.commit_count;
      native_mutex_unlock(&share->mutex);

      /* ndb_share reference temporary free */
      DBUG_PRINT("NDB_SHARE", ("%s temporary free  use_count: %u",
                               share->key_string(), share->use_count));
      native_mutex_lock(&ndbcluster_mutex);
      share->util_thread= false;
      free_share(&share, true);
      native_mutex_unlock(&ndbcluster_mutex);
    }
next:
    /* Calculate new time to wake up */
    set_timespec_nsec(&abstime, opt_ndb_cache_check_time * 1000000ULL);
  }

  log_info("Stopping...");

  native_mutex_lock(&LOCK);

ndb_util_thread_end:
  thd->get_protocol_classic()->end_net();
ndb_util_thread_fail:
  if (share_list)
    delete [] share_list;
  if (thd_ndb)
  {
    Thd_ndb::release(thd_ndb);
    thd_set_thd_ndb(thd, NULL);
  }
  delete thd;
  
  native_mutex_unlock(&LOCK);
  DBUG_PRINT("exit", ("ndb_util_thread"));

  log_info("Stopped");

  DBUG_VOID_RETURN;
}

/*
  Condition pushdown
*/
/**
  Push a condition to ndbcluster storage engine for evaluation 
  during table   and index scans. The conditions will be stored on a stack
  for possibly storing several conditions. The stack can be popped
  by calling cond_pop, handler::extra(HA_EXTRA_RESET) (handler::reset())
  will clear the stack.
  The current implementation supports arbitrary AND/OR nested conditions
  with comparisons between columns and constants (including constant
  expressions and function calls) and the following comparison operators:
  =, !=, >, >=, <, <=, "is null", and "is not null".
  
  @retval
    NULL The condition was supported and will be evaluated for each 
         row found during the scan
  @retval
    cond The condition was not supported and all rows will be returned from
         the scan for evaluation (and thus not saved on stack)
*/
const 
Item* 
ha_ndbcluster::cond_push(const Item *cond) 
{ 
  DBUG_ENTER("ha_ndbcluster::cond_push");

#if 1
  if (cond->used_tables() & ~table->pos_in_table_list->map())
  {
    /**
     * 'cond' refers fields from other tables, or other instances 
     * of this table, -> reject it.
     * (Optimizer need to have a better understanding of what is 
     *  pushable by each handler.)
     */
    DBUG_EXECUTE("where",print_where((Item *)cond, "Rejected cond_push", QT_ORDINARY););
    DBUG_RETURN(cond);
  }
#else
  /*
    Make sure that 'cond' does not refer field(s) from other tables
    or other instances of this table.
    (This was a legacy bug in optimizer)
  */
  assert(!(cond->used_tables() & ~table->pos_in_table_list->map()));
#endif
  if (!m_cond) 
    m_cond= new ha_ndbcluster_cond;
  if (!m_cond)
  {
    set_my_errno(HA_ERR_OUT_OF_MEM);
    DBUG_RETURN(cond);
  }
  DBUG_EXECUTE("where",print_where((Item *)cond, m_tabname, QT_ORDINARY););
  DBUG_RETURN(m_cond->cond_push(cond, table, (NDBTAB *)m_table));
}

/**
  Pop the top condition from the condition stack of the handler instance.
*/
void 
ha_ndbcluster::cond_pop() 
{ 
  if (m_cond)
    m_cond->cond_pop();
}


/*
  Implements the SHOW ENGINE NDB STATUS command.
*/
bool
ndbcluster_show_status(handlerton *hton, THD* thd, stat_print_fn *stat_print,
                       enum ha_stat_type stat_type)
{
  char name[16];
  char buf[IO_SIZE];
  uint buflen;
  DBUG_ENTER("ndbcluster_show_status");
  
  if (stat_type != HA_ENGINE_STATUS)
  {
    DBUG_RETURN(FALSE);
  }

  Ndb* ndb= check_ndb_in_thd(thd);
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  struct st_ndb_status ns;
  if (ndb)
    update_status_variables(thd_ndb, &ns, thd_ndb->connection);
  else
    update_status_variables(NULL, &ns, g_ndb_cluster_connection);

  buflen= (uint)
    my_snprintf(buf, sizeof(buf),
                "cluster_node_id=%ld, "
                "connected_host=%s, "
                "connected_port=%ld, "
                "number_of_data_nodes=%ld, "
                "number_of_ready_data_nodes=%ld, "
                "connect_count=%ld",
                ns.cluster_node_id,
                ns.connected_host,
                ns.connected_port,
                ns.number_of_data_nodes,
                ns.number_of_ready_data_nodes,
                ns.connect_count);
  if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length,
                 STRING_WITH_LEN("connection"), buf, buflen))
    DBUG_RETURN(TRUE);

  for (int i= 0; i < MAX_NDB_NODES; i++)
  {
    if (ns.transaction_hint_count[i] > 0 ||
        ns.transaction_no_hint_count[i] > 0)
    {
      uint namelen= (uint)my_snprintf(name, sizeof(name), "node[%d]", i);
      buflen= (uint)my_snprintf(buf, sizeof(buf),
                          "transaction_hint=%ld, transaction_no_hint=%ld",
                          ns.transaction_hint_count[i],
                          ns.transaction_no_hint_count[i]);
      if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length,
                     name, namelen, buf, buflen))
        DBUG_RETURN(TRUE);
    }
  }

  if (ndb)
  {
    Ndb::Free_list_usage tmp;
    tmp.m_name= 0;
    while (ndb->get_free_list_usage(&tmp))
    {
      buflen= (uint)
        my_snprintf(buf, sizeof(buf),
                  "created=%u, free=%u, sizeof=%u",
                  tmp.m_created, tmp.m_free, tmp.m_sizeof);
      if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length,
                     tmp.m_name, (uint)strlen(tmp.m_name), buf, buflen))
        DBUG_RETURN(TRUE);
    }
  }

  buflen = (uint)ndbcluster_show_status_binlog(buf, sizeof(buf));
  if (buflen)
  {
    if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length,
                   STRING_WITH_LEN("binlog"), buf, buflen))
      DBUG_RETURN(TRUE);
  }

  DBUG_RETURN(FALSE);
}


int ha_ndbcluster::get_default_num_partitions(HA_CREATE_INFO *create_info)
{
  if (unlikely(g_ndb_cluster_connection->get_no_ready() <= 0))
  {
err:
    my_error(HA_ERR_NO_CONNECTION, MYF(0));
    return -1;
  }

  THD* thd = current_thd;
  if (thd == 0)
    goto err;
  Thd_ndb * thd_ndb = get_thd_ndb(thd);
  if (thd_ndb == 0)
    goto err;

  ha_rows max_rows, min_rows;
  if (create_info)
  {
    max_rows= create_info->max_rows;
    min_rows= create_info->min_rows;
  }
  else
  {
    max_rows= table_share->max_rows;
    min_rows= table_share->min_rows;
  }
  uint no_fragments= get_no_fragments(max_rows >= min_rows ?
                                      max_rows : min_rows);
  uint reported_frags;
  adjusted_frag_count(thd_ndb->ndb,
                      no_fragments,
                      reported_frags);
  return reported_frags;
}

uint32 ha_ndbcluster::calculate_key_hash_value(Field **field_array)
{
  Uint32 hash_value;
  struct Ndb::Key_part_ptr key_data[MAX_REF_PARTS];
  struct Ndb::Key_part_ptr *key_data_ptr= &key_data[0];
  Uint32 i= 0;
  int ret_val;
  Uint64 tmp[(MAX_KEY_SIZE_IN_WORDS*MAX_XFRM_MULTIPLY) >> 1];
  void *buf= (void*)&tmp[0];
  DBUG_ENTER("ha_ndbcluster::calculate_key_hash_value");

  do
  {
    Field *field= *field_array;
    uint len= field->data_length();
    assert(!field->is_real_null());
    if (field->real_type() == MYSQL_TYPE_VARCHAR)
      len+= ((Field_varstring*)field)->length_bytes;
    key_data[i].ptr= field->ptr;
    key_data[i++].len= len;
  } while (*(++field_array));
  key_data[i].ptr= 0;
  if ((ret_val= Ndb::computeHash(&hash_value, m_table,
                                 key_data_ptr, buf, sizeof(tmp))))
  {
    DBUG_PRINT("info", ("ret_val = %d", ret_val));
    assert(FALSE);
    abort();
  }
  DBUG_RETURN(hash_value);
}


/*
  Set-up auto-partitioning for NDB Cluster

  SYNOPSIS
    set_auto_partitions()
    part_info                  Partition info struct to set-up
 
  RETURN VALUE
    NONE

  DESCRIPTION
    Set-up auto partitioning scheme for tables that didn't define any
    partitioning. We'll use PARTITION BY KEY() in this case which
    translates into partition by primary key if a primary key exists
    and partition by hidden key otherwise.
*/

enum ndb_distribution_enum {
  NDB_DISTRIBUTION_KEYHASH= 0,
  NDB_DISTRIBUTION_LINHASH= 1
};
static const char* distribution_names[]= { "KEYHASH", "LINHASH", NullS };
static ulong opt_ndb_distribution;
static TYPELIB distribution_typelib= {
  array_elements(distribution_names) - 1,
  "",
  distribution_names,
  NULL
};
static MYSQL_SYSVAR_ENUM(
  distribution,                      /* name */
  opt_ndb_distribution,              /* var */
  PLUGIN_VAR_RQCMDARG,
  "Default distribution for new tables in ndb",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  NDB_DISTRIBUTION_KEYHASH,          /* default */
  &distribution_typelib              /* typelib */
);


void ha_ndbcluster::set_auto_partitions(partition_info *part_info)
{
  DBUG_ENTER("ha_ndbcluster::set_auto_partitions");
  part_info->list_of_part_fields= TRUE;
  part_info->part_type= HASH_PARTITION;
  switch (opt_ndb_distribution)
  {
  case NDB_DISTRIBUTION_KEYHASH:
    part_info->linear_hash_ind= FALSE;
    break;
  case NDB_DISTRIBUTION_LINHASH:
    part_info->linear_hash_ind= TRUE;
    break;
  default:
    assert(false);
    break;
  }
  DBUG_VOID_RETURN;
}


static int
create_table_set_range_data(const partition_info *part_info,
                            NdbDictionary::Table& ndbtab)
{
  const uint num_parts = part_info->num_parts;
  DBUG_ENTER("create_table_set_range_data");

  int32 *range_data= (int32*)my_malloc(PSI_INSTRUMENT_ME, num_parts*sizeof(int32), MYF(0));
  if (!range_data)
  {
    mem_alloc_error(num_parts*sizeof(int32));
    DBUG_RETURN(1);
  }
  for (uint i= 0; i < num_parts; i++)
  {
    longlong range_val= part_info->range_int_array[i];
    const bool unsigned_flag= part_info->part_expr->unsigned_flag;
    if (unsigned_flag)
      range_val-= 0x8000000000000000ULL;
    if (range_val < INT_MIN32 || range_val >= INT_MAX32)
    {
      if ((i != num_parts - 1) ||
          (range_val != LLONG_MAX))
      {
        my_error(ER_LIMITED_PART_RANGE, MYF(0), "NDB");
        my_free((char*)range_data, MYF(0));
        DBUG_RETURN(1);
      }
      range_val= INT_MAX32;
    }
    range_data[i]= (int32)range_val;
  }
  ndbtab.setRangeListData(range_data, num_parts);
  my_free((char*)range_data, MYF(0));
  DBUG_RETURN(0);
}


static int
create_table_set_list_data(const partition_info *part_info,
                           NdbDictionary::Table& ndbtab)
{
  const uint num_list_values = part_info->num_list_values;
  int32 *list_data= (int32*)my_malloc(PSI_INSTRUMENT_ME,
                                      num_list_values*2*sizeof(int32), MYF(0));
  DBUG_ENTER("create_table_set_list_data");

  if (!list_data)
  {
    mem_alloc_error(num_list_values*2*sizeof(int32));
    DBUG_RETURN(1);
  }
  for (uint i= 0; i < num_list_values; i++)
  {
    LIST_PART_ENTRY *list_entry= &part_info->list_array[i];
    longlong list_val= list_entry->list_value;
    const bool unsigned_flag= part_info->part_expr->unsigned_flag;
    if (unsigned_flag)
      list_val-= 0x8000000000000000ULL;
    if (list_val < INT_MIN32 || list_val > INT_MAX32)
    {
      my_error(ER_LIMITED_PART_RANGE, MYF(0), "NDB");
      my_free((char*)list_data, MYF(0));
      DBUG_RETURN(1);
    }
    list_data[2*i]= (int32)list_val;
    list_data[2*i+1]= list_entry->partition_id;
  }
  ndbtab.setRangeListData(list_data, 2*num_list_values);
  my_free((char*)list_data, MYF(0));
  DBUG_RETURN(0);
}

/*
  User defined partitioning set-up. We need to check how many fragments the
  user wants defined and which node groups to put those into.

  All the functionality of the partition function, partition limits and so
  forth are entirely handled by the MySQL Server. There is one exception to
  this rule for PARTITION BY KEY where NDB handles the hash function and
  this type can thus be handled transparently also by NDB API program.
  For RANGE, HASH and LIST and subpartitioning the NDB API programs must
  implement the function to map to a partition.
*/

static int
create_table_set_up_partition_info(HA_CREATE_INFO* create_info,
                                   partition_info *part_info,
                                   NdbDictionary::Table& ndbtab)
{
  DBUG_ENTER("create_table_set_up_partition_info");

  if (part_info->part_type == HASH_PARTITION &&
      part_info->list_of_part_fields == TRUE)
  {
    Field **fields= part_info->part_field_array;

    DBUG_PRINT("info", ("Using HashMapPartition fragmentation type"));
    ndbtab.setFragmentType(NDBTAB::HashMapPartition);

    for (uint i= 0; i < part_info->part_field_list.elements; i++)
    {
      NDBCOL *col= ndbtab.getColumn(fields[i]->field_index);
      DBUG_PRINT("info",("setting dist key on %s", col->getName()));
      col->setPartitionKey(TRUE);
    }
  }
  else 
  {
    if (!current_thd->variables.new_mode)
    {
      push_warning_printf(current_thd, Sql_condition::SL_WARNING,
                          ER_ILLEGAL_HA_CREATE_OPTION,
                          ER(ER_ILLEGAL_HA_CREATE_OPTION),
                          ndbcluster_hton_name,
                          "LIST, RANGE and HASH partition disabled by default,"
                          " use --new option to enable");
      DBUG_RETURN(HA_ERR_UNSUPPORTED);
    }
   /*
      Create a shadow field for those tables that have user defined
      partitioning. This field stores the value of the partition
      function such that NDB can handle reorganisations of the data
      even when the MySQL Server isn't available to assist with
      calculation of the partition function value.
    */
    NDBCOL col;
    DBUG_PRINT("info", ("Generating partition func value field"));
    col.setName("$PART_FUNC_VALUE");
    col.setType(NdbDictionary::Column::Int);
    col.setLength(1);
    col.setNullable(FALSE);
    col.setPrimaryKey(FALSE);
    col.setAutoIncrement(FALSE);
    ndbtab.addColumn(col);
    if (part_info->part_type == RANGE_PARTITION)
    {
      const int error = create_table_set_range_data(part_info, ndbtab);
      if (error)
      {
        DBUG_RETURN(error);
      }
    }
    else if (part_info->part_type == LIST_PARTITION)
    {
      const int error = create_table_set_list_data(part_info, ndbtab);
      if (error)
      {
        DBUG_RETURN(error);
      }
    }

    DBUG_PRINT("info", ("Using UserDefined fragmentation type"));
    ndbtab.setFragmentType(NDBTAB::UserDefined);
  }

  const bool use_default_num_parts = part_info->use_default_num_partitions;
  ndbtab.setDefaultNoPartitionsFlag(use_default_num_parts);
  ndbtab.setLinearFlag(part_info->linear_hash_ind);
  {
    ha_rows max_rows= create_info->max_rows;
    ha_rows min_rows= create_info->min_rows;
    if (max_rows < min_rows)
      max_rows= min_rows;
    if (max_rows != (ha_rows)0) /* default setting, don't set fragmentation */
    {
      ndbtab.setMaxRows(max_rows);
      ndbtab.setMinRows(min_rows);
    }
  }

  {
    // Count number of fragments to use for the table and
    // build array describing which nodegroup should store each
    // partition(each partition is mapped to one fragment in the table).
    uint32 frag_data[MAX_PARTITIONS];
    ulong fd_index= 0;

    partition_element *part_elem;
    List_iterator<partition_element> part_it(part_info->partitions);
    while((part_elem = part_it++))
    {
      if (!part_info->is_sub_partitioned())
      {
        const Uint32 ng= part_elem->nodegroup_id;
        assert(fd_index < NDB_ARRAY_SIZE(frag_data));
        frag_data[fd_index++]= ng;
      }
      else
      {
        partition_element *subpart_elem;
        List_iterator<partition_element> sub_it(part_elem->subpartitions);
        while((subpart_elem = sub_it++))
        {
          const Uint32 ng= subpart_elem->nodegroup_id;
          assert(fd_index < NDB_ARRAY_SIZE(frag_data));
          frag_data[fd_index++]= ng;
        }
      }
    }

    // Double check number of partitions vs. fragments
    assert(part_info->get_tot_partitions() == fd_index);

    ndbtab.setFragmentCount(fd_index);
    ndbtab.setFragmentData(frag_data, fd_index);
  }
  DBUG_RETURN(0);
}

class NDB_ALTER_DATA : public inplace_alter_handler_ctx 
{
public:
  NDB_ALTER_DATA(NdbDictionary::Dictionary *dict,
		 const NdbDictionary::Table *table) :
    dictionary(dict),
    old_table(table),
    new_table(new NdbDictionary::Table(*table)),
    table_id(table->getObjectId()),
    old_table_version(table->getObjectVersion())
  {}
  ~NDB_ALTER_DATA()
  { delete new_table; }
  NdbDictionary::Dictionary *dictionary;
  const  NdbDictionary::Table *old_table;
  NdbDictionary::Table *new_table;
  Uint32 table_id;
  Uint32 old_table_version;
};

enum_alter_inplace_result
  ha_ndbcluster::check_if_supported_inplace_alter(TABLE *altered_table,
                                                  Alter_inplace_info *ha_alter_info)
{
  THD *thd= current_thd;
  HA_CREATE_INFO *create_info= ha_alter_info->create_info;
  Alter_inplace_info::HA_ALTER_FLAGS alter_flags=
      ha_alter_info->handler_flags;
  const Alter_inplace_info::HA_ALTER_FLAGS supported=
    Alter_inplace_info::ADD_INDEX |
    Alter_inplace_info::DROP_INDEX |
    Alter_inplace_info::ADD_UNIQUE_INDEX |
    Alter_inplace_info::DROP_UNIQUE_INDEX |
    Alter_inplace_info::ADD_COLUMN |
    Alter_inplace_info::ALTER_COLUMN_DEFAULT |
    Alter_inplace_info::ALTER_COLUMN_STORAGE_TYPE |
    Alter_inplace_info::ALTER_COLUMN_COLUMN_FORMAT |
    Alter_inplace_info::ADD_PARTITION |
    Alter_inplace_info::ALTER_TABLE_REORG |
    Alter_inplace_info::CHANGE_CREATE_OPTION |
    Alter_inplace_info::ADD_FOREIGN_KEY |
    Alter_inplace_info::DROP_FOREIGN_KEY |
    Alter_inplace_info::ALTER_INDEX_COMMENT;

  const Alter_inplace_info::HA_ALTER_FLAGS not_supported= ~supported;

  Alter_inplace_info::HA_ALTER_FLAGS add_column=
    Alter_inplace_info::ADD_COLUMN;

  const Alter_inplace_info::HA_ALTER_FLAGS adding=
    Alter_inplace_info::ADD_INDEX |
    Alter_inplace_info::ADD_UNIQUE_INDEX;

  const Alter_inplace_info::HA_ALTER_FLAGS dropping=
    Alter_inplace_info::DROP_INDEX |
    Alter_inplace_info::DROP_UNIQUE_INDEX;

  enum_alter_inplace_result result= HA_ALTER_INPLACE_SHARED_LOCK;

  DBUG_ENTER("ha_ndbcluster::check_if_supported_inplace_alter");
  partition_info *part_info= altered_table->part_info;
  const NDBTAB *old_tab= m_table;

  if (THDVAR(thd, use_copying_alter_table))
  {
    DBUG_PRINT("info", ("On-line alter table disabled"));
    DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
  }

  DBUG_PRINT("info", ("Passed alter flags 0x%llx", alter_flags));
  DBUG_PRINT("info", ("Supported 0x%llx", supported));
  DBUG_PRINT("info", ("Not supported 0x%llx", not_supported));
  DBUG_PRINT("info", ("alter_flags & not_supported 0x%llx",
                        alter_flags & not_supported));

  bool auto_increment_value_changed= false;
  bool max_rows_changed= false;
  if (alter_flags & Alter_inplace_info::CHANGE_CREATE_OPTION)
  {
    if (create_info->auto_increment_value !=
      table->file->stats.auto_increment_value)
      auto_increment_value_changed= true;
    if (create_info->used_fields & HA_CREATE_USED_MAX_ROWS)
      max_rows_changed= true;
  }

  if (alter_flags & Alter_inplace_info::ALTER_TABLE_REORG)
  {
    /*
      sql_partition.cc tries to compute what is going on
      and sets flags...that we clear
    */
    if (part_info->use_default_num_partitions)
    {
      alter_flags= alter_flags & ~Alter_inplace_info::COALESCE_PARTITION;
      alter_flags= alter_flags & ~Alter_inplace_info::ADD_PARTITION;
    }
  }

  if (alter_flags & Alter_inplace_info::ALTER_COLUMN_DEFAULT &&
      !(alter_flags & Alter_inplace_info::ADD_COLUMN))
  {
    DBUG_PRINT("info", ("Altering default value is not supported"));
    DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
  }

  if (alter_flags & not_supported)
  {
    DBUG_PRINT("info", ("Detected unsupported change: 0x%llx",
                        alter_flags & not_supported));
    DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
  }

  if (alter_flags & Alter_inplace_info::ADD_COLUMN ||
      alter_flags & Alter_inplace_info::ADD_PARTITION ||
      alter_flags & Alter_inplace_info::ALTER_TABLE_REORG ||
      max_rows_changed)
  {
     Ndb *ndb= get_ndb(thd);
     NDBDICT *dict= ndb->getDictionary();
     ndb->setDatabaseName(m_dbname);
     NdbDictionary::Table new_tab= *old_tab;

     result= HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
     if (alter_flags & Alter_inplace_info::ADD_COLUMN)
     {
       NDBCOL col;

       /*
         Check that we are only adding columns
       */
       /*
         HA_COLUMN_DEFAULT_VALUE & HA_COLUMN_STORAGE & HA_COLUMN_FORMAT
         are set if they are specified in an later cmd
         even if they're no change. This is probably a bug
         conclusion: add them to add_column-mask, so that we silently "accept" them
         In case of someone trying to change a column, the HA_CHANGE_COLUMN would be set
         which we don't support, so we will still return HA_ALTER_NOT_SUPPORTED in those cases
       */
       add_column|= Alter_inplace_info::ALTER_COLUMN_DEFAULT;
       add_column|= Alter_inplace_info::ALTER_COLUMN_STORAGE_TYPE;
       add_column|= Alter_inplace_info::ALTER_COLUMN_COLUMN_FORMAT;
       if (alter_flags & ~add_column)
       {
         DBUG_PRINT("info", ("Only add column exclusively can be performed on-line"));
         DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
       }
       /*
         Check for extra fields for hidden primary key
         or user defined partitioning
       */
       if (table_share->primary_key == MAX_KEY ||
           part_info->part_type != HASH_PARTITION ||
           !part_info->list_of_part_fields)
         DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);

       /* Find the new fields */
       for (uint i= table->s->fields; i < altered_table->s->fields; i++)
       {
         Field *field= altered_table->field[i];
         DBUG_PRINT("info", ("Found new field %s", field->field_name));
         DBUG_PRINT("info", ("storage_type %i, column_format %i",
                             (uint) field->field_storage_type(),
                             (uint) field->column_format()));
         if (!(field->flags & NO_DEFAULT_VALUE_FLAG))
         {
           my_ptrdiff_t src_offset= field->table->s->default_values 
             - field->table->record[0];
           if ((! field->is_real_null(src_offset)) ||
               ((field->flags & NOT_NULL_FLAG)))
           {
             DBUG_PRINT("info",("Adding column with non-null default value is not supported on-line"));
             DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
           }
         }
         /* Create new field to check if it can be added */
         set_my_errno(create_ndb_column(thd, col, field, create_info,
                                        COLUMN_FORMAT_TYPE_DYNAMIC));
         if (my_errno())
         {
           DBUG_PRINT("info", ("create_ndb_column returned %u", my_errno()));
           DBUG_RETURN(HA_ALTER_ERROR);
         }
         if (new_tab.addColumn(col))
         {
           set_my_errno(errno);
           DBUG_PRINT("info", ("NdbDictionary::Table::addColumn returned %u", my_errno()));
           DBUG_RETURN(HA_ALTER_ERROR);
         }
       }
     }

     if (alter_flags & Alter_inplace_info::ALTER_TABLE_REORG)
     {
       /* 
          Refuse if Max_rows has been used before...
          Workaround is to use ALTER ONLINE TABLE <t> MAX_ROWS=<bigger>;
       */
       if (old_tab->getMaxRows() != 0)
       {
         push_warning(current_thd,
                      Sql_condition::SL_WARNING, ER_UNKNOWN_ERROR,
                      "Cannot online REORGANIZE a table with Max_Rows set.  "
                      "Use ALTER TABLE ... MAX_ROWS=<new_val> or offline REORGANIZE "
                      "to redistribute this table.");
         DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
       }
       new_tab.setFragmentCount(0);
       new_tab.setFragmentData(0, 0);
     }
     else if (alter_flags & Alter_inplace_info::ADD_PARTITION)
     {
       DBUG_PRINT("info", ("Adding partition (%u)", part_info->num_parts));
       new_tab.setFragmentCount(part_info->num_parts);
     }
     if (max_rows_changed)
     {
       ulonglong rows= create_info->max_rows;
       uint no_fragments= get_no_fragments(rows);
       uint reported_frags= no_fragments;
       if (adjusted_frag_count(ndb, no_fragments, reported_frags))
       {
         push_warning(current_thd,
                      Sql_condition::SL_WARNING, ER_UNKNOWN_ERROR,
                      "Ndb might have problems storing the max amount "
                      "of rows specified");
       }
       if (reported_frags < old_tab->getFragmentCount())
       {
         DBUG_PRINT("info", ("Online reduction in number of fragments not supported"));
         DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
       }
       new_tab.setFragmentCount(reported_frags);
       new_tab.setDefaultNoPartitionsFlag(false);
       new_tab.setFragmentData(0, 0);
     }

     NDB_Modifiers table_modifiers(ndb_table_modifiers);
     table_modifiers.parse(thd, "NDB_TABLE=", create_info->comment.str,
                           create_info->comment.length);
     const NDB_Modifier* mod_nologging = table_modifiers.get("NOLOGGING");

     if (mod_nologging->m_found)
     {
       new_tab.setLogging(!mod_nologging->m_val_bool);
     }
     
     if (dict->supportedAlterTable(*old_tab, new_tab))
     {
       DBUG_PRINT("info", ("Adding column(s) supported on-line"));
     }
     else
     {
       DBUG_PRINT("info",("Adding column not supported on-line"));
       DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
     }
  }

  /*
    Check that we are not adding multiple indexes
  */
  if (alter_flags & adding)
  {
    if (((altered_table->s->keys - table->s->keys) != 1) ||
        (alter_flags & dropping))
    {
       DBUG_PRINT("info",("Only one index can be added on-line"));
       DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
    }
  }

  /*
    Check that we are not dropping multiple indexes
  */
  if (alter_flags & dropping)
  {
    if (((table->s->keys - altered_table->s->keys) != 1) ||
        (alter_flags & adding))
    {
       DBUG_PRINT("info",("Only one index can be dropped on-line"));
       DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
    }
  }

  for (uint i= 0; i < table->s->fields; i++)
  {
    Field *field= table->field[i];
    const NDBCOL *col= m_table->getColumn(i);

    NDBCOL new_col;
    create_ndb_column(0, new_col, field, create_info);

    bool index_on_column = false;
    /**
     * Check all indexes to determine if column has index instead of checking
     *   field->flags (PRI_KEY_FLAG | UNIQUE_KEY_FLAG | MULTIPLE_KEY_FLAG
     *   since field->flags appears to only be set on first column in
     *   multi-part index
     */
    for (uint j= 0; j<table->s->keys; j++)
    {
      KEY* key_info= table->key_info + j;
      KEY_PART_INFO* key_part= key_info->key_part;
      KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
      for (; key_part != end; key_part++)
      {
        if (key_part->field->field_index == i)
        {
          index_on_column= true;
          j= table->s->keys; // break outer loop
          break;
        }
      }
    }

    if (index_on_column == false && (alter_flags & adding))
    {
      for (uint j= table->s->keys; j<altered_table->s->keys; j++)
      {
        KEY* key_info= altered_table->key_info + j;
        KEY_PART_INFO* key_part= key_info->key_part;
        KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts;
        for (; key_part != end; key_part++)
        {
          if (key_part->field->field_index == i)
          {
            index_on_column= true;
            j= altered_table->s->keys; // break outer loop
            break;
          }
        }
      }
    }

    /**
     * This is a "copy" of code in ::create()
     *   that "auto-converts" columns with keys into memory
     *   (unless storage disk is explicitly added)
     * This is needed to check if getStorageType() == getStorageType() 
     * further down
     */
    if (index_on_column)
    {
      if (field->field_storage_type() == HA_SM_DISK)
      {
        DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
      }
      new_col.setStorageType(NdbDictionary::Column::StorageTypeMemory);
    }
    else if (field->field_storage_type() == HA_SM_DEFAULT)
    {
      /**
       * If user didn't specify any column format, keep old
       *   to make as many alter's as possible online
       */
      new_col.setStorageType(col->getStorageType());
    }

    if (col->getStorageType() != new_col.getStorageType())
    {
      DBUG_PRINT("info", ("Column storage media is changed"));
      DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
    }

    if (field->flags & FIELD_IS_RENAMED)
    {
      DBUG_PRINT("info", ("Field has been renamed, copy table"));
      DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
    }

    if ((field->flags & FIELD_IN_ADD_INDEX) &&
        (col->getStorageType() == NdbDictionary::Column::StorageTypeDisk))
    {
      DBUG_PRINT("info", ("add/drop index not supported for disk stored column"));
      DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
    }
  }

  /* Check that only auto_increment value was changed */
  if (auto_increment_value_changed)
  {
    if (create_info->used_fields ^ ~HA_CREATE_USED_AUTO)
    {
      DBUG_PRINT("info", ("Not only auto_increment value changed"));
      DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
    }
  }
  else
  {
    /* Check that row format didn't change */
    if (create_info->used_fields & HA_CREATE_USED_AUTO &&
        get_row_type() != create_info->row_type)
    {
      DBUG_PRINT("info", ("Row format changed"));
      DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
    }
  }
  DBUG_PRINT("info", ("Ndb supports ALTER on-line"));
  DBUG_RETURN(result);
}

bool
ha_ndbcluster::prepare_inplace_alter_table(TABLE *altered_table,
                                              Alter_inplace_info *ha_alter_info)
{
  int error= 0;
  uint i;
  THD *thd= current_thd;
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  Ndb *ndb= get_ndb(thd);
  NDBDICT *dict= ndb->getDictionary();
  ndb->setDatabaseName(m_dbname);

  HA_CREATE_INFO *create_info= ha_alter_info->create_info;

  const Alter_inplace_info::HA_ALTER_FLAGS alter_flags=
    ha_alter_info->handler_flags;

  const Alter_inplace_info::HA_ALTER_FLAGS adding=
    Alter_inplace_info::ADD_INDEX |
    Alter_inplace_info::ADD_UNIQUE_INDEX;

  const Alter_inplace_info::HA_ALTER_FLAGS dropping=
    Alter_inplace_info::DROP_INDEX |
    Alter_inplace_info::DROP_UNIQUE_INDEX;

  DBUG_ENTER("ha_ndbcluster::prepare_inplace_alter_table");

  ha_alter_info->handler_ctx= 0;
  if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::prepare_inplace_alter_table"))
    DBUG_RETURN(true);

  NDB_ALTER_DATA *alter_data;
  if (!(alter_data= new NDB_ALTER_DATA(dict, m_table)))
    DBUG_RETURN(true);

  const NDBTAB* const old_tab = alter_data->old_table;
  NdbDictionary::Table * const new_tab = alter_data->new_table;
  ha_alter_info->handler_ctx= alter_data;

  DBUG_PRINT("info", ("altered_table: '%s, alter_flags: 0x%llx",
                      altered_table->s->table_name.str,
                      alter_flags));

  bool auto_increment_value_changed= false;
  bool max_rows_changed= false;
  if (alter_flags & Alter_inplace_info::CHANGE_CREATE_OPTION)
  {
    if (create_info->auto_increment_value !=
      table->file->stats.auto_increment_value)
      auto_increment_value_changed= true;
    if (create_info->used_fields & HA_CREATE_USED_MAX_ROWS)
      max_rows_changed= true;
  }

  prepare_for_alter();

  if (dict->beginSchemaTrans() == -1)
  {
    DBUG_PRINT("info", ("Failed to start schema transaction"));
    ERR_PRINT(dict->getNdbError());
    error= ndb_to_mysql_error(&dict->getNdbError());
    table->file->print_error(error, MYF(0));
    goto err;
  }

  if (alter_flags & adding)
  {
    KEY           *key_info;
    KEY           *key;
    uint          *idx_p;
    uint          *idx_end_p;
    KEY_PART_INFO *key_part;
    KEY_PART_INFO *part_end;
    DBUG_PRINT("info", ("Adding indexes"));
    key_info= (KEY*) thd->alloc(sizeof(KEY) * ha_alter_info->index_add_count);
    key= key_info;
    for (idx_p=  ha_alter_info->index_add_buffer,
	 idx_end_p= idx_p + ha_alter_info->index_add_count;
	 idx_p < idx_end_p;
	 idx_p++, key++)
    {
      /* Copy the KEY struct. */
      *key= ha_alter_info->key_info_buffer[*idx_p];
      /* Fix the key parts. */
      part_end= key->key_part + key->user_defined_key_parts;
      for (key_part= key->key_part; key_part < part_end; key_part++)
	key_part->field= table->field[key_part->fieldnr];
    }
    if ((error= add_index_impl(thd, altered_table, key_info,
                               ha_alter_info->index_add_count)))
    {
      /*
	Exchange the key_info for the error message. If we exchange
	key number by key name in the message later, we need correct info.
      */
      KEY *save_key_info= table->key_info;
      table->key_info= key_info;
      table->file->print_error(error, MYF(0));
      table->key_info= save_key_info;
      goto abort;
    }
  }

  if (alter_flags & dropping)
  {
    uint          *key_numbers;
    uint          *keyno_p;
    KEY           **idx_p;
    KEY           **idx_end_p;
    DBUG_PRINT("info", ("Renumbering indexes"));
    /* The prepare_drop_index() method takes an array of key numbers. */
    key_numbers= (uint*) thd->alloc(sizeof(uint) * ha_alter_info->index_drop_count);
    keyno_p= key_numbers;
    /* Get the number of each key. */
    for (idx_p= ha_alter_info->index_drop_buffer,
	 idx_end_p= idx_p + ha_alter_info->index_drop_count;
	 idx_p < idx_end_p;
	 idx_p++, keyno_p++)
    {
      // Find the key number matching the key to be dropped
      KEY *keyp= *idx_p;
      uint i;
      for(i=0; i < table->s->keys; i++)
      {
	if (keyp == table->key_info + i)
	  break;
      }
      DBUG_PRINT("info", ("Dropping index %u", i)); 
      *keyno_p= i;
    }
    /*
      Tell the handler to prepare for drop indexes.
      This re-numbers the indexes to get rid of gaps.
    */
    if ((error= prepare_drop_index(table, key_numbers,
				   ha_alter_info->index_drop_count)))
    {
      table->file->print_error(error, MYF(0));
      goto abort;
    }
  }

  if (alter_flags &  Alter_inplace_info::ADD_COLUMN)
  {
     NDBCOL col;

     /* Find the new fields */
     for (i= table->s->fields; i < altered_table->s->fields; i++)
     {
       Field *field= altered_table->field[i];
       DBUG_PRINT("info", ("Found new field %s", field->field_name));
       set_my_errno(create_ndb_column(thd, col, field, create_info,
                                      COLUMN_FORMAT_TYPE_DYNAMIC));
       if (my_errno())
       {
         error= my_errno();
         goto abort;
       }
       /*
         If the user has not specified the field format
         make it dynamic to enable on-line add attribute
       */
       if (field->column_format() == COLUMN_FORMAT_TYPE_DEFAULT &&
           create_info->row_type == ROW_TYPE_DEFAULT &&
           col.getDynamic())
       {
         push_warning_printf(thd, Sql_condition::SL_WARNING,
                             ER_ILLEGAL_HA_CREATE_OPTION,
                             "Converted FIXED field '%s' to DYNAMIC "
                             "to enable on-line ADD COLUMN",
                             field->field_name);
       }
       new_tab->addColumn(col);
     }
  }

  if (alter_flags & Alter_inplace_info::ALTER_TABLE_REORG ||
      alter_flags & Alter_inplace_info::ADD_PARTITION ||
      max_rows_changed)
  {
    if (alter_flags & Alter_inplace_info::ALTER_TABLE_REORG)
    {
      new_tab->setFragmentCount(0);
      new_tab->setFragmentData(0, 0);
    }
    else if (alter_flags & Alter_inplace_info::ADD_PARTITION)
    {
      partition_info *part_info= altered_table->part_info;
      new_tab->setFragmentCount(part_info->num_parts);
    }
    else if (max_rows_changed)
    {
      ulonglong rows= create_info->max_rows;
      uint no_fragments= get_no_fragments(rows);
      uint reported_frags= no_fragments;
      if (adjusted_frag_count(ndb, no_fragments, reported_frags))
      {
        assert(false); /* Checked above */
      }
      if (reported_frags < old_tab->getFragmentCount())
      {
        assert(false);
        DBUG_RETURN(false);
      }
      /* Note we don't set the ndb table's max_rows param, as that 
       * is considered a 'real' change
       */
      //new_tab->setMaxRows(create_info->max_rows);
      new_tab->setFragmentCount(reported_frags);
      new_tab->setDefaultNoPartitionsFlag(false);
      new_tab->setFragmentData(0, 0);
    }
    
    int res= dict->prepareHashMap(*old_tab, *new_tab);
    if (res == -1)
    {
      const NdbError err= dict->getNdbError();
      set_my_errno(ndb_to_mysql_error(&err));
      goto abort;
    }
  }

  if (alter_flags & Alter_inplace_info::ADD_FOREIGN_KEY)
  {
    int res= create_fks(thd, ndb);
    if (res != 0)
    {
      /*
        Unlike CREATE, ALTER for some reason does not translate
        the HA_ code.  So fix it to be Innodb compatible.
      */
      if (res == HA_ERR_CANNOT_ADD_FOREIGN)
      {
        DBUG_PRINT("info", ("change error %d to %d",
                            HA_ERR_CANNOT_ADD_FOREIGN, ER_CANNOT_ADD_FOREIGN));
        res= ER_CANNOT_ADD_FOREIGN;
      }
      error= res;
      set_my_errno(error);
      my_error(error, MYF(0), 0);
      goto abort;
    }
  }

  DBUG_RETURN(false);
abort:
  if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)
        == -1)
  {
    DBUG_PRINT("info", ("Failed to abort schema transaction"));
    ERR_PRINT(dict->getNdbError());
    error= ndb_to_mysql_error(&dict->getNdbError());
  }

err:
  DBUG_RETURN(true);
}

int ha_ndbcluster::alter_frm(const char *file,
                             NDB_ALTER_DATA *alter_data)
{
  uchar *data= NULL, *pack_data= NULL;
  size_t length, pack_length;
  int error= 0;

  DBUG_ENTER("alter_frm");

  DBUG_PRINT("enter", ("file: %s", file));

  NDBDICT *dict= alter_data->dictionary;

  // TODO handle this
  assert(m_table != 0);

  assert(get_ndb_share_state(m_share) == NSS_ALTERED);
  if (readfrm(file, &data, &length) ||
      packfrm(data, length, &pack_data, &pack_length))
  {
    char errbuf[MYSYS_STRERROR_SIZE];
    DBUG_PRINT("info", ("Missing frm for %s", m_tabname));
    my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));
    my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR));
    error= 1;
    my_error(ER_FILE_NOT_FOUND, MYF(0), file,
             my_errno(), my_strerror(errbuf, sizeof(errbuf), my_errno()));
  }
  else
  {
    DBUG_PRINT("info", ("Table %s has changed, altering frm in ndb",
                        m_tabname));
    const NDBTAB *old_tab= alter_data->old_table;
    NdbDictionary::Table *new_tab= alter_data->new_table;

    new_tab->setFrm(pack_data, (Uint32)pack_length);
    if (dict->alterTableGlobal(*old_tab, *new_tab))
    {
      DBUG_PRINT("info", ("On-line alter of table %s failed", m_tabname));
      error= ndb_to_mysql_error(&dict->getNdbError());
      my_error(error, MYF(0), m_tabname);
    }
    my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));
    my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR));
  }

  /* ndb_share reference schema(?) free */
  DBUG_PRINT("NDB_SHARE", ("%s binlog schema(?) free  use_count: %u",
                           m_share->key_string(), m_share->use_count));

  DBUG_RETURN(error);
}

bool
ha_ndbcluster::inplace_alter_table(TABLE *altered_table,
                                   Alter_inplace_info *ha_alter_info)
{
  DBUG_ENTER("ha_ndbcluster::inplace_alter_table");
  int error= 0;
  THD *thd= current_thd;
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  HA_CREATE_INFO *create_info= ha_alter_info->create_info;
  NDB_ALTER_DATA *alter_data= (NDB_ALTER_DATA *) ha_alter_info->handler_ctx;
  NDBDICT *dict= alter_data->dictionary;
  const Alter_inplace_info::HA_ALTER_FLAGS alter_flags=
    ha_alter_info->handler_flags;
  const Alter_inplace_info::HA_ALTER_FLAGS dropping=
    Alter_inplace_info::DROP_INDEX |
    Alter_inplace_info::DROP_UNIQUE_INDEX;

  if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::inplace_alter_table"))
  {
    DBUG_RETURN(true);
  }

  bool auto_increment_value_changed= false;
  if (alter_flags & Alter_inplace_info::CHANGE_CREATE_OPTION)
  {
    if (create_info->auto_increment_value !=
      table->file->stats.auto_increment_value)
      auto_increment_value_changed= true;
  }

  if (alter_flags & dropping)
  {
    /* Tell the handler to finally drop the indexes. */
    if ((error= final_drop_index(table)))
    {
      print_error(error, MYF(0));
      goto abort;
    }
  }

  if (alter_flags & Alter_inplace_info::DROP_FOREIGN_KEY)
  {
    const NDBTAB* tab= alter_data->old_table;
    if ((error= drop_fk_for_online_alter(thd, thd_ndb->ndb, dict, tab)) != 0)
    {
      print_error(error, MYF(0));
      goto abort;
    }
  }

  DBUG_PRINT("info", ("getting frm file %s", altered_table->s->path.str));
  error= alter_frm(altered_table->s->path.str, alter_data);
  if (!error)
  {
    /*
     * Alter succesful, commit schema transaction
     */
    if (dict->endSchemaTrans() == -1)
    {
      error= ndb_to_mysql_error(&dict->getNdbError());
      DBUG_PRINT("info", ("Failed to commit schema transaction, error %u",
                          error));
      table->file->print_error(error, MYF(0));
      goto err;
    }
    if (auto_increment_value_changed)
      error= set_auto_inc_val(thd, create_info->auto_increment_value);
    if (error)
    {
      DBUG_PRINT("info", ("Failed to set auto_increment value"));
      goto err;
    }
  }
  else // if (error)
  {
abort:
    if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort)
        == -1)
    {
      DBUG_PRINT("info", ("Failed to abort schema transaction"));
      ERR_PRINT(dict->getNdbError());
    }
  }

err:
  DBUG_RETURN(error ? true : false);
}

bool
ha_ndbcluster::commit_inplace_alter_table(TABLE *altered_table,
                                          Alter_inplace_info *ha_alter_info,
                                          bool commit)
{
  DBUG_ENTER("ha_ndbcluster::commit_inplace_alter_table");

  if (!commit)
    DBUG_RETURN(abort_inplace_alter_table(altered_table,
                                          ha_alter_info));
  THD *thd= current_thd;
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  NDB_ALTER_DATA *alter_data= (NDB_ALTER_DATA *) ha_alter_info->handler_ctx;
  if (!thd_ndb->has_required_global_schema_lock("ha_ndbcluster::commit_inplace_alter_table"))
  {
    DBUG_RETURN(true); // Error
  }

  const char *db= table->s->db.str;
  const char *name= table->s->table_name.str;
  uint32 table_id= 0, table_version= 0;
  assert(alter_data != 0);
  if (alter_data)
  {
    table_id= alter_data->table_id;
    table_version= alter_data->old_table_version;
  }
  ndbcluster_log_schema_op(thd, thd->query().str, thd->query().length,
                           db, name,
                           table_id, table_version,
                           SOT_ONLINE_ALTER_TABLE_PREPARE,
                           NULL, NULL);
  delete alter_data;
  ha_alter_info->handler_ctx= 0;
  set_ndb_share_state(m_share, NSS_INITIAL);
  free_share(&m_share); // Decrease ref_count
  DBUG_RETURN(false); // OK
}

bool
ha_ndbcluster::abort_inplace_alter_table(TABLE *altered_table,
                                         Alter_inplace_info *ha_alter_info)
{
  DBUG_ENTER("ha_ndbcluster::abort_inplace_alter_table");

  NDB_ALTER_DATA *alter_data= (NDB_ALTER_DATA *) ha_alter_info->handler_ctx;
  if (!alter_data)
  {
    // Could not find any alter_data, nothing to abort or already aborted
    DBUG_RETURN(false);
  }

  NDBDICT *dict= alter_data->dictionary;
  if (dict->endSchemaTrans(NdbDictionary::Dictionary::SchemaTransAbort) == -1)
  {
    DBUG_PRINT("info", ("Failed to abort schema transaction"));
    ERR_PRINT(dict->getNdbError());
  }
  /* ndb_share reference schema free */
  DBUG_PRINT("NDB_SHARE", ("%s binlog schema free  use_count: %u",
                           m_share->key_string(), m_share->use_count));
  delete alter_data;
  ha_alter_info->handler_ctx= 0;
  set_ndb_share_state(m_share, NSS_INITIAL);
  free_share(&m_share); // Decrease ref_count
  DBUG_RETURN(false);
}

void ha_ndbcluster::notify_table_changed()
{
  DBUG_ENTER("ha_ndbcluster::notify_table_changed ");

  /*
    all mysqld's will read frms from disk and setup new
    event operation for the table (new_op)
  */
  THD *thd= current_thd;
  const char *db= table->s->db.str;
  const char *name= table->s->table_name.str;
  uint32 table_id= 0, table_version= 0;

  /*
    Get table id/version for new table
  */
  {
    Ndb* ndb= get_ndb(thd);
    assert(ndb != 0);
    if (ndb)
    {
      ndb->setDatabaseName(db);
      Ndb_table_guard ndbtab(ndb->getDictionary(), name);
      const NDBTAB *new_tab= ndbtab.get_table();
      assert(new_tab != 0);
      if (new_tab)
      {
        table_id= new_tab->getObjectId();
        table_version= new_tab->getObjectVersion();
      }
    }
  }

  /*
    all mysqld's will switch to using the new_op, and delete the old
    event operation
  */
  ndbcluster_log_schema_op(thd, thd->query().str, thd->query().length,
                           db, name,
                           table_id, table_version,
                           SOT_ONLINE_ALTER_TABLE_COMMIT,
                           NULL, NULL);

  DBUG_VOID_RETURN;
}

static
bool set_up_tablespace(st_alter_tablespace *alter_info,
                       NdbDictionary::Tablespace *ndb_ts)
{
  if (alter_info->extent_size >= (Uint64(1) << 32))
  {
    // TODO set correct error
    return TRUE;
  }
  ndb_ts->setName(alter_info->tablespace_name);
  ndb_ts->setExtentSize(Uint32(alter_info->extent_size));
  ndb_ts->setDefaultLogfileGroup(alter_info->logfile_group_name);
  return FALSE;
}

static
bool set_up_datafile(st_alter_tablespace *alter_info,
                     NdbDictionary::Datafile *ndb_df)
{
  if (alter_info->max_size > 0)
  {
    my_error(ER_TABLESPACE_AUTO_EXTEND_ERROR, MYF(0));
    return TRUE;
  }
  ndb_df->setPath(alter_info->data_file_name);
  ndb_df->setSize(alter_info->initial_size);
  ndb_df->setTablespace(alter_info->tablespace_name);
  return FALSE;
}

static
bool set_up_logfile_group(st_alter_tablespace *alter_info,
                          NdbDictionary::LogfileGroup *ndb_lg)
{
  if (alter_info->undo_buffer_size >= (Uint64(1) << 32))
  {
    // TODO set correct error
    return TRUE;
  }

  ndb_lg->setName(alter_info->logfile_group_name);
  ndb_lg->setUndoBufferSize(Uint32(alter_info->undo_buffer_size));
  return FALSE;
}

static
bool set_up_undofile(st_alter_tablespace *alter_info,
                     NdbDictionary::Undofile *ndb_uf)
{
  ndb_uf->setPath(alter_info->undo_file_name);
  ndb_uf->setSize(alter_info->initial_size);
  ndb_uf->setLogfileGroup(alter_info->logfile_group_name);
  return FALSE;
}


/**
  Get the tablespace name from the NDB dictionary for the given table in the
  given schema.

  @note For NDB tables with version before 50120, the server must ask the
        SE for the tablespace name, because for these tables, the tablespace
        name is not stored in the .FRM file, but only within the SE itself.

  @note The function is essentially doing the same as the corresponding code
        block in the function 'get_metadata()', except for the handling of
        empty strings, which are in this case returned as "" rather than NULL.

  @param       thd              Thread context.
  @param       db_name          Name of the relevant schema.
  @param       table_name       Name of the relevant table.
  @param [out] tablespace_name  Name of the tablespace containing the table.

  @return Operation status.
    @retval == 0  Success.
    @retval != 0  Error (handler error code returned).
 */

static
int ndbcluster_get_tablespace(THD* thd,
                              LEX_CSTRING db_name,
                              LEX_CSTRING table_name,
                              LEX_CSTRING *tablespace_name)
{
  DBUG_ENTER("ndbcluster_get_tablespace");
  DBUG_PRINT("enter", ("db_name: %s, table_name: %s", db_name.str,
             table_name.str));
  assert(tablespace_name != NULL);

  Ndb* ndb= check_ndb_in_thd(thd);
  if (ndb == NULL)
    DBUG_RETURN(HA_ERR_NO_CONNECTION);

  NDBDICT *dict= ndb->getDictionary();
  const NDBTAB *tab= NULL;

  ndb->setDatabaseName(db_name.str);
  Ndb_table_guard ndbtab_g(dict, table_name.str);
  if (!(tab= ndbtab_g.get_table()))
    ERR_RETURN(dict->getNdbError());

  Uint32 id;
  if (tab->getTablespace(&id))
  {
    NdbDictionary::Tablespace ts= dict->getTablespace(id);
    NdbError ndberr= dict->getNdbError();
    if (ndberr.classification == NdbError::NoError)
    {
      const char *tablespace= ts.getName();
      assert(tablespace);
      const size_t tablespace_len= strlen(tablespace);
      DBUG_PRINT("info", ("Found tablespace '%s'", tablespace));
      thd->make_lex_string(tablespace_name, tablespace, tablespace_len, false);
    }
  }

  DBUG_RETURN(0);
}

static
int ndbcluster_alter_tablespace(handlerton *hton,
                                THD* thd, st_alter_tablespace *alter_info)
{
  int is_tablespace= 0;
  NdbError err;
  NDBDICT *dict;
  int error;
  const char *errmsg= NULL;
  Ndb *ndb;
  DBUG_ENTER("ndbcluster_alter_tablespace");

  ndb= check_ndb_in_thd(thd);
  if (ndb == NULL)
  {
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
  }
  dict= ndb->getDictionary();

  uint32 table_id= 0, table_version= 0;
  switch (alter_info->ts_cmd_type){
  case (CREATE_TABLESPACE):
  {
    error= ER_CREATE_FILEGROUP_FAILED;
    
    NdbDictionary::Tablespace ndb_ts;
    NdbDictionary::Datafile ndb_df;
    NdbDictionary::ObjectId objid;
    if (set_up_tablespace(alter_info, &ndb_ts))
    {
      DBUG_RETURN(1);
    }
    if (set_up_datafile(alter_info, &ndb_df))
    {
      DBUG_RETURN(1);
    }
    errmsg= "TABLESPACE";
    if (dict->createTablespace(ndb_ts, &objid))
    {
      DBUG_PRINT("error", ("createTablespace returned %d", error));
      goto ndberror;
    }
    table_id = objid.getObjectId();
    table_version = objid.getObjectVersion();
    if (dict->getWarningFlags() &
        NdbDictionary::Dictionary::WarnExtentRoundUp)
    {
      push_warning_printf(current_thd, Sql_condition::SL_WARNING,
                          dict->getWarningFlags(),
                          "Extent size rounded up to kernel page size");
    }
    DBUG_PRINT("alter_info", ("Successfully created Tablespace"));
    errmsg= "DATAFILE";
    if (dict->createDatafile(ndb_df))
    {
      err= dict->getNdbError();
      NdbDictionary::Tablespace tmp= dict->getTablespace(ndb_ts.getName());
      if (dict->getNdbError().code == 0 &&
	  tmp.getObjectId() == objid.getObjectId() &&
	  tmp.getObjectVersion() == objid.getObjectVersion())
      {
	dict->dropTablespace(tmp);
      }
      
      DBUG_PRINT("error", ("createDatafile returned %d", error));
      goto ndberror2;
    }
    if (dict->getWarningFlags() &
        NdbDictionary::Dictionary::WarnDatafileRoundUp)
    {
      push_warning_printf(current_thd, Sql_condition::SL_WARNING,
                          dict->getWarningFlags(),
                          "Datafile size rounded up to extent size");
    }
    else /* produce only 1 message */
    if (dict->getWarningFlags() &
        NdbDictionary::Dictionary::WarnDatafileRoundDown)
    {
      push_warning_printf(current_thd, Sql_condition::SL_WARNING,
                          dict->getWarningFlags(),
                          "Datafile size rounded down to extent size");
    }
    is_tablespace= 1;
    break;
  }
  case (ALTER_TABLESPACE):
  {
    error= ER_ALTER_FILEGROUP_FAILED;
    if (alter_info->ts_alter_tablespace_type == ALTER_TABLESPACE_ADD_FILE)
    {
      NdbDictionary::Datafile ndb_df;
      if (set_up_datafile(alter_info, &ndb_df))
      {
	DBUG_RETURN(1);
      }
      errmsg= " CREATE DATAFILE";
      NdbDictionary::ObjectId objid;
      if (dict->createDatafile(ndb_df, false, &objid))
      {
	goto ndberror;
      }
      table_id= objid.getObjectId();
      table_version= objid.getObjectVersion();
      if (dict->getWarningFlags() &
          NdbDictionary::Dictionary::WarnDatafileRoundUp)
      {
        push_warning_printf(current_thd, Sql_condition::SL_WARNING,
                            dict->getWarningFlags(),
                            "Datafile size rounded up to extent size");
      }
      else /* produce only 1 message */
      if (dict->getWarningFlags() &
          NdbDictionary::Dictionary::WarnDatafileRoundDown)
      {
        push_warning_printf(current_thd, Sql_condition::SL_WARNING,
                            dict->getWarningFlags(),
                            "Datafile size rounded down to extent size");
      }
    }
    else if(alter_info->ts_alter_tablespace_type == ALTER_TABLESPACE_DROP_FILE)
    {
      NdbDictionary::Tablespace ts= dict->getTablespace(alter_info->tablespace_name);
      NdbDictionary::Datafile df= dict->getDatafile(0, alter_info->data_file_name);
      NdbDictionary::ObjectId objid;
      df.getTablespaceId(&objid);
      table_id = df.getObjectId();
      table_version = df.getObjectVersion();
      if (ts.getObjectId() == objid.getObjectId() && 
	  strcmp(df.getPath(), alter_info->data_file_name) == 0)
      {
	errmsg= " DROP DATAFILE";
	if (dict->dropDatafile(df))
	{
	  goto ndberror;
	}
      }
      else
      {
	DBUG_PRINT("error", ("No such datafile"));
	my_error(ER_ALTER_FILEGROUP_FAILED, MYF(0), " NO SUCH FILE");
	DBUG_RETURN(1);
      }
    }
    else
    {
      DBUG_PRINT("error", ("Unsupported alter tablespace: %d", 
			   alter_info->ts_alter_tablespace_type));
      DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
    }
    is_tablespace= 1;
    break;
  }
  case (CREATE_LOGFILE_GROUP):
  {
    error= ER_CREATE_FILEGROUP_FAILED;
    NdbDictionary::LogfileGroup ndb_lg;
    NdbDictionary::Undofile ndb_uf;
    NdbDictionary::ObjectId objid;
    if (alter_info->undo_file_name == NULL)
    {
      /*
	REDO files in LOGFILE GROUP not supported yet
      */
      DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
    }
    if (set_up_logfile_group(alter_info, &ndb_lg))
    {
      DBUG_RETURN(1);
    }
    errmsg= "LOGFILE GROUP";
    if (dict->createLogfileGroup(ndb_lg, &objid))
    {
      goto ndberror;
    }
    table_id = objid.getObjectId();
    table_version = objid.getObjectVersion();
    if (dict->getWarningFlags() &
        NdbDictionary::Dictionary::WarnUndobufferRoundUp)
    {
      push_warning_printf(current_thd, Sql_condition::SL_WARNING,
                          dict->getWarningFlags(),
                          "Undo buffer size rounded up to kernel page size");
    }
    DBUG_PRINT("alter_info", ("Successfully created Logfile Group"));
    if (set_up_undofile(alter_info, &ndb_uf))
    {
      DBUG_RETURN(1);
    }
    errmsg= "UNDOFILE";
    if (dict->createUndofile(ndb_uf))
    {
      err= dict->getNdbError();
      NdbDictionary::LogfileGroup tmp= dict->getLogfileGroup(ndb_lg.getName());
      if (dict->getNdbError().code == 0 &&
	  tmp.getObjectId() == objid.getObjectId() &&
	  tmp.getObjectVersion() == objid.getObjectVersion())
      {
	dict->dropLogfileGroup(tmp);
      }
      goto ndberror2;
    }
    if (dict->getWarningFlags() &
        NdbDictionary::Dictionary::WarnUndofileRoundDown)
    {
      push_warning_printf(current_thd, Sql_condition::SL_WARNING,
                          dict->getWarningFlags(),
                          "Undofile size rounded down to kernel page size");
    }
    break;
  }
  case (ALTER_LOGFILE_GROUP):
  {
    error= ER_ALTER_FILEGROUP_FAILED;
    if (alter_info->undo_file_name == NULL)
    {
      /*
	REDO files in LOGFILE GROUP not supported yet
      */
      DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
    }
    NdbDictionary::Undofile ndb_uf;
    if (set_up_undofile(alter_info, &ndb_uf))
    {
      DBUG_RETURN(1);
    }
    errmsg= "CREATE UNDOFILE";
    NdbDictionary::ObjectId objid;
    if (dict->createUndofile(ndb_uf, false, &objid))
    {
      goto ndberror;
    }
    table_id = objid.getObjectId();
    table_version = objid.getObjectVersion();
    if (dict->getWarningFlags() &
        NdbDictionary::Dictionary::WarnUndofileRoundDown)
    {
      push_warning_printf(current_thd, Sql_condition::SL_WARNING,
                          dict->getWarningFlags(),
                          "Undofile size rounded down to kernel page size");
    }
    break;
  }
  case (DROP_TABLESPACE):
  {
    error= ER_DROP_FILEGROUP_FAILED;
    errmsg= "TABLESPACE";
    NdbDictionary::Tablespace ts=
      dict->getTablespace(alter_info->tablespace_name);
    table_id= ts.getObjectId();
    table_version= ts.getObjectVersion();
    if (dict->dropTablespace(ts))
    {
      goto ndberror;
    }
    is_tablespace= 1;
    break;
  }
  case (DROP_LOGFILE_GROUP):
  {
    error= ER_DROP_FILEGROUP_FAILED;
    errmsg= "LOGFILE GROUP";
    NdbDictionary::LogfileGroup lg=
      dict->getLogfileGroup(alter_info->logfile_group_name);
    table_id= lg.getObjectId();
    table_version= lg.getObjectVersion();
    if (dict->dropLogfileGroup(lg))
    {
      goto ndberror;
    }
    break;
  }
  case (CHANGE_FILE_TABLESPACE):
  {
    DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
  }
  case (ALTER_ACCESS_MODE_TABLESPACE):
  {
    DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
  }
  default:
  {
    DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED);
  }
  }
  if (is_tablespace)
    ndbcluster_log_schema_op(thd,
                             thd->query().str, thd->query().length,
                             "", alter_info->tablespace_name,
                             table_id, table_version,
                             SOT_TABLESPACE, NULL, NULL);
  else
    ndbcluster_log_schema_op(thd,
                             thd->query().str, thd->query().length,
                             "", alter_info->logfile_group_name,
                             table_id, table_version,
                             SOT_LOGFILE_GROUP, NULL, NULL);
  DBUG_RETURN(FALSE);

ndberror:
  err= dict->getNdbError();
ndberror2:
  ndb_to_mysql_error(&err);
  
  my_error(error, MYF(0), errmsg);
  DBUG_RETURN(1);
}


bool ha_ndbcluster::get_num_parts(const char *name, uint *num_parts)
{
  THD *thd= current_thd;
  Ndb *ndb;
  NDBDICT *dict;
  int err= 0;
  DBUG_ENTER("ha_ndbcluster::get_num_parts");

  set_dbname(name);
  set_tabname(name);
  for (;;)
  {
    if (check_ndb_connection(thd))
    {
      err= HA_ERR_NO_CONNECTION;
      break;
    }
    ndb= get_ndb(thd);
    ndb->setDatabaseName(m_dbname);
    Ndb_table_guard ndbtab_g(dict= ndb->getDictionary(), m_tabname);
    if (!ndbtab_g.get_table())
      ERR_BREAK(dict->getNdbError(), err);
    *num_parts= ndbtab_g.get_table()->getFragmentCount();
    DBUG_RETURN(FALSE);
  }

  print_error(err, MYF(0));
  DBUG_RETURN(TRUE);
}

static int ndbcluster_fill_files_table(handlerton *hton, 
                                       THD *thd, 
                                       TABLE_LIST *tables,
                                       Item *cond)
{
  TABLE* table= tables->table;
  Ndb *ndb= check_ndb_in_thd(thd);
  NdbDictionary::Dictionary* dict= ndb->getDictionary();
  NdbDictionary::Dictionary::List dflist;
  NdbError ndberr;
  uint i;
  DBUG_ENTER("ndbcluster_fill_files_table");

  dict->listObjects(dflist, NdbDictionary::Object::Datafile);
  ndberr= dict->getNdbError();
  if (ndberr.classification != NdbError::NoError)
    ERR_RETURN(ndberr);

  for (i= 0; i < dflist.count; i++)
  {
    NdbDictionary::Dictionary::List::Element& elt = dflist.elements[i];
    Ndb_cluster_connection_node_iter iter;
    uint id;
    
    g_ndb_cluster_connection->init_get_next_node(iter);

    while ((id= g_ndb_cluster_connection->get_next_alive_node(iter)))
    {
      init_fill_schema_files_row(table);
      NdbDictionary::Datafile df= dict->getDatafile(id, elt.name);
      ndberr= dict->getNdbError();
      if(ndberr.classification != NdbError::NoError)
      {
        if (ndberr.classification == NdbError::SchemaError)
          continue;

        if (ndberr.classification == NdbError::UnknownResultError)
          continue;

        ERR_RETURN(ndberr);
      }
      NdbDictionary::Tablespace ts= dict->getTablespace(df.getTablespace());
      ndberr= dict->getNdbError();
      if (ndberr.classification != NdbError::NoError)
      {
        if (ndberr.classification == NdbError::SchemaError)
          continue;
        ERR_RETURN(ndberr);
      }

      table->field[IS_FILES_FILE_NAME]->set_notnull();
      table->field[IS_FILES_FILE_NAME]->store(elt.name, (uint)strlen(elt.name),
                                              system_charset_info);
      table->field[IS_FILES_FILE_TYPE]->set_notnull();
      table->field[IS_FILES_FILE_TYPE]->store("DATAFILE",8,
                                              system_charset_info);
      table->field[IS_FILES_TABLESPACE_NAME]->set_notnull();
      table->field[IS_FILES_TABLESPACE_NAME]->store(df.getTablespace(),
                                                    (uint)strlen(df.getTablespace()),
                                                    system_charset_info);
      table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull();
      table->field[IS_FILES_LOGFILE_GROUP_NAME]->
        store(ts.getDefaultLogfileGroup(),
              (uint)strlen(ts.getDefaultLogfileGroup()),
              system_charset_info);
      table->field[IS_FILES_ENGINE]->set_notnull();
      table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name,
                                           ndbcluster_hton_name_length,
                                           system_charset_info);

      table->field[IS_FILES_FREE_EXTENTS]->set_notnull();
      table->field[IS_FILES_FREE_EXTENTS]->store(df.getFree()
                                                 / ts.getExtentSize(), true);
      table->field[IS_FILES_TOTAL_EXTENTS]->set_notnull();
      table->field[IS_FILES_TOTAL_EXTENTS]->store(df.getSize()
                                                  / ts.getExtentSize(), true);
      table->field[IS_FILES_EXTENT_SIZE]->set_notnull();
      table->field[IS_FILES_EXTENT_SIZE]->store(ts.getExtentSize(), true);
      table->field[IS_FILES_INITIAL_SIZE]->set_notnull();
      table->field[IS_FILES_INITIAL_SIZE]->store(df.getSize(), true);
      table->field[IS_FILES_MAXIMUM_SIZE]->set_notnull();
      table->field[IS_FILES_MAXIMUM_SIZE]->store(df.getSize(), true);
      table->field[IS_FILES_VERSION]->set_notnull();
      table->field[IS_FILES_VERSION]->store(df.getObjectVersion(), true);

      table->field[IS_FILES_ROW_FORMAT]->set_notnull();
      table->field[IS_FILES_ROW_FORMAT]->store("FIXED", 5, system_charset_info);

      char extra[30];
      int len= (int)my_snprintf(extra, sizeof(extra), "CLUSTER_NODE=%u", id);
      table->field[IS_FILES_EXTRA]->set_notnull();
      table->field[IS_FILES_EXTRA]->store(extra, len, system_charset_info);
      schema_table_store_record(thd, table);
    }
  }

  NdbDictionary::Dictionary::List tslist;
  dict->listObjects(tslist, NdbDictionary::Object::Tablespace);
  ndberr= dict->getNdbError();
  if (ndberr.classification != NdbError::NoError)
    ERR_RETURN(ndberr);

  for (i= 0; i < tslist.count; i++)
  {
    NdbDictionary::Dictionary::List::Element&elt= tslist.elements[i];

    NdbDictionary::Tablespace ts= dict->getTablespace(elt.name);
    ndberr= dict->getNdbError();
    if (ndberr.classification != NdbError::NoError)
    {
      if (ndberr.classification == NdbError::SchemaError)
        continue;
      ERR_RETURN(ndberr);
    }

    init_fill_schema_files_row(table);
    table->field[IS_FILES_FILE_TYPE]->set_notnull();
    table->field[IS_FILES_FILE_TYPE]->store("TABLESPACE", 10,
                                            system_charset_info);

    table->field[IS_FILES_TABLESPACE_NAME]->set_notnull();
    table->field[IS_FILES_TABLESPACE_NAME]->store(elt.name,
                                                     (uint)strlen(elt.name),
                                                     system_charset_info);
    table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull();
    table->field[IS_FILES_LOGFILE_GROUP_NAME]->
      store(ts.getDefaultLogfileGroup(),
           (uint)strlen(ts.getDefaultLogfileGroup()),
           system_charset_info);

    table->field[IS_FILES_ENGINE]->set_notnull();
    table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name,
                                         ndbcluster_hton_name_length,
                                         system_charset_info);

    table->field[IS_FILES_EXTENT_SIZE]->set_notnull();
    table->field[IS_FILES_EXTENT_SIZE]->store(ts.getExtentSize(), true);

    table->field[IS_FILES_VERSION]->set_notnull();
    table->field[IS_FILES_VERSION]->store(ts.getObjectVersion(), true);

    schema_table_store_record(thd, table);
  }

  NdbDictionary::Dictionary::List uflist;
  dict->listObjects(uflist, NdbDictionary::Object::Undofile);
  ndberr= dict->getNdbError();
  if (ndberr.classification != NdbError::NoError)
    ERR_RETURN(ndberr);

  for (i= 0; i < uflist.count; i++)
  {
    NdbDictionary::Dictionary::List::Element& elt= uflist.elements[i];
    Ndb_cluster_connection_node_iter iter;
    unsigned id;

    g_ndb_cluster_connection->init_get_next_node(iter);

    while ((id= g_ndb_cluster_connection->get_next_alive_node(iter)))
    {
      NdbDictionary::Undofile uf= dict->getUndofile(id, elt.name);
      ndberr= dict->getNdbError();
      if (ndberr.classification != NdbError::NoError)
      {
        if (ndberr.classification == NdbError::SchemaError)
          continue;
        if (ndberr.classification == NdbError::UnknownResultError)
          continue;
        ERR_RETURN(ndberr);
      }
      NdbDictionary::LogfileGroup lfg=
        dict->getLogfileGroup(uf.getLogfileGroup());
      ndberr= dict->getNdbError();
      if (ndberr.classification != NdbError::NoError)
      {
        if (ndberr.classification == NdbError::SchemaError)
          continue;
        ERR_RETURN(ndberr);
      }

      init_fill_schema_files_row(table);
      table->field[IS_FILES_FILE_NAME]->set_notnull();
      table->field[IS_FILES_FILE_NAME]->store(elt.name, (uint)strlen(elt.name),
                                              system_charset_info);
      table->field[IS_FILES_FILE_TYPE]->set_notnull();
      table->field[IS_FILES_FILE_TYPE]->store("UNDO LOG", 8,
                                              system_charset_info);
      NdbDictionary::ObjectId objid;
      uf.getLogfileGroupId(&objid);
      table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull();
      table->field[IS_FILES_LOGFILE_GROUP_NAME]->store(uf.getLogfileGroup(),
                                                  (uint)strlen(uf.getLogfileGroup()),
                                                       system_charset_info);
      table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->set_notnull();
      table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->store(objid.getObjectId(), true);
      table->field[IS_FILES_ENGINE]->set_notnull();
      table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name,
                                           ndbcluster_hton_name_length,
                                           system_charset_info);

      table->field[IS_FILES_TOTAL_EXTENTS]->set_notnull();
      table->field[IS_FILES_TOTAL_EXTENTS]->store(uf.getSize()/4, true);
      table->field[IS_FILES_EXTENT_SIZE]->set_notnull();
      table->field[IS_FILES_EXTENT_SIZE]->store(4, true);

      table->field[IS_FILES_INITIAL_SIZE]->set_notnull();
      table->field[IS_FILES_INITIAL_SIZE]->store(uf.getSize(), true);
      table->field[IS_FILES_MAXIMUM_SIZE]->set_notnull();
      table->field[IS_FILES_MAXIMUM_SIZE]->store(uf.getSize(), true);

      table->field[IS_FILES_VERSION]->set_notnull();
      table->field[IS_FILES_VERSION]->store(uf.getObjectVersion(), true);

      char extra[100];
      int len= (int)my_snprintf(extra,sizeof(extra),"CLUSTER_NODE=%u;UNDO_BUFFER_SIZE=%lu",
                           id, (ulong) lfg.getUndoBufferSize());
      table->field[IS_FILES_EXTRA]->set_notnull();
      table->field[IS_FILES_EXTRA]->store(extra, len, system_charset_info);
      schema_table_store_record(thd, table);
    }
  }

  // now for LFGs
  NdbDictionary::Dictionary::List lfglist;
  dict->listObjects(lfglist, NdbDictionary::Object::LogfileGroup);
  ndberr= dict->getNdbError();
  if (ndberr.classification != NdbError::NoError)
    ERR_RETURN(ndberr);

  for (i= 0; i < lfglist.count; i++)
  {
    NdbDictionary::Dictionary::List::Element& elt= lfglist.elements[i];

    NdbDictionary::LogfileGroup lfg= dict->getLogfileGroup(elt.name);
    ndberr= dict->getNdbError();
    if (ndberr.classification != NdbError::NoError)
    {
      if (ndberr.classification == NdbError::SchemaError)
        continue;
      ERR_RETURN(ndberr);
    }

    init_fill_schema_files_row(table);
    table->field[IS_FILES_FILE_TYPE]->set_notnull();
    table->field[IS_FILES_FILE_TYPE]->store("UNDO LOG", 8,
                                            system_charset_info);

    table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull();
    table->field[IS_FILES_LOGFILE_GROUP_NAME]->store(elt.name,
                                                     (uint)strlen(elt.name),
                                                     system_charset_info);
    table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->set_notnull();
    table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->store(lfg.getObjectId(), true);
    table->field[IS_FILES_ENGINE]->set_notnull();
    table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name,
                                         ndbcluster_hton_name_length,
                                         system_charset_info);

    table->field[IS_FILES_FREE_EXTENTS]->set_notnull();
    table->field[IS_FILES_FREE_EXTENTS]->store(lfg.getUndoFreeWords(), true);
    table->field[IS_FILES_EXTENT_SIZE]->set_notnull();
    table->field[IS_FILES_EXTENT_SIZE]->store(4, true);

    table->field[IS_FILES_VERSION]->set_notnull();
    table->field[IS_FILES_VERSION]->store(lfg.getObjectVersion(), true);

    char extra[100];
    int len= (int)my_snprintf(extra,sizeof(extra),
                         "UNDO_BUFFER_SIZE=%lu",
                         (ulong) lfg.getUndoBufferSize());
    table->field[IS_FILES_EXTRA]->set_notnull();
    table->field[IS_FILES_EXTRA]->store(extra, len, system_charset_info);
    schema_table_store_record(thd, table);
  }
  DBUG_RETURN(0);
}

static int show_ndb_vars(THD *thd, SHOW_VAR *var, char *buff)
{
  if (!check_ndb_in_thd(thd))
    return -1;
  struct st_ndb_status *st;
  SHOW_VAR *st_var;
  {
    char *mem= (char*)sql_alloc(sizeof(struct st_ndb_status) +
                                sizeof(ndb_status_variables_dynamic));
    st= new (mem) st_ndb_status;
    st_var= (SHOW_VAR*)(mem + sizeof(struct st_ndb_status));
    memcpy(st_var, &ndb_status_variables_dynamic, sizeof(ndb_status_variables_dynamic));
    int i= 0;
    SHOW_VAR *tmp= &(ndb_status_variables_dynamic[0]);
    for (; tmp->value; tmp++, i++)
      st_var[i].value= mem + (tmp->value - (char*)&g_ndb_status);
  }
  {
    Thd_ndb *thd_ndb= get_thd_ndb(thd);
    Ndb_cluster_connection *c= thd_ndb->connection;
    update_status_variables(thd_ndb, st, c);
  }
  var->type= SHOW_ARRAY;
  var->value= (char *) st_var;
  return 0;
}

SHOW_VAR ndb_status_variables_export[]= {
  {"Ndb",          (char*) &show_ndb_vars,                 SHOW_FUNC,  SHOW_SCOPE_GLOBAL},
  {"Ndb_conflict", (char*) &show_ndb_conflict_status_vars, SHOW_FUNC,  SHOW_SCOPE_GLOBAL},
  {"Ndb",          (char*) &ndb_status_injector_variables, SHOW_ARRAY, SHOW_SCOPE_GLOBAL},
  {"Ndb",          (char*) &ndb_status_slave_variables,    SHOW_ARRAY, SHOW_SCOPE_GLOBAL},
  {"Ndb",          (char*) &show_ndb_server_api_stats,     SHOW_FUNC,  SHOW_SCOPE_GLOBAL},
  {"Ndb_index_stat", (char*) &ndb_status_index_stat_variables, SHOW_ARRAY, SHOW_SCOPE_GLOBAL},
  {NullS, NullS, SHOW_LONG, SHOW_SCOPE_GLOBAL}
};


static void cache_check_time_update(MYSQL_THD thd,
                                    struct st_mysql_sys_var *var,
                                    void *var_ptr,
                                    const void *save)
{
  push_warning_printf(thd, Sql_condition::SL_WARNING,
                      ER_WARN_DEPRECATED_SYNTAX_NO_REPLACEMENT,
                      ER_THD(thd, ER_WARN_DEPRECATED_SYNTAX_NO_REPLACEMENT),
                      "@@ndb_cache_check_time");

  opt_ndb_cache_check_time= *static_cast<const ulong*>(save);
}


static MYSQL_SYSVAR_ULONG(
  cache_check_time,                  /* name */
  opt_ndb_cache_check_time,              /* var */
  PLUGIN_VAR_RQCMDARG,
  "A dedicated thread is created to, at the given "
  "millisecond interval, invalidate the query cache "
  "if another MySQL server in the cluster has changed "
  "the data in the database. "
  "This variable is deprecated and will be removed in a future release.",
  NULL,                              /* check func. */
  &cache_check_time_update,          /* update func. */
  0,                                 /* default */
  0,                                 /* min */
  ONE_YEAR_IN_SECONDS,               /* max */
  0                                  /* block */
);


static MYSQL_SYSVAR_ULONG(
  extra_logging,                     /* name */
  opt_ndb_extra_logging,                 /* var */
  PLUGIN_VAR_OPCMDARG,
  "Turn on more logging in the error log.",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  1,                                 /* default */
  0,                                 /* min */
  0,                                 /* max */
  0                                  /* block */
);


static MYSQL_SYSVAR_ULONG(
  wait_connected,                    /* name */
  opt_ndb_wait_connected,            /* var */
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "Time (in seconds) for mysqld to wait for connection "
  "to cluster management and data nodes.",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  30,                                /* default */
  0,                                 /* min */
  ONE_YEAR_IN_SECONDS,               /* max */
  0                                  /* block */
);


static MYSQL_SYSVAR_ULONG(
  wait_setup,                        /* name */
  opt_ndb_wait_setup,                /* var */
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "Time (in seconds) for mysqld to wait for setup to "
  "complete (0 = no wait)",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  30,                                /* default */
  0,                                 /* min */
  ONE_YEAR_IN_SECONDS,               /* max */
  0                                  /* block */
);

static const int MAX_CLUSTER_CONNECTIONS = 63;

static MYSQL_SYSVAR_UINT(
  cluster_connection_pool,           /* name */
  opt_ndb_cluster_connection_pool,   /* var */
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "Pool of cluster connections to be used by mysql server.",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  1,                                 /* default */
  1,                                 /* min */
  MAX_CLUSTER_CONNECTIONS,           /* max */
  0                                  /* block */
);

static const int MIN_ACTIVATION_THRESHOLD = 0;
static const int MAX_ACTIVATION_THRESHOLD = 16;

static
int
ndb_recv_thread_activation_threshold_check(MYSQL_THD thd,
                                           struct st_mysql_sys_var *var,
                                           void *save,
                                           struct st_mysql_value *value)
{
  long long int_buf;
  int val = (int)value->val_int(value, &int_buf);
  int new_val = (int)int_buf;

  if (val != 0 || 
      new_val < MIN_ACTIVATION_THRESHOLD ||
      new_val > MAX_ACTIVATION_THRESHOLD)
  {
    return 1;
  }
  opt_ndb_recv_thread_activation_threshold = new_val;
  return 0;
}

static
void
ndb_recv_thread_activation_threshold_update(MYSQL_THD,
                                            struct st_mysql_sys_var *var,
                                            void *var_ptr,
                                            const void *save)
{
  ndb_set_recv_thread_activation_threshold(
    opt_ndb_recv_thread_activation_threshold);
}

static MYSQL_SYSVAR_UINT(
  recv_thread_activation_threshold,         /* name */
  opt_ndb_recv_thread_activation_threshold, /* var */
  PLUGIN_VAR_RQCMDARG,
  "Activation threshold when receive thread takes over the polling "
  "of the cluster connection (measured in concurrently active "
  "threads)",
  ndb_recv_thread_activation_threshold_check,  /* check func. */
  ndb_recv_thread_activation_threshold_update, /* update func. */
  8,                                           /* default */
  MIN_ACTIVATION_THRESHOLD,                    /* min */
  MAX_ACTIVATION_THRESHOLD,                    /* max */
  0                                            /* block */
);


/* Definitions needed for receive thread cpu mask config variable */
static const int ndb_recv_thread_cpu_mask_option_buf_size = 512;
char ndb_recv_thread_cpu_mask_option_buf[ndb_recv_thread_cpu_mask_option_buf_size];
Uint16 recv_thread_cpuid_array[1 * MAX_CLUSTER_CONNECTIONS];

static
int
ndb_recv_thread_cpu_mask_check(MYSQL_THD thd,
                               struct st_mysql_sys_var *var,
                               void *save,
                               struct st_mysql_value *value)
{
  char buf[ndb_recv_thread_cpu_mask_option_buf_size];
  int len = sizeof(buf);
  const char *str = value->val_str(value, buf, &len);

  return ndb_recv_thread_cpu_mask_check_str(str);
}

static int
ndb_recv_thread_cpu_mask_check_str(const char *str)
{
  unsigned i;
  SparseBitmask bitmask;

  recv_thread_num_cpus = 0;
  if (str == 0)
  {
    /* Setting to empty string is interpreted as remove locking to CPU */
    return 0;
  }

  if (parse_mask(str, bitmask) < 0)
  {
    sql_print_information("Trying to set ndb_recv_thread_cpu_mask to"
                          " illegal value = %s, ignored",
                          str);
    goto error;
  }
  for (i = bitmask.find(0);
       i != SparseBitmask::NotFound;
       i = bitmask.find(i + 1))
  {
    if (recv_thread_num_cpus ==
        1 * MAX_CLUSTER_CONNECTIONS)
    {
      sql_print_information("Trying to set too many CPU's in "
                            "ndb_recv_thread_cpu_mask, ignored"
                            " this variable, erroneus value = %s",
                            str);
      goto error;
    }
    recv_thread_cpuid_array[recv_thread_num_cpus++] = i;
  }
  return 0;
error:
  return 1;
}

static
void
ndb_recv_thread_cpu_mask_update()
{
  ndb_set_recv_thread_cpu(recv_thread_cpuid_array,
                          recv_thread_num_cpus);
}

static
void
ndb_recv_thread_cpu_mask_update_func(MYSQL_THD,
                                     struct st_mysql_sys_var *var,
                                     void *var_ptr,
                                     const void *save)
{
  ndb_recv_thread_cpu_mask_update();
}

static MYSQL_SYSVAR_STR(
  recv_thread_cpu_mask,             /* name */
  opt_ndb_recv_thread_cpu_mask,     /* var */
  PLUGIN_VAR_RQCMDARG,
  "CPU mask for locking receiver threads to specific CPU, specified "
  " as hexadecimal as e.g. 0x33, one CPU is used per receiver thread.",
  ndb_recv_thread_cpu_mask_check,      /* check func. */
  ndb_recv_thread_cpu_mask_update_func,/* update func. */
  ndb_recv_thread_cpu_mask_option_buf
);

/* should be in index_stat.h */

extern int
ndb_index_stat_option_check(MYSQL_THD,
                            struct st_mysql_sys_var *var,
                            void *save,
                            struct st_mysql_value *value);
extern void
ndb_index_stat_option_update(MYSQL_THD,
                             struct st_mysql_sys_var *var,
                             void *var_ptr,
                             const void *save);

extern char ndb_index_stat_option_buf[];

static MYSQL_SYSVAR_STR(
  index_stat_option,                /* name */
  opt_ndb_index_stat_option,        /* var */
  PLUGIN_VAR_RQCMDARG,
  "Comma-separated tunable options for ndb index statistics",
  ndb_index_stat_option_check,      /* check func. */
  ndb_index_stat_option_update,     /* update func. */
  ndb_index_stat_option_buf
);


ulong opt_ndb_report_thresh_binlog_epoch_slip;
static MYSQL_SYSVAR_ULONG(
  report_thresh_binlog_epoch_slip,   /* name */
  opt_ndb_report_thresh_binlog_epoch_slip,/* var */
  PLUGIN_VAR_RQCMDARG,
  "Threshold on number of epochs to be behind before reporting binlog "
  "status. E.g. 3 means that if the difference between what epoch has "
  "been received from the storage nodes and what has been applied to "
  "the binlog is 3 or more, a status message will be sent to the cluster "
  "log.",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  3,                                 /* default */
  0,                                 /* min */
  256,                               /* max */
  0                                  /* block */
);


ulong opt_ndb_report_thresh_binlog_mem_usage;
static MYSQL_SYSVAR_ULONG(
  report_thresh_binlog_mem_usage,    /* name */
  opt_ndb_report_thresh_binlog_mem_usage,/* var */
  PLUGIN_VAR_RQCMDARG,
  "Threshold on percentage of free memory before reporting binlog "
  "status. E.g. 10 means that if amount of available memory for "
  "receiving binlog data from the storage nodes goes below 10%, "
  "a status message will be sent to the cluster log.",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  10,                                /* default */
  0,                                 /* min */
  100,                               /* max */
  0                                  /* block */
);


ulong opt_ndb_eventbuffer_max_alloc;
static MYSQL_SYSVAR_ULONG(
  eventbuffer_max_alloc,             /* name */
  opt_ndb_eventbuffer_max_alloc,     /* var */
  PLUGIN_VAR_RQCMDARG,
  "Maximum memory that can be allocated for buffering "
  "events by the ndb api.",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  0,                                 /* default */
  0,                                 /* min */
  UINT_MAX32,                        /* max */
  0                                  /* block */
);


uint opt_ndb_eventbuffer_free_percent;
static MYSQL_SYSVAR_UINT(
  eventbuffer_free_percent, /* name */
  opt_ndb_eventbuffer_free_percent,/* var */
  PLUGIN_VAR_RQCMDARG,
  "Percentage of free memory that should be available "
  "in event buffer before resuming buffering "
  "after the max_alloc limit is hit.",
  NULL, /* check func. */
  NULL, /* update func. */
  20, /* default */
  1, /* min */
  99, /* max */
  0 /* block */
);


my_bool opt_ndb_log_update_as_write;
static MYSQL_SYSVAR_BOOL(
  log_update_as_write,               /* name */
  opt_ndb_log_update_as_write,       /* var */
  PLUGIN_VAR_OPCMDARG,
  "For efficiency log only after image as a write event. "
  "Ignore before image. This may cause compatibility problems if "
  "replicating to other storage engines than ndbcluster.",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  1                                  /* default */
);

my_bool opt_ndb_log_update_minimal;
static MYSQL_SYSVAR_BOOL(
  log_update_minimal,                  /* name */
  opt_ndb_log_update_minimal,          /* var */
  PLUGIN_VAR_OPCMDARG,
  "For efficiency, log updates in a minimal format"
  "Log only the primary key value(s) in the before "
  "image. Log only the changed columns in the after "
  "image. This may cause compatibility problems if "
  "replicating to other storage engines than ndbcluster.",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  0                                  /* default */
);

my_bool opt_ndb_log_updated_only;
static MYSQL_SYSVAR_BOOL(
  log_updated_only,                  /* name */
  opt_ndb_log_updated_only,          /* var */
  PLUGIN_VAR_OPCMDARG,
  "For efficiency log only updated columns. Columns are considered "
  "as \"updated\" even if they are updated with the same value. "
  "This may cause compatibility problems if "
  "replicating to other storage engines than ndbcluster.",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  1                                  /* default */
);

my_bool opt_ndb_log_empty_update;
static MYSQL_SYSVAR_BOOL(
  log_empty_update,                  /* name */
  opt_ndb_log_empty_update,          /* var */
  PLUGIN_VAR_OPCMDARG,
  "Normally empty updates are filtered away "
  "before they are logged. However, for read tracking "
  "in conflict resolution a hidden pesudo attribute is "
  "set which will result in an empty update along with "
  "special flags set. For this to work empty updates "
  "have to be allowed.",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  0                                  /* default */
);

my_bool opt_ndb_log_orig;
static MYSQL_SYSVAR_BOOL(
  log_orig,                          /* name */
  opt_ndb_log_orig,                  /* var */
  PLUGIN_VAR_OPCMDARG,
  "Log originating server id and epoch in ndb_binlog_index. Each epoch "
  "may in this case have multiple rows in ndb_binlog_index, one for "
  "each originating epoch.",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  0                                  /* default */
);


my_bool opt_ndb_log_bin;
static MYSQL_SYSVAR_BOOL(
  log_bin,                           /* name */
  opt_ndb_log_bin,                   /* var */
  PLUGIN_VAR_OPCMDARG,
  "Log ndb tables in the binary log. Option only has meaning if "
  "the binary log has been turned on for the server.",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  1                                  /* default */
);


my_bool opt_ndb_log_binlog_index;
static MYSQL_SYSVAR_BOOL(
  log_binlog_index,                  /* name */
  opt_ndb_log_binlog_index,          /* var */
  PLUGIN_VAR_OPCMDARG,
  "Insert mapping between epochs and binlog positions into the "
  "ndb_binlog_index table.",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  1                                  /* default */
);


static my_bool opt_ndb_log_empty_epochs;
static MYSQL_SYSVAR_BOOL(
  log_empty_epochs,                  /* name */
  opt_ndb_log_empty_epochs,          /* var */
  PLUGIN_VAR_OPCMDARG,
  "",
  NULL,                              /* check func. */
  NULL,                              /* update func. */
  0                                  /* default */
);

bool ndb_log_empty_epochs(void)
{
  return opt_ndb_log_empty_epochs;
}

my_bool opt_ndb_log_apply_status;
static MYSQL_SYSVAR_BOOL(
  log_apply_status,                 /* name */
  opt_ndb_log_apply_status,         /* var */
  PLUGIN_VAR_OPCMDARG,
  "Log ndb_apply_status updates from Master in the Binlog",
  NULL,                             /* check func. */
  NULL,                             /* update func. */
  0                                 /* default */
);


my_bool opt_ndb_log_transaction_id;
static MYSQL_SYSVAR_BOOL(
  log_transaction_id,               /* name */
  opt_ndb_log_transaction_id,       /* var  */
  PLUGIN_VAR_OPCMDARG,
  "Log Ndb transaction identities per row in the Binlog",
  NULL,                             /* check func. */
  NULL,                             /* update func. */
  0                                 /* default */
);


static MYSQL_SYSVAR_STR(
  connectstring,                    /* name */
  opt_ndb_connectstring,            /* var */
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "Connect string for ndbcluster.",
  NULL,                             /* check func. */
  NULL,                             /* update func. */
  NULL                              /* default */
);


static MYSQL_SYSVAR_STR(
  mgmd_host,                        /* name */
  opt_ndb_connectstring,                /* var */
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "Same as --ndb-connectstring",
  NULL,                             /* check func. */
  NULL,                             /* update func. */
  NULL                              /* default */
);


static MYSQL_SYSVAR_UINT(
  nodeid,                           /* name */
  opt_ndb_nodeid,                   /* var */
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "Set nodeid for this node. Overrides node id specified "
  "in --ndb-connectstring.",
  NULL,                             /* check func. */
  NULL,                             /* update func. */
  0,                                /* default */
  0,                                /* min */
  MAX_NODES_ID,                     /* max */
  0                                 /* block */
);

static const char* slave_conflict_role_names[] =
{
  "NONE",
  "SECONDARY",
  "PRIMARY",
  "PASS",
  NullS
};

static TYPELIB slave_conflict_role_typelib = 
{
  array_elements(slave_conflict_role_names) - 1,
  "",
  slave_conflict_role_names,
  NULL
};


/**
 * slave_conflict_role_check_func.
 * 
 * Perform most validation of a role change request.
 * Inspired by sql_plugin.cc::check_func_enum()
 */
static int slave_conflict_role_check_func(THD *thd, struct st_mysql_sys_var *var,
                                          void *save, st_mysql_value *value)
{
  char buff[STRING_BUFFER_USUAL_SIZE];
  const char *str;
  long long tmp;
  long result;
  int length;

  do
  {
    if (value->value_type(value) == MYSQL_VALUE_TYPE_STRING)
    {
      length= sizeof(buff);
      if (!(str= value->val_str(value, buff, &length)))
        break;
      if ((result= (long)find_type(str, &slave_conflict_role_typelib, 0) - 1) < 0)
        break;
    }
    else
    {
      if (value->val_int(value, &tmp))
        break;
      if (tmp < 0 || tmp >= slave_conflict_role_typelib.count)
        break;
      result= (long) tmp;
    }
    
    const char* failure_cause_str = NULL;
    if (!st_ndb_slave_state::checkSlaveConflictRoleChange(
               (enum_slave_conflict_role) opt_ndb_slave_conflict_role,
               (enum_slave_conflict_role) result,
               &failure_cause_str))
    {
      char msgbuf[256];
      my_snprintf(msgbuf, 
                  sizeof(msgbuf), 
                  "Role change from %s to %s failed : %s",
                  get_type(&slave_conflict_role_typelib, opt_ndb_slave_conflict_role),
                  get_type(&slave_conflict_role_typelib, result),
                  failure_cause_str);
      
      thd->raise_error_printf(ER_ERROR_WHEN_EXECUTING_COMMAND,
                              "SET GLOBAL ndb_slave_conflict_role",
                              msgbuf);
      
      break;
    }
    
    /* Ok */
    *(long*)save= result;
    return 0;
  } while (0);
  /* Error */
  return 1;
};

/**
 * slave_conflict_role_update_func
 *
 * Perform actual change of role, using saved 'long' enum value
 * prepared by the update func above.
 *
 * Inspired by sql_plugin.cc::update_func_long()
 */
static void slave_conflict_role_update_func(THD *thd, struct st_mysql_sys_var *var,
                                            void *tgt, const void *save)
{
  *(long *)tgt= *(long *) save;
};

static MYSQL_SYSVAR_ENUM(
  slave_conflict_role,               /* Name */
  opt_ndb_slave_conflict_role,       /* Var */
  PLUGIN_VAR_RQCMDARG,
  "Role for Slave to play in asymmetric conflict algorithms.",
  slave_conflict_role_check_func,    /* Check func */
  slave_conflict_role_update_func,   /* Update func */
  SCR_NONE,                          /* Default value */
  &slave_conflict_role_typelib       /* typelib */
);

#ifndef NDEBUG

static
void
dbg_check_shares_update(THD*, st_mysql_sys_var*, void*, const void*)
{
  sql_print_information("dbug_check_shares open:");
  for (uint i= 0; i < ndbcluster_open_tables.records; i++)
  {
    NDB_SHARE *share= (NDB_SHARE*)my_hash_element(&ndbcluster_open_tables, i);
    sql_print_information("  %s.%s: state: %s(%u) use_count: %u",
                          share->db, share->table_name,
                          get_share_state_string(share->state),
                          (unsigned)share->state,
                          share->use_count);
    assert(share->state != NSS_DROPPED);
  }

  sql_print_information("dbug_check_shares dropped:");
  for (uint i= 0; i < ndbcluster_dropped_tables.records; i++)
  {
    NDB_SHARE *share= (NDB_SHARE*)my_hash_element(&ndbcluster_dropped_tables,i);
    sql_print_information("  %s.%s: state: %s(%u) use_count: %u",
                          share->db, share->table_name,
                          get_share_state_string(share->state),
                          (unsigned)share->state,
                          share->use_count);
    assert(share->state == NSS_DROPPED);
  }

  /**
   * Only shares in mysql database may be open...
   */
  for (uint i= 0; i < ndbcluster_open_tables.records; i++)
  {
    NDB_SHARE *share= (NDB_SHARE*)my_hash_element(&ndbcluster_open_tables, i);
    assert(strcmp(share->db, "mysql") == 0);
  }

  /**
   * Only shares in mysql database may be open...
   */
  for (uint i= 0; i < ndbcluster_dropped_tables.records; i++)
  {
    NDB_SHARE *share= (NDB_SHARE*)my_hash_element(&ndbcluster_dropped_tables,i);
    assert(strcmp(share->db, "mysql") == 0);
  }
}

static MYSQL_THDVAR_UINT(
  dbg_check_shares,                  /* name */
  PLUGIN_VAR_RQCMDARG,
  "Debug, only...check that no shares are lingering...",
  NULL,                              /* check func */
  dbg_check_shares_update,           /* update func */
  0,                                 /* default */
  0,                                 /* min */
  1,                                 /* max */
  0                                  /* block */
);

#endif

static struct st_mysql_sys_var* system_variables[]= {
  MYSQL_SYSVAR(cache_check_time),
  MYSQL_SYSVAR(extra_logging),
  MYSQL_SYSVAR(wait_connected),
  MYSQL_SYSVAR(wait_setup),
  MYSQL_SYSVAR(cluster_connection_pool),
  MYSQL_SYSVAR(recv_thread_activation_threshold),
  MYSQL_SYSVAR(recv_thread_cpu_mask),
  MYSQL_SYSVAR(report_thresh_binlog_mem_usage),
  MYSQL_SYSVAR(report_thresh_binlog_epoch_slip),
  MYSQL_SYSVAR(eventbuffer_max_alloc),
  MYSQL_SYSVAR(eventbuffer_free_percent),
  MYSQL_SYSVAR(log_update_as_write),
  MYSQL_SYSVAR(log_updated_only),
  MYSQL_SYSVAR(log_update_minimal),
  MYSQL_SYSVAR(log_empty_update),
  MYSQL_SYSVAR(log_orig),
  MYSQL_SYSVAR(distribution),
  MYSQL_SYSVAR(autoincrement_prefetch_sz),
  MYSQL_SYSVAR(force_send),
  MYSQL_SYSVAR(use_exact_count),
  MYSQL_SYSVAR(use_transactions),
  MYSQL_SYSVAR(use_copying_alter_table),
  MYSQL_SYSVAR(optimized_node_selection),
  MYSQL_SYSVAR(batch_size),
  MYSQL_SYSVAR(optimization_delay),
  MYSQL_SYSVAR(index_stat_enable),
  MYSQL_SYSVAR(index_stat_option),
  MYSQL_SYSVAR(table_no_logging),
  MYSQL_SYSVAR(table_temporary),
  MYSQL_SYSVAR(log_bin),
  MYSQL_SYSVAR(log_binlog_index),
  MYSQL_SYSVAR(log_empty_epochs),
  MYSQL_SYSVAR(log_apply_status),
  MYSQL_SYSVAR(log_transaction_id),
  MYSQL_SYSVAR(connectstring),
  MYSQL_SYSVAR(mgmd_host),
  MYSQL_SYSVAR(nodeid),
  MYSQL_SYSVAR(blob_read_batch_bytes),
  MYSQL_SYSVAR(blob_write_batch_bytes),
  MYSQL_SYSVAR(deferred_constraints),
  MYSQL_SYSVAR(join_pushdown),
  MYSQL_SYSVAR(log_exclusive_reads),
#ifndef NDEBUG
  MYSQL_SYSVAR(dbg_check_shares),
#endif
  MYSQL_SYSVAR(version),
  MYSQL_SYSVAR(version_string),
  MYSQL_SYSVAR(show_foreign_key_mock_tables),
  MYSQL_SYSVAR(slave_conflict_role),
  NULL
};

struct st_mysql_storage_engine ndbcluster_storage_engine=
{ MYSQL_HANDLERTON_INTERFACE_VERSION };


extern struct st_mysql_plugin i_s_ndb_transid_mysql_connection_map_plugin;
extern struct st_mysql_plugin ndbinfo_plugin;

mysql_declare_plugin(ndbcluster)
{
  MYSQL_STORAGE_ENGINE_PLUGIN,
  &ndbcluster_storage_engine,
  ndbcluster_hton_name,
  "MySQL AB",
  "Clustered, fault-tolerant tables",
  PLUGIN_LICENSE_GPL,
  ndbcluster_init,            /* plugin init */
  NULL,                       /* plugin deinit */
  0x0100,                     /* plugin version */
  ndb_status_variables_export,/* status variables                */
  system_variables,           /* system variables */
  NULL,                       /* config options */
  0                           /* flags */
},
ndbinfo_plugin, /* ndbinfo plugin */
/* IS plugin table which maps between mysql connection id and ndb trans-id */
i_s_ndb_transid_mysql_connection_map_plugin
mysql_declare_plugin_end;


Youez - 2016 - github.com/yon3zu
LinuXploit