403Webshell
Server IP : 104.21.38.3  /  Your IP : 172.70.147.166
Web Server : Apache
System : Linux krdc-ubuntu-s-2vcpu-4gb-amd-blr1-01.localdomain 5.15.0-142-generic #152-Ubuntu SMP Mon May 19 10:54:31 UTC 2025 x86_64
User : www ( 1000)
PHP Version : 7.4.33
Disable Function : passthru,exec,system,putenv,chroot,chgrp,chown,shell_exec,popen,proc_open,pcntl_exec,ini_alter,ini_restore,dl,openlog,syslog,readlink,symlink,popepassthru,pcntl_alarm,pcntl_fork,pcntl_waitpid,pcntl_wait,pcntl_wifexited,pcntl_wifstopped,pcntl_wifsignaled,pcntl_wifcontinued,pcntl_wexitstatus,pcntl_wtermsig,pcntl_wstopsig,pcntl_signal,pcntl_signal_dispatch,pcntl_get_last_error,pcntl_strerror,pcntl_sigprocmask,pcntl_sigwaitinfo,pcntl_sigtimedwait,pcntl_exec,pcntl_getpriority,pcntl_setpriority,imap_open,apache_setenv
MySQL : OFF  |  cURL : ON  |  WGET : ON  |  Perl : ON  |  Python : OFF  |  Sudo : ON  |  Pkexec : ON
Directory :  /www/server/mysql/src/sql/

Upload File :
current_dir [ Writeable ] document_root [ Writeable ]

 

Command :


[ Back ]     

Current File : /www/server/mysql/src/sql/tc_log.cc
/* Copyright (c) 2015, 2023, Oracle and/or its affiliates.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License, version 2.0,
   as published by the Free Software Foundation.

   This program is also distributed with certain software (including
   but not limited to OpenSSL) that is licensed under separate terms,
   as designated in a particular file or component or in included license
   documentation.  The authors of MySQL hereby grant you an additional
   permission to link the program and your derivative works with the
   separately licensed software that they have included with MySQL.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License, version 2.0, for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */


#include "tc_log.h"

#include "log.h"           // sql_print_error
#include "sql_class.h"     // THD

#include "pfs_file_provider.h"
#include "mysql/psi/mysql_file.h"


TC_LOG::enum_result TC_LOG_DUMMY::commit(THD *thd, bool all)
{
  return ha_commit_low(thd, all) ? RESULT_ABORTED : RESULT_SUCCESS;
}


int TC_LOG_DUMMY::rollback(THD *thd, bool all)
{
  return ha_rollback_low(thd, all);
}


int TC_LOG_DUMMY::prepare(THD *thd, bool all)
{
  return ha_prepare_low(thd, all);
}


/********* transaction coordinator log for 2pc - mmap() based solution *******/

/*
  the log consists of a file, mmapped to a memory.
  file is divided on pages of tc_log_page_size size.
  (usable size of the first page is smaller because of log header)
  there's PAGE control structure for each page
  each page (or rather PAGE control structure) can be in one of three
  states - active, syncing, pool.
  there could be only one page in active or syncing states,
  but many in pool - pool is fifo queue.
  usual lifecycle of a page is pool->active->syncing->pool
  "active" page - is a page where new xid's are logged.
  the page stays active as long as syncing slot is taken.
  "syncing" page is being synced to disk. no new xid can be added to it.
  when the sync is done the page is moved to a pool and an active page
  becomes "syncing".

  the result of such an architecture is a natural "commit grouping" -
  If commits are coming faster than the system can sync, they do not
  stall. Instead, all commit that came since the last sync are
  logged to the same page, and they all are synced with the next -
  one - sync. Thus, thought individual commits are delayed, throughput
  is not decreasing.

  when a xid is added to an active page, the thread of this xid waits
  for a page's condition until the page is synced. when syncing slot
  becomes vacant one of these waiters is awaken to take care of syncing.
  it syncs the page and signals all waiters that the page is synced.
  PAGE::waiters is used to count these waiters, and a page may never
  become active again until waiters==0 (that is all waiters from the
  previous sync have noticed the sync was completed)

  note, that the page becomes "dirty" and has to be synced only when a
  new xid is added into it. Removing a xid from a page does not make it
  dirty - we don't sync removals to disk.
*/

ulong tc_log_page_waits= 0;

#define TC_LOG_HEADER_SIZE (sizeof(tc_log_magic)+1)

static const char tc_log_magic[]={(char) 254, 0x23, 0x05, 0x74};

ulong tc_log_max_pages_used=0, tc_log_page_size=0, tc_log_cur_pages_used=0;

int TC_LOG_MMAP::open(const char *opt_name)
{
  uint i;
  bool crashed=FALSE;
  PAGE *pg;

  assert(total_ha_2pc > 1);
  assert(opt_name && opt_name[0]);

  tc_log_page_size= my_getpagesize();

  fn_format(logname,opt_name,mysql_data_home,"",MY_UNPACK_FILENAME);
  if ((fd= mysql_file_open(key_file_tclog, logname, O_RDWR, MYF(0))) < 0)
  {
    if (my_errno() != ENOENT)
      goto err;
    if (using_heuristic_recover())
      return 1;
    if ((fd= mysql_file_create(key_file_tclog, logname, CREATE_MODE,
                               O_RDWR, MYF(MY_WME))) < 0)
      goto err;
    inited=1;
    file_length= opt_tc_log_size;
    if (mysql_file_chsize(fd, file_length, 0, MYF(MY_WME)))
      goto err;
  }
  else
  {
    inited= 1;
    crashed= TRUE;
    sql_print_information("Recovering after a crash using %s", opt_name);
    if (tc_heuristic_recover != TC_HEURISTIC_NOT_USED)
    {
      sql_print_error("Cannot perform automatic crash recovery when "
                      "--tc-heuristic-recover is used");
      goto err;
    }
    file_length= mysql_file_seek(fd, 0L, MY_SEEK_END, MYF(MY_WME+MY_FAE));
    if (file_length == MY_FILEPOS_ERROR || file_length % tc_log_page_size)
      goto err;
  }

  data= (uchar *)my_mmap(0, (size_t)file_length, PROT_READ|PROT_WRITE,
                        MAP_NOSYNC|MAP_SHARED, fd, 0);
  if (data == MAP_FAILED)
  {
    set_my_errno(errno);
    goto err;
  }
  inited=2;

  npages=(uint)file_length/tc_log_page_size;
  assert(npages >= 3);             // to guarantee non-empty pool
  if (!(pages=(PAGE *)my_malloc(key_memory_TC_LOG_MMAP_pages,
                                npages*sizeof(PAGE), MYF(MY_WME|MY_ZEROFILL))))
    goto err;
  inited=3;
  for (pg=pages, i=0; i < npages; i++, pg++)
  {
    pg->next=pg+1;
    pg->waiters=0;
    pg->state=PS_POOL;
    mysql_cond_init(key_PAGE_cond, &pg->cond);
    pg->size=pg->free=tc_log_page_size/sizeof(my_xid);
    pg->start= (my_xid *)(data + i*tc_log_page_size);
    pg->end= pg->start + pg->size;
    pg->ptr= pg->start;
  }
  pages[0].size=pages[0].free=
                (tc_log_page_size-TC_LOG_HEADER_SIZE)/sizeof(my_xid);
  pages[0].start=pages[0].end-pages[0].size;
  pages[npages-1].next=0;
  inited=4;

  if (crashed && recover())
      goto err;

  memcpy(data, tc_log_magic, sizeof(tc_log_magic));
  data[sizeof(tc_log_magic)]= (uchar)total_ha_2pc;
  my_msync(fd, data, tc_log_page_size, MS_SYNC);
  inited=5;

  mysql_mutex_init(key_LOCK_tc, &LOCK_tc, MY_MUTEX_INIT_FAST);
  mysql_cond_init(key_COND_active, &COND_active);
  mysql_cond_init(key_COND_pool, &COND_pool);

  inited=6;

  syncing= 0;
  active=pages;
  pool=pages+1;
  pool_last_ptr= &pages[npages-1].next;

  return 0;

err:
  close();
  return 1;
}


/**
  Get the total amount of potentially usable slots for XIDs in TC log.
*/

uint TC_LOG_MMAP::size() const
{
  return (tc_log_page_size-TC_LOG_HEADER_SIZE)/sizeof(my_xid) +
         (npages - 1) * (tc_log_page_size/sizeof(my_xid));
}


/**
  there is no active page, let's got one from the pool.

  Two strategies here:
    -# take the first from the pool
    -# if there're waiters - take the one with the most free space.

  @todo
    TODO page merging. try to allocate adjacent page first,
    so that they can be flushed both in one sync

  @returns Pointer to qualifying page or NULL if no page in the
           pool can be made active.
*/

TC_LOG_MMAP::PAGE* TC_LOG_MMAP::get_active_from_pool()
{
  PAGE **best_p= &pool;

  if ((*best_p)->waiters != 0 || (*best_p)->free == 0)
  {
    /* if the first page can't be used try second strategy */
    int best_free=0;
    PAGE **p= &pool;
    for (p=&(*p)->next; *p; p=&(*p)->next)
    {
      if ((*p)->waiters == 0 && (*p)->free > best_free)
      {
        best_free=(*p)->free;
        best_p=p;
      }
    }
    if (*best_p == NULL || best_free == 0)
      return NULL;
  }

  PAGE *new_active= *best_p;
  if (new_active->free == new_active->size) // we've chosen an empty page
  {
    tc_log_cur_pages_used++;
    set_if_bigger(tc_log_max_pages_used, tc_log_cur_pages_used);
  }

  *best_p= (*best_p)->next;
  if (! *best_p)
    pool_last_ptr= best_p;

  return new_active;
}

/**
  @todo
  perhaps, increase log size ?
*/
void TC_LOG_MMAP::overflow()
{
  /*
    simple overflow handling - just wait
    TODO perhaps, increase log size ?
    let's check the behaviour of tc_log_page_waits first
  */
  ulong old_log_page_waits= tc_log_page_waits;

  mysql_cond_wait(&COND_pool, &LOCK_tc);

  if (old_log_page_waits == tc_log_page_waits)
  {
    /*
      When several threads are waiting in overflow() simultaneously
      we want to increase counter only once and not for each thread.
    */
    tc_log_page_waits++;
  }
}

/**
  Commit the transaction.

  @note When the TC_LOG inteface was changed, this function was added
  and uses the functions that were there with the old interface to
  implement the logic.
 */
TC_LOG::enum_result TC_LOG_MMAP::commit(THD *thd, bool all)
{
  DBUG_ENTER("TC_LOG_MMAP::commit");
  ulong cookie= 0;
  my_xid xid= thd->get_transaction()->xid_state()->get_xid()->get_my_xid();

  if (all && xid)
    if (!(cookie= log_xid(xid)))
      DBUG_RETURN(RESULT_ABORTED);    // Failed to log the transaction

  if (ha_commit_low(thd, all))
    DBUG_RETURN(RESULT_INCONSISTENT); // Transaction logged, but not committed

  /* If cookie is non-zero, something was logged */
  if (cookie)
    unlog(cookie, xid);

  DBUG_RETURN(RESULT_SUCCESS);
}


int TC_LOG_MMAP::rollback(THD *thd, bool all)
{
  return ha_rollback_low(thd, all);
}


int TC_LOG_MMAP::prepare(THD *thd, bool all)
{
  return ha_prepare_low(thd, all);
}


/**
  Record that transaction XID is committed on the persistent storage.

    This function is called in the middle of two-phase commit:
    First all resources prepare the transaction, then tc_log->log() is called,
    then all resources commit the transaction, then tc_log->unlog() is called.

    All access to active page is serialized but it's not a problem, as
    we're assuming that fsync() will be a main bottleneck.
    That is, parallelizing writes to log pages we'll decrease number of
    threads waiting for a page, but then all these threads will be waiting
    for a fsync() anyway

   If tc_log == MYSQL_BIN_LOG then tc_log writes transaction to binlog and
   records XID in a special Xid_log_event.
   If tc_log = TC_LOG_MMAP then xid is written in a special memory-mapped
   log.

  @retval
    0  - error
  @retval
    \# - otherwise, "cookie", a number that will be passed as an argument
    to unlog() call. tc_log can define it any way it wants,
    and use for whatever purposes. TC_LOG_MMAP sets it
    to the position in memory where xid was logged to.
*/

ulong TC_LOG_MMAP::log_xid(my_xid xid)
{
  mysql_mutex_lock(&LOCK_tc);

  while (true)
  {
    /* If active page is full - just wait... */
    while (unlikely(active && active->free == 0))
      mysql_cond_wait(&COND_active, &LOCK_tc);

    /* no active page ? take one from the pool. */
    if (active == NULL)
    {
      active= get_active_from_pool();

      /* There are no pages with free slots? Wait and retry. */
      if (active == NULL)
      {
        overflow();
        continue;
      }
    }

    break;
  }

  PAGE *p= active;
  ulong cookie= store_xid_in_empty_slot(xid, p, data);
  bool err;

  if (syncing)
  {                                          // somebody's syncing. let's wait
    err= wait_sync_completion(p);
    if (p->state != PS_DIRTY)                   // page was synced
    {
      if (p->waiters == 0)
        mysql_cond_broadcast(&COND_pool);    // in case somebody's waiting
      mysql_mutex_unlock(&LOCK_tc);
      goto done;                             // we're done
    }
  }                                          // page was not synced! do it now
  assert(active == p && syncing == NULL);
  syncing= p;                                 // place is vacant - take it
  active= NULL;                                  // page is not active anymore
  mysql_cond_broadcast(&COND_active);        // in case somebody's waiting
  mysql_mutex_unlock(&LOCK_tc);
  err= sync();

done:
  return err ? 0 : cookie;
}


/**
  Write the page data being synchronized to the disk.

  @return
    @retval false   Success
    @retval true    Failure
*/
bool TC_LOG_MMAP::sync()
{
  assert(syncing != active);

  /*
    sit down and relax - this can take a while...
    note - no locks are held at this point
  */

  int err= do_msync_and_fsync(fd, syncing->start,
                              syncing->size*sizeof(my_xid), MS_SYNC);

  mysql_mutex_lock(&LOCK_tc);
  /* Page is synced. Let's move it to the pool. */
  *pool_last_ptr= syncing;
  pool_last_ptr= &(syncing->next);
  syncing->next= NULL;
  syncing->state= err ? PS_ERROR : PS_POOL;
  mysql_cond_broadcast(&COND_pool);          // in case somebody's waiting

  /* Wake-up all threads which are waiting for syncing of the same page. */
  mysql_cond_broadcast(&syncing->cond);

  /* Mark syncing slot as free and wake-up new syncer. */
  syncing= NULL;
  if (active)
    mysql_cond_signal(&active->cond);

  mysql_mutex_unlock(&LOCK_tc);
  return err != 0;
}

/**
  erase xid from the page, update page free space counters/pointers.
  cookie points directly to the memory where xid was logged.
*/

void TC_LOG_MMAP::unlog(ulong cookie, my_xid xid)
{
  PAGE *p= pages + (cookie / tc_log_page_size);
  my_xid *x= (my_xid *)(data + cookie);

  assert(*x == xid);
  assert(x >= p->start && x < p->end);
  *x= 0;

  mysql_mutex_lock(&LOCK_tc);
  p->free++;
  assert(p->free <= p->size);
  set_if_smaller(p->ptr, x);
  if (p->free == p->size)               // the page is completely empty
    tc_log_cur_pages_used--;
  if (p->waiters == 0)                 // the page is in pool and ready to rock
    mysql_cond_broadcast(&COND_pool);  // ping ... for overflow()
  mysql_mutex_unlock(&LOCK_tc);
}

void TC_LOG_MMAP::close()
{
  uint i;
  switch (inited) {
  case 6:
    mysql_mutex_destroy(&LOCK_tc);
    mysql_cond_destroy(&COND_pool);
    // Fall through.
  case 5:
    data[0]='A'; // garble the first (signature) byte, in case mysql_file_delete fails
    // Fall through.
  case 4:
    for (i=0; i < npages; i++)
    {
      if (pages[i].ptr == 0)
        break;
      mysql_cond_destroy(&pages[i].cond);
    }
    // Fall through.
  case 3:
    my_free(pages);
    // Fall through.
  case 2:
    my_munmap((char*)data, (size_t)file_length);
    // Fall through.
  case 1:
    mysql_file_close(fd, MYF(0));
  }
  if (inited>=5) // cannot do in the switch because of Windows
    mysql_file_delete(key_file_tclog, logname, MYF(MY_WME));
  inited=0;
}

int TC_LOG_MMAP::recover()
{
  HASH xids;
  PAGE *p=pages, *end_p=pages+npages;

  if (memcmp(data, tc_log_magic, sizeof(tc_log_magic)))
  {
    sql_print_error("Bad magic header in tc log");
    goto err1;
  }

  /*
    the first byte after magic signature is set to current
    number of storage engines on startup
  */
  if (data[sizeof(tc_log_magic)] != total_ha_2pc)
  {
    sql_print_error("Recovery failed! You must enable "
                    "exactly %d storage engines that support "
                    "two-phase commit protocol",
                    data[sizeof(tc_log_magic)]);
    goto err1;
  }

  if (my_hash_init(&xids, &my_charset_bin, tc_log_page_size/3, 0,
                   sizeof(my_xid), 0, 0, MYF(0),
                   PSI_INSTRUMENT_ME))
    goto err1;

  for ( ; p < end_p ; p++)
  {
    for (my_xid *x=p->start; x < p->end; x++)
      if (*x && my_hash_insert(&xids, (uchar *)x))
        goto err2; // OOM
  }

  if (ha_recover(&xids))
    goto err2;

  my_hash_free(&xids);
  memset(data, 0, (size_t)file_length);
  return 0;

err2:
  my_hash_free(&xids);
err1:
  sql_print_error("Crash recovery failed. Either correct the problem "
                  "(if it's, for example, out of memory error) and restart, "
                  "or delete tc log and start mysqld with "
                  "--tc-heuristic-recover={commit|rollback}");
  return 1;
}

TC_LOG *tc_log;
TC_LOG_DUMMY tc_log_dummy;
TC_LOG_MMAP  tc_log_mmap;

bool TC_LOG::using_heuristic_recover()
{
  if (tc_heuristic_recover == TC_HEURISTIC_NOT_USED)
    return false;

  sql_print_information("Heuristic crash recovery mode");
  if (ha_recover(0))
    sql_print_error("Heuristic crash recovery failed");
  sql_print_information("Please restart mysqld without --tc-heuristic-recover");
  return true;
}


Youez - 2016 - github.com/yon3zu
LinuXploit