// SPDX-License-Identifier: GPL-2.0-only
/*
 * CryptoServer Se/CSe/Se2-Series Driver
 *
 * Copyright 2024 Utimaco IS GmbH
 * All Rights Reserved.
 *
 */
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/version.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
#include <linux/proc_fs.h>
#include <linux/uaccess.h>
#include <linux/seq_file.h>
#include <linux/aer.h>
#include <linux/sched.h>
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,11,0))
#include <linux/sched/signal.h>
#endif

#include "cryptoserver.h"
#include "version.h"
#include "cs2_drv.h"

/******************************************************************************
 *
 * Definitions
 *
 *****************************************************************************/

// PCIe buffer
#define REG_TX_BUFF0            0x0000          // TX buffer 0 address
#define REG_TX_BUFF1            0x1000          // TX buffer 1 address

// PCIe register
#define REG_BASE_OFS            0x10000

#define REG_CS2_IRQ             0x0000          // CS IRQ status register
#define REG_CS2_IRQM            0x0004          // CS IRQ mask register
#define REG_HOST_IRQ            0x0008          // host IRQ status register
#define REG_HOST_IRQM           0x000C          // host IRQ mask register

#define REG_DMA0_ADDR           0x0020          // DMA0 buffer address
#define REG_DMA1_ADDR           0x0024          // DMA1 buffer address

#define REG_TX_LEN0             0x0028          // TX Length register 0
#define REG_TX_LEN1             0x002c          // TX Length register 1
#define REG_RX_LEN0             0x0030          // RX Length register 0
#define REG_RX_LEN1             0x0034          // RX Length register 1

#define REG_RESET               0x0038          // Reset register
#define REG_VERSION             0x003C          // Version register

#define REG_HOST_2_CS_MBX0      0x0040          // Mailbox register PC -> DSP
#define REG_HOST_2_CS_MBX1      0x0044          // Mailbox register PC -> DSP
#define REG_CS_2_HOST_MBX0      0x0048          // Mailbox register DSP -> PC
#define REG_CS_2_HOST_MBX1      0x004C          // Mailbox register DSP -> PC

#define REG_SYSSTAT             0x0050          // System status register
#define REG_SYSSTAT_CHG         0x0054          // System status register changes

// host IRQ status bits
#define BIT_IRQ_RX_BF0_FULL     1u              // RX buffer 0 full IRQ
#define BIT_IRQ_RX_BF1_FULL     2u              // RX buffer 1 full IRQ
#define BIT_IRQ_TX_BF0_ACK      4u              // TX buffer 0 ack IRQ
#define BIT_IRQ_TX_BF1_ACK      8u              // TX buffer 1 ack IRQ

// CS2 IRQ status bits
#define BIT_IRQ_TX_BF0_FULL     1u              // TX buffer 0 full IRQ
#define BIT_IRQ_TX_BF1_FULL     2u              // TX buffer 1 full IRQ
#define BIT_IRQ_RX_BF0_ACK      4u              // RX buffer 0 ack IRQ
#define BIT_IRQ_RX_BF1_ACK      8u              // RX buffer 1 ack IRQ

#define BIT_IRQ_MASK            0xF

union host_irq_status_t
{
  struct
  {
    unsigned int rx_bf0_full : 1;
    unsigned int rx_bf1_full : 1;
    unsigned int tx_bf0_ack : 1;
    unsigned int tx_bf1_ack : 1;
    unsigned int unused : 28;
  };

  unsigned int val;
};

union cs2_irq_status_t
{
  struct
  {
    unsigned int tx_bf0_full : 1;
    unsigned int tx_bf1_full : 1;
    unsigned int rx_bf0_ack : 1;
    unsigned int rx_bf1_ack : 1;
    unsigned int unused : 28;
  };

  unsigned int val;
};

// transmission buffer
#define PCIE_BLK_SIZE           2048            // sizeof block
#define PCIE_HDR_SIZE           8               // sizeof block header
#define PCIE_DATA_SIZE          (PCIE_BLK_SIZE-PCIE_HDR_SIZE)

// timeouts
#define DEF_TIMEOUT_TX_RX       10*1000               // 10 sec
#define MAX_TIMEOUT_TX_RX       60*1000               // 60 sec

#define MAX_TIMEOUT_RESET       60*1000               // 1 min

#define DEF_TIMEOUT_REQUEST     10*60*1000            // 10 min
#define MAX_TIMEOUT_REQUEST     60*60*1000            // 1 hour

// mode parameter for cs3_halt
#define SHUTDOWN_MODE_NONE      0   //
#define SHUTDOWN_MODE_SUSPEND   1   //
#define SHUTDOWN_MODE_HALT      2   // end CMDS

//--------------------------------------------------------------------------------
// Device Structure
//--------------------------------------------------------------------------------
struct cs3_device_t
{
  struct cs_device_t      csdev;

  // device specific part
  struct cs_iomem_t       bar0[2];

  volatile unsigned char __iomem *tx_buf0;  // -> 0x0000
  volatile unsigned char __iomem *tx_buf1;  // -> 0x1000
  volatile unsigned char __iomem *reg_base; // -> 0x10000

  int                     irq_mode;
  long                    timeout;
  unsigned int            crc_flag;
  unsigned char           ch_id;
  void                    *buf;

  volatile unsigned int   state;

  // tx
  wait_queue_head_t       tx_event;
  int                     tx_err;
  int                     tx_reset;
  volatile char           tx_ack[2];
  unsigned int            tx_count;

  // rx
  struct cs_dmabuf_t      rx_buf0;
  struct cs_dmabuf_t      rx_buf1;

  wait_queue_head_t       rx_event;
  int                     rx_err;
  int                     rx_reset;
  volatile char           rx_full[2];
  unsigned int            rx_count;
  unsigned int            rx_crc_err;

  wait_queue_head_t       spm_event;
  volatile int            spm_state;
  volatile int            rcvd_cmds_rdy;

  unsigned int            req_len;

  struct semaphore        sema;

  struct tasklet_struct   *task;
  unsigned int            task_irq;
  spinlock_t              task_lock;

  // statistics
  struct
  {
    unsigned int irq_cnt[4];
    unsigned int evt_cnt[4];
    unsigned int irq_pattern_cnt[16];
  }
  stats;
};

// RX / TX states
#define STATE_BIT_DONE      0x1
#define STATE_BIT_FAIL      0x2

#define STATE_IDLE          0x0

#define STATE_WRITE         0x4
#define STATE_WRITE_DONE   (0x4 | STATE_BIT_DONE)
#define STATE_WRITE_FAIL   (0x4 | STATE_BIT_FAIL)

#define STATE_GETREQ        0x8
#define STATE_GETREQ_DONE  (0x8 | STATE_BIT_DONE)
#define STATE_GETREQ_FAIL  (0x8 | STATE_BIT_FAIL)

#define STATE_READ          0xC
#define STATE_READ_DONE    (0xC | STATE_BIT_DONE)
#define STATE_READ_FAIL    (0xC | STATE_BIT_FAIL)

#define STATE_RESET         0xF

static const char *StateText[16] =
{
  "idle",
  "?1?",
  "?2?",
  "?3?",
  "write",
  "write_done",
  "write_fail",
  "?7?",
  "getreq",
  "getreq_done",
  "getreq_fail",
  "?11?",
  "read",
  "read_done",
  "read_fail",
  "reset"
};

/******************************************************************************
 *
 * Globals
 *
 *****************************************************************************/
static unsigned int ShutdownMode = 2;
module_param(ShutdownMode, int, 0444);
MODULE_PARM_DESC(ShutdownMode, " 0:none, 1:suspend, 2:halt [default]");


int cs3_probe(struct pci_dev *pci_dev, struct cs3_device_t **pp_dp);

/******************************************************************************
 *
 * cs3_set_error_reporting
 *
 *****************************************************************************/
static int cs3_set_error_reporting(struct pci_dev *pci_dev)
{
#ifdef USE_PCI_ERROR_REPORTING
  int (*pci_configure_pcie_error_reporting[2])(struct pci_dev*) =
  {
    pci_disable_pcie_error_reporting,
    pci_enable_pcie_error_reporting
  };

  if (pci_dev->is_physfn)
  {
    int err;
    struct pci_dev *bridge = pci_upstream_bridge(pci_dev);

    if (  (err = pci_configure_pcie_error_reporting[ ErrorReporting    &1](pci_dev)) != 0
       || (err = pci_configure_pcie_error_reporting[(ErrorReporting>>1)&1](bridge)) != 0
       )
    {
      return err;
    }
  }
#endif
  return 0;
}

/******************************************************************************
 *
 * cs3_io_map
 *
 *****************************************************************************/
static int cs3_io_map(struct cs3_device_t *dp)
{
  struct pci_dev    *pci_dev = dp->csdev.pci_dev;
  unsigned long     addr;
  unsigned long     size;
  struct cs_iomem_t *bar;
  int i;

  if (  (addr = pci_resource_start(pci_dev, 0)) == 0
     || (size = pci_resource_len  (pci_dev, 0)) == 0
     )
  {
    log_error("no resource BAR0\n");
    return -ENODEV;
  }

  log_trace("BAR0: phys_addr: %lx, size: 0x%lx\n", addr, size);

  // tx buffer 0/1
  bar = dp->bar0;
  bar->phys_addr = addr;
  bar->size = 0x2000;

  if ((bar->base_ptr = ioremap_wc(bar->phys_addr, bar->size)) == NULL)
  {
    log_error("can't remap io memory for BAR0 tx buffers\n");
    return -ENODEV;
  }

  dp->tx_buf0 = bar->base_ptr + REG_TX_BUFF0;
  dp->tx_buf1 = bar->base_ptr + REG_TX_BUFF1;

  // registers
  bar++;
  bar->phys_addr = addr + REG_BASE_OFS;
  bar->size = 0x1000;

  if ((bar->base_ptr = ioremap(bar->phys_addr, bar->size)) == NULL)
  {
    log_error("can't remap io memory for BAR0 registers\n");
    return -ENODEV;
  }

  dp->reg_base = bar->base_ptr;

  for (i=0; i<2; i++)
  {
    bar = dp->bar0 + i;

    log_trace("tx_buf[%d]: phys.: %08X:%08X -> virt.: %p, size = 0x%08lX\n", i, bar->phys_addr_hi,
                                                                             bar->phys_addr_lo,
                                                                             bar->base_ptr,
                                                                             bar->size);
  }
  return 0;
}

/******************************************************************************
 *
 * cs3_io_unmap
 *
 *****************************************************************************/
static int cs3_io_unmap(struct cs3_device_t *dp)
{
  struct cs_iomem_t *bar;
  int i;

  for (i=0; i<2; i++)
  {
    bar = dp->bar0 + i;

    iounmap(bar->base_ptr);
    bar->base_ptr = NULL;
  }

  dp->reg_base = NULL;
  dp->tx_buf0  = NULL;
  dp->tx_buf1  = NULL;

  return 0;
}

/******************************************************************************
 *
 * cs3_dma_alloc
 *
 *****************************************************************************/
static int cs3_dma_alloc(struct cs3_device_t *dp, unsigned int size, struct cs_dmabuf_t *buf)
{
  struct pci_dev *pci_dev = dp->csdev.pci_dev;

  buf->base_ptr = dma_alloc_coherent(&pci_dev->dev, size, &buf->phys_addr, GFP_ATOMIC);

  if (buf->base_ptr == NULL)
  {
    log_error("dma_alloc_coherent failed\n");
    return -ENOMEM;
  }

  buf->size = size;

  log_debug("Virtual Address = %p, Physical Address = %08X:%08X, size = %d\n",
                    buf->base_ptr,
                    buf->phys_addr_hi,
                    buf->phys_addr_lo,
                    buf->size);
  return 0;
}

/******************************************************************************
 *
 * cs3_dma_free
 *
 *****************************************************************************/
static void cs3_dma_free(struct cs3_device_t *dp, struct cs_dmabuf_t *buf)
{
  struct pci_dev *pci_dev = dp->csdev.pci_dev;

  if (buf->base_ptr == NULL) return;

  dma_free_coherent(&pci_dev->dev, buf->size, buf->base_ptr, buf->phys_addr);

  buf->base_ptr = NULL;
}

/******************************************************************************
 *
 * cs3_dma_clear
 *
 *****************************************************************************/
static inline void cs3_dma_clear(struct cs_dmabuf_t *buf)
{
  memset(buf->base_ptr, 0, buf->size);
}

/******************************************************************************
 *
 * cs3_spm_text
 *
 *****************************************************************************/
static const char *cs3_spm_text(unsigned short spm)
{
  const char *txt;

  switch (spm)
  {
    case 0x0000: txt = ""; break;
    case 0x4000: txt = "[BL ready]"; break;
    case 0x8000: txt = "[CMDS ready]"; break;
    case 0x5001: txt = "[BL CRC error]"; break;
    case 0x5002: txt = "[BL SDRAM error]"; break;
    case 0x500B: txt = "[PCI interface closed]"; break;
    case 0x500C: txt = "[memory corruption]"; break;
    case 0x5306: txt = "[high temperature]"; break;
    case 0x530A: txt = "[alarm was reset]"; break;
    case 0x530E: txt = "[CryptoServer was cleared]"; break;
    case 0x5344: txt = "[CryptoServer was shut down]"; break;
    default:
      switch (spm >> 8)
      {
        case 0x50: txt = "[panic]"; break;
        case 0x53: txt = "[shutdown]";  break;
        default:   txt = "[unknown]"; break;
      }
  }

  return txt;
}

/******************************************************************************
 *
 * cs3_spm_error
 *
 ******************************************************************************/
static int cs3_spm_error(const char *where, struct cs3_device_t *dp)
{
  if (dp->spm_state == 0x5306)
  {
    log_error("%s: high temperature detected\n", where);
    return -ECS2TEMPALARM;
  }

  switch (dp->spm_state >> 8)
  {
    case 0x50:
      log_error("%s: critical error detected: %04x\n", where, dp->spm_state);
      return -ECS2PANIC;

    case 0x53:
      log_error("%s: CryptoServer was shut down: %04x\n", where, dp->spm_state);
      return -ECS2DOWN;

    default:
      log_error("%s: operation was interrupted by reset\n", where);
      return -ECS2RESET;
  }
}

/******************************************************************************
 *
 * cs3_legacy_isr
 *
 ******************************************************************************/
static irqreturn_t cs3_legacy_isr(int irq, struct cs3_device_t *dp)
{
  irqreturn_t ret;
  u32         status;

  status = ~readl(dp->reg_base + REG_HOST_IRQ) & BIT_IRQ_MASK;

  if (status & BIT_IRQ_TX_BF0_ACK)
  {
    writel(BIT_IRQ_TX_BF0_ACK, dp->reg_base + REG_HOST_IRQ);
    dp->task_irq |= BIT_IRQ_TX_BF0_ACK;
  }

  if (status & BIT_IRQ_TX_BF1_ACK)
  {
    writel(BIT_IRQ_TX_BF1_ACK, dp->reg_base + REG_HOST_IRQ);
    dp->task_irq |= BIT_IRQ_TX_BF1_ACK;
  }

  if (status & BIT_IRQ_RX_BF0_FULL)
  {
    writel(BIT_IRQ_RX_BF0_FULL, dp->reg_base + REG_HOST_IRQ);
    dp->task_irq |= BIT_IRQ_RX_BF0_FULL;
  }

  if (status & BIT_IRQ_RX_BF1_FULL)
  {
    writel(BIT_IRQ_RX_BF1_FULL, dp->reg_base + REG_HOST_IRQ);
    dp->task_irq |= BIT_IRQ_RX_BF1_FULL;
  }

  if (status != 0)
  {
    readl(dp->reg_base + REG_VERSION);

    tasklet_schedule(dp->task);

    ret = IRQ_HANDLED;
  }
  else
  {
    ret = IRQ_NONE;
  }

  dp->stats.irq_pattern_cnt[status]++;
  return ret;
}

/******************************************************************************
 *
 * cs3_msi_isr
 *
 ******************************************************************************/
static irqreturn_t cs3_msi_isr(int irq, struct cs3_device_t *dp)
{
  irqreturn_t   ret;
  u32           status;
  unsigned int  *irq_cnt;

  status = ~readl(dp->reg_base + REG_HOST_IRQ) & BIT_IRQ_MASK;

  if (status != 0)
  {
    writel(status, dp->reg_base + REG_HOST_IRQ);
    readl(dp->reg_base + REG_VERSION);

    dp->task_irq |= status;
    tasklet_schedule(dp->task);

    irq_cnt = &dp->stats.irq_cnt[0];
    if (status & BIT_IRQ_TX_BF0_FULL) irq_cnt[0]++;
    if (status & BIT_IRQ_TX_BF1_FULL) irq_cnt[1]++;
    if (status & BIT_IRQ_TX_BF0_ACK)  irq_cnt[2]++;
    if (status & BIT_IRQ_TX_BF1_ACK)  irq_cnt[3]++;

    ret = IRQ_HANDLED;
  }
  else
  {
    ret = IRQ_NONE;
  }

  dp->stats.irq_pattern_cnt[status]++;
  return ret;
}

/******************************************************************************
 *
 * cs3_irq_request
 *
 *****************************************************************************/
static int cs3_irq_request(struct cs3_device_t *dp)
{
  int err = 0;
  struct pci_dev *pci_dev = dp->csdev.pci_dev;
  unsigned int irq;
  irqreturn_t (*p_isr)(int irq, struct cs3_device_t*);

#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,8,0))
  // new style
  if (  MsiMode
     && pci_alloc_irq_vectors(pci_dev, 1, 1, PCI_IRQ_MSI) == 1
     )
  {
    p_isr = cs3_msi_isr;
  }
  else if (pci_alloc_irq_vectors(pci_dev, 1, 1, PCI_IRQ_LEGACY) == 1)
  {
    p_isr = cs3_legacy_isr;
  }
  else
  {
    log_error("can't allocate any interrupt\n");
    CLEANUP(-ENOMEM);
  }

  irq = pci_irq_vector(pci_dev, 0);
#else
  // old style
  if (  MsiMode
     && pci_enable_msi(pci_dev) == 0
     )
  {
    p_isr = cs3_msi_isr;
  }
  else
  {
    p_isr = cs3_legacy_isr;
  }

  irq = pci_dev->irq;
#endif

  dp->irq_mode = (p_isr == cs3_msi_isr);

  log_trace("using %s irq mode\n", dp->irq_mode ? "MSI" : "legacy");

  if ((err = request_irq(irq, (irqreturn_t (*)(int,void*))p_isr, IRQF_SHARED, "cs3", dp)) != 0)
  {
    log_error("request_irq(%d) returned: %d\n", irq, err);
    goto cleanup;
  }

  log_trace("successfully initialized irq: %d\n", irq);

cleanup:
  return err;
}

/******************************************************************************
 *
 * cs3_irq_free
 *
 *****************************************************************************/
static void cs3_irq_free(struct cs3_device_t *dp)
{
  struct pci_dev *pci_dev = dp->csdev.pci_dev;
  unsigned int irq;

  if (pci_dev == NULL) return;

#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,8,0))
  irq = pci_irq_vector(pci_dev, 0);
  free_irq(irq, dp);
  pci_free_irq_vectors(pci_dev);
#else
  irq = pci_dev->irq;
  free_irq(irq, dp);
  pci_disable_msi(pci_dev);
#endif
}

/******************************************************************************
 *
 * cs3_irq_enable
 *
 ******************************************************************************/
static void cs3_irq_enable(struct cs3_device_t *dp)
{
  writel( BIT_IRQ_TX_BF0_ACK
        | BIT_IRQ_TX_BF1_ACK
        | BIT_IRQ_RX_BF0_FULL
        | BIT_IRQ_RX_BF1_FULL, dp->reg_base + REG_HOST_IRQM);
}

/******************************************************************************
 *
 * cs3_irq_disable
 *
 ******************************************************************************/
static void cs3_irq_disable(struct cs3_device_t *dp)
{
  writel(0, dp->reg_base + REG_HOST_IRQM);
}

/******************************************************************************
 *
 * cs3_irq_clear
 *
 ******************************************************************************/
static inline void cs3_irq_clear(struct cs3_device_t *dp)
{
  writel( BIT_IRQ_TX_BF0_ACK
        | BIT_IRQ_TX_BF1_ACK
        | BIT_IRQ_RX_BF0_FULL
        | BIT_IRQ_RX_BF1_FULL, dp->reg_base + REG_HOST_IRQ);
}

/******************************************************************************
 *
 * cs3_wait_for_spm
 *
 ******************************************************************************/
static inline int cs3_wait_for_spm(struct cs3_device_t *dp, long timeout)
{
  while (dp->spm_state == 0)
  {
    timeout = wait_event_interruptible_timeout(dp->spm_event, dp->spm_state != 0, timeout);

    if (timeout <= 0)
    {
      return((timeout < 0) ? timeout : -ECS2TIMEOUT);
    }
  }

  return 0;
}

/******************************************************************************
 *
 * cs3_wait_for_rx_full_interruptible
 *
 *****************************************************************************/
static inline int cs3_wait_for_rx_full_interruptible(struct cs3_device_t *dp, int idx, long timeout)
{
  while (dp->rx_full[idx] == 0)
  {
    timeout = wait_event_interruptible_timeout(dp->rx_event,
                                               (  dp->rx_full[idx] != 0
                                               || dp->rx_reset
                                               || dp->rcvd_cmds_rdy
                                               ),
                                               timeout);
    if (dp->tx_reset)
    {
      return cs3_spm_error(__func__, dp);
    }

    if (dp->rcvd_cmds_rdy)
    {
      dp->rcvd_cmds_rdy = 0;
      return cs3_spm_error(__func__, dp);
    }

    if (timeout <= 0)
    {
      return((timeout < 0) ? timeout : -ECS2TIMEOUT);
    }
  }

  return 0;
}

/******************************************************************************
 *
 * cs3_wait_for_rx_full
 *
 *****************************************************************************/
static inline int cs3_wait_for_rx_full(struct cs3_device_t *dp, int idx, long timeout)
{
  while (dp->rx_full[idx] == 0)
  {
    timeout = wait_event_timeout(dp->rx_event,
                                 (  dp->rx_full[idx] != 0
                                 || dp->rx_reset
                                 || dp->rcvd_cmds_rdy
                                 ),
                                 timeout);
    if (dp->tx_reset)
    {
      return cs3_spm_error(__func__, dp);
    }

    if (dp->rcvd_cmds_rdy)
    {
      dp->rcvd_cmds_rdy = 0;
      return cs3_spm_error(__func__, dp);
    }

    if (timeout <= 0)
    {
      return((timeout < 0) ? timeout : -ECS2TIMEOUT);
    }
  }

  return 0;
}

/******************************************************************************
 *
 * cs3_wait_for_tx_ack
 *
 *****************************************************************************/
static inline int cs3_wait_for_tx_ack(struct cs3_device_t *dp, int idx, long timeout)
{
  while (dp->tx_ack[idx] == 0)
  {
    timeout = wait_event_timeout(dp->tx_event,
                                 (  dp->tx_ack[idx] != 0
                                 || dp->tx_reset
                                 || dp->rcvd_cmds_rdy
                                 ),
                                 timeout);
    if (dp->tx_reset)
    {
      return cs3_spm_error(__func__, dp);
    }

    if (dp->rcvd_cmds_rdy)
    {
      dp->rcvd_cmds_rdy = 0;
      return cs3_spm_error(__func__, dp);
    }

    if (timeout <= 0)
    {
      return((timeout < 0) ? timeout : -ECS2TIMEOUT);
    }
  }

  return 0;
}

/******************************************************************************
 *
 * cs3_task_proc
 *
 *****************************************************************************/
static void cs3_task_proc(unsigned long param)
{
  struct cs3_device_t *dp = (struct cs3_device_t *)param;
  unsigned long flags;

  spin_lock_irqsave(&dp->task_lock, flags);

  if (dp->task_irq & BIT_IRQ_TX_BF0_ACK)
  {
    dp->task_irq &= ~BIT_IRQ_TX_BF0_ACK;

    dp->tx_ack[0] = 1;
    smp_wmb();
    wake_up(&dp->tx_event);
    dp->stats.evt_cnt[2]++;
  }

  if (dp->task_irq & BIT_IRQ_TX_BF1_ACK)
  {
    dp->task_irq &= ~BIT_IRQ_TX_BF1_ACK;

    dp->tx_ack[1] = 1;
    smp_wmb();
    wake_up(&dp->tx_event);
    dp->stats.evt_cnt[3]++;
  }

  if (dp->task_irq & BIT_IRQ_RX_BF0_FULL)
  {
    dp->task_irq &= ~BIT_IRQ_RX_BF0_FULL;

    dp->rx_full[0] = 1;
    smp_wmb();
    wake_up_all(&dp->rx_event);
    dp->stats.evt_cnt[0]++;
  }

  if (dp->task_irq & BIT_IRQ_RX_BF1_FULL)
  {
    unsigned int spm;

    dp->task_irq &= ~BIT_IRQ_RX_BF1_FULL;

    spm = readl(dp->reg_base + REG_RX_LEN1) >> 16;

    if (spm != 0)
    {
      // spontaneous message
      dp->spm_state = spm;

      log_info("spontaneous message from [%s] received: %04x %s\n", dp->csdev.device_name, spm, cs3_spm_text(spm));

      if ((spm & 0x1000) == 0)
      {
        // ready message
        dp->tx_reset = 0;
        dp->rx_reset = 0;

        if (spm == 0x8000)
        {
          // commands that were sent between BL ready and CMDS ready need to be unblocked
          dp->rcvd_cmds_rdy = 1;

          dp->state = STATE_IDLE;

          smp_wmb();
          wake_up_all(&dp->rx_event);
          wake_up_all(&dp->tx_event);
        }
      }
      else
      {
        // 'panic' message
        dp->tx_reset = 1;
        dp->rx_reset = 1;

        smp_wmb();
        wake_up_all(&dp->rx_event);
        wake_up_all(&dp->tx_event);
      }

      smp_wmb();
      wake_up_interruptible(&dp->spm_event);
    }
    else
    {
      dp->rx_full[1] = 1;
      smp_wmb();
      wake_up_all(&dp->rx_event);
    }

    dp->stats.evt_cnt[1]++;
  }

  spin_unlock_irqrestore(&dp->task_lock, flags);
}

/******************************************************************************
 *
 * cs3_open
 *
 *****************************************************************************/
static int cs3_open(struct cs_session_t *session)
{
  return 0;
}

/******************************************************************************
 *
 * cs3_close
 *
 *****************************************************************************/
static int cs3_close(struct cs_session_t *session)
{
  struct cs3_device_t *dp = (struct cs3_device_t*)session->dp;

  if (session->has_sema)
  {
    session->has_sema = 0;
    up(&dp->sema);
    log_info("orphaned semaphore unlocked [%d]\n", dp->sema.count);
  }

  return 0;
}

/******************************************************************************
 *
 * cs3_reset
 *
 ******************************************************************************/
static int cs3_reset(struct cs3_device_t *dp)
{
  dp->rx_reset = 1;
  dp->tx_reset = 1;

  wake_up_all(&dp->rx_event);
  wake_up_all(&dp->tx_event);

  dp->spm_state = 0;

  writel(0,                        dp->reg_base + REG_RESET);
  writel(0,                        dp->reg_base + REG_HOST_2_CS_MBX0);
  writel(0,                        dp->reg_base + REG_HOST_2_CS_MBX1);
  writel(0,                        dp->reg_base + REG_TX_LEN0);
  writel(0,                        dp->reg_base + REG_TX_LEN1);
  writel(dp->rx_buf0.phys_addr_lo, dp->reg_base + REG_DMA0_ADDR);
  writel(dp->rx_buf1.phys_addr_lo, dp->reg_base + REG_DMA1_ADDR);

  udelay(10);

  readl(dp->reg_base + REG_VERSION);     // flush PCI

  cs3_irq_enable(dp);

  dp->state = STATE_RESET;

  dp->tx_ack[0] = 1;
  dp->tx_ack[1] = 1;
  dp->rx_full[0] = 0;
  dp->rx_full[1] = 0;

  memset(&dp->stats, 0, sizeof(dp->stats));
  return 0;
}

/******************************************************************************
 *
 * cs3_halt
 *
 *****************************************************************************/
static int cs3_halt(struct cs3_device_t *dp, unsigned int mode)
{
  int err;

  if (dp->csdev.model < 4) return -EINVAL;

  dp->rx_reset = 1;
  dp->tx_reset = 1;

  wake_up_all(&dp->rx_event);
  wake_up_all(&dp->tx_event);

  dp->spm_state = 0;

  // send SPM
  writel(0x53440000 + mode, dp->reg_base + REG_TX_LEN1);
  writel(~BIT_IRQ_TX_BF1_FULL, dp->reg_base + REG_CS2_IRQ);

  switch (mode)
  {
    case SHUTDOWN_MODE_SUSPEND:
      break;

    case SHUTDOWN_MODE_HALT:
    default:
      return 0;
  }

  // wait on response SPM
  if ((err = cs3_wait_for_spm(dp, 500)) != 0)
  {
    log_error("cs3_wait_for_spm returned: %d\n", err);
    return err;
  }

  if (dp->spm_state != 0x5344)
  {
    log_error("CryptoServer didn't respond to shutdown signal, spm:%08x\n", dp->spm_state);
    return -ENOMSG;
  }

  return 0;
}

/******************************************************************************
 *
 * cs3_shutdown
 *
 *****************************************************************************/
static int cs3_shutdown(struct cs3_device_t *dp, int state)
{
  switch (state)
  {
    case SYSTEM_HALT:
    case SYSTEM_POWER_OFF:
      return cs3_halt(dp, ShutdownMode & 3);
      break;

    case SYSTEM_RESTART:
      break;
  }

  return 0;
}

/******************************************************************************
 *
 * cs3_suspend
 *
 *****************************************************************************/
static int cs3_suspend(struct cs3_device_t *dp)
{
  return cs3_halt(dp, ShutdownMode & 3);
}

/******************************************************************************
 *
 * cs3_resume
 *
 *****************************************************************************/
static int cs3_resume(struct cs3_device_t *dp)
{
  return cs3_reset(dp);
}

/******************************************************************************
 *
 * cs3_info
 *
 *****************************************************************************/
static int cs3_info(struct cs3_device_t *dp, char *info, int max)
{
  struct cs_device_t *csdev = &dp->csdev;
  char *p = info;
  int len = 0;

  unsigned int  version;
  unsigned int  *cnt;

  union host_irq_status_t host_irq;
  union cs2_irq_status_t  cs2_irq;

  if (dp == NULL) return -ENODEV;

  len += scnprintf(p+len, max-len, "drv vers.      %s\n", DriverVersionString);

  version = readl(dp->reg_base + REG_VERSION);
  len += scnprintf(p+len, max-len, "fpga vers.     %d.%d.%d.%d\n", version >> 24, (version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff);

  len += scnprintf(p+len, max-len, "slot           %-16.16s\n", csdev->slot);
  len += scnprintf(p+len, max-len, "model          %s\n", ModelTxt[csdev->model&7]);
  len += scnprintf(p+len, max-len, "use count      %d\n", csdev->use_cnt);

  len += scnprintf(p+len, max-len, "lock count     %d\n", dp->sema.count);
  len += scnprintf(p+len, max-len, "timeout        %u\n", jiffies_to_msecs(dp->timeout));

  len += scnprintf(p+len, max-len, "crc_flag       %d\n", dp->crc_flag);
  len += scnprintf(p+len, max-len, "irq mode       %s\n", dp->irq_mode ? "MSI" : "legacy");

  len += scnprintf(p+len, max-len, "spont. mesg.   %04x %s\n", dp->spm_state, cs3_spm_text(dp->spm_state));
  len += scnprintf(p+len, max-len, "state          %s\n", StateText[dp->state&15]);

  len += scnprintf(p+len, max-len, "tx_err         %d\n", dp->tx_err);
  len += scnprintf(p+len, max-len, "rx_err         %d\n", dp->rx_err);
  len += scnprintf(p+len, max-len, "rx_crc_err     %u\n", dp->rx_crc_err);

  len += scnprintf(p+len, max-len, "tx_count       %u\n", dp->tx_count);
  len += scnprintf(p+len, max-len, "rx_count       %u\n", dp->rx_count);

  len += scnprintf(p+len, max-len, "counter        %10s %10s %10s %10s\n", "tx_ack_0", "tx_ack_1", "rx_full_0", "rx_full_1");
  len += scnprintf(p+len, max-len, " interrupts    %10u %10u %10u %10u\n", dp->stats.irq_cnt[2], dp->stats.irq_cnt[3], dp->stats.irq_cnt[0], dp->stats.irq_cnt[1]);
  len += scnprintf(p+len, max-len, " events        %10u %10u %10u %10u\n", dp->stats.evt_cnt[2], dp->stats.evt_cnt[3], dp->stats.evt_cnt[0], dp->stats.evt_cnt[1]);

  len += scnprintf(p+len, max-len, "tx_len         %-4d %-4d\n", readl(dp->reg_base + REG_TX_LEN0), readl(dp->reg_base + REG_TX_LEN1) & 0xFFFF);
  len += scnprintf(p+len, max-len, "rx_len         %-4d %-4d\n", readl(dp->reg_base + REG_RX_LEN0), readl(dp->reg_base + REG_RX_LEN1) & 0xFFFF);

  host_irq.val = ~readl(dp->reg_base + REG_HOST_IRQ) & 0xf;
  len += scnprintf(p+len, max-len, "host irqs      0x%x [mask:0x%x]\n", host_irq.val, readl(dp->reg_base + REG_HOST_IRQM) & 0xf);
  len += scnprintf(p+len, max-len, " tx_ack        %d:%d [%d:%d]\n", host_irq.tx_bf0_ack,  host_irq.tx_bf1_ack,  dp->tx_ack[0],  dp->tx_ack[1] );
  len += scnprintf(p+len, max-len, " rx_full       %d:%d [%d:%d]\n", host_irq.rx_bf0_full, host_irq.rx_bf1_full, dp->rx_full[0], dp->rx_full[1]);

  cs2_irq.val  = ~readl(dp->reg_base + REG_CS2_IRQ ) & 0xf;
  len += scnprintf(p+len, max-len, "cs2 irqs       0x%x [mask:0x%x]\n", cs2_irq.val, readl(dp->reg_base + REG_CS2_IRQM) & 0xf);
  len += scnprintf(p+len, max-len, " tx_full       %d:%d\n", cs2_irq.tx_bf0_full, cs2_irq.tx_bf1_full);
  len += scnprintf(p+len, max-len, " rx_ack        %d:%d\n", cs2_irq.rx_bf0_ack,  cs2_irq.rx_bf1_ack);

  if (LogLevel >= LOG_LEVEL_TRACE)
  {
    cnt = dp->stats.irq_pattern_cnt;
    len += scnprintf(p+len, max-len, "irq_pat_cnt[0] %8d\n", cnt[0]);
    len += scnprintf(p+len, max-len, "           [1] %8d %8d %8d %8d\n", cnt[1], cnt[2], cnt[4], cnt[8]);
    len += scnprintf(p+len, max-len, "           [2] %8d %8d %8d %8d %8d %8d\n", cnt[3], cnt[5], cnt[6], cnt[9],cnt[10], cnt[12]);
    len += scnprintf(p+len, max-len, "           [3] %8d %8d %8d\n", cnt[7], cnt[13], cnt[14]);
    len += scnprintf(p+len, max-len, "           [4] %8d\n", cnt[15]);
  }

  if (csdev->model >= 5)
  {
    len += scnprintf(p+len, max-len, "mbx pc2dsp     %08x %08x\n", readl(dp->reg_base + REG_HOST_2_CS_MBX0),
                                                                  readl(dp->reg_base + REG_HOST_2_CS_MBX1));

    len += scnprintf(p+len, max-len, "mbx dsp2pc     %08x %08x\n", readl(dp->reg_base + REG_CS_2_HOST_MBX0),
                                                                  readl(dp->reg_base + REG_CS_2_HOST_MBX1));
    if (LogLevel >= LOG_LEVEL_TRACE)
    {
      len += scnprintf(p+len, max-len, "sysstat        %08x %08x\n", readl(dp->reg_base + REG_SYSSTAT),
                                                                    readl(dp->reg_base + REG_SYSSTAT_CHG));
    }
  }

  // p[len++] = 0;

  return len;
}

/******************************************************************************
 *
 * cs3_ioctl
 *
 *****************************************************************************/
static int cs3_ioctl(struct cs_session_t *session, unsigned int cmd, unsigned long arg)
{
  int err = 0;
  struct cs3_device_t *dp = (struct cs3_device_t*)session->dp;

  switch (cmd)
  {
    //--------------------------------------------------------------------------------
    case CS2IOC_SHUTDOWN:
    //--------------------------------------------------------------------------------
      return cs3_halt(dp, SHUTDOWN_MODE_HALT);

    //-------------------------------------------------------------------------
    case CS2IOC_HRESET:
    //-------------------------------------------------------------------------
      log_trace("hardware reset\n");
      cs3_halt(dp, SHUTDOWN_MODE_SUSPEND);
      cs3_reset(dp);
      return 0;

    //--------------------------------------------------------------------------------
    case CS2IOC_GETHWTYPE:
    //--------------------------------------------------------------------------------
      if (put_user(dp->csdev.model, (int*)arg) != 0) return -EFAULT;
      return 0;

    //--------------------------------------------------------------------------------
    case CS2IOC_WAIT_RDY:
    //--------------------------------------------------------------------------------
    {
      struct cs2_waitrdy *p_wait = (struct cs2_waitrdy *)arg;
      long timeout;

      if (get_user(timeout, &p_wait->timeout)) return -EFAULT;

      if (timeout < 0) timeout = MAX_TIMEOUT_RESET;

      timeout = msecs_to_jiffies(timeout);

      if ((err = cs3_wait_for_spm(dp, timeout)) != 0)
      {
        log_error("cs3_wait_for_spm returned: %d\n", err);
        return err;
      }

      if (dp->rx_reset) return cs3_spm_error(__func__, dp);

      if (put_user(dp->spm_state, &p_wait->state) != 0) return -EFAULT;

      dp->spm_state = 0;
      return 0;
    }

    //--------------------------------------------------------------------------------
    case CS2IOC_GETREQ:
    //--------------------------------------------------------------------------------
    {
      struct cs2_getreq *p_req = (struct cs2_getreq *)arg;
      long timeout = 0;

      if (get_user(timeout, &p_req->timeout)) return -EFAULT;

      /*
      if (  dp->state != STATE_WRITE_DONE
         && dp->state != STATE_READ_DONE
         )
        log_warning("unexpected state: %s\n", StateText[dp->state&15]);
      */

      dp->state = STATE_GETREQ;

      while (err == 0)
      {
        if (dp->rx_full[0] != 0)
        {
          // no wait
          unsigned char *p_hdr = dp->rx_buf0.base_ptr;
          unsigned int  len;

          if ((len = readl(dp->reg_base + REG_RX_LEN0)) < PCIE_HDR_SIZE)
          {
            log_error("RX block dropped: invalid header size: %d\n", len);
            dp->rx_full[0] = 0;
            continue;
          }

          if (  p_hdr[1] != dp->ch_id
             || p_hdr[2] != 0
             )
          {
            log_error("RX block dropped: invalid channel id: %04x, expected: %04x\n", ((p_hdr[1] << 8) + p_hdr[2]), (dp->ch_id << 8));
            dp->rx_full[0] = 0;
            continue;
          }

          dp->req_len = (p_hdr[5] << 16) + (p_hdr[6] << 8) +  p_hdr[7];
          break;
        }

        if (timeout == 0)
        {
          err = -EAGAIN;
          break;
        }

        if (  timeout < 0
           || timeout > MAX_TIMEOUT_REQUEST
           )
          timeout = DEF_TIMEOUT_REQUEST;

        if ((err = cs3_wait_for_rx_full_interruptible(dp, 0, msecs_to_jiffies(timeout))) != 0)
        {
          log_error("cs3_wait_for_rx_full_interruptible returned: %d\n", err);
          break;
        }
      }

      dp->rx_err = err;

      log_back("%2d | req_len: %d", dp->csdev.minor, dp->req_len);

      if (err != 0)
      {
        dp->state = STATE_GETREQ_FAIL;
        log_back_print();
        return err;
      }

      dp->state = STATE_GETREQ_DONE;

      // copy data to user buffer
      if (  put_user(dp->req_len, &p_req->req_size) != 0
         || put_user(dp->ch_id, &p_req->req_id) != 0
         )
        return -EFAULT;

      return 0;
    }

    //--------------------------------------------------------------------------------
    case CS2IOC_CANCEL:
    //--------------------------------------------------------------------------------
      return 0;

     //--------------------------------------------------------------------------------
    case CS2IOC_GETINFO:
    //--------------------------------------------------------------------------------
    {
      struct cs2_info *p_info = (struct cs2_info *)arg;
      char            *buf = NULL;
      int             len;

      if ((buf = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) return -ENOMEM;

      if ((len = cs3_info(dp, buf, PAGE_SIZE)) < 0)
      {
        err = len;
      }
      else
      {
        if (  copy_to_user(p_info->buf, buf, len) != 0
           || put_user(len, &p_info->count) != 0
           )
          err = -EFAULT;
      }

      kfree(buf);
      break;
    }

    //--------------------------------------------------------------------------------
    case CS2IOC_LOCK_WAIT:
    //--------------------------------------------------------------------------------
    {
      if ((err = down_interruptible(&dp->sema)) != 0) return err;
      session->has_sema = 1;
      return 0;
    }

    //--------------------------------------------------------------------------------
    case CS2IOC_UNLOCK:
    //--------------------------------------------------------------------------------
    {
      session->has_sema = 0;
      up(&dp->sema);
      return 0;
    }

    //--------------------------------------------------------------------------------
    case CS2IOC_SETTMOUT:
    //--------------------------------------------------------------------------------
    {
      long timeout = (long)arg;

      if (  timeout < 0
         || timeout > MAX_TIMEOUT_TX_RX
         )
        return -EINVAL;

      if (timeout == 0) timeout = DEF_TIMEOUT_TX_RX;

      dp->timeout = msecs_to_jiffies(timeout);
      return 0;
    }

    //--------------------------------------------------------------------------------
    case CS2IOC_GETTMOUT:
    //--------------------------------------------------------------------------------
    {
      long *p_timeout = (long *)arg;

      if (put_user(jiffies_to_msecs(dp->timeout), p_timeout)) return -EFAULT;
      return 0;
    }

    //-------------------------------------------------------------------------
    case CS2IOC_GET_VERSION:
    //-------------------------------------------------------------------------
    {
      long *p_version = (long *)arg;
      int version = (VERSION_MAJOR << 24) | (VERSION_MINOR << 16) | VERSION_PATCH;

      if (put_user(version, p_version)) return -EFAULT;
      return 0;
    }

    default:
    {
      if (_IOC_TYPE(cmd) != CS2IOC_MAGIC) return -ENOTTY;
      log_error("cs3_ioctl: unknown control code: 0x%08x", cmd);
      return -EINVAL;
    }
  }

  return err;
}

/******************************************************************************
 *
 * cs3_read
 *
 *****************************************************************************/
static int cs3_read(struct cs_session_t *session, char __user *buf, size_t max_len)
{
  struct cs3_device_t *dp = (struct cs3_device_t*)session->dp;
  int           err;
  unsigned int  ct;
  unsigned int  crc = 0;
  unsigned int  rest;     // length of remaining request
  unsigned char *src;     // points to source buffer [0/1]
  unsigned char *rx_buf;  // temporary buffer that holds block
  unsigned char *rx_data; // points to data portion
  unsigned int  buf_idx;
  unsigned int  blen;     // length of block
  unsigned int  len;      // data length
  unsigned int  ack;
  unsigned int  in_len = 0;

  if (dp->rx_reset) return cs3_spm_error(__func__, dp);

  if (  max_len == 0
     || (rest = dp->req_len) > max_len
     )
    return -ECS2BUFSIZE;

  if (dp->state != STATE_GETREQ_DONE)
    log_warning("unexpected state: %s\n", StateText[dp->state&15]);

  dp->state = STATE_READ;

  rx_buf  = dp->buf;
  rx_data = rx_buf + PCIE_HDR_SIZE;

  for (ct=0; ; ct++)
  {
    buf_idx = ct & 1;

    if ((err = cs3_wait_for_rx_full(dp, buf_idx, dp->timeout)) != 0)
    {
      log_error("wait_for_rx_full%d (ct:%d) returned: %d\n", buf_idx, ct, err);
      goto cleanup;
    }

    if (buf_idx)
    {
      dp->rx_full[1] = 0;

      ack = ~BIT_IRQ_RX_BF1_ACK;

      blen = readl(dp->reg_base + REG_RX_LEN1);
      src = dp->rx_buf1.base_ptr;
    }
    else
    {
      dp->rx_full[0] = 0;

      ack = ~BIT_IRQ_RX_BF0_ACK;

      blen = readl(dp->reg_base + REG_RX_LEN0);
      src = dp->rx_buf0.base_ptr;
    }

    // check block size
    if (  blen < PCIE_HDR_SIZE
       || blen > PCIE_BLK_SIZE
       || (blen & 3) != 0
       )
    {
      log_error("invalid block length: %x\n", blen);
      CLEANUP(-ECS2BADPLEN);
    }

    // copy kernel buffer to temporary buffer
    memcpy(rx_buf, src, blen);

    // send ACK
    writel(ack, dp->reg_base + REG_CS2_IRQ);

    blen -= PCIE_HDR_SIZE;

    // check sequence counter
    if (  rx_buf[1] != dp->ch_id
       || rx_buf[2] != ct
       )
    {
      log_error("ch_id mismatch: %04x, expected: %04x\n", ((rx_buf[1] << 8) + rx_buf[2]), ((dp->ch_id << 8) + ct));
      CLEANUP(-ECS2BADSEQ);
    }

    // calculate length of payload
    len = (blen > rest) ? rest : blen;

    // calculate crc
    if (dp->crc_flag)
      crc = crc16_calc(crc, rx_data, len);

    if (copy_to_user(buf, rx_data, len) != 0) CLEANUP(-EFAULT);

    in_len += len;
    buf += len;

    if (rest <= blen)  break;

    rest -= blen;
  }

  if (dp->crc_flag != 0)
  {
    if ((rx_buf[3] << 8) + rx_buf[4] != crc)
    {
      log_error("CRC mismatch: %04x, recalc: %04x\n", ((rx_buf[3] << 8) + rx_buf[4]), crc);
      dp->rx_crc_err++;
      CLEANUP(-ECS2RXCRC);
    }
  }

  dp->rx_count++;

cleanup:
  dp->rx_err = err;

  log_back("%2d | in_len:%d [%d] rest:%d blen:%d", dp->csdev.minor, in_len, dp->req_len, rest, blen);

  if (err != 0)
  {
    dp->state = STATE_READ_FAIL;
    log_back_print();
    return err;
  }

  dp->state = STATE_READ_DONE;

  return in_len;
}

/******************************************************************************
 *
 * cs3_write
 *
 *****************************************************************************/
static int cs3_write(struct cs_session_t *session, const char __user *buf, size_t count)
{
  struct cs3_device_t *dp = (struct cs3_device_t*)session->dp;
  int           err;
  unsigned int  rest;
  unsigned int  ct = 0;
  unsigned int  crc = 0;
  unsigned char *tx_buf  = dp->buf;
  unsigned char *tx_data = tx_buf + PCIE_HDR_SIZE;
  unsigned int  buf_idx;
  unsigned int  out_len = 0;
  unsigned int  len;
  unsigned int  blen;
  unsigned int  rlen;

  if (dp->tx_reset) return cs3_spm_error(__func__, dp);

  if (  dp->state != STATE_IDLE
     && dp->state < STATE_READ_DONE
     )
  {
    log_warning("unexpected state: %s\n", StateText[dp->state&15]);
    if ((dp->state & 3) == 0) return -EBUSY;
  }

  dp->state = STATE_WRITE;

  // first command sent after cmds ready will hit this condition
  if (dp->rcvd_cmds_rdy != 0) dp->rcvd_cmds_rdy = 0;

  dp->req_len = 0;
  dp->ch_id++;

  dp->tx_ack[0] = 1;
  dp->tx_ack[1] = 1;

  for (rest = count; rest > 0; )
  {
    len = (rest > PCIE_DATA_SIZE) ? PCIE_DATA_SIZE : rest;
    blen = PCIE_HDR_SIZE + ((len + 3) & ~3);

    buf_idx = ct & 1;

    // build block
    if (copy_from_user(tx_data, buf, len) != 0) CLEANUP(-EFAULT);

    if (dp->crc_flag)
    {
      crc = crc16_calc(crc, tx_data, len);
      tx_buf[0] = 1;
    }
    else
    {
      tx_buf[0] = 0;
    }

    // channel ID / sequence counter
    tx_buf[1] = dp->ch_id;
    tx_buf[2] = ct;

    // CRC: last block contains resulting crc, previous blocks an intermediate crc
    tx_buf[3] = crc >> 8;
    tx_buf[4] = crc;

    // length: first block contains full length, subsequent blocks the rest length
    rlen = rest;

    tx_buf[5] = rlen >> 16;
    tx_buf[6] = rlen >> 8;
    tx_buf[7] = rlen;

    /* if ((err = cs3_wait_for_tx_ack(dp, buf_idx, dp->timeout)) != 0)
    {
      log_error("cs3_wait_for_tx_ack%d (ct:%d) returned: %d\n", buf_idx, ct, err);
      goto cleanup;
    } */

    if (dp->tx_ack[buf_idx] == 0)
    {
      log_warning("tx_buf%d wasn't acknowledged (ct:%d)\n", buf_idx, ct);
    }

    if (buf_idx)
    {
      __iowrite32_copy((void *)dp->tx_buf1, tx_buf, blen>>2);
    }
    else
    {
      __iowrite32_copy((void *)dp->tx_buf0, tx_buf, blen>>2);
    }

    if (ct > 0)
    {
      if ((err = cs3_wait_for_tx_ack(dp, buf_idx^1, dp->timeout)) != 0)
      {
        log_error("cs3_wait_for_tx_ack%d (ct:%d) returned: %d\n", buf_idx^1, ct, err);
        goto cleanup;
      }
    }

    dp->tx_ack[buf_idx] = 0;

    // send block
    if (buf_idx)
    {
      writel(blen, dp->reg_base + REG_TX_LEN1);
      writel(~BIT_IRQ_TX_BF1_FULL, dp->reg_base + REG_CS2_IRQ);
    }
    else
    {
      writel(blen, dp->reg_base + REG_TX_LEN0);
      writel(~BIT_IRQ_TX_BF0_FULL, dp->reg_base + REG_CS2_IRQ);
    }

    readl(dp->reg_base + REG_VERSION);  // flush PCI

    buf += len;
    out_len += len;

    rest -= len;

    ct++;
  }

  dp->tx_count++;

cleanup:
  dp->tx_err = err;

  log_back("%2d | out_len:%d [%d] rest:%d blen:%d", dp->csdev.minor, out_len, count, rest, blen);

  if (err != 0)
  {
    dp->state = STATE_WRITE_FAIL;
    log_back_print();
    return err;
  }

  dp->state = STATE_WRITE_DONE;

  return out_len;
}

/******************************************************************************
 *
 * cs3_proc_show
 *
 ******************************************************************************/
static int cs3_proc_show(struct seq_file *m, void *v)
{
  int                 err = 0;
  char                *buf = NULL;
  int                 len;
  struct cs3_device_t *dp = (struct cs3_device_t*)m->private;

  if (dp == NULL) return -ENODEV;

  if ((buf = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) return -ENOMEM;

  if ((len = cs3_info(dp, buf, PAGE_SIZE)) < 0) CLEANUP(len);

  seq_printf(m, buf);

cleanup:
  kfree(buf);
  return err;
}

/******************************************************************************
 *
 * cs3_proc_write
 *
 ******************************************************************************/
static ssize_t cs3_proc_write(struct file *file, const char __user *user, size_t count, loff_t *off)
{
  struct cs3_device_t *dp = compat_file_to_PDE_DATA(file);
  int   err = 0;
  char  *buf = NULL;
  int   i;

  if ((buf = kmalloc(count+1, GFP_KERNEL)) == NULL) return -ENOMEM;

  if (copy_from_user(buf, user, count) != 0) CLEANUP(-EFAULT);
  buf[count] = 0;

  if (strncasecmp(buf, "RESET", 5) == 0)
  {
    cs3_reset(dp);
  }
  else if (strncasecmp(buf, "SHUTDOWN", 8) == 0)
  {
    unsigned int mode = SHUTDOWN_MODE_HALT;

    cs_get_args(buf+8, "u", &mode);

    cs3_halt(dp, mode&3);
  }
  else if (strncasecmp(buf, "TIMEOUT", 7) == 0)
  {
    unsigned int timeout = DEF_TIMEOUT_TX_RX;

    cs_get_args(buf+7, "u", &timeout);

    dp->timeout = msecs_to_jiffies(timeout);

    printk("timeout set to: %d ms\n", timeout);
  }
  else if (strncasecmp(buf, "LOGLEVEL", 8) == 0)
  {
    unsigned int new = LogLevel;

    if (cs_get_args(buf+8, "u", &new) > 0)
    {
      printk("LogLevel changed from % d to %d\n", LogLevel, new);
      LogLevel = new;
    }
    else
    {
       printk("LogLevel: %d\n", LogLevel);
    }
  }
  else if (strncasecmp(buf, "REG", 3) == 0)
  {
    char line[80];
    char *p_x = NULL;
    int  ofs;

    for (i=0, ofs=0; i<32; i++, ofs += 4)
    {
      u32 regval = readl(dp->reg_base + ofs);

      if ((i & 3) == 0) p_x = line + sprintf(line, "%4x |", ofs);

      p_x += sprintf(p_x, " %08X\n", regval) - 1;

      if ((i & 3) == 3)
      {
        printk(line);
        p_x = NULL;
      }
    }
    msleep(100);
  }
  else
  {
    log_error("invalid argument: %s\n", buf);
    CLEANUP(-EINVAL);
  }

  if (err < 0)
  {
    log_error("'%s' returned: %d\n", buf, err);
    goto cleanup;
  }

  (void)dp;

cleanup:
  if (buf != NULL) kfree(buf);

  return (err != 0) ? err : count;
}

/******************************************************************************
 *
 * cs3_proc_open
 *
 ******************************************************************************/
static int cs3_proc_open(struct inode *inode, struct file *file)
{
  return single_open(file, cs3_proc_show, compat_PDE_DATA(inode));
}

//-----------------------------------------------------------------------------
static const proc_op_t cs3_proc_ops =
//-----------------------------------------------------------------------------
{
#ifdef HAVE_PROC_OPS
  .proc_open    = cs3_proc_open,
  .proc_read    = seq_read,
  .proc_write   = cs3_proc_write,
  .proc_lseek   = seq_lseek,
  .proc_release = single_release,
#else
  .owner   = THIS_MODULE,
  .open    = cs3_proc_open,
  .read    = seq_read,
  .write   = cs3_proc_write,
  .llseek  = seq_lseek,
  .release = single_release,
#endif
};


/******************************************************************************
 *
 * cs3_remove
 *
 *****************************************************************************/
static int cs3_remove(struct cs3_device_t *dp)
{
  struct pci_dev *pci_dev;

  if (dp == NULL) return -EINVAL;

  pci_dev = dp->csdev.pci_dev;

  if (dp->reg_base != NULL)
  {
    cs3_irq_disable(dp);
    writel(0, dp->reg_base + REG_DMA0_ADDR);
    writel(0, dp->reg_base + REG_DMA1_ADDR);
  }

  if (pci_dev != NULL)
  {
    cs3_irq_free(dp);
    pci_disable_device(pci_dev);
  }

  cs3_dma_free(dp, &dp->rx_buf0);
  cs3_dma_free(dp, &dp->rx_buf1);

  if (dp->task != NULL) kfree(dp->task);

  if (dp->buf != NULL) kfree(dp->buf);

  cs3_io_unmap(dp);

  if (pci_dev != NULL) pci_release_regions(pci_dev);

  memset(dp, 0, sizeof(struct cs3_device_t));
  kfree(dp);

  return 0;
}

/******************************************************************************
 *
 * cs3_probe
 *
 *****************************************************************************/
int cs3_probe(struct pci_dev *pci_dev, struct cs3_device_t **pp_dp)
{
  int err;
  struct cs3_device_t *dp;
  struct cs_device_t *csdev;
  unsigned int version;
  unsigned int req_ver = 0;

  // create device
  if ((dp = kmalloc(sizeof(struct cs3_device_t), GFP_KERNEL)) == NULL) return -ENOMEM;
  memset(dp, 0, sizeof(struct cs3_device_t));
  csdev = &dp->csdev;
  csdev->minor = -1;

  csdev->pci_dev = pci_dev;
  csdev->proc_ops = &cs3_proc_ops;

  csdev->p_remove  = (int(*)(struct cs_device_t*))cs3_remove;

  csdev->p_shutdown = (int(*)(struct cs_device_t*, int))cs3_shutdown;

  csdev->p_suspend  = (int(*)(struct cs_device_t*))cs3_suspend;
  csdev->p_resume   = (int(*)(struct cs_device_t*))cs3_resume;

  csdev->p_open    = cs3_open;
  csdev->p_close   = cs3_close;
  csdev->p_ioctl   = cs3_ioctl;
  csdev->p_read    = cs3_read;
  csdev->p_write   = cs3_write;

  // map memory mapped register
  if ((err = pci_request_regions(pci_dev, "cs3")) != 0)
  {
    log_error("pci_request_regions returned: %d\n", err);
    CLEANUP(-ENODEV);
  }

  if ((err = cs3_io_map(dp)) != 0)
  {
    log_error("cs3_io_map(1) returned: %d", err);
    goto cleanup;
  }

  // enable device
  if ((err = pci_enable_device(pci_dev)) != 0)
  {
    log_error("pci_enable_device returned: %d\n", err);
    goto cleanup;
  }

  cs3_irq_disable(dp);

  // set master
  pci_set_master(pci_dev);

  // enable/disable error reporting
  if ((err = cs3_set_error_reporting(pci_dev)) != 0)
  {
    log_error("cs3_set_error_reporting(0x%x) returned: %d\n", ErrorReporting, err);
  }

  // set DMA mask
  if ((err = dma_set_coherent_mask(&pci_dev->dev, DMA_BIT_MASK(32))) != 0)
  {
    log_error("dma_set_coherent_mask(32) returned: %d\n", err);
    goto cleanup;
  }

  // allocate DMA memory
  if ((err = cs3_dma_alloc(dp, PCIE_BLK_SIZE, &dp->rx_buf0)) != 0)
  {
    log_error("cs3_dma_alloc(rx_buf0) returned: %d\n", err);
    goto cleanup;
  }

  if ((err = cs3_dma_alloc(dp, PCIE_BLK_SIZE, &dp->rx_buf1)) != 0)
  {
    log_error("cs3_dma_alloc(rx_buf1) returned: %d\n", err);
    goto cleanup;
  }

  version = readl(dp->reg_base + REG_VERSION);

  switch (version >> 24)
  {
    case 4: req_ver = 0x04010009; break;
    case 5: req_ver = 0x05010009; break;
  }

  if (  req_ver != 0
     && version < req_ver
     )
  {
    log_warning("current FPGA version: 0x%08x\n", version);
    log_warning("updated FPGA version: 0x%08x\n", req_ver);
  }

  writel(dp->rx_buf0.phys_addr_lo, dp->reg_base + REG_DMA0_ADDR);
  writel(dp->rx_buf1.phys_addr_lo, dp->reg_base + REG_DMA1_ADDR);

  init_waitqueue_head(&dp->tx_event);
  init_waitqueue_head(&dp->rx_event);
  init_waitqueue_head(&dp->spm_event);

  spin_lock_init(&dp->task_lock);

  // allocate tasklet
  if ((dp->task = kmalloc(sizeof(struct tasklet_struct), GFP_KERNEL)) == NULL)
  {
    log_error("kmalloc [tasklet] failed\n");
    CLEANUP(-ENOMEM);
  }

  tasklet_init(dp->task, cs3_task_proc, (unsigned long)dp);

  // connect interrupt
  if ((err = cs3_irq_request(dp)) != 0)
  {
    log_error("cs3_irq_request returned: %d\n", err);
    goto cleanup;
  }

  // allocate temporary chunk buffer
  if ((dp->buf = kmalloc(PCIE_BLK_SIZE, GFP_KERNEL)) == NULL)
  {
    log_error("unable to alloc memory for buf\n");
    goto cleanup;
  }

  // initialize device
  dp->ch_id = 0;
  dp->timeout = msecs_to_jiffies(DEF_TIMEOUT_TX_RX);

  sema_init(&dp->sema, 1);

  dp->tx_count = 0;
  dp->rx_crc_err = 0;
  dp->rx_count = 0;

  dp->spm_state = 0;
  dp->crc_flag = 1;

  // clear all host interrupts
  cs3_irq_clear(dp);

  dp->tx_ack[0] = 1;
  dp->tx_ack[1] = 1;

  // enable interrupts
  cs3_irq_enable(dp);

  *pp_dp = dp;
  return 0;

cleanup:
  cs3_remove(dp);
  *pp_dp = NULL;
  return err;
}

MODULE_AUTHOR("Sven Kaltschmidt <sven.kaltschmidt@utimaco.com>");
