src/scoop.c - librsync - Git at Google

 /*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*-
  *
  * librsync -- the library for network deltas
  *
  * Copyright (C) 2000, 2001 by Martin Pool <mbp@sourcefrog.net>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public License
  * as published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this program; if not, write to the Free Software
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */

                               /*=
                                | To walk on water you've gotta sink
                                | in the ice.
                                |   -- Shihad, `The General Electric'.
                                */

 /** \file scoop.c
  * This file deals with readahead from caller-supplied buffers.
  *
  * Many functions require a certain minimum amount of input to do their
  * processing. For example, to calculate a strong checksum of a block we need
  * at least a block of input.
  *
  * Since we put the buffers completely under the control of the caller, we
  * can't count on ever getting this much data all in one go. We can't simply
  * wait, because the caller might have a smaller buffer than we require and so
  * we'll never get it. For the same reason we must always accept all the data
  * we're given.
  *
  * So, stream input data that's required for readahead is put into a special
  * buffer, from which the caller can then read. It's essentially like an
  * internal pipe, which on any given read request may or may not be able to
  * actually supply the data.
  *
  * As a future optimization, we might try to take data directly from the input
  * buffer if there's already enough there.
  *
  * \todo We probably know a maximum amount of data that can be scooped up, so
  * we could just avoid dynamic allocation. However that can't be fixed at
  * compile time, because when generating a delta it needs to be large enough to
  * hold one full block. Perhaps we can set it up when the job is allocated? It
  * would be kind of nice to not do any memory allocation after startup, as
  * bzlib does this. */

 #include "config.h"
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>
 #include "librsync.h"
 #include "job.h"
 #include "stream.h"
 #include "trace.h"
 #include "util.h"

 /** Try to accept a from the input buffer to get LEN bytes in the scoop. */
 void rs_scoop_input(rs_job_t *job, size_t len)
 {
     rs_buffers_t *stream = job->stream;
     size_t tocopy;

     assert(len > job->scoop_avail);

     if (job->scoop_alloc < len) {
         /* Need to allocate a larger scoop. */
         rs_byte_t *newbuf;
         size_t newsize;
         for (newsize = 64; newsize < len; newsize <<= 1) ;
         newbuf = rs_alloc(newsize, "scoop buffer");
         if (job->scoop_avail)
             memcpy(newbuf, job->scoop_next, job->scoop_avail);
         if (job->scoop_buf)
             free(job->scoop_buf);
         job->scoop_buf = job->scoop_next = newbuf;
         rs_trace("resized scoop buffer to " FMT_SIZE " bytes from " FMT_SIZE "",
                  newsize, job->scoop_alloc);
         job->scoop_alloc = newsize;
     } else if (job->scoop_buf != job->scoop_next) {
         /* Move existing data to the front of the scoop. */
         rs_trace("moving scoop " FMT_SIZE " bytes to reuse " FMT_SIZE " bytes",
                  job->scoop_avail, (size_t)(job->scoop_next - job->scoop_buf));
         memmove(job->scoop_buf, job->scoop_next, job->scoop_avail);
         job->scoop_next = job->scoop_buf;
     }
     /* take as much input as is available, to give up to LEN bytes in the
        scoop. */
     tocopy = len - job->scoop_avail;
     if (tocopy > stream->avail_in)
         tocopy = stream->avail_in;
     assert(tocopy + job->scoop_avail <= job->scoop_alloc);

     memcpy(job->scoop_next + job->scoop_avail, stream->next_in, tocopy);
     rs_trace("accepted " FMT_SIZE " bytes from input to scoop", tocopy);
     job->scoop_avail += tocopy;
     stream->next_in += tocopy;
     stream->avail_in -= tocopy;
 }

 /** Advance the input cursor forward \p len bytes.
  *
  * This is used after doing readahead, when you decide you want to keep it. \p
  * len must be no more than the amount of available data, so you can't cheat.
  *
  * So when creating a delta, we require one block of readahead. But after
  * examining that block, we might decide to advance over all of it (if there is
  * a match), or just one byte (if not). */
 void rs_scoop_advance(rs_job_t *job, size_t len)
 {
     rs_buffers_t *stream = job->stream;

     /* It never makes sense to advance over a mixture of bytes from the scoop
        and input, because you couldn't possibly have looked at them all at the
        same time. */
     if (job->scoop_avail) {
         /* reading from the scoop buffer */
         rs_trace("advance over " FMT_SIZE " bytes from scoop", len);
         assert(len <= job->scoop_avail);
         job->scoop_avail -= len;
         job->scoop_next += len;
     } else {
         rs_trace("advance over " FMT_SIZE " bytes from input buffer", len);
         assert(len <= stream->avail_in);
         stream->avail_in -= len;
         stream->next_in += len;
     }
 }

 /** Read from scoop without advancing.
  *
  * Ask for LEN bytes of input from the stream. If that much data is available,
  * then return a pointer to it in PTR, advance the stream input pointer over
  * the data, and return RS_DONE. If there's not enough data, then accept
  * whatever is there into a buffer, advance over it, and return RS_BLOCKED.
  *
  * The data is not actually removed from the input, so this function lets you
  * do readahead. If you want to keep any of the data, you should also call
  * rs_scoop_advance() to skip over it. */
 rs_result rs_scoop_readahead(rs_job_t *job, size_t len, void **ptr)
 {
     rs_buffers_t *stream = job->stream;
     rs_job_check(job);

     if (!job->scoop_avail && stream->avail_in >= len) {
         /* The scoop is empty and there's enough data in the input. */
         *ptr = stream->next_in;
         rs_trace("got " FMT_SIZE " bytes direct from input", len);
         return RS_DONE;
     } else if (job->scoop_avail < len && stream->avail_in) {
         /* There is not enough data in the scoop. */
         rs_trace("scoop has less than " FMT_SIZE " bytes, scooping from "
                  FMT_SIZE " input bytes", len, stream->avail_in);
         rs_scoop_input(job, len);
     }
     if (job->scoop_avail >= len) {
         /* There is enough data in the scoop now. */
         rs_trace("scoop has at least " FMT_SIZE " bytes, this is enough",
                  job->scoop_avail);
         *ptr = job->scoop_next;
         return RS_DONE;
     } else if (stream->eof_in) {
         /* Not enough input data and at EOF. */
         rs_trace("reached end of input stream");
         return RS_INPUT_ENDED;
     } else {
         /* Not enough input data yet. */
         rs_trace("blocked with insufficient input data");
         return RS_BLOCKED;
     }
 }

 /** Read LEN bytes if possible, and remove them from the input scoop.
  *
  * \param *job An rs_job_t pointer to the job instance.
  *
  * \param len The length of the data in the ptr buffer.
  *
  * \param **ptr will be updated to point to a read-only buffer holding the
  * data, if enough is available.
  *
  * \return RS_DONE if there was enough data, RS_BLOCKED if there was not enough
  * data yet, or RS_INPUT_ENDED if there was not enough data and at EOF. */
 rs_result rs_scoop_read(rs_job_t *job, size_t len, void **ptr)
 {
     rs_result result;

     result = rs_scoop_readahead(job, len, ptr);
     if (result == RS_DONE)
         rs_scoop_advance(job, len);
     return result;
 }

 /** Read whatever data remains in the input stream.
  *
  * \param *job The rs_job_t instance the job instance.
  *
  * \param *len will be updated to the length of the available data.
  *
  * \param **ptr will point at the available data.
  *
  * \return RS_DONE if there was data, RS_INPUT_ENDED if there was no data and
  * at EOF, RS_BLOCKED if there was no data and not at EOF. */
 rs_result rs_scoop_read_rest(rs_job_t *job, size_t *len, void **ptr)
 {
     rs_buffers_t *stream = job->stream;

     *len = job->scoop_avail + stream->avail_in;
     if (*len)
         return rs_scoop_read(job, *len, ptr);
     else if (stream->eof_in)
         return RS_INPUT_ENDED;
     else
         return RS_BLOCKED;
 }

 /** Return the total number of bytes available including the scoop and input
  * buffer. */
 size_t rs_scoop_total_avail(rs_job_t *job)
 {
     return job->scoop_avail + job->stream->avail_in;
 }
	/= -- c-basic-offset: 4; indent-tabs-mode: nil; -*-
	*
	* librsync -- the library for network deltas
	*
	* Copyright (C) 2000, 2001 by Martin Pool <mbp@sourcefrog.net>
	*
	* This program is free software; you can redistribute it and/or
	* modify it under the terms of the GNU Lesser General Public License
	* as published by the Free Software Foundation; either version 2.1 of
	* the License, or (at your option) any later version.
	*
	* This program is distributed in the hope that it will be useful, but
	* WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* Lesser General Public License for more details.
	*
	* You should have received a copy of the GNU Lesser General Public
	* License along with this program; if not, write to the Free Software
	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	*/

	/*=
	\| To walk on water you've gotta sink
	\| in the ice.
	\| -- Shihad, `The General Electric'.
	*/

	/** \file scoop.c
	* This file deals with readahead from caller-supplied buffers.
	*
	* Many functions require a certain minimum amount of input to do their
	* processing. For example, to calculate a strong checksum of a block we need
	* at least a block of input.
	*
	* Since we put the buffers completely under the control of the caller, we
	* can't count on ever getting this much data all in one go. We can't simply
	* wait, because the caller might have a smaller buffer than we require and so
	* we'll never get it. For the same reason we must always accept all the data
	* we're given.
	*
	* So, stream input data that's required for readahead is put into a special
	* buffer, from which the caller can then read. It's essentially like an
	* internal pipe, which on any given read request may or may not be able to
	* actually supply the data.
	*
	* As a future optimization, we might try to take data directly from the input
	* buffer if there's already enough there.
	*
	* \todo We probably know a maximum amount of data that can be scooped up, so
	* we could just avoid dynamic allocation. However that can't be fixed at
	* compile time, because when generating a delta it needs to be large enough to
	* hold one full block. Perhaps we can set it up when the job is allocated? It
	* would be kind of nice to not do any memory allocation after startup, as
	* bzlib does this. */

	#include "config.h"
	#include <assert.h>
	#include <stdlib.h>
	#include <string.h>
	#include "librsync.h"
	#include "job.h"
	#include "stream.h"
	#include "trace.h"
	#include "util.h"

	/** Try to accept a from the input buffer to get LEN bytes in the scoop. */
	void rs_scoop_input(rs_job_t *job, size_t len)
	{
	rs_buffers_t *stream = job->stream;
	size_t tocopy;

	assert(len > job->scoop_avail);

	if (job->scoop_alloc < len) {
	/* Need to allocate a larger scoop. */
	rs_byte_t *newbuf;
	size_t newsize;
	for (newsize = 64; newsize < len; newsize <<= 1) ;
	newbuf = rs_alloc(newsize, "scoop buffer");
	if (job->scoop_avail)
	memcpy(newbuf, job->scoop_next, job->scoop_avail);
	if (job->scoop_buf)
	free(job->scoop_buf);
	job->scoop_buf = job->scoop_next = newbuf;
	rs_trace("resized scoop buffer to " FMT_SIZE " bytes from " FMT_SIZE "",
	newsize, job->scoop_alloc);
	job->scoop_alloc = newsize;
	} else if (job->scoop_buf != job->scoop_next) {
	/* Move existing data to the front of the scoop. */
	rs_trace("moving scoop " FMT_SIZE " bytes to reuse " FMT_SIZE " bytes",
	job->scoop_avail, (size_t)(job->scoop_next - job->scoop_buf));
	memmove(job->scoop_buf, job->scoop_next, job->scoop_avail);
	job->scoop_next = job->scoop_buf;
	}
	/* take as much input as is available, to give up to LEN bytes in the
	scoop. */
	tocopy = len - job->scoop_avail;
	if (tocopy > stream->avail_in)
	tocopy = stream->avail_in;
	assert(tocopy + job->scoop_avail <= job->scoop_alloc);

	memcpy(job->scoop_next + job->scoop_avail, stream->next_in, tocopy);
	rs_trace("accepted " FMT_SIZE " bytes from input to scoop", tocopy);
	job->scoop_avail += tocopy;
	stream->next_in += tocopy;
	stream->avail_in -= tocopy;
	}

	/** Advance the input cursor forward \p len bytes.
	*
	* This is used after doing readahead, when you decide you want to keep it. \p
	* len must be no more than the amount of available data, so you can't cheat.
	*
	* So when creating a delta, we require one block of readahead. But after
	* examining that block, we might decide to advance over all of it (if there is
	* a match), or just one byte (if not). */
	void rs_scoop_advance(rs_job_t *job, size_t len)
	{
	rs_buffers_t *stream = job->stream;

	/* It never makes sense to advance over a mixture of bytes from the scoop
	and input, because you couldn't possibly have looked at them all at the
	same time. */
	if (job->scoop_avail) {
	/* reading from the scoop buffer */
	rs_trace("advance over " FMT_SIZE " bytes from scoop", len);
	assert(len <= job->scoop_avail);
	job->scoop_avail -= len;
	job->scoop_next += len;
	} else {
	rs_trace("advance over " FMT_SIZE " bytes from input buffer", len);
	assert(len <= stream->avail_in);
	stream->avail_in -= len;
	stream->next_in += len;
	}
	}

	/** Read from scoop without advancing.
	*
	* Ask for LEN bytes of input from the stream. If that much data is available,
	* then return a pointer to it in PTR, advance the stream input pointer over
	* the data, and return RS_DONE. If there's not enough data, then accept
	* whatever is there into a buffer, advance over it, and return RS_BLOCKED.
	*
	* The data is not actually removed from the input, so this function lets you
	* do readahead. If you want to keep any of the data, you should also call
	* rs_scoop_advance() to skip over it. */
	rs_result rs_scoop_readahead(rs_job_t job, size_t len, void *ptr)
	{
	rs_buffers_t *stream = job->stream;
	rs_job_check(job);

	if (!job->scoop_avail && stream->avail_in >= len) {
	/* The scoop is empty and there's enough data in the input. */
	*ptr = stream->next_in;
	rs_trace("got " FMT_SIZE " bytes direct from input", len);
	return RS_DONE;
	} else if (job->scoop_avail < len && stream->avail_in) {
	/* There is not enough data in the scoop. */
	rs_trace("scoop has less than " FMT_SIZE " bytes, scooping from "
	FMT_SIZE " input bytes", len, stream->avail_in);
	rs_scoop_input(job, len);
	}
	if (job->scoop_avail >= len) {
	/* There is enough data in the scoop now. */
	rs_trace("scoop has at least " FMT_SIZE " bytes, this is enough",
	job->scoop_avail);
	*ptr = job->scoop_next;
	return RS_DONE;
	} else if (stream->eof_in) {
	/* Not enough input data and at EOF. */
	rs_trace("reached end of input stream");
	return RS_INPUT_ENDED;
	} else {
	/* Not enough input data yet. */
	rs_trace("blocked with insufficient input data");
	return RS_BLOCKED;
	}
	}

	/** Read LEN bytes if possible, and remove them from the input scoop.
	*
	* \param *job An rs_job_t pointer to the job instance.
	*
	* \param len The length of the data in the ptr buffer.
	*
	* \param **ptr will be updated to point to a read-only buffer holding the
	* data, if enough is available.
	*
	* \return RS_DONE if there was enough data, RS_BLOCKED if there was not enough
	* data yet, or RS_INPUT_ENDED if there was not enough data and at EOF. */
	rs_result rs_scoop_read(rs_job_t job, size_t len, void *ptr)
	{
	rs_result result;

	result = rs_scoop_readahead(job, len, ptr);
	if (result == RS_DONE)
	rs_scoop_advance(job, len);
	return result;
	}

	/** Read whatever data remains in the input stream.
	*
	* \param *job The rs_job_t instance the job instance.
	*
	* \param *len will be updated to the length of the available data.
	*
	* \param **ptr will point at the available data.
	*
	* \return RS_DONE if there was data, RS_INPUT_ENDED if there was no data and
	* at EOF, RS_BLOCKED if there was no data and not at EOF. */
	rs_result rs_scoop_read_rest(rs_job_t job, size_t len, void **ptr)
	{
	rs_buffers_t *stream = job->stream;

	*len = job->scoop_avail + stream->avail_in;
	if (*len)
	return rs_scoop_read(job, *len, ptr);
	else if (stream->eof_in)
	return RS_INPUT_ENDED;
	else
	return RS_BLOCKED;
	}

	/** Return the total number of bytes available including the scoop and input
	* buffer. */
	size_t rs_scoop_total_avail(rs_job_t *job)
	{
	return job->scoop_avail + job->stream->avail_in;
	}