Files
fmviewer3/project/fm_viewer/fav/GPUMemCopy.cpp
2026-02-21 17:11:31 +09:00

147 lines
4.0 KiB
C++

/******************************************************************************
QtAV: Multimedia framework based on Qt and FFmpeg
Copyright (C) 2012-2017 Wang Bin <wbsecg1@gmail.com>
* This file is part of QtAV
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
******************************************************************************/
#include "GPUMemCopy.h"
#if !(REMOVE_GPU_MEMCOPY)
#include "_fav_constants.h"
#include <string.h> //memcpy
#include <algorithm>
extern "C" {
#include <libavutil/cpu.h>
}
#ifndef Q_PROCESSOR_X86 // qt4
#if defined(__SSE__) || defined(_M_IX86) || defined(_M_X64)
#define Q_PROCESSOR_X86
#endif
#endif
// read qsimd_p.h
#define UINT unsigned int
void CopyFrame_SSE2(void *pSrc, void *pDest, void *pCacheBlock, UINT width, UINT height, UINT pitch);
void CopyFrame_SSE4(void *pSrc, void *pDest, void *pCacheBlock, UINT width, UINT height, UINT pitch);
void *memcpy_sse2(void* dst, const void* src, size_t size);
void *memcpy_sse4(void* dst, const void* src, size_t size);
namespace FAV {
bool detect_sse4() {
static bool is_sse4 = !!(av_get_cpu_flags() & AV_CPU_FLAG_SSE4);
return is_sse4;
}
bool detect_sse2() {
static bool is_sse2 = !!(av_get_cpu_flags() & AV_CPU_FLAG_SSE2);
return is_sse2;
}
bool GPUMemCopy::isAvailable()
{
#if QTAV_HAVE(SSE4_1) && defined(Q_PROCESSOR_X86)
if (detect_sse4())
return true;
#endif
#if QTAV_HAVE(SSE2) && defined(Q_PROCESSOR_X86)
if (detect_sse2())
return true;
#endif
return false;
}
GPUMemCopy::GPUMemCopy()
: mInitialized(false)
{
#if QTAV_HAVE(SSE2) && defined(Q_PROCESSOR_X86)
mCache.buffer = 0;
mCache.size = 0;
#endif
}
GPUMemCopy::~GPUMemCopy()
{
cleanCache();
}
bool GPUMemCopy::isReady() const
{
return mInitialized && GPUMemCopy::isAvailable();
}
#define CACHED_BUFFER_SIZE 4096
bool GPUMemCopy::initCache(unsigned width)
{
mInitialized = false;
#if QTAV_HAVE(SSE2) && defined(Q_PROCESSOR_X86)
mCache.size = std::max<size_t>((width + 0x0f) & ~ 0x0f, CACHED_BUFFER_SIZE);
mCache.buffer = (unsigned char*)qMallocAligned(mCache.size, 16);
mInitialized = !!mCache.buffer;
return mInitialized;
#else
Q_UNUSED(width);
#endif
return false;
}
void GPUMemCopy::cleanCache()
{
mInitialized = false;
#if QTAV_HAVE(SSE2) && defined(Q_PROCESSOR_X86)
if (mCache.buffer) {
qFreeAligned(mCache.buffer);
}
mCache.buffer = 0;
mCache.size = 0;
#endif
}
void GPUMemCopy::copyFrame(void *pSrc, void *pDest, unsigned width, unsigned height, unsigned pitch)
{
#if QTAV_HAVE(SSE4_1) && defined(Q_PROCESSOR_X86)
if (detect_sse4())
CopyFrame_SSE4(pSrc, pDest, mCache.buffer, width, height, pitch);
#elif QTAV_HAVE(SSE2) && defined(Q_PROCESSOR_X86)
if (detect_sse2())
CopyFrame_SSE2(pSrc, pDest, mCache.buffer, width, height, pitch);
#else
Q_UNUSED(pSrc);
Q_UNUSED(pDest);
Q_UNUSED(width);
Q_UNUSED(height);
Q_UNUSED(pitch);
#endif
}
void* gpu_memcpy(void *dst, const void *src, size_t size)
{
#if QTAV_HAVE(SSE4_1) && defined(Q_PROCESSOR_X86)
if (detect_sse4())
return memcpy_sse4(dst, src, size);
#elif QTAV_HAVE(SSE2) && defined(Q_PROCESSOR_X86)
if (detect_sse2())
return memcpy_sse2(dst, src, size);
#endif
return memcpy(dst, src, size);
}
} //namespace FAV
#endif // #if !(REMOVE_GPU_MEMCOPY)