library/specializations/gpu-programming/skills/nvenc-nvdec/SKILL.md
NVIDIA hardware video encoding/decoding integration. Configure NVENC encoding parameters, set up NVDEC decoding pipelines, handle codec configurations, integrate with CUDA for pre/post processing, and manage video memory surfaces.
npx skillsauth add a5c-ai/babysitter nvenc-nvdecInstall this skill globally with one command. Works with Claude Code, Cursor, and Windsurf.
3 of 9 scanners reported clean
Some scanners were skipped, did not run, or reported a non-clean status. Review each row below.
You are nvenc-nvdec - a specialized skill for NVIDIA hardware video encoding and decoding integration. This skill provides expert capabilities for GPU-accelerated video processing.
This skill enables AI-powered video processing including:
Initialize hardware encoder:
#include <nvEncodeAPI.h>
// Create encoder instance
NV_ENCODE_API_FUNCTION_LIST nvenc = {NV_ENCODE_API_FUNCTION_LIST_VER};
NvEncodeAPICreateInstance(&nvenc);
void* encoder = NULL;
NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS sessionParams = {
NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER};
sessionParams.device = cudaDevice;
sessionParams.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
sessionParams.apiVersion = NVENCAPI_VERSION;
nvenc.nvEncOpenEncodeSessionEx(&sessionParams, &encoder);
// Query encoder capabilities
NV_ENC_CAPS_PARAM capsParam = {NV_ENC_CAPS_PARAM_VER};
capsParam.capsToQuery = NV_ENC_CAPS_SUPPORTED_RATECONTROL_MODES;
int capsVal;
nvenc.nvEncGetEncodeCaps(encoder, NV_ENC_CODEC_H264_GUID, &capsParam, &capsVal);
Configure encoder parameters:
// Initialize encoder configuration
NV_ENC_INITIALIZE_PARAMS initParams = {NV_ENC_INITIALIZE_PARAMS_VER};
NV_ENC_CONFIG encodeConfig = {NV_ENC_CONFIG_VER};
initParams.encodeConfig = &encodeConfig;
// Get preset configuration
NV_ENC_PRESET_CONFIG presetConfig = {NV_ENC_PRESET_CONFIG_VER};
presetConfig.presetCfg = {NV_ENC_CONFIG_VER};
nvenc.nvEncGetEncodePresetConfigEx(encoder,
NV_ENC_CODEC_H264_GUID,
NV_ENC_PRESET_P4_GUID, // Balanced preset
NV_ENC_TUNING_INFO_HIGH_QUALITY,
&presetConfig);
memcpy(&encodeConfig, &presetConfig.presetCfg, sizeof(NV_ENC_CONFIG));
// Set basic parameters
initParams.encodeGUID = NV_ENC_CODEC_H264_GUID;
initParams.presetGUID = NV_ENC_PRESET_P4_GUID;
initParams.encodeWidth = 1920;
initParams.encodeHeight = 1080;
initParams.frameRateNum = 60;
initParams.frameRateDen = 1;
initParams.enablePTD = 1; // Enable picture type decision
// Rate control
encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR;
encodeConfig.rcParams.averageBitRate = 8000000; // 8 Mbps
encodeConfig.rcParams.maxBitRate = 12000000; // 12 Mbps max
// B-frames and lookahead
encodeConfig.frameIntervalP = 3; // I/P frame interval
encodeConfig.rcParams.lookaheadDepth = 20;
encodeConfig.rcParams.enableLookahead = 1;
// Initialize encoder
nvenc.nvEncInitializeEncoder(encoder, &initParams);
Setup HEVC encoding:
// HEVC-specific configuration
initParams.encodeGUID = NV_ENC_CODEC_HEVC_GUID;
NV_ENC_CONFIG_HEVC* hevcConfig = &encodeConfig.encodeCodecConfig.hevcConfig;
hevcConfig->chromaFormatIDC = 1; // 4:2:0
hevcConfig->pixelBitDepthMinus8 = 0; // 8-bit
hevcConfig->idrPeriod = encodeConfig.gopLength;
hevcConfig->enableIntraRefresh = 0;
hevcConfig->maxCUSize = NV_ENC_HEVC_CUSIZE_32x32;
hevcConfig->minCUSize = NV_ENC_HEVC_CUSIZE_8x8;
// 10-bit HDR
hevcConfig->pixelBitDepthMinus8 = 2; // 10-bit
hevcConfig->chromaFormatIDC = 1;
// Tier and level
hevcConfig->tier = NV_ENC_TIER_HEVC_MAIN;
hevcConfig->level = NV_ENC_LEVEL_HEVC_51;
Initialize hardware decoder:
#include <nvcuvid.h>
// Create CUDA video parser
CUVIDPARSERPARAMS parserParams = {};
parserParams.CodecType = cudaVideoCodec_H264;
parserParams.ulMaxNumDecodeSurfaces = 4;
parserParams.ulMaxDisplayDelay = 2;
parserParams.pUserData = this;
parserParams.pfnSequenceCallback = HandleVideoSequence;
parserParams.pfnDecodePicture = HandlePictureDecode;
parserParams.pfnDisplayPicture = HandlePictureDisplay;
CUvideoparser parser;
cuvidCreateVideoParser(&parser, &parserParams);
// Sequence callback - create decoder
int HandleVideoSequence(void* userData, CUVIDEOFORMAT* format) {
CUVIDDECODECREATEINFO createInfo = {};
createInfo.CodecType = format->codec;
createInfo.ulWidth = format->coded_width;
createInfo.ulHeight = format->coded_height;
createInfo.ulNumDecodeSurfaces = 8;
createInfo.ChromaFormat = format->chroma_format;
createInfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
createInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Adaptive;
createInfo.ulTargetWidth = format->display_area.right;
createInfo.ulTargetHeight = format->display_area.bottom;
createInfo.ulNumOutputSurfaces = 2;
CUvideodecoder decoder;
cuvidCreateDecoder(&decoder, &createInfo);
return 1; // Return number of decode surfaces
}
Process video frames with CUDA:
// Map decoded frame to CUDA
CUVIDPROCPARAMS procParams = {};
procParams.progressive_frame = 1;
procParams.output_stream = cudaStream;
unsigned int pitch;
CUdeviceptr framePtr;
cuvidMapVideoFrame(decoder, pictureIndex, &framePtr, &pitch, &procParams);
// Process with CUDA kernel
processFrameKernel<<<blocks, threads, 0, cudaStream>>>(
(unsigned char*)framePtr, pitch, width, height);
// Unmap
cuvidUnmapVideoFrame(decoder, framePtr);
// For encoding: register CUDA resource
NV_ENC_REGISTER_RESOURCE registerResource = {NV_ENC_REGISTER_RESOURCE_VER};
registerResource.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR;
registerResource.resourceToRegister = (void*)cudaFrame;
registerResource.width = width;
registerResource.height = height;
registerResource.pitch = pitch;
registerResource.bufferFormat = NV_ENC_BUFFER_FORMAT_NV12;
registerResource.bufferUsage = NV_ENC_INPUT_IMAGE;
nvenc.nvEncRegisterResource(encoder, ®isterResource);
Submit frames for encoding:
// Map input buffer
NV_ENC_MAP_INPUT_RESOURCE mapInput = {NV_ENC_MAP_INPUT_RESOURCE_VER};
mapInput.registeredResource = registeredResource;
nvenc.nvEncMapInputResource(encoder, &mapInput);
// Configure picture parameters
NV_ENC_PIC_PARAMS picParams = {NV_ENC_PIC_PARAMS_VER};
picParams.inputBuffer = mapInput.mappedResource;
picParams.bufferFmt = mapInput.mappedBufferFmt;
picParams.inputWidth = width;
picParams.inputHeight = height;
picParams.outputBitstream = bitstreamBuffer;
picParams.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
picParams.inputTimeStamp = frameNumber;
// Encode
nvenc.nvEncEncodePicture(encoder, &picParams);
// Lock and retrieve bitstream
NV_ENC_LOCK_BITSTREAM lockBitstream = {NV_ENC_LOCK_BITSTREAM_VER};
lockBitstream.outputBitstream = bitstreamBuffer;
nvenc.nvEncLockBitstream(encoder, &lockBitstream);
// Copy encoded data
memcpy(outputBuffer, lockBitstream.bitstreamBufferPtr,
lockBitstream.bitstreamSizeInBytes);
nvenc.nvEncUnlockBitstream(encoder, bitstreamBuffer);
nvenc.nvEncUnmapInputResource(encoder, mapInput.mappedResource);
Handle multiple encode sessions:
// Query max encode sessions
NV_ENC_CAPS_PARAM capsParam = {NV_ENC_CAPS_PARAM_VER};
capsParam.capsToQuery = NV_ENC_CAPS_NUM_MAX_TEMPORAL_LAYERS;
// Create multiple encoders for concurrent encoding
std::vector<void*> encoders(numStreams);
for (int i = 0; i < numStreams; i++) {
NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS sessionParams = {
NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER};
sessionParams.device = cudaDevice;
sessionParams.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
nvenc.nvEncOpenEncodeSessionEx(&sessionParams, &encoders[i]);
}
// Encode streams in parallel using CUDA streams
for (int i = 0; i < numStreams; i++) {
// Each encoder uses its own CUDA stream
cudaStreamCreate(&streams[i]);
// Submit frames asynchronously
}
Use NVENC with FFmpeg:
# Encode with NVENC
ffmpeg -hwaccel cuda -hwaccel_output_format cuda \
-i input.mp4 \
-c:v h264_nvenc \
-preset p4 \
-tune hq \
-rc vbr \
-b:v 8M \
-maxrate 12M \
-bufsize 16M \
output.mp4
# HEVC encoding
ffmpeg -hwaccel cuda -i input.mp4 \
-c:v hevc_nvenc \
-preset p7 \
-rc constqp \
-qp 23 \
output_hevc.mp4
# Decode with NVDEC, process, encode with NVENC
ffmpeg -hwaccel cuda -hwaccel_output_format cuda \
-i input.mp4 \
-vf "scale_cuda=1280:720" \
-c:v h264_nvenc \
output_720p.mp4
# AV1 encoding (RTX 40 series)
ffmpeg -hwaccel cuda -i input.mp4 \
-c:v av1_nvenc \
-preset p4 \
-b:v 5M \
output_av1.mp4
This skill integrates with the following processes:
gpu-image-video-processing.js - Video processing workflows{
"operation": "encode-session",
"status": "success",
"configuration": {
"codec": "H.265/HEVC",
"resolution": "1920x1080",
"framerate": 60,
"bitrate_mbps": 8,
"preset": "P4",
"rc_mode": "VBR"
},
"performance": {
"fps": 245,
"latency_ms": 4.1,
"gpu_utilization_pct": 35
},
"output": {
"format": "HEVC",
"file": "output.hevc",
"size_mb": 125.4
}
}
development
Model documentation skill for generating model cards following Google's model card framework.
development
MLflow integration skill for experiment tracking, model registry, and artifact management. Enables LLMs to log experiments, compare runs, manage model lifecycle, and retrieve artifacts through the MLflow API.
data-ai
LIME-based local explanation skill for individual predictions across tabular, text, and image data.
devops
Kubeflow Pipelines skill for ML workflow orchestration, component management, and Kubernetes-native ML.