skills/azure-ai-vision-imageanalysis-java/SKILL.md
Build image analysis applications with Azure AI Vision SDK for Java. Use when implementing image captioning, OCR text extraction, object detection, tagging, or smart cropping.
npx skillsauth add drsh0911-a11y/demoproj azure-ai-vision-imageanalysis-javaInstall this skill globally with one command. Works with Claude Code, Cursor, and Windsurf.
3 of 9 scanners reported clean
Some scanners were skipped, did not run, or reported a non-clean status. Review each row below.
Build image analysis applications using the Azure AI Vision Image Analysis SDK for Java.
<dependency>
<groupId>com.azure</groupId>
<artifactId>azure-ai-vision-imageanalysis</artifactId>
<version>1.1.0-beta.1</version>
</dependency>
import com.azure.ai.vision.imageanalysis.ImageAnalysisClient;
import com.azure.ai.vision.imageanalysis.ImageAnalysisClientBuilder;
import com.azure.core.credential.KeyCredential;
String endpoint = System.getenv("VISION_ENDPOINT");
String key = System.getenv("VISION_KEY");
ImageAnalysisClient client = new ImageAnalysisClientBuilder()
.endpoint(endpoint)
.credential(new KeyCredential(key))
.buildClient();
import com.azure.ai.vision.imageanalysis.ImageAnalysisAsyncClient;
ImageAnalysisAsyncClient asyncClient = new ImageAnalysisClientBuilder()
.endpoint(endpoint)
.credential(new KeyCredential(key))
.buildAsyncClient();
import com.azure.identity.DefaultAzureCredentialBuilder;
ImageAnalysisClient client = new ImageAnalysisClientBuilder()
.endpoint(endpoint)
.credential(new DefaultAzureCredentialBuilder().build())
.buildClient();
| Feature | Description |
|---------|-------------|
| CAPTION | Generate human-readable image description |
| DENSE_CAPTIONS | Captions for up to 10 regions |
| READ | OCR - Extract text from images |
| TAGS | Content tags for objects, scenes, actions |
| OBJECTS | Detect objects with bounding boxes |
| SMART_CROPS | Smart thumbnail regions |
| PEOPLE | Detect people with locations |
import com.azure.ai.vision.imageanalysis.models.*;
import com.azure.core.util.BinaryData;
import java.io.File;
import java.util.Arrays;
// From file
BinaryData imageData = BinaryData.fromFile(new File("image.jpg").toPath());
ImageAnalysisResult result = client.analyze(
imageData,
Arrays.asList(VisualFeatures.CAPTION),
new ImageAnalysisOptions().setGenderNeutralCaption(true));
System.out.printf("Caption: \"%s\" (confidence: %.4f)%n",
result.getCaption().getText(),
result.getCaption().getConfidence());
ImageAnalysisResult result = client.analyzeFromUrl(
"https://example.com/image.jpg",
Arrays.asList(VisualFeatures.CAPTION),
new ImageAnalysisOptions().setGenderNeutralCaption(true));
System.out.printf("Caption: \"%s\"%n", result.getCaption().getText());
ImageAnalysisResult result = client.analyze(
BinaryData.fromFile(new File("document.jpg").toPath()),
Arrays.asList(VisualFeatures.READ),
null);
for (DetectedTextBlock block : result.getRead().getBlocks()) {
for (DetectedTextLine line : block.getLines()) {
System.out.printf("Line: '%s'%n", line.getText());
System.out.printf(" Bounding polygon: %s%n", line.getBoundingPolygon());
for (DetectedTextWord word : line.getWords()) {
System.out.printf(" Word: '%s' (confidence: %.4f)%n",
word.getText(),
word.getConfidence());
}
}
}
ImageAnalysisResult result = client.analyzeFromUrl(
imageUrl,
Arrays.asList(VisualFeatures.OBJECTS),
null);
for (DetectedObject obj : result.getObjects()) {
System.out.printf("Object: %s (confidence: %.4f)%n",
obj.getTags().get(0).getName(),
obj.getTags().get(0).getConfidence());
ImageBoundingBox box = obj.getBoundingBox();
System.out.printf(" Location: x=%d, y=%d, w=%d, h=%d%n",
box.getX(), box.getY(), box.getWidth(), box.getHeight());
}
ImageAnalysisResult result = client.analyzeFromUrl(
imageUrl,
Arrays.asList(VisualFeatures.TAGS),
null);
for (DetectedTag tag : result.getTags()) {
System.out.printf("Tag: %s (confidence: %.4f)%n",
tag.getName(),
tag.getConfidence());
}
ImageAnalysisResult result = client.analyzeFromUrl(
imageUrl,
Arrays.asList(VisualFeatures.PEOPLE),
null);
for (DetectedPerson person : result.getPeople()) {
ImageBoundingBox box = person.getBoundingBox();
System.out.printf("Person at x=%d, y=%d (confidence: %.4f)%n",
box.getX(), box.getY(), person.getConfidence());
}
ImageAnalysisResult result = client.analyzeFromUrl(
imageUrl,
Arrays.asList(VisualFeatures.SMART_CROPS),
new ImageAnalysisOptions().setSmartCropsAspectRatios(Arrays.asList(1.0, 1.5)));
for (CropRegion crop : result.getSmartCrops()) {
System.out.printf("Crop region: aspect=%.2f, x=%d, y=%d, w=%d, h=%d%n",
crop.getAspectRatio(),
crop.getBoundingBox().getX(),
crop.getBoundingBox().getY(),
crop.getBoundingBox().getWidth(),
crop.getBoundingBox().getHeight());
}
ImageAnalysisResult result = client.analyzeFromUrl(
imageUrl,
Arrays.asList(VisualFeatures.DENSE_CAPTIONS),
new ImageAnalysisOptions().setGenderNeutralCaption(true));
for (DenseCaption caption : result.getDenseCaptions()) {
System.out.printf("Caption: \"%s\" (confidence: %.4f)%n",
caption.getText(),
caption.getConfidence());
System.out.printf(" Region: x=%d, y=%d, w=%d, h=%d%n",
caption.getBoundingBox().getX(),
caption.getBoundingBox().getY(),
caption.getBoundingBox().getWidth(),
caption.getBoundingBox().getHeight());
}
ImageAnalysisResult result = client.analyzeFromUrl(
imageUrl,
Arrays.asList(
VisualFeatures.CAPTION,
VisualFeatures.TAGS,
VisualFeatures.OBJECTS,
VisualFeatures.READ),
new ImageAnalysisOptions()
.setGenderNeutralCaption(true)
.setLanguage("en"));
// Access all results
System.out.println("Caption: " + result.getCaption().getText());
System.out.println("Tags: " + result.getTags().size());
System.out.println("Objects: " + result.getObjects().size());
System.out.println("Text blocks: " + result.getRead().getBlocks().size());
asyncClient.analyzeFromUrl(
imageUrl,
Arrays.asList(VisualFeatures.CAPTION),
null)
.subscribe(
result -> System.out.println("Caption: " + result.getCaption().getText()),
error -> System.err.println("Error: " + error.getMessage()),
() -> System.out.println("Complete")
);
import com.azure.core.exception.HttpResponseException;
try {
client.analyzeFromUrl(imageUrl, Arrays.asList(VisualFeatures.CAPTION), null);
} catch (HttpResponseException e) {
System.out.println("Status: " + e.getResponse().getStatusCode());
System.out.println("Error: " + e.getMessage());
}
VISION_ENDPOINT=https://<resource>.cognitiveservices.azure.com/
VISION_KEY=<your-api-key>
Caption and Dense Captions require GPU-supported regions. Check supported regions before deployment.
This skill is applicable to execute the workflow or actions described in the overview.
tools
Azure Key Vault Keys SDK for .NET. Client library for managing cryptographic keys in Azure Key Vault and Managed HSM. Use for key creation, rotation, encryption, decryption, signing, and verification.
development
Build search applications with vector, hybrid, and semantic search capabilities.
development
Azure AI Search SDK for Python. Use for vector search, hybrid search, semantic ranking, indexing, and skillsets.
development
Azure AI Search SDK for .NET (Azure.Search.Documents). Use for building search applications with full-text, vector, semantic, and hybrid search.