skills/azure-ai-vision-imageanalysis-java/SKILL.md
Build image analysis applications with Azure AI Vision SDK for Java. Use when implementing image captioning, OCR text extraction, object detection, tagging, or smart cropping.
npx skillsauth add voidomin/Param_Adventures_Phase2 azure-ai-vision-imageanalysis-javaInstall this skill globally with one command. Works with Claude Code, Cursor, and Windsurf.
3 of 9 scanners reported clean
Some scanners were skipped, did not run, or reported a non-clean status. Review each row below.
Build image analysis applications using the Azure AI Vision Image Analysis SDK for Java.
<dependency>
<groupId>com.azure</groupId>
<artifactId>azure-ai-vision-imageanalysis</artifactId>
<version>1.1.0-beta.1</version>
</dependency>
import com.azure.ai.vision.imageanalysis.ImageAnalysisClient;
import com.azure.ai.vision.imageanalysis.ImageAnalysisClientBuilder;
import com.azure.core.credential.KeyCredential;
String endpoint = System.getenv("VISION_ENDPOINT");
String key = System.getenv("VISION_KEY");
ImageAnalysisClient client = new ImageAnalysisClientBuilder()
.endpoint(endpoint)
.credential(new KeyCredential(key))
.buildClient();
import com.azure.ai.vision.imageanalysis.ImageAnalysisAsyncClient;
ImageAnalysisAsyncClient asyncClient = new ImageAnalysisClientBuilder()
.endpoint(endpoint)
.credential(new KeyCredential(key))
.buildAsyncClient();
import com.azure.identity.DefaultAzureCredentialBuilder;
ImageAnalysisClient client = new ImageAnalysisClientBuilder()
.endpoint(endpoint)
.credential(new DefaultAzureCredentialBuilder().build())
.buildClient();
| Feature | Description |
|---------|-------------|
| CAPTION | Generate human-readable image description |
| DENSE_CAPTIONS | Captions for up to 10 regions |
| READ | OCR - Extract text from images |
| TAGS | Content tags for objects, scenes, actions |
| OBJECTS | Detect objects with bounding boxes |
| SMART_CROPS | Smart thumbnail regions |
| PEOPLE | Detect people with locations |
import com.azure.ai.vision.imageanalysis.models.*;
import com.azure.core.util.BinaryData;
import java.io.File;
import java.util.Arrays;
// From file
BinaryData imageData = BinaryData.fromFile(new File("image.jpg").toPath());
ImageAnalysisResult result = client.analyze(
imageData,
Arrays.asList(VisualFeatures.CAPTION),
new ImageAnalysisOptions().setGenderNeutralCaption(true));
System.out.printf("Caption: \"%s\" (confidence: %.4f)%n",
result.getCaption().getText(),
result.getCaption().getConfidence());
ImageAnalysisResult result = client.analyzeFromUrl(
"https://example.com/image.jpg",
Arrays.asList(VisualFeatures.CAPTION),
new ImageAnalysisOptions().setGenderNeutralCaption(true));
System.out.printf("Caption: \"%s\"%n", result.getCaption().getText());
ImageAnalysisResult result = client.analyze(
BinaryData.fromFile(new File("document.jpg").toPath()),
Arrays.asList(VisualFeatures.READ),
null);
for (DetectedTextBlock block : result.getRead().getBlocks()) {
for (DetectedTextLine line : block.getLines()) {
System.out.printf("Line: '%s'%n", line.getText());
System.out.printf(" Bounding polygon: %s%n", line.getBoundingPolygon());
for (DetectedTextWord word : line.getWords()) {
System.out.printf(" Word: '%s' (confidence: %.4f)%n",
word.getText(),
word.getConfidence());
}
}
}
ImageAnalysisResult result = client.analyzeFromUrl(
imageUrl,
Arrays.asList(VisualFeatures.OBJECTS),
null);
for (DetectedObject obj : result.getObjects()) {
System.out.printf("Object: %s (confidence: %.4f)%n",
obj.getTags().get(0).getName(),
obj.getTags().get(0).getConfidence());
ImageBoundingBox box = obj.getBoundingBox();
System.out.printf(" Location: x=%d, y=%d, w=%d, h=%d%n",
box.getX(), box.getY(), box.getWidth(), box.getHeight());
}
ImageAnalysisResult result = client.analyzeFromUrl(
imageUrl,
Arrays.asList(VisualFeatures.TAGS),
null);
for (DetectedTag tag : result.getTags()) {
System.out.printf("Tag: %s (confidence: %.4f)%n",
tag.getName(),
tag.getConfidence());
}
ImageAnalysisResult result = client.analyzeFromUrl(
imageUrl,
Arrays.asList(VisualFeatures.PEOPLE),
null);
for (DetectedPerson person : result.getPeople()) {
ImageBoundingBox box = person.getBoundingBox();
System.out.printf("Person at x=%d, y=%d (confidence: %.4f)%n",
box.getX(), box.getY(), person.getConfidence());
}
ImageAnalysisResult result = client.analyzeFromUrl(
imageUrl,
Arrays.asList(VisualFeatures.SMART_CROPS),
new ImageAnalysisOptions().setSmartCropsAspectRatios(Arrays.asList(1.0, 1.5)));
for (CropRegion crop : result.getSmartCrops()) {
System.out.printf("Crop region: aspect=%.2f, x=%d, y=%d, w=%d, h=%d%n",
crop.getAspectRatio(),
crop.getBoundingBox().getX(),
crop.getBoundingBox().getY(),
crop.getBoundingBox().getWidth(),
crop.getBoundingBox().getHeight());
}
ImageAnalysisResult result = client.analyzeFromUrl(
imageUrl,
Arrays.asList(VisualFeatures.DENSE_CAPTIONS),
new ImageAnalysisOptions().setGenderNeutralCaption(true));
for (DenseCaption caption : result.getDenseCaptions()) {
System.out.printf("Caption: \"%s\" (confidence: %.4f)%n",
caption.getText(),
caption.getConfidence());
System.out.printf(" Region: x=%d, y=%d, w=%d, h=%d%n",
caption.getBoundingBox().getX(),
caption.getBoundingBox().getY(),
caption.getBoundingBox().getWidth(),
caption.getBoundingBox().getHeight());
}
ImageAnalysisResult result = client.analyzeFromUrl(
imageUrl,
Arrays.asList(
VisualFeatures.CAPTION,
VisualFeatures.TAGS,
VisualFeatures.OBJECTS,
VisualFeatures.READ),
new ImageAnalysisOptions()
.setGenderNeutralCaption(true)
.setLanguage("en"));
// Access all results
System.out.println("Caption: " + result.getCaption().getText());
System.out.println("Tags: " + result.getTags().size());
System.out.println("Objects: " + result.getObjects().size());
System.out.println("Text blocks: " + result.getRead().getBlocks().size());
asyncClient.analyzeFromUrl(
imageUrl,
Arrays.asList(VisualFeatures.CAPTION),
null)
.subscribe(
result -> System.out.println("Caption: " + result.getCaption().getText()),
error -> System.err.println("Error: " + error.getMessage()),
() -> System.out.println("Complete")
);
import com.azure.core.exception.HttpResponseException;
try {
client.analyzeFromUrl(imageUrl, Arrays.asList(VisualFeatures.CAPTION), null);
} catch (HttpResponseException e) {
System.out.println("Status: " + e.getResponse().getStatusCode());
System.out.println("Error: " + e.getMessage());
}
VISION_ENDPOINT=https://<resource>.cognitiveservices.azure.com/
VISION_KEY=<your-api-key>
Caption and Dense Captions require GPU-supported regions. Check supported regions before deployment.
This skill is applicable to execute the workflow or actions described in the overview.
documentation
Create beautiful visual art in .png and .pdf documents using design philosophy. You should use this skill when the user asks to create a poster, piece of art, design, or other static piece. Create ...
tools
Automate Canva tasks via Rube MCP (Composio): designs, exports, folders, brand templates, autofill. Always search tools first for current schemas.
tools
Automate Calendly scheduling, event management, invitee tracking, availability checks, and organization administration via Rube MCP (Composio). Always search tools first for current schemas.
tools
Automate Cal.com tasks via Rube MCP (Composio): manage bookings, check availability, configure webhooks, and handle teams. Always search tools first for current schemas.