.github/skills/azure-ai-vision-imageanalysis-java/SKILL.md
Build image analysis applications with Azure AI Vision SDK for Java. Use when implementing image captioning, OCR text extraction, object detection, tagging, or smart cropping.
npx skillsauth add javiertarazon/agente-copilot azure-ai-vision-imageanalysis-javaInstall this skill globally with one command. Works with Claude Code, Cursor, and Windsurf.
3 of 9 scanners reported clean
Some scanners were skipped, did not run, or reported a non-clean status. Review each row below.
Build image analysis applications using the Azure AI Vision Image Analysis SDK for Java.
<dependency>
<groupId>com.azure</groupId>
<artifactId>azure-ai-vision-imageanalysis</artifactId>
<version>1.1.0-beta.1</version>
</dependency>
import com.azure.ai.vision.imageanalysis.ImageAnalysisClient;
import com.azure.ai.vision.imageanalysis.ImageAnalysisClientBuilder;
import com.azure.core.credential.KeyCredential;
String endpoint = System.getenv("VISION_ENDPOINT");
String key = System.getenv("VISION_KEY");
ImageAnalysisClient client = new ImageAnalysisClientBuilder()
.endpoint(endpoint)
.credential(new KeyCredential(key))
.buildClient();
import com.azure.ai.vision.imageanalysis.ImageAnalysisAsyncClient;
ImageAnalysisAsyncClient asyncClient = new ImageAnalysisClientBuilder()
.endpoint(endpoint)
.credential(new KeyCredential(key))
.buildAsyncClient();
import com.azure.identity.DefaultAzureCredentialBuilder;
ImageAnalysisClient client = new ImageAnalysisClientBuilder()
.endpoint(endpoint)
.credential(new DefaultAzureCredentialBuilder().build())
.buildClient();
| Feature | Description |
|---------|-------------|
| CAPTION | Generate human-readable image description |
| DENSE_CAPTIONS | Captions for up to 10 regions |
| READ | OCR - Extract text from images |
| TAGS | Content tags for objects, scenes, actions |
| OBJECTS | Detect objects with bounding boxes |
| SMART_CROPS | Smart thumbnail regions |
| PEOPLE | Detect people with locations |
import com.azure.ai.vision.imageanalysis.models.*;
import com.azure.core.util.BinaryData;
import java.io.File;
import java.util.Arrays;
// From file
BinaryData imageData = BinaryData.fromFile(new File("image.jpg").toPath());
ImageAnalysisResult result = client.analyze(
imageData,
Arrays.asList(VisualFeatures.CAPTION),
new ImageAnalysisOptions().setGenderNeutralCaption(true));
System.out.printf("Caption: \"%s\" (confidence: %.4f)%n",
result.getCaption().getText(),
result.getCaption().getConfidence());
ImageAnalysisResult result = client.analyzeFromUrl(
"https://example.com/image.jpg",
Arrays.asList(VisualFeatures.CAPTION),
new ImageAnalysisOptions().setGenderNeutralCaption(true));
System.out.printf("Caption: \"%s\"%n", result.getCaption().getText());
ImageAnalysisResult result = client.analyze(
BinaryData.fromFile(new File("document.jpg").toPath()),
Arrays.asList(VisualFeatures.READ),
null);
for (DetectedTextBlock block : result.getRead().getBlocks()) {
for (DetectedTextLine line : block.getLines()) {
System.out.printf("Line: '%s'%n", line.getText());
System.out.printf(" Bounding polygon: %s%n", line.getBoundingPolygon());
for (DetectedTextWord word : line.getWords()) {
System.out.printf(" Word: '%s' (confidence: %.4f)%n",
word.getText(),
word.getConfidence());
}
}
}
ImageAnalysisResult result = client.analyzeFromUrl(
imageUrl,
Arrays.asList(VisualFeatures.OBJECTS),
null);
for (DetectedObject obj : result.getObjects()) {
System.out.printf("Object: %s (confidence: %.4f)%n",
obj.getTags().get(0).getName(),
obj.getTags().get(0).getConfidence());
ImageBoundingBox box = obj.getBoundingBox();
System.out.printf(" Location: x=%d, y=%d, w=%d, h=%d%n",
box.getX(), box.getY(), box.getWidth(), box.getHeight());
}
ImageAnalysisResult result = client.analyzeFromUrl(
imageUrl,
Arrays.asList(VisualFeatures.TAGS),
null);
for (DetectedTag tag : result.getTags()) {
System.out.printf("Tag: %s (confidence: %.4f)%n",
tag.getName(),
tag.getConfidence());
}
ImageAnalysisResult result = client.analyzeFromUrl(
imageUrl,
Arrays.asList(VisualFeatures.PEOPLE),
null);
for (DetectedPerson person : result.getPeople()) {
ImageBoundingBox box = person.getBoundingBox();
System.out.printf("Person at x=%d, y=%d (confidence: %.4f)%n",
box.getX(), box.getY(), person.getConfidence());
}
ImageAnalysisResult result = client.analyzeFromUrl(
imageUrl,
Arrays.asList(VisualFeatures.SMART_CROPS),
new ImageAnalysisOptions().setSmartCropsAspectRatios(Arrays.asList(1.0, 1.5)));
for (CropRegion crop : result.getSmartCrops()) {
System.out.printf("Crop region: aspect=%.2f, x=%d, y=%d, w=%d, h=%d%n",
crop.getAspectRatio(),
crop.getBoundingBox().getX(),
crop.getBoundingBox().getY(),
crop.getBoundingBox().getWidth(),
crop.getBoundingBox().getHeight());
}
ImageAnalysisResult result = client.analyzeFromUrl(
imageUrl,
Arrays.asList(VisualFeatures.DENSE_CAPTIONS),
new ImageAnalysisOptions().setGenderNeutralCaption(true));
for (DenseCaption caption : result.getDenseCaptions()) {
System.out.printf("Caption: \"%s\" (confidence: %.4f)%n",
caption.getText(),
caption.getConfidence());
System.out.printf(" Region: x=%d, y=%d, w=%d, h=%d%n",
caption.getBoundingBox().getX(),
caption.getBoundingBox().getY(),
caption.getBoundingBox().getWidth(),
caption.getBoundingBox().getHeight());
}
ImageAnalysisResult result = client.analyzeFromUrl(
imageUrl,
Arrays.asList(
VisualFeatures.CAPTION,
VisualFeatures.TAGS,
VisualFeatures.OBJECTS,
VisualFeatures.READ),
new ImageAnalysisOptions()
.setGenderNeutralCaption(true)
.setLanguage("en"));
// Access all results
System.out.println("Caption: " + result.getCaption().getText());
System.out.println("Tags: " + result.getTags().size());
System.out.println("Objects: " + result.getObjects().size());
System.out.println("Text blocks: " + result.getRead().getBlocks().size());
asyncClient.analyzeFromUrl(
imageUrl,
Arrays.asList(VisualFeatures.CAPTION),
null)
.subscribe(
result -> System.out.println("Caption: " + result.getCaption().getText()),
error -> System.err.println("Error: " + error.getMessage()),
() -> System.out.println("Complete")
);
import com.azure.core.exception.HttpResponseException;
try {
client.analyzeFromUrl(imageUrl, Arrays.asList(VisualFeatures.CAPTION), null);
} catch (HttpResponseException e) {
System.out.println("Status: " + e.getResponse().getStatusCode());
System.out.println("Error: " + e.getMessage());
}
VISION_ENDPOINT=https://<resource>.cognitiveservices.azure.com/
VISION_KEY=<your-api-key>
Caption and Dense Captions require GPU-supported regions. Check supported regions before deployment.
This skill is applicable to execute the workflow or actions described in the overview.
tools
Automate GitHub repositories, issues, pull requests, branches, CI/CD, and permissions via Rube MCP (Composio). Manage code workflows, review PRs, search code, and handle deployments programmatically.
development
Create production-ready GitHub Actions workflows for automated testing, building, and deploying applications. Use when setting up CI/CD with GitHub Actions, automating development workflows, or cre...
tools
Stage, commit, and push git changes with conventional commit messages. Use when user wants to commit and push changes, mentions pushing to remote, or asks to save and push their work. Also activate...
development
You are a PR optimization expert specializing in creating high-quality pull requests that facilitate efficient code reviews. Generate comprehensive PR descriptions, automate review processes, and ensu