当前位置:   article > 正文

C# Onnx Chinese CLIP 通过一句话从图库中搜出来符合要求的图片_c# winform onnx 识别图片

c# winform onnx 识别图片

目录

效果

生成图片特征

查找踢足球的人

测试图片

模型信息

image_model.onnx

text_model.onnx

项目

代码

Form1.cs

Clip.cs

下载


C# Onnx Chinese CLIP 通过一句话从图库中搜出来符合要求的图片

效果

生成图片特征

查找踢足球的人

测试图片

模型信息

image_model.onnx

Inputs
-------------------------
name:image
tensor:Float[1, 3, 224, 224]
---------------------------------------------------------------

Outputs
-------------------------
name:unnorm_image_features
tensor:Float[1, 512]
---------------------------------------------------------------

text_model.onnx

Inputs
-------------------------
name:text
tensor:Int64[1, 52]
---------------------------------------------------------------

Outputs
-------------------------
name:unnorm_text_features
tensor:Float[1, 512]
---------------------------------------------------------------

项目

代码

Form1.cs


using System;
using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Text;
using System.Windows.Forms;

namespace Onnx_Demo
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }

        Clip mynet = new Clip("model/image_model.onnx", "model/text_model.onnx", "model/myvocab.txt");

        float[] imagedir_features;
        string image_dir = "test_img";
        StringBuilder sb = new StringBuilder();

        private void button2_Click(object sender, EventArgs e)
        {
            //特征向量 可以存二进制文件或者向量数据库
            imagedir_features = mynet.generate_imagedir_features(image_dir);
            txtInfo.Text = "生成完成!";
            txtInfo.Text += "有" + mynet.imgnum + "张图片,特征向量长度=" + imagedir_features.Length;
        }

        private void button3_Click(object sender, EventArgs e)
        {
            if (imagedir_features == null)
            {
                MessageBox.Show("请先生成图片特征!");
                return;
            }

            sb.Clear();
            txtInfo.Text = "";
            lblInfo.Text = "";
            pictureBox1.Image = null;

            string input_text = txt_input_text.Text;
            if (string.IsNullOrEmpty(input_text))
            {
                return;
            }
            List<Dictionary<string, float>> top5imglist = mynet.input_text_search_image(input_text, imagedir_features, mynet.imglist);

            sb.AppendLine("top5:");
            foreach (var item in top5imglist)
            {
                sb.AppendLine(Path.GetFileName(item.Keys.First()) + "  相似度:" + item[item.Keys.First()].ToString("F2"));
            }

            txtInfo.Text = sb.ToString();
            lblInfo.Text = Path.GetFileName(top5imglist[0].Keys.First());
            pictureBox1.Image = new Bitmap(top5imglist[0].Keys.First());

        }

        private void Form1_Load(object sender, EventArgs e)
        {

        }
    }
}

  1. using System;
  2. using System.Collections.Generic;
  3. using System.Drawing;
  4. using System.IO;
  5. using System.Linq;
  6. using System.Text;
  7. using System.Windows.Forms;
  8. namespace Onnx_Demo
  9. {
  10. public partial class Form1 : Form
  11. {
  12. public Form1()
  13. {
  14. InitializeComponent();
  15. }
  16. Clip mynet = new Clip("model/image_model.onnx", "model/text_model.onnx", "model/myvocab.txt");
  17. float[] imagedir_features;
  18. string image_dir = "test_img";
  19. StringBuilder sb = new StringBuilder();
  20. private void button2_Click(object sender, EventArgs e)
  21. {
  22. //特征向量 可以存二进制文件或者向量数据库
  23. imagedir_features = mynet.generate_imagedir_features(image_dir);
  24. txtInfo.Text = "生成完成!";
  25. txtInfo.Text += "有" + mynet.imgnum + "张图片,特征向量长度=" + imagedir_features.Length;
  26. }
  27. private void button3_Click(object sender, EventArgs e)
  28. {
  29. if (imagedir_features == null)
  30. {
  31. MessageBox.Show("请先生成图片特征!");
  32. return;
  33. }
  34. sb.Clear();
  35. txtInfo.Text = "";
  36. lblInfo.Text = "";
  37. pictureBox1.Image = null;
  38. string input_text = txt_input_text.Text;
  39. if (string.IsNullOrEmpty(input_text))
  40. {
  41. return;
  42. }
  43. List<Dictionary<string, float>> top5imglist = mynet.input_text_search_image(input_text, imagedir_features, mynet.imglist);
  44. sb.AppendLine("top5:");
  45. foreach (var item in top5imglist)
  46. {
  47. sb.AppendLine(Path.GetFileName(item.Keys.First()) + " 相似度:" + item[item.Keys.First()].ToString("F2"));
  48. }
  49. txtInfo.Text = sb.ToString();
  50. lblInfo.Text = Path.GetFileName(top5imglist[0].Keys.First());
  51. pictureBox1.Image = new Bitmap(top5imglist[0].Keys.First());
  52. }
  53. private void Form1_Load(object sender, EventArgs e)
  54. {
  55. }
  56. }
  57. }

Clip.cs

public class Clip
    {
        int inpWidth = 224;
        int inpHeight = 224;
        float[] mean = new float[] { 0.48145466f, 0.4578275f, 0.40821073f };
        float[] std = new float[] { 0.26862954f, 0.26130258f, 0.27577711f };

        int context_length = 52;
        int len_text_feature = 512;

        Net net;
        float[] image_features_input;

        SessionOptions options;
        InferenceSession onnx_session;
        Tensor<long> input_tensor;
        List<NamedOnnxValue> input_container;
        IDisposableReadOnlyCollection<DisposableNamedOnnxValue> result_infer;
        DisposableNamedOnnxValue[] results_onnxvalue;
        Tensor<float> result_tensors;

        TokenizerBase tokenizer;

        int[] text_tokens_input;
        float[,] text_features_input;

        public int imgnum = 0;
        public List<string> imglist = new List<string>();

        public Clip(string image_modelpath, string text_modelpath, string vocab_path)
        {
            net = CvDnn.ReadNetFromOnnx(image_modelpath);

            // 创建输出会话,用于输出模型读取信息
            options = new SessionOptions();
            options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_INFO;
            options.AppendExecutionProvider_CPU(0);// 设置为CPU上运行
            // 创建推理模型类,读取本地模型文件
            onnx_session = new InferenceSession(text_modelpath, options);//model_path 为onnx模型文件的路径
            // 创建输入容器
            input_container = new List<NamedOnnxValue>();

            load_tokenizer(vocab_path);

        }

        void load_tokenizer(string vocab_path)
        {

            tokenizer = new TokenizerClipChinese();
            tokenizer.load_tokenize(vocab_path);
            text_tokens_input = new int[1024 * context_length];
        }

        Mat normalize_(Mat src)
        {
            Cv2.CvtColor(src, src, ColorConversionCodes.BGR2RGB);

            Mat[] bgr = src.Split();
            for (int i = 0; i < bgr.Length; ++i)
            {
                bgr[i].ConvertTo(bgr[i], MatType.CV_32FC1, 1.0 / (255.0 * std[i]), (0.0 - mean[i]) / std[i]);
            }

            Cv2.Merge(bgr, src);

            foreach (Mat channel in bgr)
            {
                channel.Dispose();
            }

            return src;
        }

        unsafe void generate_image_feature(Mat srcimg)
        {
            Mat temp_image = new Mat();
            Cv2.Resize(srcimg, temp_image, new Size(inpWidth, inpHeight), 0, 0, InterpolationFlags.Cubic);
            Mat normalized_mat = normalize_(temp_image);
            Mat blob = CvDnn.BlobFromImage(normalized_mat);
            net.SetInput(blob);
            //模型推理,读取推理结果
            Mat[] outs = new Mat[1] { new Mat() };
            string[] outBlobNames = net.GetUnconnectedOutLayersNames().ToArray();
            net.Forward(outs, outBlobNames);
            float* ptr_feat = (float*)outs[0].Data;
            int len_image_feature = outs[0].Size(1);  //忽略第0维batchsize=1, len_image_feature是定值512,跟len_text_feature相等的, 也可以写死在类成员变量里
            image_features_input = new float[len_image_feature];
            float norm = 0.0f;
            for (int i = 0; i < len_image_feature; i++)
            {
                norm += ptr_feat[i] * ptr_feat[i];
            }
            norm = (float)Math.Sqrt(norm);
            for (int i = 0; i < len_image_feature; i++)
            {
                image_features_input[i] = ptr_feat[i] / norm;
            }
        }

        unsafe void generate_text_feature(List<string> texts)
        {
            List<List<int>> text_token = new List<List<int>>(texts.Count);
            for (int i = 0; i < texts.Count; i++)
            {
                text_token.Add(new List<int>());
            }

            for (int i = 0; i < texts.Count; i++)
            {
                tokenizer.encode_text(texts[i], text_token[i]);
            }

            if (text_token.Count * context_length > text_tokens_input.Length)
            {
                text_tokens_input = new int[text_token.Count * context_length];
            }

            foreach (int i in text_tokens_input) { text_tokens_input[i] = 0; }

            for (int i = 0; i < text_token.Count; i++)
            {
                if (text_token[i].Count > context_length)
                {
                    Console.WriteLine("text_features index " + i + " ,bigger than " + context_length + "\n");
                    continue;
                }
                for (int j = 0; j < text_token[i].Count; j++)
                {
                    text_tokens_input[i * context_length + j] = text_token[i][j];
                }

            }

            int[] text_token_shape = new int[] { 1, context_length };

            text_features_input = new float[text_token.Count, len_text_feature];

            long[] text_tokens_input_64 = new long[texts.Count * context_length];
            for (int i = 0; i < text_tokens_input_64.Length; i++)
            {
                text_tokens_input_64[i] = text_tokens_input[i];
            }

            for (int i = 0; i < text_token.Count; i++)
            {
                input_tensor = new DenseTensor<long>(text_tokens_input_64, new[] { 1, 52 });
                input_container.Clear();
                input_container.Add(NamedOnnxValue.CreateFromTensor("text", input_tensor));

                //运行 Inference 并获取结果
                result_infer = onnx_session.Run(input_container);

                // 将输出结果转为DisposableNamedOnnxValue数组
                results_onnxvalue = result_infer.ToArray();

                // 读取第一个节点输出并转为Tensor数据
                result_tensors = results_onnxvalue[0].AsTensor<float>();

                float[] text_feature_ptr = results_onnxvalue[0].AsTensor<float>().ToArray();

                float norm = 0.0f;
                for (int j = 0; j < len_text_feature; j++)
                {
                    norm += text_feature_ptr[j] * text_feature_ptr[j];
                }
                norm = (float)Math.Sqrt(norm);
                for (int j = 0; j < len_text_feature; j++)
                {
                    text_features_input[i, j] = text_feature_ptr[j] / norm;
                }

            }
        }

        void softmax(float[] input)
        {
            int length = input.Length;
            float[] exp_x = new float[length];
            float maxVal = input.Max();
            float sum = 0;
            for (int i = 0; i < length; i++)
            {
                float expval = (float)Math.Exp(input[i] - maxVal);
                exp_x[i] = expval;
                sum += expval;
            }
            for (int i = 0; i < length; i++)
            {
                input[i] = exp_x[i] / sum;
            }
        }

        int[] argsort_ascend(float[] array)
        {
            int array_len = array.Length;
            int[] array_index = new int[array_len];
            for (int i = 0; i < array_len; ++i)
            {
                array_index[i] = i;
            }

            var temp = array_index.ToList();

            temp.Sort((pos1, pos2) =>
             {

                 if (array[pos1] < array[pos2])
                 {
                     return -1;
                 }
                 else if (array[pos1] == array[pos2])
                 {
                     return 0;
                 }
                 else
                 {
                     return 0;
                 }

             });

            return temp.ToArray();
        }

        public List<Dictionary<string, float>> input_text_search_image(string text, float[] image_features, List<string> imglist)
        {

            int imgnum = imglist.Count;
            List<string> texts = new List<string> { text };

            generate_text_feature(texts);

            float[] logits_per_image = new float[imgnum];

            for (int i = 0; i < imgnum; i++)
            {
                float sum = 0;
                for (int j = 0; j < len_text_feature; j++)
                {
                    sum += image_features[i * len_text_feature + j] * text_features_input[0, j]; //图片特征向量跟文本特征向量做内积
                }
                logits_per_image[i] = 100 * sum;
            }

            softmax(logits_per_image);

            int[] index = argsort_ascend(logits_per_image);

            List<Dictionary<string, float>> top5imglist = new List<Dictionary<string, float>>(5);

            for (int i = 0; i < 5; i++)
            {
                int ind = index[imgnum - 1 - i];
                Dictionary<string, float> result = new Dictionary<string, float>();
                result.Add(imglist[ind], logits_per_image[ind]);
                top5imglist.Add(result);
            }
            return top5imglist;
        }

        public float[] generate_imagedir_features(string image_dir)
        {

            imglist = Common.listdir(image_dir);
            imgnum = imglist.Count;
            Console.WriteLine("遍历到" + imgnum + "张图片");

            float[] imagedir_features = new float[0];

            for (int i = 0; i < imgnum; i++)
            {
                string imgpath = imglist[i];

                Mat srcimg = Cv2.ImRead(imgpath);

                generate_image_feature(srcimg);

                imagedir_features = imagedir_features.Concat(image_features_input).ToArray();

                srcimg.Dispose();
            }

            return imagedir_features;

        }

    }

  1. public class Clip
  2. {
  3. int inpWidth = 224;
  4. int inpHeight = 224;
  5. float[] mean = new float[] { 0.48145466f, 0.4578275f, 0.40821073f };
  6. float[] std = new float[] { 0.26862954f, 0.26130258f, 0.27577711f };
  7. int context_length = 52;
  8. int len_text_feature = 512;
  9. Net net;
  10. float[] image_features_input;
  11. SessionOptions options;
  12. InferenceSession onnx_session;
  13. Tensor<long> input_tensor;
  14. List<NamedOnnxValue> input_container;
  15. IDisposableReadOnlyCollection<DisposableNamedOnnxValue> result_infer;
  16. DisposableNamedOnnxValue[] results_onnxvalue;
  17. Tensor<float> result_tensors;
  18. TokenizerBase tokenizer;
  19. int[] text_tokens_input;
  20. float[,] text_features_input;
  21. public int imgnum = 0;
  22. public List<string> imglist = new List<string>();
  23. public Clip(string image_modelpath, string text_modelpath, string vocab_path)
  24. {
  25. net = CvDnn.ReadNetFromOnnx(image_modelpath);
  26. // 创建输出会话,用于输出模型读取信息
  27. options = new SessionOptions();
  28. options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_INFO;
  29. options.AppendExecutionProvider_CPU(0);// 设置为CPU上运行
  30. // 创建推理模型类,读取本地模型文件
  31. onnx_session = new InferenceSession(text_modelpath, options);//model_path 为onnx模型文件的路径
  32. // 创建输入容器
  33. input_container = new List<NamedOnnxValue>();
  34. load_tokenizer(vocab_path);
  35. }
  36. void load_tokenizer(string vocab_path)
  37. {
  38. tokenizer = new TokenizerClipChinese();
  39. tokenizer.load_tokenize(vocab_path);
  40. text_tokens_input = new int[1024 * context_length];
  41. }
  42. Mat normalize_(Mat src)
  43. {
  44. Cv2.CvtColor(src, src, ColorConversionCodes.BGR2RGB);
  45. Mat[] bgr = src.Split();
  46. for (int i = 0; i < bgr.Length; ++i)
  47. {
  48. bgr[i].ConvertTo(bgr[i], MatType.CV_32FC1, 1.0 / (255.0 * std[i]), (0.0 - mean[i]) / std[i]);
  49. }
  50. Cv2.Merge(bgr, src);
  51. foreach (Mat channel in bgr)
  52. {
  53. channel.Dispose();
  54. }
  55. return src;
  56. }
  57. unsafe void generate_image_feature(Mat srcimg)
  58. {
  59. Mat temp_image = new Mat();
  60. Cv2.Resize(srcimg, temp_image, new Size(inpWidth, inpHeight), 0, 0, InterpolationFlags.Cubic);
  61. Mat normalized_mat = normalize_(temp_image);
  62. Mat blob = CvDnn.BlobFromImage(normalized_mat);
  63. net.SetInput(blob);
  64. //模型推理,读取推理结果
  65. Mat[] outs = new Mat[1] { new Mat() };
  66. string[] outBlobNames = net.GetUnconnectedOutLayersNames().ToArray();
  67. net.Forward(outs, outBlobNames);
  68. float* ptr_feat = (float*)outs[0].Data;
  69. int len_image_feature = outs[0].Size(1); //忽略第0维batchsize=1, len_image_feature是定值512,跟len_text_feature相等的, 也可以写死在类成员变量里
  70. image_features_input = new float[len_image_feature];
  71. float norm = 0.0f;
  72. for (int i = 0; i < len_image_feature; i++)
  73. {
  74. norm += ptr_feat[i] * ptr_feat[i];
  75. }
  76. norm = (float)Math.Sqrt(norm);
  77. for (int i = 0; i < len_image_feature; i++)
  78. {
  79. image_features_input[i] = ptr_feat[i] / norm;
  80. }
  81. }
  82. unsafe void generate_text_feature(List<string> texts)
  83. {
  84. List<List<int>> text_token = new List<List<int>>(texts.Count);
  85. for (int i = 0; i < texts.Count; i++)
  86. {
  87. text_token.Add(new List<int>());
  88. }
  89. for (int i = 0; i < texts.Count; i++)
  90. {
  91. tokenizer.encode_text(texts[i], text_token[i]);
  92. }
  93. if (text_token.Count * context_length > text_tokens_input.Length)
  94. {
  95. text_tokens_input = new int[text_token.Count * context_length];
  96. }
  97. foreach (int i in text_tokens_input) { text_tokens_input[i] = 0; }
  98. for (int i = 0; i < text_token.Count; i++)
  99. {
  100. if (text_token[i].Count > context_length)
  101. {
  102. Console.WriteLine("text_features index " + i + " ,bigger than " + context_length + "\n");
  103. continue;
  104. }
  105. for (int j = 0; j < text_token[i].Count; j++)
  106. {
  107. text_tokens_input[i * context_length + j] = text_token[i][j];
  108. }
  109. }
  110. int[] text_token_shape = new int[] { 1, context_length };
  111. text_features_input = new float[text_token.Count, len_text_feature];
  112. long[] text_tokens_input_64 = new long[texts.Count * context_length];
  113. for (int i = 0; i < text_tokens_input_64.Length; i++)
  114. {
  115. text_tokens_input_64[i] = text_tokens_input[i];
  116. }
  117. for (int i = 0; i < text_token.Count; i++)
  118. {
  119. input_tensor = new DenseTensor<long>(text_tokens_input_64, new[] { 1, 52 });
  120. input_container.Clear();
  121. input_container.Add(NamedOnnxValue.CreateFromTensor("text", input_tensor));
  122. //运行 Inference 并获取结果
  123. result_infer = onnx_session.Run(input_container);
  124. // 将输出结果转为DisposableNamedOnnxValue数组
  125. results_onnxvalue = result_infer.ToArray();
  126. // 读取第一个节点输出并转为Tensor数据
  127. result_tensors = results_onnxvalue[0].AsTensor<float>();
  128. float[] text_feature_ptr = results_onnxvalue[0].AsTensor<float>().ToArray();
  129. float norm = 0.0f;
  130. for (int j = 0; j < len_text_feature; j++)
  131. {
  132. norm += text_feature_ptr[j] * text_feature_ptr[j];
  133. }
  134. norm = (float)Math.Sqrt(norm);
  135. for (int j = 0; j < len_text_feature; j++)
  136. {
  137. text_features_input[i, j] = text_feature_ptr[j] / norm;
  138. }
  139. }
  140. }
  141. void softmax(float[] input)
  142. {
  143. int length = input.Length;
  144. float[] exp_x = new float[length];
  145. float maxVal = input.Max();
  146. float sum = 0;
  147. for (int i = 0; i < length; i++)
  148. {
  149. float expval = (float)Math.Exp(input[i] - maxVal);
  150. exp_x[i] = expval;
  151. sum += expval;
  152. }
  153. for (int i = 0; i < length; i++)
  154. {
  155. input[i] = exp_x[i] / sum;
  156. }
  157. }
  158. int[] argsort_ascend(float[] array)
  159. {
  160. int array_len = array.Length;
  161. int[] array_index = new int[array_len];
  162. for (int i = 0; i < array_len; ++i)
  163. {
  164. array_index[i] = i;
  165. }
  166. var temp = array_index.ToList();
  167. temp.Sort((pos1, pos2) =>
  168. {
  169. if (array[pos1] < array[pos2])
  170. {
  171. return -1;
  172. }
  173. else if (array[pos1] == array[pos2])
  174. {
  175. return 0;
  176. }
  177. else
  178. {
  179. return 0;
  180. }
  181. });
  182. return temp.ToArray();
  183. }
  184. public List<Dictionary<string, float>> input_text_search_image(string text, float[] image_features, List<string> imglist)
  185. {
  186. int imgnum = imglist.Count;
  187. List<string> texts = new List<string> { text };
  188. generate_text_feature(texts);
  189. float[] logits_per_image = new float[imgnum];
  190. for (int i = 0; i < imgnum; i++)
  191. {
  192. float sum = 0;
  193. for (int j = 0; j < len_text_feature; j++)
  194. {
  195. sum += image_features[i * len_text_feature + j] * text_features_input[0, j]; //图片特征向量跟文本特征向量做内积
  196. }
  197. logits_per_image[i] = 100 * sum;
  198. }
  199. softmax(logits_per_image);
  200. int[] index = argsort_ascend(logits_per_image);
  201. List<Dictionary<string, float>> top5imglist = new List<Dictionary<string, float>>(5);
  202. for (int i = 0; i < 5; i++)
  203. {
  204. int ind = index[imgnum - 1 - i];
  205. Dictionary<string, float> result = new Dictionary<string, float>();
  206. result.Add(imglist[ind], logits_per_image[ind]);
  207. top5imglist.Add(result);
  208. }
  209. return top5imglist;
  210. }
  211. public float[] generate_imagedir_features(string image_dir)
  212. {
  213. imglist = Common.listdir(image_dir);
  214. imgnum = imglist.Count;
  215. Console.WriteLine("遍历到" + imgnum + "张图片");
  216. float[] imagedir_features = new float[0];
  217. for (int i = 0; i < imgnum; i++)
  218. {
  219. string imgpath = imglist[i];
  220. Mat srcimg = Cv2.ImRead(imgpath);
  221. generate_image_feature(srcimg);
  222. imagedir_features = imagedir_features.Concat(image_features_input).ToArray();
  223. srcimg.Dispose();
  224. }
  225. return imagedir_features;
  226. }
  227. }

下载

源码下载

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/代码探险家/article/detail/775624
推荐阅读
相关标签
  

闽ICP备14008679号