当前位置:   article > 正文

使用HuggingfaceAPI的音频转文本Demo_huggingface 语音转文本模型

huggingface 语音转文本模型

前置准备

  1、科学上网

  2、音频文件。(可到在线翻译之类的网站生成音频⬇)

  3、使用的openai/whisper-large-v3模型地址:https://huggingface.co/openai/whisper-large-v3

(自行登录申请TOKEN)

音频转文本Demo

1、activity_main布局
  1. <?xml version="1.0" encoding="utf-8"?>
  2. <LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
  3. xmlns:app="http://schemas.android.com/apk/res-auto"
  4. xmlns:tools="http://schemas.android.com/tools"
  5. android:layout_width="match_parent"
  6. android:layout_height="match_parent"
  7. android:orientation="vertical"
  8. tools:context=".MainActivity">
  9. <TextView
  10. android:id="@+id/tvResult"
  11. android:layout_width="match_parent"
  12. android:layout_height="200dp"
  13. android:layout_marginTop="20dp"
  14. android:hint="请先选择文件"
  15. android:scrollbars="vertical"/>
  16. <LinearLayout
  17. android:layout_width="match_parent"
  18. android:layout_height="wrap_content"
  19. android:orientation="horizontal">
  20. <Button
  21. android:id="@+id/btnSelectFile"
  22. android:layout_width="0dp"
  23. android:layout_height="wrap_content"
  24. android:layout_weight="1"
  25. android:text="选择文件" />
  26. <Button
  27. android:id="@+id/btnConvert"
  28. android:layout_width="0dp"
  29. android:layout_height="wrap_content"
  30. android:layout_weight="1"
  31. android:text="转换为文本" />
  32. </LinearLayout>
  33. </LinearLayout>
2、使用在app的build.gradle添加相关插件
  1. plugins {
  2. id("com.android.application")
  3. id("org.jetbrains.kotlin.android")
  4. }
  5. android {
  6. namespace = "com.example.vicetotextdemo"
  7. compileSdk = 34
  8. defaultConfig {
  9. applicationId = "com.example.vicetotextdemo"
  10. minSdk = 24
  11. targetSdk = 34
  12. versionCode = 1
  13. versionName = "1.0"
  14. testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
  15. }
  16. buildTypes {
  17. release {
  18. isMinifyEnabled = false
  19. proguardFiles(
  20. getDefaultProguardFile("proguard-android-optimize.txt"),
  21. "proguard-rules.pro"
  22. )
  23. }
  24. }
  25. compileOptions {
  26. sourceCompatibility = JavaVersion.VERSION_1_8
  27. targetCompatibility = JavaVersion.VERSION_1_8
  28. }
  29. kotlinOptions {
  30. jvmTarget = "1.8"
  31. }
  32. }
  33. dependencies {
  34. implementation("androidx.core:core-ktx:1.9.0")
  35. implementation("androidx.appcompat:appcompat:1.6.1")
  36. implementation("com.google.android.material:material:1.11.0")
  37. implementation("androidx.constraintlayout:constraintlayout:2.1.4")
  38. testImplementation("junit:junit:4.13.2")
  39. androidTestImplementation("androidx.test.ext:junit:1.1.5")
  40. androidTestImplementation("androidx.test.espresso:espresso-core:3.5.1")
  41. implementation ("com.squareup.retrofit2:retrofit:2.9.0")
  42. implementation ("com.squareup.retrofit2:converter-gson:2.9.0")
  43. implementation ("com.squareup.okhttp3:okhttp:4.12.0")
  44. implementation ("com.google.code.gson:gson:2.10.1")
  45. }
3、创建ApiService接口文件
  1. interface MyApi {
  2. // 把YOUR_TOKEN替换成自己申请到的TOKEN
  3. @Headers(
  4. "Authorization: Bearer YOUR_TOKEN",
  5. "Content-Type: application/octet-stream"
  6. )
  7. @POST("models/openai/whisper-large-v3")
  8. fun convertText(@Body requestBody: RequestBody): Call<ResponseBody>
  9. }
  10. class TextRequestBody(data: ByteArray) {
  11. private val mediaTypeBinary = "application/octet-stream".toMediaTypeOrNull()
  12. val requestBody: RequestBody = RequestBody.create(mediaTypeBinary, data)
  13. }
4、创建ApiResponse数据类
data class ApiResponse(val text: String)
5、修改MainActivity代码
  1. package com.example.vicetotextdemo
  2. import android.annotation.SuppressLint
  3. import android.app.Activity
  4. import android.content.Intent
  5. import android.content.pm.PackageManager
  6. import android.net.Uri
  7. import android.os.Bundle
  8. import android.provider.OpenableColumns
  9. import android.text.method.ScrollingMovementMethod
  10. import android.util.Log
  11. import android.widget.Button
  12. import android.widget.TextView
  13. import androidx.appcompat.app.AppCompatActivity
  14. import androidx.core.app.ActivityCompat
  15. import androidx.core.content.ContextCompat
  16. import com.google.gson.Gson
  17. import okhttp3.ResponseBody
  18. import retrofit2.Call
  19. import retrofit2.Callback
  20. import retrofit2.Response
  21. import retrofit2.Retrofit
  22. import retrofit2.converter.gson.GsonConverterFactory
  23. import java.io.IOException
  24. class MainActivity : AppCompatActivity() {
  25. companion object {
  26. private const val FILE_SELECT_CODE = 1
  27. private const val READ_EXTERNAL_STORAGE_PERMISSION_REQUEST = 22
  28. }
  29. private lateinit var btnSelectFile: Button
  30. private lateinit var btnConvert: Button
  31. private lateinit var tvResult: TextView
  32. private var selectedFileUri: Uri? = null
  33. override fun onCreate(savedInstanceState: Bundle?) {
  34. super.onCreate(savedInstanceState)
  35. setContentView(R.layout.activity_main)
  36. btnSelectFile = findViewById(R.id.btnSelectFile)
  37. btnConvert = findViewById(R.id.btnConvert)
  38. tvResult = findViewById(R.id.tvResult)
  39. tvResult.movementMethod = ScrollingMovementMethod()
  40. // 选择文件
  41. btnSelectFile.setOnClickListener {
  42. requestReadExternalStoragePermission()
  43. }
  44. // 转换为文本
  45. btnConvert.setOnClickListener {
  46. selectedFileUri?.let { uri ->
  47. val retrofit = Retrofit.Builder()
  48. .baseUrl("https://api-inference.huggingface.co/")
  49. .addConverterFactory(GsonConverterFactory.create())
  50. .build()
  51. val service = retrofit.create(MyApi::class.java)
  52. val inputStream = contentResolver.openInputStream(uri)
  53. val data = inputStream?.readBytes()
  54. inputStream?.close()
  55. data?.let {
  56. val requestBody = TextRequestBody(it)
  57. val call = service.convertText(requestBody.requestBody)
  58. call.enqueue(object : Callback<ResponseBody> {
  59. override fun onResponse(call: Call<ResponseBody>, response: Response<ResponseBody>) {
  60. if (response.isSuccessful) {
  61. try {
  62. val jsonResponse = response.body()?.string()
  63. Log.d("TAG", "onResponse: $jsonResponse")
  64. // 解析JSON响应
  65. try {
  66. val gson = Gson()
  67. val apiResponse = gson.fromJson(jsonResponse, ApiResponse::class.java)
  68. if (apiResponse != null) {
  69. val text = apiResponse.text
  70. tvResult.text = text
  71. }
  72. } catch (e: Throwable) {
  73. e.printStackTrace()
  74. }
  75. } catch (e: IOException) {
  76. e.printStackTrace()
  77. }
  78. } else {
  79. // 处理错误
  80. }
  81. }
  82. override fun onFailure(call: Call<ResponseBody>, t: Throwable) {
  83. // 处理请求失败
  84. t.printStackTrace()
  85. }
  86. })
  87. }
  88. }
  89. }
  90. }
  91. // 请求读取外部存储权限
  92. private fun requestReadExternalStoragePermission() {
  93. if (ContextCompat.checkSelfPermission(this, android.Manifest.permission.READ_EXTERNAL_STORAGE)
  94. != PackageManager.PERMISSION_GRANTED) {
  95. ActivityCompat.requestPermissions(
  96. this,
  97. arrayOf(android.Manifest.permission.READ_EXTERNAL_STORAGE),
  98. READ_EXTERNAL_STORAGE_PERMISSION_REQUEST
  99. )
  100. } else {
  101. openFilePicker()
  102. }
  103. }
  104. // 打开文件选择器
  105. private fun openFilePicker() {
  106. val intent = Intent(Intent.ACTION_GET_CONTENT)
  107. intent.type = "audio/*"
  108. startActivityForResult(intent, FILE_SELECT_CODE)
  109. }
  110. // 处理文件选择结果
  111. override fun onActivityResult(requestCode: Int, resultCode: Int, data: Intent?) {
  112. super.onActivityResult(requestCode, resultCode, data)
  113. if (requestCode == FILE_SELECT_CODE && resultCode == Activity.RESULT_OK) {
  114. data?.data?.let { uri ->
  115. selectedFileUri = uri
  116. val fileName = getFileName(uri)
  117. tvResult.text = "Selected File: $fileName"
  118. }
  119. }
  120. }
  121. // 获取文件名
  122. @SuppressLint("Range")
  123. private fun getFileName(uri: Uri): String {
  124. var result = ""
  125. val cursor = contentResolver.query(uri, null, null, null, null)
  126. cursor?.use {
  127. if (it.moveToFirst()) {
  128. val displayName = it.getString(it.getColumnIndex(OpenableColumns.DISPLAY_NAME))
  129. result = displayName ?: ""
  130. }
  131. }
  132. return result
  133. }
  134. }
6、在AndroidManifest.xml中注册相关网络和读取外部存储权限
  1. <?xml version="1.0" encoding="utf-8"?>
  2. <manifest xmlns:android="http://schemas.android.com/apk/res/android"
  3. xmlns:tools="http://schemas.android.com/tools">
  4. <uses-permission android:name="android.permission.INTERNET"/>
  5. <uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" />
  6. <application
  7. android:allowBackup="true"
  8. android:dataExtractionRules="@xml/data_extraction_rules"
  9. android:fullBackupContent="@xml/backup_rules"
  10. android:icon="@mipmap/ic_launcher"
  11. android:label="@string/app_name"
  12. android:roundIcon="@mipmap/ic_launcher_round"
  13. android:supportsRtl="true"
  14. android:theme="@style/Theme.ViceToTextDemo"
  15. tools:targetApi="31">
  16. <activity
  17. android:name=".MainActivity"
  18. android:exported="true"
  19. android:launchMode="singleTop">
  20. <intent-filter>
  21. <action android:name="android.intent.action.MAIN" />
  22. <category android:name="android.intent.category.LAUNCHER" />
  23. </intent-filter>
  24. </activity>
  25. </application>
  26. </manifest>
7、运行结果

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/Li_阴宅/article/detail/772600
推荐阅读
相关标签
  

闽ICP备14008679号