import SwiftUI import AVFoundation // 用於語音合成 (TTS) 与 錄音引擎 import Speech // 用於語音辨識 (STT) import Combine // 用於 ObservableObject // MARK: - 1. 資料模型與結構 struct ChatMessage: Identifiable, Equatable { let id = UUID() var content: String let isUser: Bool } struct OllamaRequest: Encodable { let model: String let messages: [OllamaMessage] let stream: Bool } struct OllamaMessage: Codable { let role: String let content: String } struct OllamaStreamResponse: Decodable { let message: OllamaMessage? let done: Bool } // MARK: - 2. 語音合成管理器 (TTS - 機器人說話) class SpeechManager: ObservableObject { private let synthesizer = AVSpeechSynthesizer() private var sentenceBuffer: String = "" @Published var isMuted: Bool = false private let delimiters: CharacterSet = CharacterSet(charactersIn: "。!?!?\n:") func processStream(_ token: String) { if isMuted { return } sentenceBuffer += token if let _ = sentenceBuffer.rangeOfCharacter(from: delimiters) { speakBufferedSentences() } } func flush() { if !sentenceBuffer.isEmpty { speak(sentenceBuffer) sentenceBuffer = "" } } private func speakBufferedSentences() { var content = sentenceBuffer while let range = content.rangeOfCharacter(from: delimiters) { let endIndex = range.upperBound let sentence = String(content[.. Void)? private let speechRecognizer = SFSpeechRecognizer(locale: Locale(identifier: "zh-TW")) private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest? private var recognitionTask: SFSpeechRecognitionTask? private let audioEngine = AVAudioEngine() // 靜音偵測計時器 private var silenceTimer: Timer? private let silenceThreshold: TimeInterval = 1.5 // 停頓多久視為結束 (秒) override init() { super.init() speechRecognizer?.delegate = self requestPermission() } func requestPermission() { SFSpeechRecognizer.requestAuthorization { status in DispatchQueue.main.async { switch status { case .authorized: self.errorMessage = nil case .denied: self.errorMessage = "請至設定開啟語音辨識權限" case .restricted, .notDetermined: self.errorMessage = "語音辨識暫時無法使用" @unknown default: break } } } } func startRecording() { // 如果正在錄音,則停止 (切換模式) if audioEngine.isRunning { stopRecording() return } // 1. 取消舊任務 recognitionTask?.cancel() recognitionTask = nil recognizedText = "" errorMessage = nil // 2. 設定 Audio Session (macOS 不需要像 iOS 設定 Category,但檢查節點很重要) let inputNode = audioEngine.inputNode // 3. 建立請求 recognitionRequest = SFSpeechAudioBufferRecognitionRequest() guard let recognitionRequest = recognitionRequest else { return } recognitionRequest.shouldReportPartialResults = true // 4. 開始辨識任務 recognitionTask = speechRecognizer?.recognitionTask(with: recognitionRequest) { [weak self] result, error in guard let self = self else { return } var isFinal = false if let result = result { // 更新辨識出的文字 DispatchQueue.main.async { self.recognizedText = result.bestTranscription.formattedString // 每當有新文字進來,就重置靜音計時器 self.resetSilenceTimer() } isFinal = result.isFinal } if error != nil || isFinal { self.stopRecording() // 真正停止引擎 } } // 5. 設定音訊輸入格式並安裝 Tap let recordingFormat = inputNode.outputFormat(forBus: 0) inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { buffer, _ in self.recognitionRequest?.append(buffer) } // 6. 啟動引擎 audioEngine.prepare() do { try audioEngine.start() isRecording = true errorMessage = nil } catch { errorMessage = "錄音啟動失敗: \(error.localizedDescription)" } } func stopRecording() { silenceTimer?.invalidate() silenceTimer = nil audioEngine.stop() audioEngine.inputNode.removeTap(onBus: 0) recognitionRequest?.endAudio() recognitionTask?.cancel() // 停止辨識 recognitionTask = nil // 釋放 DispatchQueue.main.async { self.isRecording = false // 如果有文字,就觸發送出 if !self.recognizedText.isEmpty { self.onCommit?(self.recognizedText) self.recognizedText = "" // 清空暫存 } } } // 重置靜音計時器 private func resetSilenceTimer() { silenceTimer?.invalidate() silenceTimer = Timer.scheduledTimer(withTimeInterval: silenceThreshold, repeats: false) { [weak self] _ in // 時間到,使用者沒再說話 -> 視為結束 DispatchQueue.main.async { if self?.isRecording == true { self?.stopRecording() } } } } } // MARK: - 4. 主視圖 struct ContentView: View { @State private var inputText: String = "" @State private var messages: [ChatMessage] = [] @State private var isLoading: Bool = false @StateObject private var speechManager = SpeechManager() @StateObject private var voiceManager = VoiceInputManager() private let modelName = "gemma3:4b" private let systemPrompt = "你是一個嚴肅的專業助理,請全程使用繁體中文(台灣)。【絕對禁止】嚴格禁止使用任何表情符號(Emoji)、顏文字或圖示。回答時請保持語氣冷靜、直接且不帶情緒,僅輸出純文字內容。" var body: some View { ZStack { Color(red: 0.1, green: 0.1, blue: 0.12).ignoresSafeArea() VStack(spacing: 0) { // Header HStack { Text("Ollama Voice") .font(.headline) .foregroundColor(.gray) if let error = voiceManager.errorMessage { Text(error) .font(.caption2) .foregroundColor(.red) } Spacer() Button(action: { speechManager.isMuted.toggle() if speechManager.isMuted { speechManager.stop() } }) { Image(systemName: speechManager.isMuted ? "speaker.slash.fill" : "speaker.wave.2.fill") .foregroundColor(speechManager.isMuted ? .gray : .purple) .padding(8) .background(Color.black.opacity(0.3)) .clipShape(Circle()) } .help(speechManager.isMuted ? "開啟語音" : "靜音") Text(modelName) .font(.caption) .padding(6) .background(Color.purple.opacity(0.3)) .cornerRadius(8) .foregroundColor(.purple) } .padding() .background(Color.black.opacity(0.5)) // Messages Area ScrollViewReader { proxy in ScrollView { LazyVStack(spacing: 12) { ForEach(messages) { msg in MessageBubble(message: msg) } } .padding() } .onChange(of: messages.last?.content) { _, _ in if let lastId = messages.last?.id { proxy.scrollTo(lastId, anchor: .bottom) } } .onChange(of: messages.count) { _, _ in if let lastId = messages.last?.id { proxy.scrollTo(lastId, anchor: .bottom) } } } // Input Area HStack(spacing: 10) { // 輸入框 (綁定語音辨識的文字 + 使用者手動輸入) // 當 voiceManager 有結果時,我們讓它直接覆蓋或顯示在畫面上 TextField("", text: $inputText) .padding(10) .background(Color(white: 0.2)) .cornerRadius(20) .foregroundColor(.white) .onSubmit { startStreaming(text: inputText) } // 監聽語音辨識的即時文字變化,顯示在輸入框給使用者看 .onChange(of: voiceManager.recognizedText) { _, newText in if voiceManager.isRecording && !newText.isEmpty { inputText = newText } } // MARK: 語音輸入按鈕 (新增) Button(action: { if voiceManager.isRecording { // 如果手動按停止,也會觸發送出 voiceManager.stopRecording() } else { // 停止機器人說話,開始聽使用者說話 speechManager.stop() inputText = "" // 清空輸入框準備接收語音 voiceManager.startRecording() } }) { Image(systemName: voiceManager.isRecording ? "mic.fill" : "mic") .font(.system(size: 20)) // 錄音時變紅色,還有呼吸燈效果 (可選) .foregroundColor(voiceManager.isRecording ? .white : .gray) .padding(10) .background(voiceManager.isRecording ? Color.red : Color(white: 0.2)) .clipShape(Circle()) .scaleEffect(voiceManager.isRecording ? 1.1 : 1.0) .animation(voiceManager.isRecording ? Animation.easeInOut(duration: 0.6).repeatForever(autoreverses: true) : .default, value: voiceManager.isRecording) } .disabled(isLoading) // 機器人思考時不能錄音 // 發送按鈕 Button(action: { startStreaming(text: inputText) }) { Image(systemName: "paperplane.fill") .font(.system(size: 20)) .foregroundColor(inputText.isEmpty ? .gray : .purple) .padding(10) .background(Color(white: 0.2)) .clipShape(Circle()) } .disabled(inputText.isEmpty || isLoading) } .padding() .background(Color.black.opacity(0.8)) } } .preferredColorScheme(.dark) // 設定語音送出後的行為 .onAppear { voiceManager.onCommit = { finalString in // 語音結束後,自動發送 self.inputText = finalString self.startStreaming(text: finalString) } } } // MARK: - 4. 串流邏輯核心 func startStreaming(text: String) { let cleanInput = text.trimmingCharacters(in: .whitespacesAndNewlines) guard !cleanInput.isEmpty else { return } speechManager.stop() let userMsg = cleanInput inputText = "" // 確保語音輸入狀態也重置 voiceManager.recognizedText = "" messages.append(ChatMessage(content: userMsg, isUser: true)) messages.append(ChatMessage(content: "", isUser: false)) isLoading = true Task { await streamResponse(userMessage: userMsg) } } func streamResponse(userMessage: String) async { guard let url = URL(string: "http://127.0.0.1:11434/api/chat") else { return } let apiMessages = [ OllamaMessage(role: "system", content: systemPrompt), OllamaMessage(role: "user", content: userMessage) ] let requestBody = OllamaRequest( model: modelName, messages: apiMessages, stream: true ) var request = URLRequest(url: url) request.httpMethod = "POST" request.setValue("application/json", forHTTPHeaderField: "Content-Type") request.httpBody = try? JSONEncoder().encode(requestBody) do { let (result, _) = try await URLSession.shared.bytes(for: request) for try await line in result.lines { if let data = line.data(using: .utf8), let response = try? JSONDecoder().decode(OllamaStreamResponse.self, from: data) { if response.done { await MainActor.run { speechManager.flush() isLoading = false } break } if let content = response.message?.content { await MainActor.run { if let index = messages.indices.last { messages[index].content += content } speechManager.processStream(content) } } } } } catch { await MainActor.run { if let index = messages.indices.last { messages[index].content += "\n[連線錯誤: \(error.localizedDescription)]" } isLoading = false } } } } // MARK: - 5. UI 元件 struct MessageBubble: View { let message: ChatMessage var body: some View { HStack(alignment: .top) { if message.isUser { Spacer() } Text(message.content) .padding(12) .background(message.isUser ? Color.purple : Color(white: 0.2)) .foregroundColor(.white) .cornerRadius(16) .frame(maxWidth: 300, alignment: message.isUser ? .trailing : .leading) .textSelection(.enabled) if !message.isUser { Spacer() } } } } #Preview { ContentView() }