linting and handle resources

pull/5100/head
Stéphane Tetsing 3 months ago
parent a2ef615e00
commit fd8832f5bb
  1. 40
      apps/remixdesktop/src/lib/InferenceServerManager.ts
  2. 2
      apps/remixdesktop/src/lib/databatcher.ts
  3. 1
      libs/remix-ai-core/src/helpers/inferenceServerReleases.ts

@ -84,6 +84,9 @@ export class InferenceManager implements ICompletions {
if (this.inferenceProcess === null) await this._startServer() if (this.inferenceProcess === null) await this._startServer()
// check if resources are met before initializing the models
this._handleResources(true)
console.log('Initializing model request', model.modelType) console.log('Initializing model request', model.modelType)
switch (model.modelType) { switch (model.modelType) {
case ModelType.CODE_COMPLETION_INSERTION || ModelType.CODE_COMPLETION:{ case ModelType.CODE_COMPLETION_INSERTION || ModelType.CODE_COMPLETION:{
@ -125,6 +128,8 @@ export class InferenceManager implements ICompletions {
} }
async _processStatus() { async _processStatus() {
// check if the server is running
const options = { headers: { 'Content-Type': 'application/json', } } const options = { headers: { 'Content-Type': 'application/json', } }
const state = await axios.get(this.inferenceURL+"/state", options) const state = await axios.get(this.inferenceURL+"/state", options)
@ -147,6 +152,35 @@ export class InferenceManager implements ICompletions {
// console.log('general is runnig', state.data?.general) // console.log('general is runnig', state.data?.general)
} }
// this._handleResources()
}
async _handleResources(logger:boolean=false) {
// check resrource usage
const options = { headers: { 'Content-Type': 'application/json', } }
const res = await axios.get(this.inferenceURL+"/sys", options)
if (res.data?.status) {
const max_memory = res.data.memory.total
const used_memory = res.data.memory.used
const memory_usage = res.data.memory.percent * 100
const gpu_available = res.data.gpus
for (const model of this.selectedModels) {
if (model.modelReqs.minSysMemory > max_memory) {
if (logger) console.warn('Insufficient memory for the model')
}
if (model.modelReqs.minSysMemory > used_memory) {
if (logger) console.warn('Insufficient memory for the model')
}
if (model.modelReqs.GPURequired) {
if (gpu_available.length < 1) {
if (logger)console.warn('GPU requiredfor desktop inference but not available')
}
}
}
}
} }
async _downloadModel(model:IModel): Promise<string> { async _downloadModel(model:IModel): Promise<string> {
@ -449,7 +483,7 @@ export class InferenceManager implements ICompletions {
console.log('model not ready yet') console.log('model not ready yet')
return return
} }
if (GenerationParams.stream_result) { if (params.stream_result) {
return this._streamInferenceRequest('code_explaining', { code, context, ...params }) return this._streamInferenceRequest('code_explaining', { code, context, ...params })
} else { } else {
return this._makeInferenceRequest('code_explaining', { code, context, ...params }, AIRequestType.GENERAL) return this._makeInferenceRequest('code_explaining', { code, context, ...params }, AIRequestType.GENERAL)
@ -461,7 +495,7 @@ export class InferenceManager implements ICompletions {
console.log('model not ready yet') console.log('model not ready yet')
return "" return ""
} }
if (GenerationParams.stream_result) { if (params.stream_result) {
return this._streamInferenceRequest('error_explaining', { prompt, ...params }) return this._streamInferenceRequest('error_explaining', { prompt, ...params })
} else { } else {
return this._makeInferenceRequest('error_explaining', { prompt, ...params }, AIRequestType.GENERAL) return this._makeInferenceRequest('error_explaining', { prompt, ...params }, AIRequestType.GENERAL)
@ -481,7 +515,7 @@ export class InferenceManager implements ICompletions {
} }
const prompt = buildSolgptPromt(userPrompt, modelOP) const prompt = buildSolgptPromt(userPrompt, modelOP)
if (GenerationParams.stream_result) { if (params.stream_result) {
return this._streamInferenceRequest('solidity_answer', { prompt, ...params }) return this._streamInferenceRequest('solidity_answer', { prompt, ...params })
} else { } else {
return this._makeInferenceRequest('solidity_answer', { prompt, ...params }, AIRequestType.GENERAL) return this._makeInferenceRequest('solidity_answer', { prompt, ...params }, AIRequestType.GENERAL)

@ -1,4 +1,4 @@
import {EventEmitter} from 'events'; import { EventEmitter } from 'events';
import { StringDecoder } from 'string_decoder'; import { StringDecoder } from 'string_decoder';
// Max duration to batch session data before sending it to the renderer process. // Max duration to batch session data before sending it to the renderer process.
const BATCH_DURATION_MS = 16; const BATCH_DURATION_MS = 16;

@ -44,7 +44,6 @@ export async function downloadLatestReleaseExecutable(platform: string, outputDi
console.log(`Downloading executables for ${platform}..., ${executables} `); console.log(`Downloading executables for ${platform}..., ${executables} `);
for (const executable of executables) { for (const executable of executables) {
// sort out the correct executable
const filePath = path.join(outputDir, executable.name); const filePath = path.join(outputDir, executable.name);
console.log(`Downloading ${executable.name}...`); console.log(`Downloading ${executable.name}...`);
await downloadFile(executable.browser_download_url, filePath); await downloadFile(executable.browser_download_url, filePath);

Loading…
Cancel
Save