llama and transformerjs backend working

pull/5098/head
Stéphane Tetsing 5 months ago
parent 2b3ef31d8d
commit 1e28611c0a
  1. 57
      apps/remixdesktop/src/lib/completionTransformer.ts
  2. 166
      apps/remixdesktop/src/lib/llamaInferencer.ts
  3. 117
      apps/remixdesktop/src/plugins/remixAIDektop.ts
  4. 820
      apps/remixdesktop/yarn.lock
  5. 2
      libs/remix-ui/remix-ai/src/lib/components/Default.tsx

@ -29,7 +29,11 @@ class InlineCompletionTransformer {
const TransformersApi = Function('return import("@xenova/transformers")')();
const { pipeline, env} = await TransformersApi;
InlineCompletionTransformer.model = InlineCompletionTransformer.defaultModels.find(model => model.name === 'DeepSeekTransformer')
if (InlineCompletionTransformer.model.modelReqs.backend !== 'transformerjs') {
console.log('model not supported')
return
}
console.log('loading model', InlineCompletionTransformer.model)
InlineCompletionTransformer.instance = pipeline(InlineCompletionTransformer.task, InlineCompletionTransformer.model.modelName, { progress_callback, quantized: true});
}
@ -76,6 +80,7 @@ class DownloadManager {
break;
case 'complete':
this.events.emit(e.status, e)
if (this.responses[e.id]) {
if (this.current === e.id) {
this.responses[e.id](null, e)
@ -97,13 +102,10 @@ export class InlineCompletionServiceTransformer{
dMng = new DownloadManager()
isReady = false
event = new EventEmitter()
selectedModel: any
constructor(defaultModels) {
InlineCompletionTransformer.defaultModels = defaultModels
InlineCompletionTransformer.model = defaultModels.find(model => model.name === 'DeepSeekTransformer')
InlineCompletionTransformer.task = InlineCompletionTransformer.model.task
InlineCompletionTransformer.getInstance(this.dMng.onMessageReceived);
constructor(model: any) {
this.selectedModel = model
this.dMng.events.on('progress', (data) => {
// log progress percentage
@ -116,28 +118,61 @@ export class InlineCompletionServiceTransformer{
}
})
this.dMng.events.on('done', (data) => {
console.log('download complete')
this.isReady = true
})
this.dMng.events.on('ready', (data) => {
console.log('model ready')
this.isReady = true
})
this.dMng.events.on('complete', (data) => {
})
}
async init(envPath?: string) {
InlineCompletionTransformer.model = this.selectedModel
InlineCompletionTransformer.task = InlineCompletionTransformer.model.task
// create inference instance
await InlineCompletionTransformer.getInstance(this.dMng.onMessageReceived);
if (envPath) {
this.setTransformerEnvPath(envPath)
}
}
setTransformerEnvPath(path: string) {
if (InlineCompletionTransformer.instance === null) {
console.log('model not ready yet')
return
}
if (path === '') {
console.log('path is empty')
return
}
console.log('check this setting env path')
InlineCompletionTransformer.instance.env.set('TRANSFORMERS_CACHE', path)
}
async code_completion(context: any, params=completionParams): Promise<any> {
if (!this.isReady) {
console.log('model not ready yet')
return
}
// as of now no prompt required
this.event.emit('onInference')
const instance = await InlineCompletionTransformer.getInstance()
const result = await instance(context, completionParams)
const result = await instance(context, params)
this.event.emit('onInferenceDone')
return result
}
async code_insertion(msg_pfx: string, msg_sfx: string, params=insertionParams): Promise<any> {
console.log('in code_insertion', this)
if (!this.isReady) {
console.log('model not ready')
console.log('model not ready yet')
return
}

@ -0,0 +1,166 @@
import path from 'path';
import fs from 'fs';
import axios from "axios";
import { EventEmitter } from 'events';
import { LlamaModel, LlamaContext, LlamaChatSession, LlamaModelOptions } from "node-llama-cpp";
class LLamaBackend {
static instance: any
static model: any
static modelPath: string
static async getInstance() {
if (this.instance === null || this.instance === undefined) {
const LlamaApi = Function('return import("node-llama-cpp")')();
const { LlamaModel, LlamaContext, LlamaChatSession, LlamaModelOptions } = await LlamaApi;
const getModelOptions = () => {
const options = {
modelPath: this.modelPath? this.modelPath: null,
threads: 1,
temperature: 0.6,
topK: 40,
topP: 0.92,
logitsAll: false,
vocabOnly: false,
useMmap: false,
useMlock: false,
embedding: false,
};
return options;
}
console.log('loading model with options', getModelOptions())
const m = new LlamaModel(getModelOptions());
console.log("system infos\n", LlamaModel.systemInfo)
console.log("model infos\n", m.modelInfo)
const context = new LlamaContext({model: m});
const session = new LlamaChatSession({context});
this.instance = session
return this.instance
}
return this.instance
}
}
export class LLamaInferencer {
plugin: any
isReady: boolean = false
selectedModel: any
modelPath: string
event: EventEmitter
inferencer: any
constructor(props, model) {
this.plugin = props
this.selectedModel = model
this.event = new EventEmitter()
}
async init(envPath?: string) {
try {
await this._downloadModel(this.selectedModel)
if (this.modelPath === undefined) {
console.log('Model not downloaded or not found')
return
}
console.log('Model downloaded at', this.modelPath)
LLamaBackend.model = this.selectedModel
LLamaBackend.modelPath = this.modelPath
this.inferencer = await LLamaBackend.getInstance()
this.inferencer.init()
this.isReady = this.inferencer.initialized
} catch (error) {
console.log('Error initializing the model', error)
}
}
async _downloadModel(model): Promise<void> {
console.log('Downloading the model model', model)
console.log('Downloading model', model.downloadUrl)
const wdir = await this.plugin.call('fs' as any, 'getWorkingDir');
console.log('working dir is', wdir)
const outputLocationDir = await this.plugin.call('fs' as any, 'selectFolder', wdir);
console.log('output location dir is', outputLocationDir)
if (outputLocationDir === undefined) {
console.log('No output location selected');
return;
}
const outputLocationPath = path.join(outputLocationDir, model.modelName);
console.log('output location path is', outputLocationDir)
if (fs.existsSync(outputLocationPath)) {
this.modelPath = outputLocationPath
console.log('Model already exists in the output location', outputLocationPath);
return;
}
// Make a HEAD request to get the file size
const { headers } = await axios.head(model.downloadUrl);
const totalSize = parseInt(headers['content-length'], 10);
// Create a write stream to save the file
const writer = fs.createWriteStream(outputLocationPath);
// Start the file download
const response = await axios({
method: 'get',
url: model.downloadUrl,
responseType: 'stream'
});
let downloadedSize = 0;
response.data.on('data', (chunk: Buffer) => {
downloadedSize += chunk.length;
const progress = (Number((downloadedSize / totalSize) * 100).toFixed(2));
console.log(`Downloaded ${progress}%`);
this.event.emit('download', progress);
});
response.data.pipe(writer);
this.event.emit('ready')
this.modelPath = outputLocationPath
console.log('LLama Download complete');
return new Promise((resolve, reject) => {
writer.on('finish', resolve);
writer.on('error', reject);
});
}
async code_completion(context: any, params): Promise<any> {
if (!this.isReady) {
console.log('model not ready yet')
return
}
// as of now no prompt required
this.event.emit('onInference')
const result = this.inferencer.promptWithMeta(context)
this.event.emit('onInferenceDone')
return result
}
async code_insertion(msg_pfx: string, msg_sfx: string, params): Promise<any> {
if (!this.isReady) {
console.log('model not ready yet')
return
}
this.event.emit('onInference')
// const prompt = getInsertionPrompt(InlineCompletionTransformer.model, msg_pfx, msg_sfx)
// const instance = await InlineCompletionTransformer.getInstance()
// const result = instance(prompt, insertionParams)
// this.event.emit('onInferenceDone')
// return result
}
}

@ -8,6 +8,7 @@ import path from 'path';
import {ipcMain} from 'electron';
import {InlineCompletionServiceTransformer} from '../lib/completionTransformer'
import { LLamaInferencer } from '../lib/llamaInferencer';
//import {LlamaModel, LlamaContext, LlamaChatSession, LlamaModelOptions} from "node-llama-cpp";
@ -43,13 +44,13 @@ const clientProfile: Profile = {
description: 'RemixAI provides AI services to Remix IDE Desktop.',
kind: '',
documentation: 'https://remix-ide.readthedocs.io/en/latest/remixai.html',
methods: ['downloadModel', 'getInferenceModel', 'loadTransformerModel', 'code_completion'],
methods: ['initializeModelBackend', 'code_completion'],
}
class RemixAIDesktopPluginClient extends ElectronBasePluginClient {
SelectedModelPath: any
selectedModel: any
inlineCompleter: any
multitaskModel: LLamaInferencer| InlineCompletionServiceTransformer = null
completionModel: LLamaInferencer| InlineCompletionServiceTransformer = null
constructor (webContentsId: number, profile: Profile){
console.log("loading the remix plugin client ........................")
@ -63,85 +64,60 @@ class RemixAIDesktopPluginClient extends ElectronBasePluginClient {
console.log("loaded the remix plugin client application side")
})
}
async listAvailableModels(){
}
async enable (){
console.log('Remix AI desktop plugin enabled')
this.emit('enabled')
}
async downloadModel(model): Promise<void> {
console.log('Downloading the model model', model)
console.log('Downloading model', model.downloadUrl)
const wdir = await this.call('fs' as any, 'getWorkingDir');
console.log('working dir is', wdir)
const outputLocationDir = await this.call('fs' as any, 'selectFolder', wdir);
console.log('output location dir is', outputLocationDir)
if (outputLocationDir === undefined) {
console.log('No output location selected');
return;
}
const outputLocationPath = path.join(outputLocationDir, model.modelName);
console.log('output location path is', outputLocationDir)
if (fs.existsSync(outputLocationPath)) {
console.log('Model already exists in the output location', outputLocationPath);
this.SelectedModelPath = outputLocationPath;
this.selectedModel = model;
return;
async initializeModelBackend(multitaskModel: any, completionModel?: any){
console.log("Initializing backend with model ", multitaskModel, completionModel)
switch (multitaskModel.modelReqs.backend) {
case 'llamacpp':
this.multitaskModel = new LLamaInferencer(this, multitaskModel)
break;
case 'transformerjs':
this.multitaskModel = new InlineCompletionServiceTransformer(multitaskModel)
break;
default:
console.log("Backend not supported")
break;
}
// Make a HEAD request to get the file size
const { headers } = await axios.head(model.downloadUrl);
const totalSize = parseInt(headers['content-length'], 10);
// Create a write stream to save the file
const writer = fs.createWriteStream(outputLocationPath);
// Start the file download
const response = await axios({
method: 'get',
url: model.downloadUrl,
responseType: 'stream'
});
let downloadedSize = 0;
response.data.on('data', (chunk: Buffer) => {
downloadedSize += chunk.length;
const progress = (downloadedSize / totalSize) * 100;
console.log(`Downloaded ${progress}%`);
this.emit('download_progress', progress);
});
response.data.pipe(writer);
this.SelectedModelPath = outputLocationPath;
this.selectedModel = model;
console.log('Download complete');
return new Promise((resolve, reject) => {
writer.on('finish', resolve);
writer.on('error', reject);
});
if (completionModel && completionModel.modelType === 'CODE_COMPLETION'){
switch (completionModel.modelReqs.backend) {
case 'llamacpp':
this.completionModel = new LLamaInferencer(this, completionModel)
break;
case 'transformerjs':
this.completionModel = new InlineCompletionServiceTransformer(completionModel)
break;
default:
console.log("Backend not supported")
break;
}
}
}
// init the mmodels
if (this.multitaskModel){
await this.multitaskModel.init()
}
async loadTransformerModel(defaultModels) {
this.inlineCompleter = await new InlineCompletionServiceTransformer(defaultModels);
if (this.inlineCompleter.ready) {
console.log("Completer ready");
if (this.completionModel){
await this.completionModel.init()
}
console.log("Loaded transformer")
}
code_completion(context: any) {
console.log("Code completion called")
console.log("Context is ", this.inlineCompleter)
return this.inlineCompleter.code_completion(context);
if (this.completionModel){
return this.completionModel.code_completion(context, {max_new_tokens: 100})
}
// use general purpose model
return this.multitaskModel.code_completion(context, {max_new_tokens: 100})
}
// async _loadLocalModel(): Promise<LlamaChatSession> {
@ -182,5 +158,10 @@ class RemixAIDesktopPluginClient extends ElectronBasePluginClient {
// return this._loadLocalModel();
// }
changemodel(newModel: any){
/// dereference the current static inference object
/// set new one
}
}

File diff suppressed because it is too large Load Diff

@ -57,7 +57,7 @@ export const Default = (props) => {
// if (!completer.ready) {
// await completer.init();
// }
await props.plugin.call(pluginName, 'loadTransformerModel', DefaultModels());
await props.plugin.call(pluginName, 'initializeModelBackend', DefaultModels()[0]);
// // const code = completer.code_completion("pragma solidity ^0.8.0;\n")
console.log("Got transformer model completion ");

Loading…
Cancel
Save