Files
2025-11-30 08:39:54 +08:00

165 lines
6.8 KiB
TypeScript

/**
* Data extraction utilities for company analysis
*/
import type { CompanyData, SearchResult } from './company-analyzer';
import { REGEX_PATTERNS } from './constants';
/**
* Extracts basic company information from search results
* Uses regex patterns to identify and extract company website, founded date, and headquarters
* @param {SearchResult} searchResult - The search result containing title and snippet to analyze
* @param {CompanyData} companyData - The company data object to populate with extracted information
* @returns {Promise<void>} Promise that resolves when basic information extraction is complete
*/
export async function extractBasicInfo(searchResult: SearchResult, companyData: CompanyData): Promise<void> {
const text = `${searchResult.title} ${searchResult.snippet}`;
// Extract website
const websiteMatch = text.match(REGEX_PATTERNS.WEBSITE);
if (websiteMatch && !companyData..website) {
companyData..website = websiteMatch[0];
}
// Extract founded date
const foundedMatch = text.match(REGEX_PATTERNS.FOUNDED_DATE);
if (foundedMatch && !companyData..founded_date) {
companyData..founded_date = foundedMatch[1] || foundedMatch[2] || foundedMatch[3];
}
// Extract headquarters
const hqMatch = text.match(REGEX_PATTERNS.HEADQUARTERS);
if (hqMatch && !companyData..headquarters) {
companyData..headquarters = (hqMatch[1] || hqMatch[2] || '').replace(/\.$/, '').trim();
}
companyData.sources.push({
url: searchResult.url,
title: searchResult.title,
access_date: new Date().toISOString(),
reliability: 'high',
});
}
/**
* Extracts leadership information from search results
* Uses regex patterns to identify CEO and key executive information
* @param {SearchResult} searchResult - The search result containing title and snippet to analyze
* @param {CompanyData} companyData - The company data object to populate with leadership information
* @returns {Promise<void>} Promise that resolves when leadership information extraction is complete
*/
export async function extractLeadershipInfo(searchResult: SearchResult, companyData: CompanyData): Promise<void> {
const text = `${searchResult.title} ${searchResult.snippet}`;
// Extract CEO
const ceoMatch = text.match(REGEX_PATTERNS.CEO);
if (ceoMatch && !companyData.leadership.ceo) {
companyData.leadership.ceo = (ceoMatch[1] || ceoMatch[2] || '').replace(/\.$/, '').trim();
}
// Extract key executives
const executiveMatches = text.match(REGEX_PATTERNS.EXECUTIVES);
if (executiveMatches) {
for (const match of executiveMatches) {
const positionMatch = match.match(/(president|cto|cfo|coo|chief[^,]+?)[\s:]+([^.]+?)\./i);
if (positionMatch) {
companyData.leadership.key_executives.push({
name: positionMatch[2].trim(),
position: positionMatch[1].trim(),
experience: 'To be researched',
});
}
}
}
}
/**
* Extracts financial information from search results
* Uses regex patterns to identify revenue, market cap, and other financial metrics
* @param {SearchResult} searchResult - The search result containing title and snippet to analyze
* @param {CompanyData} companyData - The company data object to populate with financial information
* @returns {Promise<void>} Promise that resolves when financial information extraction is complete
*/
export async function extractFinancialInfo(searchResult: SearchResult, companyData: CompanyData): Promise<void> {
const text = `${searchResult.title} ${searchResult.snippet}`;
// Extract revenue
const revenueMatch = text.match(REGEX_PATTERNS.REVENUE);
if (revenueMatch && !companyData.financial.revenue) {
const amount = revenueMatch[1] || revenueMatch[2];
const multiplier = getFinancialMultiplier(text);
companyData.financial.revenue = `$${amount} ${multiplier}`;
}
// Extract market cap
const marketCapMatch = text.match(REGEX_PATTERNS.MARKET_CAP);
if (marketCapMatch && !companyData.financial.market_cap) {
const amount = marketCapMatch[1] || marketCapMatch[2];
const multiplier = getFinancialMultiplier(text);
companyData.financial.market_cap = `$${amount} ${multiplier}`;
}
}
/**
* Extracts market position information from search results
* Uses regex patterns to identify competitors, market share, and competitive advantages
* @param {SearchResult} searchResult - The search result containing title and snippet to analyze
* @param {CompanyData} companyData - The company data object to populate with market position information
* @returns {Promise<void>} Promise that resolves when market position information extraction is complete
*/
export async function extractMarketPositionInfo(
searchResult: SearchResult,
companyData: CompanyData
): Promise<void> {
const text = `${searchResult.title} ${searchResult.snippet}`;
// Extract competitors
const competitorMatches = text.match(REGEX_PATTERNS.COMPETITORS);
if (competitorMatches) {
const competitors = (competitorMatches[1] || competitorMatches[2])
.split(/[,;]/)
.map(c => c.trim());
companyData.market_position.competitors.push(...competitors);
}
// Extract market share
const marketShareMatch = text.match(REGEX_PATTERNS.MARKET_SHARE);
if (marketShareMatch && !companyData.market_position.market_share) {
companyData.market_position.market_share = `${marketShareMatch[1] || marketShareMatch[2]}%`;
}
}
/**
* Extracts culture and employment information from search results
* Uses regex patterns to identify employee satisfaction ratings and work environment details
* @param {SearchResult} searchResult - The search result containing title and snippet to analyze
* @param {CompanyData} companyData - The company data object to populate with culture and employment information
* @returns {Promise<void>} Promise that resolves when culture information extraction is complete
*/
export async function extractCultureInfo(searchResult: SearchResult, companyData: CompanyData): Promise<void> {
const text = `${searchResult.title} ${searchResult.snippet}`;
// Extract employee satisfaction
const satisfactionMatch = text.match(REGEX_PATTERNS.SATISFACTION);
if (satisfactionMatch && !companyData.culture_employment.employee_satisfaction) {
const rating = satisfactionMatch[1] || satisfactionMatch[2] || satisfactionMatch[3];
companyData.culture_employment.employee_satisfaction = `${rating}/5`;
}
}
/**
* Determines financial multiplier based on text content
* @param {string} text - Text to search for multiplier indicators
* @returns {string} Appropriate financial multiplier string
*/
function getFinancialMultiplier(text: string): string {
if (text.includes('B')) {
return 'Billion';
}
if (text.includes('M')) {
return 'Million';
}
return 'Thousand';
}